raggrep 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +15 -0
  3. package/dist/application/index.d.ts +7 -0
  4. package/dist/application/usecases/cleanupIndex.d.ts +54 -0
  5. package/dist/application/usecases/index.d.ts +9 -0
  6. package/dist/application/usecases/indexDirectory.d.ts +54 -0
  7. package/dist/application/usecases/searchIndex.d.ts +48 -0
  8. package/dist/cli/main.d.ts +1 -0
  9. package/dist/cli/main.js +1596 -0
  10. package/dist/cli/main.js.map +22 -0
  11. package/dist/composition.d.ts +52 -0
  12. package/dist/domain/entities/chunk.d.ts +41 -0
  13. package/dist/domain/entities/config.d.ts +43 -0
  14. package/dist/domain/entities/fileIndex.d.ts +58 -0
  15. package/dist/domain/entities/fileSummary.d.ts +61 -0
  16. package/dist/domain/entities/index.d.ts +14 -0
  17. package/dist/domain/entities/searchResult.d.ts +36 -0
  18. package/dist/domain/index.d.ts +11 -0
  19. package/dist/domain/ports/embedding.d.ts +60 -0
  20. package/dist/domain/ports/filesystem.d.ts +78 -0
  21. package/dist/domain/ports/index.d.ts +10 -0
  22. package/dist/domain/ports/storage.d.ts +79 -0
  23. package/dist/domain/services/bm25.d.ts +82 -0
  24. package/dist/domain/services/bm25.test.d.ts +4 -0
  25. package/dist/domain/services/index.d.ts +8 -0
  26. package/dist/domain/services/keywords.d.ts +27 -0
  27. package/dist/index.d.ts +98 -0
  28. package/dist/index.js +1378 -0
  29. package/dist/index.js.map +22 -0
  30. package/dist/indexer/index.d.ts +33 -0
  31. package/dist/infrastructure/embeddings/index.d.ts +4 -0
  32. package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +34 -0
  33. package/dist/infrastructure/filesystem/index.d.ts +4 -0
  34. package/dist/infrastructure/filesystem/nodeFileSystem.d.ts +28 -0
  35. package/dist/infrastructure/index.d.ts +9 -0
  36. package/dist/infrastructure/storage/fileIndexStorage.d.ts +68 -0
  37. package/dist/infrastructure/storage/index.d.ts +4 -0
  38. package/dist/modules/registry.d.ts +3 -0
  39. package/dist/modules/semantic/index.d.ts +55 -0
  40. package/dist/modules/semantic/parseCode.d.ts +44 -0
  41. package/dist/modules/semantic/parseCode.test.d.ts +4 -0
  42. package/dist/search/index.d.ts +11 -0
  43. package/dist/types.d.ts +84 -0
  44. package/dist/utils/bm25.d.ts +9 -0
  45. package/dist/utils/config.d.ts +45 -0
  46. package/dist/utils/embeddings.d.ts +46 -0
  47. package/dist/utils/embeddings.test.d.ts +4 -0
  48. package/dist/utils/tieredIndex.d.ts +100 -0
  49. package/package.json +66 -0
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Embedding Port
3
+ *
4
+ * Abstract interface for embedding generation.
5
+ * This allows the domain to remain independent of the actual embedding implementation
6
+ * (e.g., Transformers.js, OpenAI API, local models).
7
+ */
8
+ /**
9
+ * Available embedding model names
10
+ */
11
+ export type EmbeddingModelName = 'all-MiniLM-L6-v2' | 'all-MiniLM-L12-v2' | 'bge-small-en-v1.5' | 'paraphrase-MiniLM-L3-v2';
12
+ /**
13
+ * Configuration for embedding provider
14
+ */
15
+ export interface EmbeddingConfig {
16
+ /** Model name to use */
17
+ model: EmbeddingModelName;
18
+ /** Whether to show progress during model loading */
19
+ showProgress?: boolean;
20
+ }
21
+ /**
22
+ * Abstract embedding provider interface.
23
+ *
24
+ * Implementations might use:
25
+ * - Local models (Transformers.js)
26
+ * - Remote APIs (OpenAI, Cohere)
27
+ * - Custom models
28
+ */
29
+ export interface EmbeddingProvider {
30
+ /**
31
+ * Generate embedding for a single text
32
+ * @returns Embedding vector (typically 384 dimensions for MiniLM)
33
+ */
34
+ getEmbedding(text: string): Promise<number[]>;
35
+ /**
36
+ * Generate embeddings for multiple texts (batched for efficiency)
37
+ * @returns Array of embedding vectors
38
+ */
39
+ getEmbeddings(texts: string[]): Promise<number[][]>;
40
+ /**
41
+ * Get the dimension of embeddings produced by this provider
42
+ */
43
+ getDimension(): number;
44
+ /**
45
+ * Get the current model name
46
+ */
47
+ getModelName(): string;
48
+ /**
49
+ * Initialize the provider (e.g., load model)
50
+ */
51
+ initialize?(config: EmbeddingConfig): Promise<void>;
52
+ /**
53
+ * Cleanup resources
54
+ */
55
+ dispose?(): Promise<void>;
56
+ }
57
+ /**
58
+ * Calculate cosine similarity between two vectors
59
+ */
60
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
@@ -0,0 +1,78 @@
1
+ /**
2
+ * FileSystem Port
3
+ *
4
+ * Abstract interface for filesystem operations.
5
+ * This allows the domain to remain independent of the actual filesystem implementation.
6
+ */
7
+ /**
8
+ * File statistics
9
+ */
10
+ export interface FileStats {
11
+ /** ISO timestamp of last modification */
12
+ lastModified: string;
13
+ /** File size in bytes */
14
+ size?: number;
15
+ }
16
+ /**
17
+ * Abstract filesystem interface.
18
+ *
19
+ * All filesystem operations should go through this interface
20
+ * to maintain domain independence from Node.js fs module.
21
+ */
22
+ export interface FileSystem {
23
+ /**
24
+ * Read a file's content as UTF-8 string
25
+ */
26
+ readFile(filepath: string): Promise<string>;
27
+ /**
28
+ * Write content to a file (creates directories if needed)
29
+ */
30
+ writeFile(filepath: string, content: string): Promise<void>;
31
+ /**
32
+ * Delete a file
33
+ */
34
+ deleteFile(filepath: string): Promise<void>;
35
+ /**
36
+ * Get file statistics
37
+ */
38
+ getStats(filepath: string): Promise<FileStats>;
39
+ /**
40
+ * Check if a file exists
41
+ */
42
+ exists(filepath: string): Promise<boolean>;
43
+ /**
44
+ * Create directory (and parent directories)
45
+ */
46
+ mkdir(dirpath: string): Promise<void>;
47
+ /**
48
+ * List files in a directory
49
+ */
50
+ readDir(dirpath: string): Promise<string[]>;
51
+ /**
52
+ * Find files matching patterns
53
+ * @param rootDir - Root directory to search from
54
+ * @param patterns - Glob patterns to match (e.g., ['**\/*.ts'])
55
+ * @param ignore - Patterns to ignore
56
+ */
57
+ findFiles(rootDir: string, patterns: string[], ignore: string[]): Promise<string[]>;
58
+ /**
59
+ * Join path segments
60
+ */
61
+ join(...segments: string[]): string;
62
+ /**
63
+ * Get relative path from one path to another
64
+ */
65
+ relative(from: string, to: string): string;
66
+ /**
67
+ * Resolve to absolute path
68
+ */
69
+ resolve(...segments: string[]): string;
70
+ /**
71
+ * Get directory name from path
72
+ */
73
+ dirname(filepath: string): string;
74
+ /**
75
+ * Get file extension
76
+ */
77
+ extname(filepath: string): string;
78
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Domain Ports
3
+ *
4
+ * Interfaces defining what the domain needs from external systems.
5
+ * These are implemented by infrastructure adapters.
6
+ */
7
+ export type { FileSystem, FileStats } from './filesystem';
8
+ export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from './embedding';
9
+ export { cosineSimilarity } from './embedding';
10
+ export type { IndexStorage } from './storage';
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Index Storage Port
3
+ *
4
+ * Abstract interface for storing and retrieving index data.
5
+ * This allows the domain to remain independent of the actual storage implementation.
6
+ */
7
+ import type { FileIndex, ModuleManifest, GlobalManifest, Config } from '../entities';
8
+ import type { Tier1Manifest } from '../entities';
9
+ /**
10
+ * Abstract index storage interface.
11
+ *
12
+ * Handles persistence of index data (Tier 1 and Tier 2).
13
+ * Implementations might use:
14
+ * - Filesystem (current)
15
+ * - SQLite
16
+ * - IndexedDB (for browser)
17
+ */
18
+ export interface IndexStorage {
19
+ /**
20
+ * Load configuration from storage
21
+ */
22
+ loadConfig(): Promise<Config>;
23
+ /**
24
+ * Save configuration to storage
25
+ */
26
+ saveConfig(config: Config): Promise<void>;
27
+ /**
28
+ * Load global manifest
29
+ */
30
+ loadGlobalManifest(): Promise<GlobalManifest | null>;
31
+ /**
32
+ * Save global manifest
33
+ */
34
+ saveGlobalManifest(manifest: GlobalManifest): Promise<void>;
35
+ /**
36
+ * Load module manifest
37
+ */
38
+ loadModuleManifest(moduleId: string): Promise<ModuleManifest | null>;
39
+ /**
40
+ * Save module manifest
41
+ */
42
+ saveModuleManifest(moduleId: string, manifest: ModuleManifest): Promise<void>;
43
+ /**
44
+ * Load Tier 1 index for a module
45
+ */
46
+ loadTier1Index(moduleId: string): Promise<Tier1Manifest | null>;
47
+ /**
48
+ * Save Tier 1 index for a module
49
+ */
50
+ saveTier1Index(moduleId: string, manifest: Tier1Manifest): Promise<void>;
51
+ /**
52
+ * Load file index (Tier 2)
53
+ */
54
+ loadFileIndex(moduleId: string, filepath: string): Promise<FileIndex | null>;
55
+ /**
56
+ * Save file index (Tier 2)
57
+ */
58
+ saveFileIndex(moduleId: string, filepath: string, index: FileIndex): Promise<void>;
59
+ /**
60
+ * Delete file index
61
+ */
62
+ deleteFileIndex(moduleId: string, filepath: string): Promise<void>;
63
+ /**
64
+ * List all indexed files for a module
65
+ */
66
+ listIndexedFiles(moduleId: string): Promise<string[]>;
67
+ /**
68
+ * Check if index exists for this project
69
+ */
70
+ indexExists(): Promise<boolean>;
71
+ /**
72
+ * Delete entire index
73
+ */
74
+ deleteIndex(): Promise<void>;
75
+ /**
76
+ * Get the root directory being indexed
77
+ */
78
+ getRootDir(): string;
79
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * BM25 (Best Matching 25) Implementation
3
+ *
4
+ * A ranking function for keyword-based search. This is a pure domain service
5
+ * with no external dependencies - just algorithms operating on data.
6
+ *
7
+ * BM25 estimates relevance of documents to a search query using term frequency
8
+ * and inverse document frequency with length normalization.
9
+ */
10
+ /**
11
+ * Tokenize text into normalized terms.
12
+ *
13
+ * @param text - Text to tokenize
14
+ * @returns Array of lowercase tokens
15
+ */
16
+ export declare function tokenize(text: string): string[];
17
+ /**
18
+ * Document data for BM25 scoring.
19
+ */
20
+ export interface BM25Document {
21
+ id: string;
22
+ content: string;
23
+ /** Pre-computed tokens (optional, computed from content if not provided) */
24
+ tokens?: string[];
25
+ }
26
+ /**
27
+ * BM25 search result.
28
+ */
29
+ export interface BM25Result {
30
+ id: string;
31
+ score: number;
32
+ }
33
+ /**
34
+ * BM25 search index.
35
+ *
36
+ * This is a pure in-memory data structure with no I/O operations.
37
+ * Build the index by adding documents, then search against it.
38
+ */
39
+ export declare class BM25Index {
40
+ private documents;
41
+ private avgDocLength;
42
+ private documentFrequencies;
43
+ private totalDocs;
44
+ /**
45
+ * Add documents to the index.
46
+ *
47
+ * @param documents - Array of documents to index
48
+ */
49
+ addDocuments(documents: BM25Document[]): void;
50
+ /**
51
+ * Calculate IDF (Inverse Document Frequency) for a term.
52
+ */
53
+ private idf;
54
+ /**
55
+ * Calculate BM25 score for a document given query terms.
56
+ */
57
+ private score;
58
+ /**
59
+ * Search the index with a query.
60
+ *
61
+ * @param query - Search query
62
+ * @param topK - Maximum number of results to return
63
+ * @returns Sorted array of results (highest score first)
64
+ */
65
+ search(query: string, topK?: number): BM25Result[];
66
+ /**
67
+ * Get the number of indexed documents.
68
+ */
69
+ get size(): number;
70
+ /**
71
+ * Clear the index.
72
+ */
73
+ clear(): void;
74
+ }
75
+ /**
76
+ * Normalize a raw score to 0-1 range using sigmoid function.
77
+ *
78
+ * @param score - Raw score
79
+ * @param midpoint - Score at which output is 0.5
80
+ * @returns Normalized score between 0 and 1
81
+ */
82
+ export declare function normalizeScore(score: number, midpoint?: number): number;
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Tests for BM25 search implementation
3
+ */
4
+ export {};
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Domain Services
3
+ *
4
+ * Pure algorithms and business logic with no external dependencies.
5
+ * These services operate only on domain entities and primitive data.
6
+ */
7
+ export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, } from './bm25';
8
+ export { extractKeywords, extractPathKeywords, COMMON_KEYWORDS, } from './keywords';
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Keyword Extraction Service
3
+ *
4
+ * Pure domain service for extracting keywords from code.
5
+ * No external dependencies - operates only on string data.
6
+ */
7
+ /**
8
+ * Common programming keywords to exclude from keyword extraction.
9
+ * These appear in almost every code file and don't add search value.
10
+ */
11
+ export declare const COMMON_KEYWORDS: Set<string>;
12
+ /**
13
+ * Extract keywords from code content and optional name.
14
+ *
15
+ * @param content - Code content to extract keywords from
16
+ * @param name - Optional name (function name, class name, etc.)
17
+ * @param maxKeywords - Maximum keywords to return (default: 50)
18
+ * @returns Array of unique lowercase keywords
19
+ */
20
+ export declare function extractKeywords(content: string, name?: string, maxKeywords?: number): string[];
21
+ /**
22
+ * Extract keywords from a file path.
23
+ *
24
+ * @param filepath - File path to extract keywords from
25
+ * @returns Array of keywords from path segments
26
+ */
27
+ export declare function extractPathKeywords(filepath: string): string[];
@@ -0,0 +1,98 @@
1
+ /**
2
+ * RAGgrep - Local filesystem-based RAG system for codebases
3
+ *
4
+ * Provides semantic search over code using local embeddings.
5
+ *
6
+ * @example
7
+ * ```ts
8
+ * import raggrep from 'raggrep';
9
+ *
10
+ * // Index a directory
11
+ * await raggrep.index('/path/to/project');
12
+ *
13
+ * // Search the index
14
+ * const results = await raggrep.search('/path/to/project', 'user authentication');
15
+ *
16
+ * // Clean up stale entries
17
+ * await raggrep.cleanup('/path/to/project');
18
+ * ```
19
+ */
20
+ import type { IndexResult, IndexOptions, CleanupResult } from './indexer';
21
+ import { formatSearchResults } from './search';
22
+ import type { SearchOptions, SearchResult } from './types';
23
+ export type { IndexResult, IndexOptions, CleanupResult } from './indexer';
24
+ export type { SearchOptions, SearchResult, Chunk, FileIndex } from './types';
25
+ /**
26
+ * Index a directory for semantic search.
27
+ *
28
+ * Creates a `.raggrep/` folder with the index data.
29
+ *
30
+ * @param directory - Path to the directory to index
31
+ * @param options - Index options
32
+ * @returns Array of results per module
33
+ *
34
+ * @example
35
+ * ```ts
36
+ * // Basic indexing
37
+ * await raggrep.index('./my-project');
38
+ *
39
+ * // With options
40
+ * await raggrep.index('./my-project', {
41
+ * model: 'bge-small-en-v1.5',
42
+ * verbose: true
43
+ * });
44
+ * ```
45
+ */
46
+ export declare function index(directory: string, options?: IndexOptions): Promise<IndexResult[]>;
47
+ /**
48
+ * Search the indexed codebase.
49
+ *
50
+ * @param directory - Path to the indexed directory
51
+ * @param query - Natural language search query
52
+ * @param options - Search options
53
+ * @returns Array of search results sorted by relevance
54
+ *
55
+ * @example
56
+ * ```ts
57
+ * // Basic search
58
+ * const results = await raggrep.search('./my-project', 'user login');
59
+ *
60
+ * // With options
61
+ * const results = await raggrep.search('./my-project', 'database query', {
62
+ * topK: 5,
63
+ * minScore: 0.2,
64
+ * filePatterns: ['*.ts']
65
+ * });
66
+ * ```
67
+ */
68
+ export declare function search(directory: string, query: string, options?: SearchOptions): Promise<SearchResult[]>;
69
+ /**
70
+ * Clean up stale index entries for files that no longer exist.
71
+ *
72
+ * @param directory - Path to the indexed directory
73
+ * @param options - Cleanup options
74
+ * @returns Array of cleanup results per module
75
+ *
76
+ * @example
77
+ * ```ts
78
+ * const results = await raggrep.cleanup('./my-project');
79
+ * console.log(`Removed ${results[0].removed} stale entries`);
80
+ * ```
81
+ */
82
+ export declare function cleanup(directory: string, options?: {
83
+ verbose?: boolean;
84
+ }): Promise<CleanupResult[]>;
85
+ /**
86
+ * Format search results for display.
87
+ *
88
+ * @param results - Array of search results
89
+ * @returns Formatted string for console output
90
+ */
91
+ export { formatSearchResults };
92
+ declare const raggrep: {
93
+ index: typeof index;
94
+ search: typeof search;
95
+ cleanup: typeof cleanup;
96
+ formatSearchResults: typeof formatSearchResults;
97
+ };
98
+ export default raggrep;