raggrep 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +138 -6
  2. package/dist/{indexer → app/indexer}/index.d.ts +26 -1
  3. package/dist/app/indexer/watcher.d.ts +33 -0
  4. package/dist/{search → app/search}/index.d.ts +1 -1
  5. package/dist/cli/main.js +1802 -222
  6. package/dist/cli/main.js.map +26 -15
  7. package/dist/composition.d.ts +7 -7
  8. package/dist/domain/entities/fileSummary.d.ts +18 -0
  9. package/dist/domain/entities/index.d.ts +1 -1
  10. package/dist/domain/entities/searchResult.d.ts +47 -2
  11. package/dist/domain/index.d.ts +5 -3
  12. package/dist/domain/ports/embedding.d.ts +0 -4
  13. package/dist/domain/ports/index.d.ts +3 -4
  14. package/dist/domain/services/bm25.d.ts +24 -0
  15. package/dist/domain/services/index.d.ts +3 -2
  16. package/dist/domain/services/keywords.d.ts +45 -0
  17. package/dist/domain/services/similarity.d.ts +23 -0
  18. package/dist/{application → domain}/usecases/cleanupIndex.d.ts +2 -2
  19. package/dist/{application → domain}/usecases/indexDirectory.d.ts +2 -2
  20. package/dist/{application → domain}/usecases/searchIndex.d.ts +2 -2
  21. package/dist/index.d.ts +5 -5
  22. package/dist/index.js +1444 -244
  23. package/dist/index.js.map +26 -15
  24. package/dist/{utils/config.d.ts → infrastructure/config/configLoader.d.ts} +7 -4
  25. package/dist/infrastructure/config/index.d.ts +6 -0
  26. package/dist/infrastructure/embeddings/index.d.ts +3 -1
  27. package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +16 -0
  28. package/dist/infrastructure/index.d.ts +4 -3
  29. package/dist/infrastructure/storage/index.d.ts +4 -1
  30. package/dist/{utils/tieredIndex.d.ts → infrastructure/storage/symbolicIndex.d.ts} +7 -18
  31. package/dist/introspection/fileIntrospector.d.ts +14 -0
  32. package/dist/introspection/index.d.ts +68 -0
  33. package/dist/introspection/introspection.test.d.ts +4 -0
  34. package/dist/introspection/projectDetector.d.ts +27 -0
  35. package/dist/introspection/types.d.ts +70 -0
  36. package/dist/modules/core/index.d.ts +69 -0
  37. package/dist/modules/core/symbols.d.ts +27 -0
  38. package/dist/modules/core/symbols.test.d.ts +4 -0
  39. package/dist/modules/{semantic → language/typescript}/index.d.ts +11 -12
  40. package/dist/types.d.ts +4 -1
  41. package/package.json +7 -6
  42. package/dist/application/index.d.ts +0 -7
  43. package/dist/utils/bm25.d.ts +0 -9
  44. package/dist/utils/embeddings.d.ts +0 -46
  45. /package/dist/{cli → app/cli}/main.d.ts +0 -0
  46. /package/dist/{application → domain}/usecases/index.d.ts +0 -0
  47. /package/dist/{utils → infrastructure/embeddings}/embeddings.test.d.ts +0 -0
  48. /package/dist/modules/{semantic → language/typescript}/parseCode.d.ts +0 -0
  49. /package/dist/modules/{semantic → language/typescript}/parseCode.test.d.ts +0 -0
@@ -8,10 +8,10 @@
8
8
  * This is the only file that knows about concrete implementations.
9
9
  * Everything else depends only on interfaces (ports).
10
10
  */
11
- import type { Config } from './domain/entities';
12
- import type { FileSystem } from './domain/ports';
13
- import type { IndexModule, IndexContext, SearchContext } from './types';
14
- import { FileIndexStorage } from './infrastructure/storage';
11
+ import type { Config } from "./domain/entities";
12
+ import type { FileSystem } from "./domain/ports";
13
+ import type { IndexModule, IndexContext, SearchContext } from "./types";
14
+ import { FileIndexStorage } from "./infrastructure/storage";
15
15
  /**
16
16
  * Container for all application services.
17
17
  * Created once and passed to use cases.
@@ -27,9 +27,9 @@ export interface ServiceContainer {
27
27
  * Create a service container for a specific project directory.
28
28
  */
29
29
  export declare function createServiceContainer(rootDir: string): Promise<ServiceContainer>;
30
- import type { IndexDirectoryDependencies } from './application/usecases/indexDirectory';
31
- import type { SearchIndexDependencies } from './application/usecases/searchIndex';
32
- import type { CleanupIndexDependencies } from './application/usecases/cleanupIndex';
30
+ import type { IndexDirectoryDependencies } from "./domain/usecases/indexDirectory";
31
+ import type { SearchIndexDependencies } from "./domain/usecases/searchIndex";
32
+ import type { CleanupIndexDependencies } from "./domain/usecases/cleanupIndex";
33
33
  /**
34
34
  * Create dependencies for the indexDirectory use case.
35
35
  */
@@ -7,6 +7,19 @@
7
7
  * Stored as individual files in: .raggrep/index/<module>/symbolic/<filepath>.json
8
8
  */
9
9
  import type { ChunkType } from "./chunk";
10
+ /**
11
+ * Path context information for structural search boosting.
12
+ */
13
+ export interface PathContext {
14
+ /** Directory segments (excluding filename) */
15
+ segments: string[];
16
+ /** Detected architectural layer (service, controller, repository, etc.) */
17
+ layer?: string;
18
+ /** Detected feature domain (auth, users, payments, etc.) */
19
+ domain?: string;
20
+ /** Path depth (number of directory levels) */
21
+ depth: number;
22
+ }
10
23
  /**
11
24
  * Lightweight file summary for fast filtering.
12
25
  *
@@ -26,6 +39,11 @@ export interface FileSummary {
26
39
  exports: string[];
27
40
  /** ISO timestamp of when the file was last modified */
28
41
  lastModified: string;
42
+ /**
43
+ * Parsed path context for structural boosting.
44
+ * Includes detected layer, domain, and path depth.
45
+ */
46
+ pathContext?: PathContext;
29
47
  }
30
48
  /**
31
49
  * Metadata for the symbolic index.
@@ -8,7 +8,7 @@ export type { Chunk, ChunkType } from "./chunk";
8
8
  export { createChunkId } from "./chunk";
9
9
  export type { FileIndex, FileManifestEntry, ModuleManifest, GlobalManifest, } from "./fileIndex";
10
10
  export type { FileSummary, SymbolicIndexMeta, Tier1Manifest, } from "./fileSummary";
11
- export type { SearchResult, SearchOptions } from "./searchResult";
11
+ export type { SearchResult, SearchOptions, SearchContributions, CoreContribution, LanguageContribution, IntrospectionContribution, } from "./searchResult";
12
12
  export { DEFAULT_SEARCH_OPTIONS } from "./searchResult";
13
13
  export type { Config, ModuleConfig } from "./config";
14
14
  export { DEFAULT_IGNORE_PATHS, DEFAULT_EXTENSIONS, createDefaultConfig, } from "./config";
@@ -3,7 +3,50 @@
3
3
  *
4
4
  * Represents a single result from a search query.
5
5
  */
6
- import type { Chunk } from './chunk';
6
+ import type { Chunk } from "./chunk";
7
+ /**
8
+ * Contribution from the core index.
9
+ */
10
+ export interface CoreContribution {
11
+ /** Symbol name match score (0-1) */
12
+ symbolMatch: number;
13
+ /** BM25 keyword match score (0-1) */
14
+ keywordMatch: number;
15
+ }
16
+ /**
17
+ * Contribution from a language-specific index.
18
+ */
19
+ export interface LanguageContribution {
20
+ /** Semantic embedding similarity (0-1) */
21
+ semanticMatch: number;
22
+ /** BM25 keyword match score (0-1) */
23
+ keywordMatch: number;
24
+ }
25
+ /**
26
+ * Contribution from introspection boosting.
27
+ */
28
+ export interface IntrospectionContribution {
29
+ /** Boost from domain match */
30
+ domainBoost: number;
31
+ /** Boost from layer match */
32
+ layerBoost: number;
33
+ /** Boost from scope match */
34
+ scopeBoost: number;
35
+ /** Boost from path segment match */
36
+ pathBoost: number;
37
+ }
38
+ /**
39
+ * Tracks which indexes contributed to a search result's score.
40
+ * Used for learning and tuning.
41
+ */
42
+ export interface SearchContributions {
43
+ /** Core index contribution */
44
+ core?: CoreContribution;
45
+ /** Language-specific index contribution (keyed by module ID) */
46
+ language?: Record<string, LanguageContribution>;
47
+ /** Introspection boost contribution */
48
+ introspection?: IntrospectionContribution;
49
+ }
7
50
  /**
8
51
  * A search result with relevance score and source information.
9
52
  */
@@ -12,10 +55,12 @@ export interface SearchResult {
12
55
  filepath: string;
13
56
  /** The matching chunk */
14
57
  chunk: Chunk;
15
- /** Relevance score (0-1, higher is better) */
58
+ /** Final relevance score (0-1, higher is better) */
16
59
  score: number;
17
60
  /** ID of the module that produced this result */
18
61
  moduleId: string;
62
+ /** Contribution tracking for learning */
63
+ contributions?: SearchContributions;
19
64
  /** Additional context from the search (e.g., semantic vs keyword scores) */
20
65
  context?: Record<string, unknown>;
21
66
  }
@@ -5,7 +5,9 @@
5
5
  * - Entities: Core data structures
6
6
  * - Ports: Interfaces for external dependencies
7
7
  * - Services: Pure business logic and algorithms
8
+ * - Use Cases: Application business logic
8
9
  */
9
- export * from './entities';
10
- export * from './ports';
11
- export * from './services';
10
+ export * from "./entities";
11
+ export * from "./ports";
12
+ export * from "./services";
13
+ export * from "./usecases";
@@ -54,7 +54,3 @@ export interface EmbeddingProvider {
54
54
  */
55
55
  dispose?(): Promise<void>;
56
56
  }
57
- /**
58
- * Calculate cosine similarity between two vectors
59
- */
60
- export declare function cosineSimilarity(a: number[], b: number[]): number;
@@ -4,7 +4,6 @@
4
4
  * Interfaces defining what the domain needs from external systems.
5
5
  * These are implemented by infrastructure adapters.
6
6
  */
7
- export type { FileSystem, FileStats } from './filesystem';
8
- export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from './embedding';
9
- export { cosineSimilarity } from './embedding';
10
- export type { IndexStorage } from './storage';
7
+ export type { FileSystem, FileStats } from "./filesystem";
8
+ export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from "./embedding";
9
+ export type { IndexStorage } from "./storage";
@@ -71,6 +71,30 @@ export declare class BM25Index {
71
71
  * Clear the index.
72
72
  */
73
73
  clear(): void;
74
+ /**
75
+ * Add a single document by ID and pre-computed tokens.
76
+ *
77
+ * @param id - Document identifier
78
+ * @param tokens - Pre-computed tokens
79
+ */
80
+ addDocument(id: string, tokens: string[]): void;
81
+ /**
82
+ * Serialize the index to a JSON-compatible object.
83
+ */
84
+ serialize(): BM25SerializedData;
85
+ /**
86
+ * Deserialize a BM25 index from saved data.
87
+ */
88
+ static deserialize(data: BM25SerializedData): BM25Index;
89
+ }
90
+ /**
91
+ * Serialized BM25 index data.
92
+ */
93
+ export interface BM25SerializedData {
94
+ documents: Record<string, string[]>;
95
+ avgDocLength: number;
96
+ documentFrequencies: Record<string, number>;
97
+ totalDocs: number;
74
98
  }
75
99
  /**
76
100
  * Normalize a raw score to 0-1 range using sigmoid function.
@@ -4,5 +4,6 @@
4
4
  * Pure algorithms and business logic with no external dependencies.
5
5
  * These services operate only on domain entities and primitive data.
6
6
  */
7
- export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, } from './bm25';
8
- export { extractKeywords, extractPathKeywords, COMMON_KEYWORDS, } from './keywords';
7
+ export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
8
+ export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
9
+ export { cosineSimilarity, euclideanDistance } from "./similarity";
@@ -9,6 +9,11 @@
9
9
  * These appear in almost every code file and don't add search value.
10
10
  */
11
11
  export declare const COMMON_KEYWORDS: Set<string>;
12
+ /**
13
+ * Common architectural layer patterns in file names/paths.
14
+ * Used to detect the layer a file belongs to.
15
+ */
16
+ export declare const LAYER_PATTERNS: Record<string, string[]>;
12
17
  /**
13
18
  * Extract keywords from code content and optional name.
14
19
  *
@@ -21,7 +26,47 @@ export declare function extractKeywords(content: string, name?: string, maxKeywo
21
26
  /**
22
27
  * Extract keywords from a file path.
23
28
  *
29
+ * Enhanced extraction that:
30
+ * - Splits camelCase/PascalCase filenames
31
+ * - Extracts directory segments
32
+ * - Recognizes common patterns (Service, Controller, etc.)
33
+ *
24
34
  * @param filepath - File path to extract keywords from
25
35
  * @returns Array of keywords from path segments
26
36
  */
27
37
  export declare function extractPathKeywords(filepath: string): string[];
38
+ /**
39
+ * Path context information extracted from a file path.
40
+ */
41
+ export interface PathContext {
42
+ /** Directory segments (excluding filename) */
43
+ segments: string[];
44
+ /** Detected architectural layer (service, controller, repository, etc.) */
45
+ layer?: string;
46
+ /** Detected feature domain (auth, users, payments, etc.) */
47
+ domain?: string;
48
+ /** Path depth (number of directory levels) */
49
+ depth: number;
50
+ /** Keywords extracted from the path */
51
+ keywords: string[];
52
+ }
53
+ /**
54
+ * Parse a file path and extract structural context.
55
+ *
56
+ * This helps with:
57
+ * - Boosting files in related directories
58
+ * - Understanding architectural layer
59
+ * - Grouping by feature domain
60
+ *
61
+ * @param filepath - File path to parse
62
+ * @returns Parsed path context
63
+ */
64
+ export declare function parsePathContext(filepath: string): PathContext;
65
+ /**
66
+ * Generate a path context string for embedding.
67
+ * This is prepended to content to give the embedding model path awareness.
68
+ *
69
+ * @param pathContext - Parsed path context
70
+ * @returns A string representation of the path context
71
+ */
72
+ export declare function formatPathContextForEmbedding(pathContext: PathContext): string;
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Similarity Service
3
+ *
4
+ * Pure mathematical functions for computing vector similarity.
5
+ * No external dependencies.
6
+ */
7
+ /**
8
+ * Calculate cosine similarity between two vectors.
9
+ *
10
+ * @param a - First vector
11
+ * @param b - Second vector
12
+ * @returns Similarity score between -1 and 1 (1 = identical, 0 = orthogonal, -1 = opposite)
13
+ * @throws Error if vectors have different lengths
14
+ */
15
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
16
+ /**
17
+ * Calculate Euclidean distance between two vectors.
18
+ *
19
+ * @param a - First vector
20
+ * @param b - Second vector
21
+ * @returns Distance (0 = identical, larger = more different)
22
+ */
23
+ export declare function euclideanDistance(a: number[], b: number[]): number;
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * Removes stale index entries for files that no longer exist.
5
5
  */
6
- import type { Config, ModuleManifest } from '../../domain/entities';
7
- import type { FileSystem } from '../../domain/ports';
6
+ import type { Config, ModuleManifest } from '../entities';
7
+ import type { FileSystem } from '../ports';
8
8
  import type { IndexModule } from '../../types';
9
9
  /**
10
10
  * Result of cleanup for a single module
@@ -5,8 +5,8 @@
5
5
  * This is an application-level use case that coordinates domain entities
6
6
  * and infrastructure services.
7
7
  */
8
- import type { Config } from '../../domain/entities';
9
- import type { FileSystem } from '../../domain/ports';
8
+ import type { Config } from '../entities';
9
+ import type { FileSystem } from '../ports';
10
10
  import type { IndexModule } from '../../types';
11
11
  /**
12
12
  * Result of indexing with a single module
@@ -3,8 +3,8 @@
3
3
  *
4
4
  * Orchestrates searching the indexed codebase.
5
5
  */
6
- import type { Config, SearchResult, SearchOptions } from '../../domain/entities';
7
- import type { FileSystem } from '../../domain/ports';
6
+ import type { Config, SearchResult, SearchOptions } from '../entities';
7
+ import type { FileSystem } from '../ports';
8
8
  import type { IndexModule, FileIndex } from '../../types';
9
9
  /**
10
10
  * Options for the search use case
package/dist/index.d.ts CHANGED
@@ -17,11 +17,11 @@
17
17
  * await raggrep.cleanup('/path/to/project');
18
18
  * ```
19
19
  */
20
- import type { IndexResult, IndexOptions, CleanupResult } from './indexer';
21
- import { formatSearchResults } from './search';
22
- import type { SearchOptions, SearchResult } from './types';
23
- export type { IndexResult, IndexOptions, CleanupResult } from './indexer';
24
- export type { SearchOptions, SearchResult, Chunk, FileIndex } from './types';
20
+ import type { IndexResult, IndexOptions, CleanupResult } from "./app/indexer";
21
+ import { formatSearchResults } from "./app/search";
22
+ import type { SearchOptions, SearchResult } from "./types";
23
+ export type { IndexResult, IndexOptions, CleanupResult } from "./app/indexer";
24
+ export type { SearchOptions, SearchResult, Chunk, FileIndex } from "./types";
25
25
  /**
26
26
  * Index a directory for semantic search.
27
27
  *