raggrep 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +32 -4
- package/dist/cli/main.js +1459 -635
- package/dist/cli/main.js.map +17 -11
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/ports/embedding.d.ts +4 -1
- package/dist/domain/ports/index.d.ts +2 -1
- package/dist/domain/ports/logger.d.ts +66 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.d.ts +45 -8
- package/dist/index.js +1500 -679
- package/dist/index.js.map +17 -11
- package/dist/infrastructure/index.d.ts +1 -0
- package/dist/infrastructure/logger/index.d.ts +6 -0
- package/dist/infrastructure/logger/loggers.d.ts +75 -0
- package/dist/modules/data/json/index.d.ts +47 -0
- package/dist/modules/docs/markdown/index.d.ts +47 -0
- package/dist/modules/language/typescript/index.d.ts +10 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/package.json +1 -1
|
@@ -35,6 +35,12 @@ export interface Config {
|
|
|
35
35
|
export declare const DEFAULT_IGNORE_PATHS: string[];
|
|
36
36
|
/**
|
|
37
37
|
* Default file extensions to index.
|
|
38
|
+
*
|
|
39
|
+
* Note: Each module filters for its own supported extensions.
|
|
40
|
+
* - language/typescript: .ts, .tsx, .js, .jsx, .mjs, .cjs, .mts, .cts
|
|
41
|
+
* - data/json: .json
|
|
42
|
+
* - docs/markdown: .md
|
|
43
|
+
* - core: all remaining extensions
|
|
38
44
|
*/
|
|
39
45
|
export declare const DEFAULT_EXTENSIONS: string[];
|
|
40
46
|
/**
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
* This allows the domain to remain independent of the actual embedding implementation
|
|
6
6
|
* (e.g., Transformers.js, OpenAI API, local models).
|
|
7
7
|
*/
|
|
8
|
+
import type { Logger } from "./logger";
|
|
8
9
|
/**
|
|
9
10
|
* Available embedding model names
|
|
10
11
|
*/
|
|
@@ -15,8 +16,10 @@ export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge
|
|
|
15
16
|
export interface EmbeddingConfig {
|
|
16
17
|
/** Model name to use */
|
|
17
18
|
model: EmbeddingModelName;
|
|
18
|
-
/** Whether to show progress during model loading */
|
|
19
|
+
/** Whether to show progress during model loading (deprecated, use logger instead) */
|
|
19
20
|
showProgress?: boolean;
|
|
21
|
+
/** Logger for reporting download progress */
|
|
22
|
+
logger?: Logger;
|
|
20
23
|
}
|
|
21
24
|
/**
|
|
22
25
|
* Abstract embedding provider interface.
|
|
@@ -5,5 +5,6 @@
|
|
|
5
5
|
* These are implemented by infrastructure adapters.
|
|
6
6
|
*/
|
|
7
7
|
export type { FileSystem, FileStats } from "./filesystem";
|
|
8
|
-
export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from "./embedding";
|
|
8
|
+
export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, } from "./embedding";
|
|
9
9
|
export type { IndexStorage } from "./storage";
|
|
10
|
+
export type { Logger, ProgressInfo, LoggerFactory } from "./logger";
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Logger Port
|
|
3
|
+
*
|
|
4
|
+
* Abstract interface for logging progress and messages.
|
|
5
|
+
* This allows the domain and application layers to remain independent
|
|
6
|
+
* of the actual logging implementation (console, file, etc.).
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Progress information for long-running operations
|
|
10
|
+
*/
|
|
11
|
+
export interface ProgressInfo {
|
|
12
|
+
/** Current item being processed */
|
|
13
|
+
current: number;
|
|
14
|
+
/** Total number of items */
|
|
15
|
+
total: number;
|
|
16
|
+
/** Optional descriptive message */
|
|
17
|
+
message?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Abstract logger interface.
|
|
21
|
+
*
|
|
22
|
+
* Implementations might:
|
|
23
|
+
* - Log to console (ConsoleLogger)
|
|
24
|
+
* - Log to console with inline replacement for progress (InlineProgressLogger)
|
|
25
|
+
* - Be silent (SilentLogger)
|
|
26
|
+
* - Log to a file or external service
|
|
27
|
+
*/
|
|
28
|
+
export interface Logger {
|
|
29
|
+
/**
|
|
30
|
+
* Log an info message (general progress updates)
|
|
31
|
+
*/
|
|
32
|
+
info(message: string): void;
|
|
33
|
+
/**
|
|
34
|
+
* Log a warning message
|
|
35
|
+
*/
|
|
36
|
+
warn(message: string): void;
|
|
37
|
+
/**
|
|
38
|
+
* Log an error message
|
|
39
|
+
*/
|
|
40
|
+
error(message: string): void;
|
|
41
|
+
/**
|
|
42
|
+
* Log a debug message (only shown in verbose mode)
|
|
43
|
+
*/
|
|
44
|
+
debug(message: string): void;
|
|
45
|
+
/**
|
|
46
|
+
* Log a progress update that can replace the current line.
|
|
47
|
+
* Used for download progress, file processing counters, etc.
|
|
48
|
+
*
|
|
49
|
+
* In terminal environments, this may overwrite the current line.
|
|
50
|
+
* In non-terminal environments (SDK), this may just log normally.
|
|
51
|
+
*
|
|
52
|
+
* @param message - Progress message to display
|
|
53
|
+
*/
|
|
54
|
+
progress(message: string): void;
|
|
55
|
+
/**
|
|
56
|
+
* Clear any inline progress output.
|
|
57
|
+
* Call this before switching from progress() to info/warn/error.
|
|
58
|
+
*/
|
|
59
|
+
clearProgress(): void;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Factory function type for creating loggers
|
|
63
|
+
*/
|
|
64
|
+
export type LoggerFactory = (options?: {
|
|
65
|
+
verbose?: boolean;
|
|
66
|
+
}) => Logger;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunking Service
|
|
3
|
+
*
|
|
4
|
+
* Provides generic text chunking strategies for indexing.
|
|
5
|
+
* These are language-agnostic and work with any text content.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChunkType } from "../entities";
|
|
8
|
+
/**
|
|
9
|
+
* Represents a chunk of text with location information.
|
|
10
|
+
*/
|
|
11
|
+
export interface TextChunk {
|
|
12
|
+
/** The text content */
|
|
13
|
+
content: string;
|
|
14
|
+
/** 1-based start line number */
|
|
15
|
+
startLine: number;
|
|
16
|
+
/** 1-based end line number */
|
|
17
|
+
endLine: number;
|
|
18
|
+
/** The type of chunk */
|
|
19
|
+
type: ChunkType;
|
|
20
|
+
/** Optional name for the chunk */
|
|
21
|
+
name?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Options for line-based chunking.
|
|
25
|
+
*/
|
|
26
|
+
export interface ChunkingOptions {
|
|
27
|
+
/** Lines per chunk (default: 30) */
|
|
28
|
+
chunkSize?: number;
|
|
29
|
+
/** Overlap between chunks (default: 5) */
|
|
30
|
+
overlap?: number;
|
|
31
|
+
/** Minimum lines to create multiple chunks (default: chunkSize) */
|
|
32
|
+
minLinesForMultipleChunks?: number;
|
|
33
|
+
}
|
|
34
|
+
/** Default lines per chunk */
|
|
35
|
+
export declare const DEFAULT_CHUNK_SIZE = 30;
|
|
36
|
+
/** Default overlap between chunks */
|
|
37
|
+
export declare const DEFAULT_OVERLAP = 5;
|
|
38
|
+
/**
|
|
39
|
+
* Split text into overlapping chunks based on line boundaries.
|
|
40
|
+
*
|
|
41
|
+
* This is a generic chunking strategy that works with any text content.
|
|
42
|
+
* It creates overlapping chunks to ensure context is preserved across
|
|
43
|
+
* chunk boundaries.
|
|
44
|
+
*
|
|
45
|
+
* @param content - The text content to chunk
|
|
46
|
+
* @param options - Chunking options
|
|
47
|
+
* @returns Array of text chunks
|
|
48
|
+
*/
|
|
49
|
+
export declare function createLineBasedChunks(content: string, options?: ChunkingOptions): TextChunk[];
|
|
50
|
+
/**
|
|
51
|
+
* Create a single chunk from entire content.
|
|
52
|
+
* Useful for small files or when chunking isn't needed.
|
|
53
|
+
*
|
|
54
|
+
* @param content - The text content
|
|
55
|
+
* @returns A single file chunk
|
|
56
|
+
*/
|
|
57
|
+
export declare function createSingleChunk(content: string): TextChunk;
|
|
58
|
+
/**
|
|
59
|
+
* Generate a unique chunk ID from filepath and line numbers.
|
|
60
|
+
*
|
|
61
|
+
* @param filepath - The source file path
|
|
62
|
+
* @param startLine - Start line number
|
|
63
|
+
* @param endLine - End line number
|
|
64
|
+
* @returns Unique chunk identifier
|
|
65
|
+
*/
|
|
66
|
+
export declare function generateChunkId(filepath: string, startLine: number, endLine: number): string;
|
|
@@ -7,3 +7,5 @@
|
|
|
7
7
|
export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
|
|
8
8
|
export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
|
|
9
9
|
export { cosineSimilarity, euclideanDistance } from "./similarity";
|
|
10
|
+
export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
|
|
11
|
+
export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query Intent Detection Service
|
|
3
|
+
*
|
|
4
|
+
* Detects whether a search query is looking for implementation code
|
|
5
|
+
* or documentation, and calculates appropriate boosts.
|
|
6
|
+
*/
|
|
7
|
+
/** Implementation-related query terms that boost source code files */
|
|
8
|
+
export declare const IMPLEMENTATION_TERMS: string[];
|
|
9
|
+
/** Documentation-related query terms that boost documentation files */
|
|
10
|
+
export declare const DOCUMENTATION_TERMS: string[];
|
|
11
|
+
/** Source code file extensions */
|
|
12
|
+
export declare const SOURCE_CODE_EXTENSIONS: string[];
|
|
13
|
+
/** Documentation file extensions */
|
|
14
|
+
export declare const DOC_EXTENSIONS: string[];
|
|
15
|
+
/** Data/config file extensions */
|
|
16
|
+
export declare const DATA_EXTENSIONS: string[];
|
|
17
|
+
/** Query intent types */
|
|
18
|
+
export type QueryIntent = "implementation" | "documentation" | "neutral";
|
|
19
|
+
/**
|
|
20
|
+
* Detect query intent based on terms.
|
|
21
|
+
* Returns: 'implementation' | 'documentation' | 'neutral'
|
|
22
|
+
*
|
|
23
|
+
* @param queryTerms - Array of query terms (lowercase)
|
|
24
|
+
* @returns The detected intent
|
|
25
|
+
*/
|
|
26
|
+
export declare function detectQueryIntent(queryTerms: string[]): QueryIntent;
|
|
27
|
+
/**
|
|
28
|
+
* Extract query terms from a search query.
|
|
29
|
+
*
|
|
30
|
+
* @param query - The search query string
|
|
31
|
+
* @returns Array of lowercase terms (length > 2)
|
|
32
|
+
*/
|
|
33
|
+
export declare function extractQueryTerms(query: string): string[];
|
|
34
|
+
/**
|
|
35
|
+
* Determine if a file is a source code file based on extension.
|
|
36
|
+
*/
|
|
37
|
+
export declare function isSourceCodeFile(filepath: string): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Determine if a file is a documentation file based on extension.
|
|
40
|
+
*/
|
|
41
|
+
export declare function isDocFile(filepath: string): boolean;
|
|
42
|
+
/**
|
|
43
|
+
* Determine if a file is a data/config file based on extension.
|
|
44
|
+
*/
|
|
45
|
+
export declare function isDataFile(filepath: string): boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Calculate boost based on file type and query context.
|
|
48
|
+
* Bidirectional: boosts code for implementation queries, docs for documentation queries.
|
|
49
|
+
* Only applies when query intent is clear.
|
|
50
|
+
*
|
|
51
|
+
* @param filepath - The file path
|
|
52
|
+
* @param queryTerms - Array of query terms (lowercase)
|
|
53
|
+
* @returns Boost value (0 to ~0.1)
|
|
54
|
+
*/
|
|
55
|
+
export declare function calculateFileTypeBoost(filepath: string, queryTerms: string[]): number;
|
package/dist/index.d.ts
CHANGED
|
@@ -7,25 +7,42 @@
|
|
|
7
7
|
* ```ts
|
|
8
8
|
* import raggrep from 'raggrep';
|
|
9
9
|
*
|
|
10
|
-
* // Index a directory
|
|
10
|
+
* // Index a directory (automatically cleans up deleted files)
|
|
11
11
|
* await raggrep.index('/path/to/project');
|
|
12
12
|
*
|
|
13
13
|
* // Search the index
|
|
14
14
|
* const results = await raggrep.search('/path/to/project', 'user authentication');
|
|
15
15
|
*
|
|
16
|
-
* //
|
|
17
|
-
* await raggrep.
|
|
16
|
+
* // Reset (clear) the index completely
|
|
17
|
+
* await raggrep.reset('/path/to/project');
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @example With custom logger
|
|
21
|
+
* ```ts
|
|
22
|
+
* import raggrep, { createLogger, createInlineLogger } from 'raggrep';
|
|
23
|
+
*
|
|
24
|
+
* // Create a logger (defaults to console)
|
|
25
|
+
* const logger = createLogger({ verbose: true });
|
|
26
|
+
*
|
|
27
|
+
* // Or use inline logger for CLI-style progress
|
|
28
|
+
* const inlineLogger = createInlineLogger({ verbose: false });
|
|
29
|
+
*
|
|
30
|
+
* await raggrep.index('/path/to/project', { logger: inlineLogger });
|
|
18
31
|
* ```
|
|
19
32
|
*/
|
|
20
|
-
import type { IndexResult, IndexOptions, CleanupResult } from "./app/indexer";
|
|
33
|
+
import type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult } from "./app/indexer";
|
|
21
34
|
import { formatSearchResults } from "./app/search";
|
|
22
35
|
import type { SearchOptions, SearchResult } from "./types";
|
|
23
|
-
|
|
36
|
+
import { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger } from "./infrastructure/logger";
|
|
37
|
+
export type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult, } from "./app/indexer";
|
|
24
38
|
export type { SearchOptions, SearchResult, Chunk, FileIndex } from "./types";
|
|
39
|
+
export type { Logger, LoggerFactory } from "./domain/ports";
|
|
40
|
+
export { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger, };
|
|
25
41
|
/**
|
|
26
42
|
* Index a directory for semantic search.
|
|
27
43
|
*
|
|
28
44
|
* Creates a `.raggrep/` folder with the index data.
|
|
45
|
+
* Automatically cleans up stale entries for deleted files.
|
|
29
46
|
*
|
|
30
47
|
* @param directory - Path to the directory to index
|
|
31
48
|
* @param options - Index options
|
|
@@ -69,6 +86,9 @@ export declare function search(directory: string, query: string, options?: Searc
|
|
|
69
86
|
/**
|
|
70
87
|
* Clean up stale index entries for files that no longer exist.
|
|
71
88
|
*
|
|
89
|
+
* Note: Cleanup is now automatic during indexing. This function is provided
|
|
90
|
+
* for explicit cleanup without re-indexing.
|
|
91
|
+
*
|
|
72
92
|
* @param directory - Path to the indexed directory
|
|
73
93
|
* @param options - Cleanup options
|
|
74
94
|
* @returns Array of cleanup results per module
|
|
@@ -79,9 +99,25 @@ export declare function search(directory: string, query: string, options?: Searc
|
|
|
79
99
|
* console.log(`Removed ${results[0].removed} stale entries`);
|
|
80
100
|
* ```
|
|
81
101
|
*/
|
|
82
|
-
export declare function cleanup(directory: string, options?:
|
|
83
|
-
|
|
84
|
-
|
|
102
|
+
export declare function cleanup(directory: string, options?: CleanupOptions): Promise<CleanupResult[]>;
|
|
103
|
+
/**
|
|
104
|
+
* Reset (completely clear) the index for a directory.
|
|
105
|
+
*
|
|
106
|
+
* @param directory - Path to the indexed directory
|
|
107
|
+
* @returns Result with success status and removed index path
|
|
108
|
+
* @throws Error if no index exists for the directory
|
|
109
|
+
*
|
|
110
|
+
* @example
|
|
111
|
+
* ```ts
|
|
112
|
+
* try {
|
|
113
|
+
* const result = await raggrep.reset('./my-project');
|
|
114
|
+
* console.log(`Cleared index at: ${result.indexDir}`);
|
|
115
|
+
* } catch (error) {
|
|
116
|
+
* console.error('No index found');
|
|
117
|
+
* }
|
|
118
|
+
* ```
|
|
119
|
+
*/
|
|
120
|
+
export declare function reset(directory: string): Promise<ResetResult>;
|
|
85
121
|
/**
|
|
86
122
|
* Format search results for display.
|
|
87
123
|
*
|
|
@@ -93,6 +129,7 @@ declare const raggrep: {
|
|
|
93
129
|
index: typeof index;
|
|
94
130
|
search: typeof search;
|
|
95
131
|
cleanup: typeof cleanup;
|
|
132
|
+
reset: typeof reset;
|
|
96
133
|
formatSearchResults: typeof formatSearchResults;
|
|
97
134
|
};
|
|
98
135
|
export default raggrep;
|