raggrep 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,12 @@ export interface Config {
35
35
  export declare const DEFAULT_IGNORE_PATHS: string[];
36
36
  /**
37
37
  * Default file extensions to index.
38
+ *
39
+ * Note: Each module filters for its own supported extensions.
40
+ * - language/typescript: .ts, .tsx, .js, .jsx, .mjs, .cjs, .mts, .cts
41
+ * - data/json: .json
42
+ * - docs/markdown: .md
43
+ * - core: all remaining extensions
38
44
  */
39
45
  export declare const DEFAULT_EXTENSIONS: string[];
40
46
  /**
@@ -5,6 +5,7 @@
5
5
  * This allows the domain to remain independent of the actual embedding implementation
6
6
  * (e.g., Transformers.js, OpenAI API, local models).
7
7
  */
8
+ import type { Logger } from "./logger";
8
9
  /**
9
10
  * Available embedding model names
10
11
  */
@@ -15,8 +16,10 @@ export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge
15
16
  export interface EmbeddingConfig {
16
17
  /** Model name to use */
17
18
  model: EmbeddingModelName;
18
- /** Whether to show progress during model loading */
19
+ /** Whether to show progress during model loading (deprecated, use logger instead) */
19
20
  showProgress?: boolean;
21
+ /** Logger for reporting download progress */
22
+ logger?: Logger;
20
23
  }
21
24
  /**
22
25
  * Abstract embedding provider interface.
@@ -5,5 +5,6 @@
5
5
  * These are implemented by infrastructure adapters.
6
6
  */
7
7
  export type { FileSystem, FileStats } from "./filesystem";
8
- export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName } from "./embedding";
8
+ export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, } from "./embedding";
9
9
  export type { IndexStorage } from "./storage";
10
+ export type { Logger, ProgressInfo, LoggerFactory } from "./logger";
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Logger Port
3
+ *
4
+ * Abstract interface for logging progress and messages.
5
+ * This allows the domain and application layers to remain independent
6
+ * of the actual logging implementation (console, file, etc.).
7
+ */
8
+ /**
9
+ * Progress information for long-running operations
10
+ */
11
+ export interface ProgressInfo {
12
+ /** Current item being processed */
13
+ current: number;
14
+ /** Total number of items */
15
+ total: number;
16
+ /** Optional descriptive message */
17
+ message?: string;
18
+ }
19
+ /**
20
+ * Abstract logger interface.
21
+ *
22
+ * Implementations might:
23
+ * - Log to console (ConsoleLogger)
24
+ * - Log to console with inline replacement for progress (InlineProgressLogger)
25
+ * - Be silent (SilentLogger)
26
+ * - Log to a file or external service
27
+ */
28
+ export interface Logger {
29
+ /**
30
+ * Log an info message (general progress updates)
31
+ */
32
+ info(message: string): void;
33
+ /**
34
+ * Log a warning message
35
+ */
36
+ warn(message: string): void;
37
+ /**
38
+ * Log an error message
39
+ */
40
+ error(message: string): void;
41
+ /**
42
+ * Log a debug message (only shown in verbose mode)
43
+ */
44
+ debug(message: string): void;
45
+ /**
46
+ * Log a progress update that can replace the current line.
47
+ * Used for download progress, file processing counters, etc.
48
+ *
49
+ * In terminal environments, this may overwrite the current line.
50
+ * In non-terminal environments (SDK), this may just log normally.
51
+ *
52
+ * @param message - Progress message to display
53
+ */
54
+ progress(message: string): void;
55
+ /**
56
+ * Clear any inline progress output.
57
+ * Call this before switching from progress() to info/warn/error.
58
+ */
59
+ clearProgress(): void;
60
+ }
61
+ /**
62
+ * Factory function type for creating loggers
63
+ */
64
+ export type LoggerFactory = (options?: {
65
+ verbose?: boolean;
66
+ }) => Logger;
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Text Chunking Service
3
+ *
4
+ * Provides generic text chunking strategies for indexing.
5
+ * These are language-agnostic and work with any text content.
6
+ */
7
+ import type { ChunkType } from "../entities";
8
+ /**
9
+ * Represents a chunk of text with location information.
10
+ */
11
+ export interface TextChunk {
12
+ /** The text content */
13
+ content: string;
14
+ /** 1-based start line number */
15
+ startLine: number;
16
+ /** 1-based end line number */
17
+ endLine: number;
18
+ /** The type of chunk */
19
+ type: ChunkType;
20
+ /** Optional name for the chunk */
21
+ name?: string;
22
+ }
23
+ /**
24
+ * Options for line-based chunking.
25
+ */
26
+ export interface ChunkingOptions {
27
+ /** Lines per chunk (default: 30) */
28
+ chunkSize?: number;
29
+ /** Overlap between chunks (default: 5) */
30
+ overlap?: number;
31
+ /** Minimum lines to create multiple chunks (default: chunkSize) */
32
+ minLinesForMultipleChunks?: number;
33
+ }
34
+ /** Default lines per chunk */
35
+ export declare const DEFAULT_CHUNK_SIZE = 30;
36
+ /** Default overlap between chunks */
37
+ export declare const DEFAULT_OVERLAP = 5;
38
+ /**
39
+ * Split text into overlapping chunks based on line boundaries.
40
+ *
41
+ * This is a generic chunking strategy that works with any text content.
42
+ * It creates overlapping chunks to ensure context is preserved across
43
+ * chunk boundaries.
44
+ *
45
+ * @param content - The text content to chunk
46
+ * @param options - Chunking options
47
+ * @returns Array of text chunks
48
+ */
49
+ export declare function createLineBasedChunks(content: string, options?: ChunkingOptions): TextChunk[];
50
+ /**
51
+ * Create a single chunk from entire content.
52
+ * Useful for small files or when chunking isn't needed.
53
+ *
54
+ * @param content - The text content
55
+ * @returns A single file chunk
56
+ */
57
+ export declare function createSingleChunk(content: string): TextChunk;
58
+ /**
59
+ * Generate a unique chunk ID from filepath and line numbers.
60
+ *
61
+ * @param filepath - The source file path
62
+ * @param startLine - Start line number
63
+ * @param endLine - End line number
64
+ * @returns Unique chunk identifier
65
+ */
66
+ export declare function generateChunkId(filepath: string, startLine: number, endLine: number): string;
@@ -7,3 +7,5 @@
7
7
  export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
8
8
  export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
9
9
  export { cosineSimilarity, euclideanDistance } from "./similarity";
10
+ export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
11
+ export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Query Intent Detection Service
3
+ *
4
+ * Detects whether a search query is looking for implementation code
5
+ * or documentation, and calculates appropriate boosts.
6
+ */
7
+ /** Implementation-related query terms that boost source code files */
8
+ export declare const IMPLEMENTATION_TERMS: string[];
9
+ /** Documentation-related query terms that boost documentation files */
10
+ export declare const DOCUMENTATION_TERMS: string[];
11
+ /** Source code file extensions */
12
+ export declare const SOURCE_CODE_EXTENSIONS: string[];
13
+ /** Documentation file extensions */
14
+ export declare const DOC_EXTENSIONS: string[];
15
+ /** Data/config file extensions */
16
+ export declare const DATA_EXTENSIONS: string[];
17
+ /** Query intent types */
18
+ export type QueryIntent = "implementation" | "documentation" | "neutral";
19
+ /**
20
+ * Detect query intent based on terms.
21
+ * Returns: 'implementation' | 'documentation' | 'neutral'
22
+ *
23
+ * @param queryTerms - Array of query terms (lowercase)
24
+ * @returns The detected intent
25
+ */
26
+ export declare function detectQueryIntent(queryTerms: string[]): QueryIntent;
27
+ /**
28
+ * Extract query terms from a search query.
29
+ *
30
+ * @param query - The search query string
31
+ * @returns Array of lowercase terms (length > 2)
32
+ */
33
+ export declare function extractQueryTerms(query: string): string[];
34
+ /**
35
+ * Determine if a file is a source code file based on extension.
36
+ */
37
+ export declare function isSourceCodeFile(filepath: string): boolean;
38
+ /**
39
+ * Determine if a file is a documentation file based on extension.
40
+ */
41
+ export declare function isDocFile(filepath: string): boolean;
42
+ /**
43
+ * Determine if a file is a data/config file based on extension.
44
+ */
45
+ export declare function isDataFile(filepath: string): boolean;
46
+ /**
47
+ * Calculate boost based on file type and query context.
48
+ * Bidirectional: boosts code for implementation queries, docs for documentation queries.
49
+ * Only applies when query intent is clear.
50
+ *
51
+ * @param filepath - The file path
52
+ * @param queryTerms - Array of query terms (lowercase)
53
+ * @returns Boost value (0 to ~0.1)
54
+ */
55
+ export declare function calculateFileTypeBoost(filepath: string, queryTerms: string[]): number;
package/dist/index.d.ts CHANGED
@@ -7,25 +7,42 @@
7
7
  * ```ts
8
8
  * import raggrep from 'raggrep';
9
9
  *
10
- * // Index a directory
10
+ * // Index a directory (automatically cleans up deleted files)
11
11
  * await raggrep.index('/path/to/project');
12
12
  *
13
13
  * // Search the index
14
14
  * const results = await raggrep.search('/path/to/project', 'user authentication');
15
15
  *
16
- * // Clean up stale entries
17
- * await raggrep.cleanup('/path/to/project');
16
+ * // Reset (clear) the index completely
17
+ * await raggrep.reset('/path/to/project');
18
+ * ```
19
+ *
20
+ * @example With custom logger
21
+ * ```ts
22
+ * import raggrep, { createLogger, createInlineLogger } from 'raggrep';
23
+ *
24
+ * // Create a logger (defaults to console)
25
+ * const logger = createLogger({ verbose: true });
26
+ *
27
+ * // Or use inline logger for CLI-style progress
28
+ * const inlineLogger = createInlineLogger({ verbose: false });
29
+ *
30
+ * await raggrep.index('/path/to/project', { logger: inlineLogger });
18
31
  * ```
19
32
  */
20
- import type { IndexResult, IndexOptions, CleanupResult } from "./app/indexer";
33
+ import type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult } from "./app/indexer";
21
34
  import { formatSearchResults } from "./app/search";
22
35
  import type { SearchOptions, SearchResult } from "./types";
23
- export type { IndexResult, IndexOptions, CleanupResult } from "./app/indexer";
36
+ import { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger } from "./infrastructure/logger";
37
+ export type { IndexResult, IndexOptions, CleanupResult, CleanupOptions, ResetResult, } from "./app/indexer";
24
38
  export type { SearchOptions, SearchResult, Chunk, FileIndex } from "./types";
39
+ export type { Logger, LoggerFactory } from "./domain/ports";
40
+ export { ConsoleLogger, InlineProgressLogger, SilentLogger, createLogger, createInlineLogger, createSilentLogger, };
25
41
  /**
26
42
  * Index a directory for semantic search.
27
43
  *
28
44
  * Creates a `.raggrep/` folder with the index data.
45
+ * Automatically cleans up stale entries for deleted files.
29
46
  *
30
47
  * @param directory - Path to the directory to index
31
48
  * @param options - Index options
@@ -69,6 +86,9 @@ export declare function search(directory: string, query: string, options?: Searc
69
86
  /**
70
87
  * Clean up stale index entries for files that no longer exist.
71
88
  *
89
+ * Note: Cleanup is now automatic during indexing. This function is provided
90
+ * for explicit cleanup without re-indexing.
91
+ *
72
92
  * @param directory - Path to the indexed directory
73
93
  * @param options - Cleanup options
74
94
  * @returns Array of cleanup results per module
@@ -79,9 +99,25 @@ export declare function search(directory: string, query: string, options?: Searc
79
99
  * console.log(`Removed ${results[0].removed} stale entries`);
80
100
  * ```
81
101
  */
82
- export declare function cleanup(directory: string, options?: {
83
- verbose?: boolean;
84
- }): Promise<CleanupResult[]>;
102
+ export declare function cleanup(directory: string, options?: CleanupOptions): Promise<CleanupResult[]>;
103
+ /**
104
+ * Reset (completely clear) the index for a directory.
105
+ *
106
+ * @param directory - Path to the indexed directory
107
+ * @returns Result with success status and removed index path
108
+ * @throws Error if no index exists for the directory
109
+ *
110
+ * @example
111
+ * ```ts
112
+ * try {
113
+ * const result = await raggrep.reset('./my-project');
114
+ * console.log(`Cleared index at: ${result.indexDir}`);
115
+ * } catch (error) {
116
+ * console.error('No index found');
117
+ * }
118
+ * ```
119
+ */
120
+ export declare function reset(directory: string): Promise<ResetResult>;
85
121
  /**
86
122
  * Format search results for display.
87
123
  *
@@ -93,6 +129,7 @@ declare const raggrep: {
93
129
  index: typeof index;
94
130
  search: typeof search;
95
131
  cleanup: typeof cleanup;
132
+ reset: typeof reset;
96
133
  formatSearchResults: typeof formatSearchResults;
97
134
  };
98
135
  export default raggrep;