rag-lite-ts 1.0.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/README.md +605 -93
  2. package/dist/cli/indexer.js +192 -4
  3. package/dist/cli/search.js +50 -11
  4. package/dist/cli.js +183 -26
  5. package/dist/core/abstract-embedder.d.ts +125 -0
  6. package/dist/core/abstract-embedder.js +264 -0
  7. package/dist/core/actionable-error-messages.d.ts +60 -0
  8. package/dist/core/actionable-error-messages.js +397 -0
  9. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  10. package/dist/core/batch-processing-optimizer.js +541 -0
  11. package/dist/core/binary-index-format.d.ts +52 -0
  12. package/dist/core/binary-index-format.js +122 -0
  13. package/dist/core/chunker.d.ts +2 -0
  14. package/dist/core/cli-database-utils.d.ts +53 -0
  15. package/dist/core/cli-database-utils.js +239 -0
  16. package/dist/core/config.js +10 -3
  17. package/dist/core/content-errors.d.ts +111 -0
  18. package/dist/core/content-errors.js +362 -0
  19. package/dist/core/content-manager.d.ts +343 -0
  20. package/dist/core/content-manager.js +1504 -0
  21. package/dist/core/content-performance-optimizer.d.ts +150 -0
  22. package/dist/core/content-performance-optimizer.js +516 -0
  23. package/dist/core/content-resolver.d.ts +104 -0
  24. package/dist/core/content-resolver.js +285 -0
  25. package/dist/core/cross-modal-search.d.ts +164 -0
  26. package/dist/core/cross-modal-search.js +342 -0
  27. package/dist/core/database-connection-manager.d.ts +109 -0
  28. package/dist/core/database-connection-manager.js +304 -0
  29. package/dist/core/db.d.ts +141 -2
  30. package/dist/core/db.js +631 -89
  31. package/dist/core/embedder-factory.d.ts +176 -0
  32. package/dist/core/embedder-factory.js +338 -0
  33. package/dist/core/index.d.ts +3 -1
  34. package/dist/core/index.js +4 -1
  35. package/dist/core/ingestion.d.ts +85 -15
  36. package/dist/core/ingestion.js +510 -45
  37. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  38. package/dist/core/lazy-dependency-loader.js +453 -0
  39. package/dist/core/mode-detection-service.d.ts +150 -0
  40. package/dist/core/mode-detection-service.js +565 -0
  41. package/dist/core/mode-model-validator.d.ts +92 -0
  42. package/dist/core/mode-model-validator.js +203 -0
  43. package/dist/core/model-registry.d.ts +120 -0
  44. package/dist/core/model-registry.js +415 -0
  45. package/dist/core/model-validator.d.ts +217 -0
  46. package/dist/core/model-validator.js +782 -0
  47. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  48. package/dist/core/polymorphic-search-factory.js +344 -0
  49. package/dist/core/raglite-paths.d.ts +121 -0
  50. package/dist/core/raglite-paths.js +145 -0
  51. package/dist/core/reranking-config.d.ts +42 -0
  52. package/dist/core/reranking-config.js +156 -0
  53. package/dist/core/reranking-factory.d.ts +92 -0
  54. package/dist/core/reranking-factory.js +591 -0
  55. package/dist/core/reranking-strategies.d.ts +325 -0
  56. package/dist/core/reranking-strategies.js +720 -0
  57. package/dist/core/resource-cleanup.d.ts +163 -0
  58. package/dist/core/resource-cleanup.js +371 -0
  59. package/dist/core/resource-manager.d.ts +212 -0
  60. package/dist/core/resource-manager.js +564 -0
  61. package/dist/core/search.d.ts +28 -1
  62. package/dist/core/search.js +83 -5
  63. package/dist/core/streaming-operations.d.ts +145 -0
  64. package/dist/core/streaming-operations.js +409 -0
  65. package/dist/core/types.d.ts +3 -0
  66. package/dist/core/universal-embedder.d.ts +177 -0
  67. package/dist/core/universal-embedder.js +139 -0
  68. package/dist/core/validation-messages.d.ts +99 -0
  69. package/dist/core/validation-messages.js +334 -0
  70. package/dist/core/vector-index.d.ts +1 -1
  71. package/dist/core/vector-index.js +37 -39
  72. package/dist/factories/index.d.ts +3 -1
  73. package/dist/factories/index.js +2 -0
  74. package/dist/factories/polymorphic-factory.d.ts +50 -0
  75. package/dist/factories/polymorphic-factory.js +159 -0
  76. package/dist/factories/text-factory.d.ts +128 -34
  77. package/dist/factories/text-factory.js +346 -97
  78. package/dist/file-processor.d.ts +88 -2
  79. package/dist/file-processor.js +720 -17
  80. package/dist/index.d.ts +32 -0
  81. package/dist/index.js +29 -0
  82. package/dist/ingestion.d.ts +16 -0
  83. package/dist/ingestion.js +21 -0
  84. package/dist/mcp-server.d.ts +35 -3
  85. package/dist/mcp-server.js +1107 -31
  86. package/dist/multimodal/clip-embedder.d.ts +327 -0
  87. package/dist/multimodal/clip-embedder.js +992 -0
  88. package/dist/multimodal/index.d.ts +6 -0
  89. package/dist/multimodal/index.js +6 -0
  90. package/dist/run-error-recovery-tests.d.ts +7 -0
  91. package/dist/run-error-recovery-tests.js +101 -0
  92. package/dist/search.d.ts +60 -9
  93. package/dist/search.js +82 -11
  94. package/dist/test-utils.d.ts +8 -26
  95. package/dist/text/chunker.d.ts +1 -0
  96. package/dist/text/embedder.js +15 -8
  97. package/dist/text/index.d.ts +1 -0
  98. package/dist/text/index.js +1 -0
  99. package/dist/text/reranker.d.ts +1 -2
  100. package/dist/text/reranker.js +17 -47
  101. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  102. package/dist/text/sentence-transformer-embedder.js +340 -0
  103. package/dist/types.d.ts +39 -0
  104. package/dist/utils/vector-math.d.ts +31 -0
  105. package/dist/utils/vector-math.js +70 -0
  106. package/package.json +27 -6
  107. package/dist/api-errors.d.ts.map +0 -1
  108. package/dist/api-errors.js.map +0 -1
  109. package/dist/cli/indexer.d.ts.map +0 -1
  110. package/dist/cli/indexer.js.map +0 -1
  111. package/dist/cli/search.d.ts.map +0 -1
  112. package/dist/cli/search.js.map +0 -1
  113. package/dist/cli.d.ts.map +0 -1
  114. package/dist/cli.js.map +0 -1
  115. package/dist/config.d.ts.map +0 -1
  116. package/dist/config.js.map +0 -1
  117. package/dist/core/adapters.d.ts.map +0 -1
  118. package/dist/core/adapters.js.map +0 -1
  119. package/dist/core/chunker.d.ts.map +0 -1
  120. package/dist/core/chunker.js.map +0 -1
  121. package/dist/core/config.d.ts.map +0 -1
  122. package/dist/core/config.js.map +0 -1
  123. package/dist/core/db.d.ts.map +0 -1
  124. package/dist/core/db.js.map +0 -1
  125. package/dist/core/error-handler.d.ts.map +0 -1
  126. package/dist/core/error-handler.js.map +0 -1
  127. package/dist/core/index.d.ts.map +0 -1
  128. package/dist/core/index.js.map +0 -1
  129. package/dist/core/ingestion.d.ts.map +0 -1
  130. package/dist/core/ingestion.js.map +0 -1
  131. package/dist/core/interfaces.d.ts.map +0 -1
  132. package/dist/core/interfaces.js.map +0 -1
  133. package/dist/core/path-manager.d.ts.map +0 -1
  134. package/dist/core/path-manager.js.map +0 -1
  135. package/dist/core/search-example.d.ts +0 -25
  136. package/dist/core/search-example.d.ts.map +0 -1
  137. package/dist/core/search-example.js +0 -138
  138. package/dist/core/search-example.js.map +0 -1
  139. package/dist/core/search-pipeline-example.d.ts +0 -21
  140. package/dist/core/search-pipeline-example.d.ts.map +0 -1
  141. package/dist/core/search-pipeline-example.js +0 -188
  142. package/dist/core/search-pipeline-example.js.map +0 -1
  143. package/dist/core/search-pipeline.d.ts.map +0 -1
  144. package/dist/core/search-pipeline.js.map +0 -1
  145. package/dist/core/search.d.ts.map +0 -1
  146. package/dist/core/search.js.map +0 -1
  147. package/dist/core/types.d.ts.map +0 -1
  148. package/dist/core/types.js.map +0 -1
  149. package/dist/core/vector-index.d.ts.map +0 -1
  150. package/dist/core/vector-index.js.map +0 -1
  151. package/dist/dom-polyfills.d.ts.map +0 -1
  152. package/dist/dom-polyfills.js.map +0 -1
  153. package/dist/examples/clean-api-examples.d.ts +0 -44
  154. package/dist/examples/clean-api-examples.d.ts.map +0 -1
  155. package/dist/examples/clean-api-examples.js +0 -206
  156. package/dist/examples/clean-api-examples.js.map +0 -1
  157. package/dist/factories/index.d.ts.map +0 -1
  158. package/dist/factories/index.js.map +0 -1
  159. package/dist/factories/text-factory.d.ts.map +0 -1
  160. package/dist/factories/text-factory.js.map +0 -1
  161. package/dist/file-processor.d.ts.map +0 -1
  162. package/dist/file-processor.js.map +0 -1
  163. package/dist/index-manager.d.ts.map +0 -1
  164. package/dist/index-manager.js.map +0 -1
  165. package/dist/index.d.ts.map +0 -1
  166. package/dist/index.js.map +0 -1
  167. package/dist/indexer.d.ts.map +0 -1
  168. package/dist/indexer.js.map +0 -1
  169. package/dist/ingestion.d.ts.map +0 -1
  170. package/dist/ingestion.js.map +0 -1
  171. package/dist/mcp-server.d.ts.map +0 -1
  172. package/dist/mcp-server.js.map +0 -1
  173. package/dist/preprocess.d.ts.map +0 -1
  174. package/dist/preprocess.js.map +0 -1
  175. package/dist/preprocessors/index.d.ts.map +0 -1
  176. package/dist/preprocessors/index.js.map +0 -1
  177. package/dist/preprocessors/mdx.d.ts.map +0 -1
  178. package/dist/preprocessors/mdx.js.map +0 -1
  179. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  180. package/dist/preprocessors/mermaid.js.map +0 -1
  181. package/dist/preprocessors/registry.d.ts.map +0 -1
  182. package/dist/preprocessors/registry.js.map +0 -1
  183. package/dist/search-standalone.d.ts.map +0 -1
  184. package/dist/search-standalone.js.map +0 -1
  185. package/dist/search.d.ts.map +0 -1
  186. package/dist/search.js.map +0 -1
  187. package/dist/test-utils.d.ts.map +0 -1
  188. package/dist/test-utils.js.map +0 -1
  189. package/dist/text/chunker.d.ts.map +0 -1
  190. package/dist/text/chunker.js.map +0 -1
  191. package/dist/text/embedder.d.ts.map +0 -1
  192. package/dist/text/embedder.js.map +0 -1
  193. package/dist/text/index.d.ts.map +0 -1
  194. package/dist/text/index.js.map +0 -1
  195. package/dist/text/preprocessors/index.d.ts.map +0 -1
  196. package/dist/text/preprocessors/index.js.map +0 -1
  197. package/dist/text/preprocessors/mdx.d.ts.map +0 -1
  198. package/dist/text/preprocessors/mdx.js.map +0 -1
  199. package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
  200. package/dist/text/preprocessors/mermaid.js.map +0 -1
  201. package/dist/text/preprocessors/registry.d.ts.map +0 -1
  202. package/dist/text/preprocessors/registry.js.map +0 -1
  203. package/dist/text/reranker.d.ts.map +0 -1
  204. package/dist/text/reranker.js.map +0 -1
  205. package/dist/text/tokenizer.d.ts.map +0 -1
  206. package/dist/text/tokenizer.js.map +0 -1
  207. package/dist/types.d.ts.map +0 -1
  208. package/dist/types.js.map +0 -1
@@ -0,0 +1,122 @@
1
+ /**
2
+ * Binary Index Format Module
3
+ *
4
+ * Provides efficient binary serialization for HNSW vector indices.
5
+ *
6
+ * Format Specification:
7
+ * - Header: 24 bytes (6 × uint32)
8
+ * - Vectors: N × (4 + D × 4) bytes
9
+ * - Little-endian encoding for cross-platform compatibility
10
+ * - 4-byte alignment for Float32Array zero-copy views
11
+ *
12
+ * Performance:
13
+ * - 3.66x smaller than JSON format
14
+ * - 3.5x faster loading
15
+ * - Zero-copy Float32Array views
16
+ */
17
+ import { readFileSync, writeFileSync } from 'fs';
18
+ export class BinaryIndexFormat {
19
+ /**
20
+ * Save index data to binary format
21
+ *
22
+ * File structure:
23
+ * - Header (24 bytes): dimensions, maxElements, M, efConstruction, seed, currentSize
24
+ * - Vectors: For each vector: id (4 bytes) + vector data (dimensions × 4 bytes)
25
+ *
26
+ * @param indexPath Path to save the binary index file
27
+ * @param data Index data to serialize
28
+ */
29
+ static async save(indexPath, data) {
30
+ // Calculate total size
31
+ const headerSize = 24; // 6 uint32 fields
32
+ const vectorSize = 4 + (data.dimensions * 4); // id + vector
33
+ const totalSize = headerSize + (data.currentSize * vectorSize);
34
+ const buffer = new ArrayBuffer(totalSize);
35
+ const view = new DataView(buffer);
36
+ let offset = 0;
37
+ // Write header (24 bytes, all little-endian)
38
+ view.setUint32(offset, data.dimensions, true);
39
+ offset += 4;
40
+ view.setUint32(offset, data.maxElements, true);
41
+ offset += 4;
42
+ view.setUint32(offset, data.M, true);
43
+ offset += 4;
44
+ view.setUint32(offset, data.efConstruction, true);
45
+ offset += 4;
46
+ view.setUint32(offset, data.seed, true);
47
+ offset += 4;
48
+ view.setUint32(offset, data.currentSize, true);
49
+ offset += 4;
50
+ // Write vectors
51
+ for (const item of data.vectors) {
52
+ // Ensure 4-byte alignment (should always be true with our format)
53
+ if (offset % 4 !== 0) {
54
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
55
+ }
56
+ // Write vector ID
57
+ view.setUint32(offset, item.id, true);
58
+ offset += 4;
59
+ // Write vector data
60
+ for (let i = 0; i < item.vector.length; i++) {
61
+ view.setFloat32(offset, item.vector[i], true);
62
+ offset += 4;
63
+ }
64
+ }
65
+ // Write to file
66
+ writeFileSync(indexPath, Buffer.from(buffer));
67
+ }
68
+ /**
69
+ * Load index data from binary format
70
+ *
71
+ * Uses zero-copy Float32Array views for efficient loading.
72
+ * Copies the views to ensure data persistence after buffer lifecycle.
73
+ *
74
+ * @param indexPath Path to the binary index file
75
+ * @returns Deserialized index data
76
+ */
77
+ static async load(indexPath) {
78
+ const buffer = readFileSync(indexPath);
79
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
80
+ let offset = 0;
81
+ // Read header (24 bytes, all little-endian)
82
+ const dimensions = view.getUint32(offset, true);
83
+ offset += 4;
84
+ const maxElements = view.getUint32(offset, true);
85
+ offset += 4;
86
+ const M = view.getUint32(offset, true);
87
+ offset += 4;
88
+ const efConstruction = view.getUint32(offset, true);
89
+ offset += 4;
90
+ const seed = view.getUint32(offset, true);
91
+ offset += 4;
92
+ const currentSize = view.getUint32(offset, true);
93
+ offset += 4;
94
+ // Read vectors
95
+ const vectors = [];
96
+ for (let i = 0; i < currentSize; i++) {
97
+ // Ensure 4-byte alignment (should always be true with our format)
98
+ if (offset % 4 !== 0) {
99
+ throw new Error(`Offset ${offset} is not 4-byte aligned`);
100
+ }
101
+ // Read vector ID
102
+ const id = view.getUint32(offset, true);
103
+ offset += 4;
104
+ // Zero-copy Float32Array view (fast!)
105
+ const vectorView = new Float32Array(buffer.buffer, buffer.byteOffset + offset, dimensions);
106
+ // Copy to avoid buffer lifecycle issues
107
+ const vector = new Float32Array(vectorView);
108
+ offset += dimensions * 4;
109
+ vectors.push({ id, vector });
110
+ }
111
+ return {
112
+ dimensions,
113
+ maxElements,
114
+ M,
115
+ efConstruction,
116
+ seed,
117
+ currentSize,
118
+ vectors
119
+ };
120
+ }
121
+ }
122
+ //# sourceMappingURL=binary-index-format.js.map
@@ -92,6 +92,8 @@ export interface Document {
92
92
  title: string;
93
93
  /** Full text content */
94
94
  content: string;
95
+ /** Optional metadata */
96
+ metadata?: Record<string, any>;
95
97
  }
96
98
  /**
97
99
  * Chunk interface for text chunking results
@@ -0,0 +1,53 @@
1
+ /**
2
+ * CLI Database Utilities - Database access helpers for CLI commands
3
+ * Provides database locking detection and retry mechanisms for CLI operations
4
+ * Prevents conflicts between CLI commands and long-running processes like MCP server
5
+ */
6
+ /**
7
+ * CLI-specific database access options
8
+ */
9
+ export interface CLIDatabaseOptions {
10
+ /** Maximum time to wait for database access (ms) */
11
+ maxWaitMs?: number;
12
+ /** Retry interval (ms) */
13
+ retryIntervalMs?: number;
14
+ /** Show progress messages to user */
15
+ showProgress?: boolean;
16
+ /** Command name for better error messages */
17
+ commandName?: string;
18
+ }
19
+ /**
20
+ * Wait for database to become available for CLI operations
21
+ * Provides user-friendly progress messages and error handling
22
+ */
23
+ export declare function waitForCLIDatabaseAccess(dbPath: string, options?: CLIDatabaseOptions): Promise<void>;
24
+ /**
25
+ * Execute a CLI operation with database access protection
26
+ * Automatically handles database locking and provides user feedback
27
+ */
28
+ export declare function withCLIDatabaseAccess<T>(dbPath: string, operation: () => Promise<T>, options?: CLIDatabaseOptions): Promise<T>;
29
+ /**
30
+ * Check if database is currently busy (non-blocking)
31
+ * Useful for showing warnings or status information
32
+ */
33
+ export declare function isDatabaseBusy(dbPath: string): Promise<{
34
+ isBusy: boolean;
35
+ reason?: string;
36
+ suggestions?: string[];
37
+ }>;
38
+ /**
39
+ * Show database status information for debugging
40
+ * Useful for troubleshooting CLI issues
41
+ */
42
+ export declare function showDatabaseStatus(dbPath: string): Promise<void>;
43
+ /**
44
+ * Force cleanup of database connections (emergency use only)
45
+ * Use with caution - only for recovery from stuck states
46
+ */
47
+ export declare function forceCleanupDatabase(dbPath: string): Promise<void>;
48
+ /**
49
+ * Graceful shutdown helper for CLI commands
50
+ * Ensures proper cleanup when CLI commands are interrupted
51
+ */
52
+ export declare function setupCLICleanup(dbPath?: string): void;
53
+ //# sourceMappingURL=cli-database-utils.d.ts.map
@@ -0,0 +1,239 @@
1
+ /**
2
+ * CLI Database Utilities - Database access helpers for CLI commands
3
+ * Provides database locking detection and retry mechanisms for CLI operations
4
+ * Prevents conflicts between CLI commands and long-running processes like MCP server
5
+ */
6
+ import { DatabaseConnectionManager } from './database-connection-manager.js';
7
+ import { existsSync } from 'fs';
8
+ /**
9
+ * Default options for CLI database access
10
+ */
11
+ const DEFAULT_CLI_OPTIONS = {
12
+ maxWaitMs: 10000, // 10 seconds
13
+ retryIntervalMs: 500, // 0.5 seconds
14
+ showProgress: true,
15
+ commandName: 'CLI command'
16
+ };
17
+ /**
18
+ * Wait for database to become available for CLI operations
19
+ * Provides user-friendly progress messages and error handling
20
+ */
21
+ export async function waitForCLIDatabaseAccess(dbPath, options = {}) {
22
+ const opts = { ...DEFAULT_CLI_OPTIONS, ...options };
23
+ // Check if database file exists
24
+ if (!existsSync(dbPath)) {
25
+ throw new Error(`Database file not found: ${dbPath}\n` +
26
+ `Please run 'raglite ingest <path>' first to create the database.`);
27
+ }
28
+ const startTime = Date.now();
29
+ let attempts = 0;
30
+ let lastError = null;
31
+ while (Date.now() - startTime < opts.maxWaitMs) {
32
+ attempts++;
33
+ try {
34
+ // Try to get database access
35
+ await DatabaseConnectionManager.waitForDatabaseAccess(dbPath, 1000);
36
+ if (opts.showProgress && attempts > 1) {
37
+ console.log(`✅ Database is now available (after ${attempts} attempts)`);
38
+ }
39
+ return; // Success!
40
+ }
41
+ catch (error) {
42
+ lastError = error;
43
+ if (lastError.message.includes('SQLITE_BUSY') || lastError.message.includes('busy')) {
44
+ // Database is busy - show progress and retry
45
+ if (opts.showProgress) {
46
+ if (attempts === 1) {
47
+ console.log(`⏳ Database is busy, waiting for access...`);
48
+ console.log(` This usually happens when another process is using the database.`);
49
+ console.log(` Common causes:`);
50
+ console.log(` • MCP server is running`);
51
+ console.log(` • Another CLI command is in progress`);
52
+ console.log(` • Long-running ingestion process`);
53
+ console.log('');
54
+ }
55
+ else if (attempts % 4 === 0) {
56
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
57
+ console.log(` Still waiting... (${elapsed}s elapsed, attempt ${attempts})`);
58
+ }
59
+ }
60
+ // Wait before retrying
61
+ await new Promise(resolve => setTimeout(resolve, opts.retryIntervalMs));
62
+ continue;
63
+ }
64
+ else {
65
+ // Other error - don't retry
66
+ throw new Error(`Failed to access database: ${lastError.message}\n` +
67
+ `Please check that the database file is not corrupted and you have proper permissions.`);
68
+ }
69
+ }
70
+ }
71
+ // Timeout reached
72
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
73
+ throw new Error(`Database is still busy after ${elapsed} seconds.\n` +
74
+ `\n` +
75
+ `This might be because:\n` +
76
+ `• Another process is using the database (MCP server, long ingestion, etc.)\n` +
77
+ `• The database is locked due to an interrupted operation\n` +
78
+ `\n` +
79
+ `Solutions:\n` +
80
+ `• Wait for other operations to complete\n` +
81
+ `• Stop the MCP server if running\n` +
82
+ `• Restart your terminal/process\n` +
83
+ `• As a last resort, restart your computer\n` +
84
+ `\n` +
85
+ `Last error: ${lastError?.message || 'Unknown error'}`);
86
+ }
87
+ /**
88
+ * Execute a CLI operation with database access protection
89
+ * Automatically handles database locking and provides user feedback
90
+ */
91
+ export async function withCLIDatabaseAccess(dbPath, operation, options = {}) {
92
+ const opts = { ...DEFAULT_CLI_OPTIONS, ...options };
93
+ try {
94
+ // Wait for database access
95
+ await waitForCLIDatabaseAccess(dbPath, opts);
96
+ // Execute the operation
97
+ return await operation();
98
+ }
99
+ catch (error) {
100
+ if (error instanceof Error) {
101
+ // Enhance error message with CLI context
102
+ const enhancedMessage = `${opts.commandName} failed: ${error.message}\n` +
103
+ `\n` +
104
+ `If this error persists:\n` +
105
+ `• Check that no other RAG-lite processes are running\n` +
106
+ `• Verify database file permissions\n` +
107
+ `• Try running the command again\n`;
108
+ throw new Error(enhancedMessage);
109
+ }
110
+ throw error;
111
+ }
112
+ }
113
+ /**
114
+ * Check if database is currently busy (non-blocking)
115
+ * Useful for showing warnings or status information
116
+ */
117
+ export async function isDatabaseBusy(dbPath) {
118
+ try {
119
+ await DatabaseConnectionManager.waitForDatabaseAccess(dbPath, 100);
120
+ return { isBusy: false };
121
+ }
122
+ catch (error) {
123
+ if (error instanceof Error && (error.message.includes('SQLITE_BUSY') ||
124
+ error.message.includes('busy'))) {
125
+ return {
126
+ isBusy: true,
127
+ reason: 'Database is currently in use by another process',
128
+ suggestions: [
129
+ 'Wait for other operations to complete',
130
+ 'Stop MCP server if running',
131
+ 'Check for other CLI commands in progress'
132
+ ]
133
+ };
134
+ }
135
+ return {
136
+ isBusy: true,
137
+ reason: `Database access error: ${error instanceof Error ? error.message : String(error)}`,
138
+ suggestions: [
139
+ 'Check database file permissions',
140
+ 'Verify database file is not corrupted',
141
+ 'Ensure you have read/write access to the database directory'
142
+ ]
143
+ };
144
+ }
145
+ }
146
+ /**
147
+ * Show database status information for debugging
148
+ * Useful for troubleshooting CLI issues
149
+ */
150
+ export async function showDatabaseStatus(dbPath) {
151
+ console.log(`📊 Database Status: ${dbPath}`);
152
+ console.log('');
153
+ // Check file existence
154
+ if (!existsSync(dbPath)) {
155
+ console.log('❌ Database file does not exist');
156
+ console.log(' Run "raglite ingest <path>" to create the database');
157
+ return;
158
+ }
159
+ // Check file stats
160
+ try {
161
+ const fs = await import('fs');
162
+ const stats = fs.statSync(dbPath);
163
+ console.log(`📁 File size: ${(stats.size / 1024).toFixed(1)} KB`);
164
+ console.log(`📅 Last modified: ${stats.mtime.toLocaleString()}`);
165
+ }
166
+ catch (error) {
167
+ console.log(`⚠️ Cannot read file stats: ${error}`);
168
+ }
169
+ // Check database access
170
+ const busyStatus = await isDatabaseBusy(dbPath);
171
+ if (busyStatus.isBusy) {
172
+ console.log(`🔒 Status: BUSY`);
173
+ console.log(` Reason: ${busyStatus.reason}`);
174
+ if (busyStatus.suggestions) {
175
+ console.log(' Suggestions:');
176
+ busyStatus.suggestions.forEach(suggestion => {
177
+ console.log(` • ${suggestion}`);
178
+ });
179
+ }
180
+ }
181
+ else {
182
+ console.log(`✅ Status: AVAILABLE`);
183
+ }
184
+ // Show connection manager stats
185
+ const connectionStats = DatabaseConnectionManager.getConnectionStats();
186
+ if (connectionStats.totalConnections > 0) {
187
+ console.log('');
188
+ console.log(`🔗 Active connections: ${connectionStats.totalConnections}`);
189
+ connectionStats.connections.forEach((conn, index) => {
190
+ console.log(` ${index + 1}. ${conn.path}`);
191
+ console.log(` References: ${conn.refCount}`);
192
+ console.log(` Last accessed: ${conn.lastAccessed.toLocaleString()}`);
193
+ console.log(` Idle time: ${(conn.idleTime / 1000).toFixed(1)}s`);
194
+ });
195
+ }
196
+ console.log('');
197
+ }
198
+ /**
199
+ * Force cleanup of database connections (emergency use only)
200
+ * Use with caution - only for recovery from stuck states
201
+ */
202
+ export async function forceCleanupDatabase(dbPath) {
203
+ console.log(`🚨 Force cleaning up database connections: ${dbPath}`);
204
+ try {
205
+ await DatabaseConnectionManager.forceCloseConnection(dbPath);
206
+ console.log('✅ Force cleanup completed');
207
+ }
208
+ catch (error) {
209
+ console.log(`⚠️ Force cleanup failed: ${error}`);
210
+ console.log('You may need to restart the process or reboot your system');
211
+ }
212
+ }
213
+ /**
214
+ * Graceful shutdown helper for CLI commands
215
+ * Ensures proper cleanup when CLI commands are interrupted
216
+ */
217
+ export function setupCLICleanup(dbPath) {
218
+ const cleanup = async () => {
219
+ console.log('\n🛑 Shutting down gracefully...');
220
+ if (dbPath) {
221
+ try {
222
+ await DatabaseConnectionManager.releaseConnection(dbPath);
223
+ }
224
+ catch (error) {
225
+ // Ignore cleanup errors during shutdown
226
+ }
227
+ }
228
+ await DatabaseConnectionManager.closeAllConnections();
229
+ process.exit(0);
230
+ };
231
+ // Only set up handlers if they haven't been set up already
232
+ if (!process.listenerCount('SIGINT')) {
233
+ process.on('SIGINT', cleanup);
234
+ }
235
+ if (!process.listenerCount('SIGTERM')) {
236
+ process.on('SIGTERM', cleanup);
237
+ }
238
+ }
239
+ //# sourceMappingURL=cli-database-utils.js.map
@@ -101,11 +101,16 @@ export function getModelDefaults(modelName) {
101
101
  chunk_overlap: 50,
102
102
  batch_size: 16
103
103
  };
104
- // Model-specific overrides
104
+ // Model-specific overrides based on model name heuristics
105
105
  if (modelName) {
106
106
  const normalizedName = modelName.toLowerCase();
107
- // Specific model configurations
108
- if (normalizedName.includes('all-mpnet-base-v2')) {
107
+ // CLIP models - 512 dimensions
108
+ if (normalizedName.includes('clip')) {
109
+ defaults.dimensions = 512;
110
+ defaults.batch_size = 8;
111
+ }
112
+ // MPNet models - 768 dimensions
113
+ else if (normalizedName.includes('all-mpnet-base-v2')) {
109
114
  defaults.dimensions = 768;
110
115
  defaults.chunk_size = 400;
111
116
  defaults.chunk_overlap = 80;
@@ -114,9 +119,11 @@ export function getModelDefaults(modelName) {
114
119
  else if (normalizedName.includes('mpnet') || normalizedName.includes('768')) {
115
120
  defaults.dimensions = 768;
116
121
  }
122
+ // Models with 512 in the name
117
123
  else if (normalizedName.includes('512')) {
118
124
  defaults.dimensions = 512;
119
125
  }
126
+ // MiniLM and other 384-dimensional models (default)
120
127
  else if (normalizedName.includes('384') || normalizedName.includes('minilm')) {
121
128
  defaults.dimensions = 384;
122
129
  }
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Content System Error Handling - Comprehensive error handling for unified content system
3
+ * Implements task 8.1: Add specific error types for content operations with clear guidance
4
+ * Requirements: 8.1, 8.2, 8.3, 8.6
5
+ */
6
+ import { APIError, ErrorFactory } from '../api-errors.js';
7
+ import { ErrorSeverity } from './error-handler.js';
8
+ /**
9
+ * Content-specific error types for unified content system operations
10
+ */
11
+ export declare class ContentNotFoundError extends APIError {
12
+ constructor(contentId: string, displayName?: string, context?: string);
13
+ }
14
+ /**
15
+ * Error when storage limits are exceeded
16
+ */
17
+ export declare class StorageLimitExceededError extends APIError {
18
+ constructor(currentUsageMB: number, limitMB: number, contentSizeMB: number, context?: string);
19
+ }
20
+ /**
21
+ * Error when content format is invalid or unsupported
22
+ */
23
+ export declare class InvalidContentFormatError extends APIError {
24
+ constructor(contentType: string, reason: string, context?: string);
25
+ private static getSuggestionsForContentType;
26
+ }
27
+ /**
28
+ * Error when content ingestion fails
29
+ */
30
+ export declare class ContentIngestionError extends APIError {
31
+ constructor(operation: string, reason: string, context?: string);
32
+ }
33
+ /**
34
+ * Error when content retrieval fails
35
+ */
36
+ export declare class ContentRetrievalError extends APIError {
37
+ constructor(contentId: string, format: string, reason: string, context?: string);
38
+ }
39
+ /**
40
+ * Error when content directory operations fail
41
+ */
42
+ export declare class ContentDirectoryError extends APIError {
43
+ constructor(operation: string, reason: string, context?: string);
44
+ }
45
+ /**
46
+ * Error when content deduplication fails
47
+ */
48
+ export declare class ContentDeduplicationError extends APIError {
49
+ constructor(reason: string, context?: string);
50
+ }
51
+ /**
52
+ * Enhanced error factory for content system operations
53
+ * Extends the base ErrorFactory with content-specific error handling
54
+ */
55
+ export declare class ContentErrorFactory extends ErrorFactory {
56
+ /**
57
+ * Create content-specific error from generic error
58
+ */
59
+ static createContentError(error: unknown, operation: string, context: string): APIError;
60
+ /**
61
+ * Create storage-related error with enhanced guidance
62
+ */
63
+ static createStorageError(error: unknown, context: string): APIError;
64
+ /**
65
+ * Create format validation error with specific guidance
66
+ */
67
+ static createFormatError(contentType: string, reason: string, context: string): InvalidContentFormatError;
68
+ }
69
+ /**
70
+ * Content system error handler with categorized error management
71
+ */
72
+ export declare class ContentErrorHandler {
73
+ /**
74
+ * Handle content operation errors with appropriate categorization
75
+ */
76
+ static handleContentError(error: unknown, operation: string, context: string, options?: {
77
+ severity?: ErrorSeverity;
78
+ skipError?: boolean;
79
+ showStack?: boolean;
80
+ }): never;
81
+ /**
82
+ * Get appropriate error category for content errors
83
+ */
84
+ private static getCategoryForError;
85
+ /**
86
+ * Validate content operation parameters and throw appropriate errors
87
+ */
88
+ static validateContentOperation(contentId?: string, format?: string, contentType?: string): void;
89
+ /**
90
+ * Create user-friendly error message for common content scenarios
91
+ */
92
+ static createUserFriendlyMessage(error: APIError): string;
93
+ }
94
+ /**
95
+ * Utility functions for content error handling
96
+ */
97
+ export declare const ContentErrorUtils: {
98
+ /**
99
+ * Check if an error is content-related
100
+ */
101
+ isContentError(error: unknown): error is APIError;
102
+ /**
103
+ * Extract content ID from error message
104
+ */
105
+ extractContentId(error: APIError): string | null;
106
+ /**
107
+ * Get recovery action for content error
108
+ */
109
+ getRecoveryAction(error: APIError): string;
110
+ };
111
+ //# sourceMappingURL=content-errors.d.ts.map