rag-lite-ts 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +651 -109
  2. package/dist/cli/indexer.js +262 -46
  3. package/dist/cli/search.js +54 -32
  4. package/dist/cli.js +185 -28
  5. package/dist/config.d.ts +34 -73
  6. package/dist/config.js +50 -255
  7. package/dist/core/abstract-embedder.d.ts +125 -0
  8. package/dist/core/abstract-embedder.js +264 -0
  9. package/dist/core/actionable-error-messages.d.ts +60 -0
  10. package/dist/core/actionable-error-messages.js +397 -0
  11. package/dist/core/adapters.d.ts +93 -0
  12. package/dist/core/adapters.js +139 -0
  13. package/dist/core/batch-processing-optimizer.d.ts +155 -0
  14. package/dist/core/batch-processing-optimizer.js +541 -0
  15. package/dist/core/chunker.d.ts +119 -0
  16. package/dist/core/chunker.js +73 -0
  17. package/dist/core/cli-database-utils.d.ts +53 -0
  18. package/dist/core/cli-database-utils.js +239 -0
  19. package/dist/core/config.d.ts +102 -0
  20. package/dist/core/config.js +247 -0
  21. package/dist/core/content-errors.d.ts +111 -0
  22. package/dist/core/content-errors.js +362 -0
  23. package/dist/core/content-manager.d.ts +343 -0
  24. package/dist/core/content-manager.js +1504 -0
  25. package/dist/core/content-performance-optimizer.d.ts +150 -0
  26. package/dist/core/content-performance-optimizer.js +516 -0
  27. package/dist/core/content-resolver.d.ts +104 -0
  28. package/dist/core/content-resolver.js +285 -0
  29. package/dist/core/cross-modal-search.d.ts +164 -0
  30. package/dist/core/cross-modal-search.js +342 -0
  31. package/dist/core/database-connection-manager.d.ts +109 -0
  32. package/dist/core/database-connection-manager.js +304 -0
  33. package/dist/core/db.d.ts +245 -0
  34. package/dist/core/db.js +952 -0
  35. package/dist/core/embedder-factory.d.ts +176 -0
  36. package/dist/core/embedder-factory.js +338 -0
  37. package/dist/{error-handler.d.ts → core/error-handler.d.ts} +23 -2
  38. package/dist/{error-handler.js → core/error-handler.js} +51 -8
  39. package/dist/core/index.d.ts +59 -0
  40. package/dist/core/index.js +69 -0
  41. package/dist/core/ingestion.d.ts +213 -0
  42. package/dist/core/ingestion.js +812 -0
  43. package/dist/core/interfaces.d.ts +408 -0
  44. package/dist/core/interfaces.js +106 -0
  45. package/dist/core/lazy-dependency-loader.d.ts +152 -0
  46. package/dist/core/lazy-dependency-loader.js +453 -0
  47. package/dist/core/mode-detection-service.d.ts +150 -0
  48. package/dist/core/mode-detection-service.js +565 -0
  49. package/dist/core/mode-model-validator.d.ts +92 -0
  50. package/dist/core/mode-model-validator.js +203 -0
  51. package/dist/core/model-registry.d.ts +120 -0
  52. package/dist/core/model-registry.js +415 -0
  53. package/dist/core/model-validator.d.ts +217 -0
  54. package/dist/core/model-validator.js +782 -0
  55. package/dist/{path-manager.d.ts → core/path-manager.d.ts} +5 -0
  56. package/dist/{path-manager.js → core/path-manager.js} +5 -0
  57. package/dist/core/polymorphic-search-factory.d.ts +154 -0
  58. package/dist/core/polymorphic-search-factory.js +344 -0
  59. package/dist/core/raglite-paths.d.ts +121 -0
  60. package/dist/core/raglite-paths.js +145 -0
  61. package/dist/core/reranking-config.d.ts +42 -0
  62. package/dist/core/reranking-config.js +156 -0
  63. package/dist/core/reranking-factory.d.ts +92 -0
  64. package/dist/core/reranking-factory.js +591 -0
  65. package/dist/core/reranking-strategies.d.ts +325 -0
  66. package/dist/core/reranking-strategies.js +720 -0
  67. package/dist/core/resource-cleanup.d.ts +163 -0
  68. package/dist/core/resource-cleanup.js +371 -0
  69. package/dist/core/resource-manager.d.ts +212 -0
  70. package/dist/core/resource-manager.js +564 -0
  71. package/dist/core/search-pipeline.d.ts +111 -0
  72. package/dist/core/search-pipeline.js +287 -0
  73. package/dist/core/search.d.ts +131 -0
  74. package/dist/core/search.js +296 -0
  75. package/dist/core/streaming-operations.d.ts +145 -0
  76. package/dist/core/streaming-operations.js +409 -0
  77. package/dist/core/types.d.ts +66 -0
  78. package/dist/core/types.js +6 -0
  79. package/dist/core/universal-embedder.d.ts +177 -0
  80. package/dist/core/universal-embedder.js +139 -0
  81. package/dist/core/validation-messages.d.ts +99 -0
  82. package/dist/core/validation-messages.js +334 -0
  83. package/dist/{vector-index.d.ts → core/vector-index.d.ts} +4 -0
  84. package/dist/{vector-index.js → core/vector-index.js} +21 -3
  85. package/dist/dom-polyfills.d.ts +6 -0
  86. package/dist/dom-polyfills.js +40 -0
  87. package/dist/factories/index.d.ts +43 -0
  88. package/dist/factories/index.js +44 -0
  89. package/dist/factories/text-factory.d.ts +560 -0
  90. package/dist/factories/text-factory.js +968 -0
  91. package/dist/file-processor.d.ts +90 -4
  92. package/dist/file-processor.js +723 -20
  93. package/dist/index-manager.d.ts +3 -2
  94. package/dist/index-manager.js +13 -11
  95. package/dist/index.d.ts +72 -8
  96. package/dist/index.js +102 -16
  97. package/dist/indexer.js +1 -1
  98. package/dist/ingestion.d.ts +44 -154
  99. package/dist/ingestion.js +75 -671
  100. package/dist/mcp-server.d.ts +35 -3
  101. package/dist/mcp-server.js +1186 -79
  102. package/dist/multimodal/clip-embedder.d.ts +314 -0
  103. package/dist/multimodal/clip-embedder.js +945 -0
  104. package/dist/multimodal/index.d.ts +6 -0
  105. package/dist/multimodal/index.js +6 -0
  106. package/dist/preprocess.js +1 -1
  107. package/dist/run-error-recovery-tests.d.ts +7 -0
  108. package/dist/run-error-recovery-tests.js +101 -0
  109. package/dist/search-standalone.js +1 -1
  110. package/dist/search.d.ts +51 -69
  111. package/dist/search.js +117 -412
  112. package/dist/test-utils.d.ts +8 -26
  113. package/dist/text/chunker.d.ts +33 -0
  114. package/dist/{chunker.js → text/chunker.js} +98 -75
  115. package/dist/{embedder.d.ts → text/embedder.d.ts} +22 -1
  116. package/dist/{embedder.js → text/embedder.js} +84 -10
  117. package/dist/text/index.d.ts +8 -0
  118. package/dist/text/index.js +9 -0
  119. package/dist/text/preprocessors/index.d.ts +17 -0
  120. package/dist/text/preprocessors/index.js +38 -0
  121. package/dist/text/preprocessors/mdx.d.ts +25 -0
  122. package/dist/text/preprocessors/mdx.js +101 -0
  123. package/dist/text/preprocessors/mermaid.d.ts +68 -0
  124. package/dist/text/preprocessors/mermaid.js +330 -0
  125. package/dist/text/preprocessors/registry.d.ts +56 -0
  126. package/dist/text/preprocessors/registry.js +180 -0
  127. package/dist/text/reranker.d.ts +59 -0
  128. package/dist/{reranker.js → text/reranker.js} +138 -53
  129. package/dist/text/sentence-transformer-embedder.d.ts +96 -0
  130. package/dist/text/sentence-transformer-embedder.js +340 -0
  131. package/dist/{tokenizer.d.ts → text/tokenizer.d.ts} +1 -0
  132. package/dist/{tokenizer.js → text/tokenizer.js} +7 -2
  133. package/dist/types.d.ts +40 -1
  134. package/dist/utils/vector-math.d.ts +31 -0
  135. package/dist/utils/vector-math.js +70 -0
  136. package/package.json +16 -4
  137. package/dist/api-errors.d.ts.map +0 -1
  138. package/dist/api-errors.js.map +0 -1
  139. package/dist/chunker.d.ts +0 -47
  140. package/dist/chunker.d.ts.map +0 -1
  141. package/dist/chunker.js.map +0 -1
  142. package/dist/cli/indexer.d.ts.map +0 -1
  143. package/dist/cli/indexer.js.map +0 -1
  144. package/dist/cli/search.d.ts.map +0 -1
  145. package/dist/cli/search.js.map +0 -1
  146. package/dist/cli.d.ts.map +0 -1
  147. package/dist/cli.js.map +0 -1
  148. package/dist/config.d.ts.map +0 -1
  149. package/dist/config.js.map +0 -1
  150. package/dist/db.d.ts +0 -90
  151. package/dist/db.d.ts.map +0 -1
  152. package/dist/db.js +0 -340
  153. package/dist/db.js.map +0 -1
  154. package/dist/embedder.d.ts.map +0 -1
  155. package/dist/embedder.js.map +0 -1
  156. package/dist/error-handler.d.ts.map +0 -1
  157. package/dist/error-handler.js.map +0 -1
  158. package/dist/file-processor.d.ts.map +0 -1
  159. package/dist/file-processor.js.map +0 -1
  160. package/dist/index-manager.d.ts.map +0 -1
  161. package/dist/index-manager.js.map +0 -1
  162. package/dist/index.d.ts.map +0 -1
  163. package/dist/index.js.map +0 -1
  164. package/dist/indexer.d.ts.map +0 -1
  165. package/dist/indexer.js.map +0 -1
  166. package/dist/ingestion.d.ts.map +0 -1
  167. package/dist/ingestion.js.map +0 -1
  168. package/dist/mcp-server.d.ts.map +0 -1
  169. package/dist/mcp-server.js.map +0 -1
  170. package/dist/path-manager.d.ts.map +0 -1
  171. package/dist/path-manager.js.map +0 -1
  172. package/dist/preprocess.d.ts.map +0 -1
  173. package/dist/preprocess.js.map +0 -1
  174. package/dist/preprocessors/index.d.ts.map +0 -1
  175. package/dist/preprocessors/index.js.map +0 -1
  176. package/dist/preprocessors/mdx.d.ts.map +0 -1
  177. package/dist/preprocessors/mdx.js.map +0 -1
  178. package/dist/preprocessors/mermaid.d.ts.map +0 -1
  179. package/dist/preprocessors/mermaid.js.map +0 -1
  180. package/dist/preprocessors/registry.d.ts.map +0 -1
  181. package/dist/preprocessors/registry.js.map +0 -1
  182. package/dist/reranker.d.ts +0 -40
  183. package/dist/reranker.d.ts.map +0 -1
  184. package/dist/reranker.js.map +0 -1
  185. package/dist/resource-manager-demo.d.ts +0 -7
  186. package/dist/resource-manager-demo.d.ts.map +0 -1
  187. package/dist/resource-manager-demo.js +0 -52
  188. package/dist/resource-manager-demo.js.map +0 -1
  189. package/dist/resource-manager.d.ts +0 -129
  190. package/dist/resource-manager.d.ts.map +0 -1
  191. package/dist/resource-manager.js +0 -389
  192. package/dist/resource-manager.js.map +0 -1
  193. package/dist/search-standalone.d.ts.map +0 -1
  194. package/dist/search-standalone.js.map +0 -1
  195. package/dist/search.d.ts.map +0 -1
  196. package/dist/search.js.map +0 -1
  197. package/dist/test-utils.d.ts.map +0 -1
  198. package/dist/test-utils.js.map +0 -1
  199. package/dist/tokenizer.d.ts.map +0 -1
  200. package/dist/tokenizer.js.map +0 -1
  201. package/dist/types.d.ts.map +0 -1
  202. package/dist/types.js.map +0 -1
  203. package/dist/vector-index.d.ts.map +0 -1
  204. package/dist/vector-index.js.map +0 -1
@@ -0,0 +1,952 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import sqlite3 from 'sqlite3';
6
+ import { promisify } from 'util';
7
+ import { handleError, ErrorSeverity, createError } from './error-handler.js';
8
+ /**
9
+ * Opens a SQLite database connection with promisified methods
10
+ * @param dbPath - Path to the SQLite database file
11
+ * @returns Promise that resolves to a database connection object
12
+ */
13
+ export function openDatabase(dbPath) {
14
+ return new Promise((resolve, reject) => {
15
+ const db = new sqlite3.Database(dbPath, (err) => {
16
+ if (err) {
17
+ const errorMsg = `Failed to open database at ${dbPath}: ${err.message}`;
18
+ // Categorize database errors for better handling
19
+ if (err.message.includes('ENOENT')) {
20
+ handleError(createError.fileSystem(`Database file not found: ${dbPath}. It will be created automatically.`), 'Database Connection', { severity: ErrorSeverity.INFO });
21
+ }
22
+ else if (err.message.includes('EACCES') || err.message.includes('permission')) {
23
+ reject(createError.database(`Permission denied accessing database: ${dbPath}. Check file permissions.`));
24
+ return;
25
+ }
26
+ else if (err.message.includes('SQLITE_CORRUPT')) {
27
+ reject(createError.database(`Database file is corrupted: ${dbPath}. Try running 'raglite rebuild'.`));
28
+ return;
29
+ }
30
+ else {
31
+ reject(createError.database(errorMsg));
32
+ return;
33
+ }
34
+ }
35
+ // Enable foreign key constraints
36
+ db.run('PRAGMA foreign_keys = ON', (err) => {
37
+ if (err) {
38
+ reject(createError.database(`Failed to enable foreign keys: ${err.message}`));
39
+ return;
40
+ }
41
+ // Create promisified methods with proper context binding and error handling
42
+ const connection = {
43
+ db,
44
+ run: (sql, params) => {
45
+ return new Promise((resolve, reject) => {
46
+ db.run(sql, params || [], function (err) {
47
+ if (err) {
48
+ // Enhance SQLite error messages
49
+ const enhancedError = enhanceSQLiteError(err, sql);
50
+ reject(enhancedError);
51
+ }
52
+ else {
53
+ resolve(this);
54
+ }
55
+ });
56
+ });
57
+ },
58
+ get: promisify(db.get.bind(db)),
59
+ all: promisify(db.all.bind(db)),
60
+ close: promisify(db.close.bind(db))
61
+ };
62
+ resolve(connection);
63
+ });
64
+ });
65
+ });
66
+ }
67
+ /**
68
+ * Enhance SQLite error messages with more context
69
+ */
70
+ function enhanceSQLiteError(error, sql) {
71
+ let enhancedMessage = error.message;
72
+ if (error.message.includes('SQLITE_BUSY')) {
73
+ enhancedMessage = 'Database is locked by another process. Ensure no other RAG-lite instances are running.';
74
+ }
75
+ else if (error.message.includes('SQLITE_FULL')) {
76
+ enhancedMessage = 'Database disk is full. Free up disk space and try again.';
77
+ }
78
+ else if (error.message.includes('SQLITE_CORRUPT')) {
79
+ enhancedMessage = 'Database file is corrupted. Try running "raglite rebuild" to recreate it.';
80
+ }
81
+ else if (error.message.includes('UNIQUE constraint failed')) {
82
+ enhancedMessage = `Duplicate entry detected: ${error.message}. This item may already exist.`;
83
+ }
84
+ else if (error.message.includes('FOREIGN KEY constraint failed')) {
85
+ enhancedMessage = `Foreign key constraint violation: ${error.message}. Referenced record may not exist.`;
86
+ }
87
+ if (sql && sql.length < 200) {
88
+ enhancedMessage += `\nSQL: ${sql}`;
89
+ }
90
+ return new Error(enhancedMessage);
91
+ }
92
+ /**
93
+ * Initializes the database schema with all required tables and indexes
94
+ * Enhanced to support content types for multimodal use
95
+ * @param connection - Database connection object
96
+ */
97
+ export async function initializeSchema(connection) {
98
+ try {
99
+ // Create documents table with content type support and content_id reference
100
+ await connection.run(`
101
+ CREATE TABLE IF NOT EXISTS documents (
102
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
103
+ content_id TEXT, -- References content_metadata.id
104
+ source TEXT NOT NULL UNIQUE,
105
+ title TEXT NOT NULL,
106
+ content_type TEXT DEFAULT 'text',
107
+ metadata TEXT,
108
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
109
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
110
+ FOREIGN KEY (content_id) REFERENCES content_metadata(id)
111
+ )
112
+ `);
113
+ // Create chunks table with content type and metadata support
114
+ await connection.run(`
115
+ CREATE TABLE IF NOT EXISTS chunks (
116
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
117
+ embedding_id TEXT NOT NULL UNIQUE,
118
+ document_id INTEGER NOT NULL,
119
+ content TEXT NOT NULL,
120
+ content_type TEXT DEFAULT 'text',
121
+ chunk_index INTEGER NOT NULL,
122
+ metadata TEXT,
123
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
124
+ FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
125
+ )
126
+ `);
127
+ // Create content_metadata table for unified content system
128
+ await connection.run(`
129
+ CREATE TABLE IF NOT EXISTS content_metadata (
130
+ id TEXT PRIMARY KEY, -- Hash-based content ID
131
+ storage_type TEXT NOT NULL CHECK (storage_type IN ('filesystem', 'content_dir')),
132
+ original_path TEXT, -- Original file path (filesystem only)
133
+ content_path TEXT NOT NULL, -- Actual storage path
134
+ display_name TEXT NOT NULL, -- User-friendly name
135
+ content_type TEXT NOT NULL, -- MIME type
136
+ file_size INTEGER NOT NULL, -- Size in bytes
137
+ content_hash TEXT NOT NULL, -- SHA-256 hash
138
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
139
+ )
140
+ `);
141
+ // Create storage_stats table for basic content directory tracking
142
+ await connection.run(`
143
+ CREATE TABLE IF NOT EXISTS storage_stats (
144
+ id INTEGER PRIMARY KEY CHECK (id = 1),
145
+ content_dir_files INTEGER DEFAULT 0,
146
+ content_dir_size INTEGER DEFAULT 0,
147
+ filesystem_refs INTEGER DEFAULT 0,
148
+ last_cleanup DATETIME,
149
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
150
+ )
151
+ `);
152
+ // Create system_info table for mode persistence and model tracking
153
+ await connection.run(`
154
+ CREATE TABLE IF NOT EXISTS system_info (
155
+ id INTEGER PRIMARY KEY CHECK (id = 1),
156
+
157
+ -- Core mode and model information
158
+ mode TEXT NOT NULL DEFAULT 'text' CHECK (mode IN ('text', 'multimodal')),
159
+ model_name TEXT NOT NULL DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
160
+ model_type TEXT NOT NULL DEFAULT 'sentence-transformer' CHECK (model_type IN ('sentence-transformer', 'clip')),
161
+ model_dimensions INTEGER NOT NULL DEFAULT 384,
162
+ model_version TEXT NOT NULL DEFAULT '',
163
+
164
+ -- Content type support (JSON array)
165
+ supported_content_types TEXT NOT NULL DEFAULT '["text"]',
166
+
167
+ -- Reranking configuration
168
+ reranking_strategy TEXT DEFAULT 'cross-encoder' CHECK (
169
+ reranking_strategy IN ('cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled')
170
+ ),
171
+ reranking_model TEXT,
172
+ reranking_config TEXT, -- JSON configuration for strategy-specific settings
173
+
174
+ -- Timestamps
175
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
176
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
177
+ )
178
+ `);
179
+ // Clean slate approach - no migration logic needed
180
+ // Users will perform fresh ingestion with the new architecture
181
+ // Create indexes for performance
182
+ await connection.run(`
183
+ CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)
184
+ `);
185
+ await connection.run(`
186
+ CREATE INDEX IF NOT EXISTS idx_chunks_embedding_id ON chunks(embedding_id)
187
+ `);
188
+ await connection.run(`
189
+ CREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source)
190
+ `);
191
+ await connection.run(`
192
+ CREATE INDEX IF NOT EXISTS idx_chunks_content_type ON chunks(content_type)
193
+ `);
194
+ await connection.run(`
195
+ CREATE INDEX IF NOT EXISTS idx_documents_content_type ON documents(content_type)
196
+ `);
197
+ await connection.run(`
198
+ CREATE INDEX IF NOT EXISTS idx_documents_content_id ON documents(content_id)
199
+ `);
200
+ // Create indexes for content metadata table for efficient lookup
201
+ await connection.run(`
202
+ CREATE INDEX IF NOT EXISTS idx_content_hash ON content_metadata(content_hash)
203
+ `);
204
+ await connection.run(`
205
+ CREATE INDEX IF NOT EXISTS idx_storage_type ON content_metadata(storage_type)
206
+ `);
207
+ console.log('Database schema initialized successfully');
208
+ }
209
+ catch (error) {
210
+ throw new Error(`Failed to initialize database schema: ${error instanceof Error ? error.message : 'Unknown error'}`);
211
+ }
212
+ }
213
+ /**
214
+ * Inserts a new document into the database with content type support
215
+ * @param connection - Database connection object
216
+ * @param source - Source path of the document
217
+ * @param title - Title of the document
218
+ * @param contentType - Type of content ('text', 'image', etc.)
219
+ * @param metadata - Optional metadata object
220
+ * @param contentId - Optional content ID referencing content_metadata table
221
+ * @returns Promise that resolves to the document ID
222
+ */
223
+ export async function insertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
224
+ try {
225
+ // Validate content type
226
+ validateContentType(contentType);
227
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
228
+ const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
229
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
230
+ throw new Error('Failed to get document ID after insertion');
231
+ }
232
+ return result.lastID;
233
+ }
234
+ catch (error) {
235
+ if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
236
+ throw new Error(`Document with source '${source}' already exists`);
237
+ }
238
+ throw new Error(`Failed to insert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
239
+ }
240
+ }
241
+ /**
242
+ * Inserts or updates a chunk in the database with content type support (upsert operation)
243
+ * @param connection - Database connection object
244
+ * @param embeddingId - Unique embedding ID for the chunk
245
+ * @param documentId - ID of the parent document
246
+ * @param content - Content of the chunk (text, image path, etc.)
247
+ * @param chunkIndex - Index of the chunk within the document
248
+ * @param contentType - Type of content ('text', 'image', etc.)
249
+ * @param metadata - Optional metadata object
250
+ */
251
+ export async function insertChunk(connection, embeddingId, documentId, content, chunkIndex, contentType = 'text', metadata) {
252
+ try {
253
+ // Validate content type
254
+ validateContentType(contentType);
255
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
256
+ // Use INSERT OR REPLACE to handle duplicates gracefully
257
+ await connection.run('INSERT OR REPLACE INTO chunks (embedding_id, document_id, content, chunk_index, content_type, metadata) VALUES (?, ?, ?, ?, ?, ?)', [embeddingId, documentId, content, chunkIndex, contentType, metadataJson]);
258
+ }
259
+ catch (error) {
260
+ if (error instanceof Error && error.message.includes('FOREIGN KEY constraint failed')) {
261
+ throw new Error(`Document with ID ${documentId} does not exist`);
262
+ }
263
+ throw new Error(`Failed to insert/update chunk: ${error instanceof Error ? error.message : 'Unknown error'}`);
264
+ }
265
+ }
266
+ /**
267
+ * Inserts a new document or returns existing document ID if it already exists
268
+ * Enhanced with content type support
269
+ * @param connection - Database connection object
270
+ * @param source - Source path of the document
271
+ * @param title - Title of the document
272
+ * @param contentType - Type of content ('text', 'image', etc.)
273
+ * @param metadata - Optional metadata object
274
+ * @param contentId - Optional content ID referencing content_metadata table
275
+ * @returns Promise that resolves to the document ID
276
+ */
277
+ export async function upsertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
278
+ try {
279
+ // Validate content type
280
+ validateContentType(contentType);
281
+ // First try to get existing document
282
+ const existing = await connection.get('SELECT id FROM documents WHERE source = ?', [source]);
283
+ if (existing) {
284
+ return existing.id;
285
+ }
286
+ // Insert new document if it doesn't exist
287
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
288
+ const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
289
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
290
+ throw new Error('Failed to get document ID after insertion');
291
+ }
292
+ return result.lastID;
293
+ }
294
+ catch (error) {
295
+ throw new Error(`Failed to upsert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
296
+ }
297
+ }
298
+ /**
299
+ * Retrieves chunks by their embedding IDs with document metadata
300
+ * Enhanced to include content type information
301
+ * @param connection - Database connection object
302
+ * @param embeddingIds - Array of embedding IDs to retrieve
303
+ * @returns Promise that resolves to an array of chunk results with document metadata
304
+ */
305
+ export async function getChunksByEmbeddingIds(connection, embeddingIds) {
306
+ if (embeddingIds.length === 0) {
307
+ return [];
308
+ }
309
+ try {
310
+ const placeholders = embeddingIds.map(() => '?').join(',');
311
+ const sql = `
312
+ SELECT
313
+ c.id,
314
+ c.embedding_id,
315
+ c.document_id,
316
+ c.content,
317
+ c.content_type,
318
+ c.chunk_index,
319
+ c.metadata,
320
+ c.created_at,
321
+ d.source as document_source,
322
+ d.title as document_title,
323
+ d.content_type as document_content_type,
324
+ d.content_id as document_content_id
325
+ FROM chunks c
326
+ JOIN documents d ON c.document_id = d.id
327
+ WHERE c.embedding_id IN (${placeholders})
328
+ ORDER BY c.chunk_index
329
+ `;
330
+ const results = await connection.all(sql, embeddingIds);
331
+ // Parse metadata JSON strings back to objects
332
+ return results.map((row) => ({
333
+ ...row,
334
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
335
+ }));
336
+ }
337
+ catch (error) {
338
+ throw new Error(`Failed to retrieve chunks: ${error instanceof Error ? error.message : 'Unknown error'}`);
339
+ }
340
+ }
341
+ /**
342
+ * Validates mode value against allowed enum values
343
+ */
344
+ function validateMode(mode) {
345
+ const validModes = ['text', 'multimodal'];
346
+ if (!validModes.includes(mode)) {
347
+ throw new Error(`Invalid mode '${mode}'. Must be one of: ${validModes.join(', ')}`);
348
+ }
349
+ }
350
+ /**
351
+ * Validates model type value against allowed enum values
352
+ */
353
+ function validateModelType(modelType) {
354
+ const validTypes = ['sentence-transformer', 'clip'];
355
+ if (!validTypes.includes(modelType)) {
356
+ throw new Error(`Invalid model type '${modelType}'. Must be one of: ${validTypes.join(', ')}`);
357
+ }
358
+ }
359
+ /**
360
+ * Validates reranking strategy value against allowed enum values
361
+ */
362
+ function validateRerankingStrategy(strategy) {
363
+ const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
364
+ if (!validStrategies.includes(strategy)) {
365
+ throw new Error(`Invalid reranking strategy '${strategy}'. Must be one of: ${validStrategies.join(', ')}`);
366
+ }
367
+ }
368
+ /**
369
+ * Validates content type value against allowed types
370
+ */
371
+ function validateContentType(contentType) {
372
+ const validTypes = ['text', 'image', 'pdf', 'docx'];
373
+ if (!validTypes.includes(contentType)) {
374
+ throw new Error(`Invalid content type '${contentType}'. Must be one of: ${validTypes.join(', ')}`);
375
+ }
376
+ }
377
+ /**
378
+ * Gets the complete system information from system_info table
379
+ * @param connection - Database connection object
380
+ * @returns Promise that resolves to SystemInfo object or null if not set
381
+ */
382
+ export async function getSystemInfo(connection) {
383
+ try {
384
+ const result = await connection.get(`
385
+ SELECT
386
+ mode, model_name, model_type, model_dimensions, model_version,
387
+ supported_content_types, reranking_strategy, reranking_model,
388
+ reranking_config, created_at, updated_at
389
+ FROM system_info WHERE id = 1
390
+ `);
391
+ if (!result) {
392
+ return null;
393
+ }
394
+ // Parse JSON fields and convert to proper types
395
+ const supportedContentTypes = result.supported_content_types
396
+ ? JSON.parse(result.supported_content_types)
397
+ : ['text'];
398
+ const rerankingConfig = result.reranking_config
399
+ ? JSON.parse(result.reranking_config)
400
+ : undefined;
401
+ return {
402
+ mode: result.mode,
403
+ modelName: result.model_name,
404
+ modelType: result.model_type,
405
+ modelDimensions: result.model_dimensions,
406
+ modelVersion: result.model_version,
407
+ supportedContentTypes,
408
+ rerankingStrategy: result.reranking_strategy,
409
+ rerankingModel: result.reranking_model,
410
+ rerankingConfig,
411
+ createdAt: new Date(result.created_at),
412
+ updatedAt: new Date(result.updated_at)
413
+ };
414
+ }
415
+ catch (error) {
416
+ throw new Error(`Failed to get system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
417
+ }
418
+ }
419
+ /**
420
+ * Sets the complete system information in system_info table
421
+ * @param connection - Database connection object
422
+ * @param systemInfo - SystemInfo object to store
423
+ */
424
+ export async function setSystemInfo(connection, systemInfo) {
425
+ try {
426
+ // Validate enum values if provided
427
+ if (systemInfo.mode) {
428
+ validateMode(systemInfo.mode);
429
+ }
430
+ if (systemInfo.modelType) {
431
+ validateModelType(systemInfo.modelType);
432
+ }
433
+ if (systemInfo.rerankingStrategy) {
434
+ validateRerankingStrategy(systemInfo.rerankingStrategy);
435
+ }
436
+ // Check if there's already a row
437
+ const existing = await connection.get('SELECT id FROM system_info WHERE id = 1');
438
+ // Prepare JSON fields
439
+ const supportedContentTypesJson = systemInfo.supportedContentTypes
440
+ ? JSON.stringify(systemInfo.supportedContentTypes)
441
+ : undefined;
442
+ const rerankingConfigJson = systemInfo.rerankingConfig
443
+ ? JSON.stringify(systemInfo.rerankingConfig)
444
+ : undefined;
445
+ if (existing) {
446
+ // Build dynamic UPDATE query based on provided fields
447
+ const updateFields = [];
448
+ const updateValues = [];
449
+ if (systemInfo.mode !== undefined) {
450
+ updateFields.push('mode = ?');
451
+ updateValues.push(systemInfo.mode);
452
+ }
453
+ if (systemInfo.modelName !== undefined) {
454
+ updateFields.push('model_name = ?');
455
+ updateValues.push(systemInfo.modelName);
456
+ }
457
+ if (systemInfo.modelType !== undefined) {
458
+ updateFields.push('model_type = ?');
459
+ updateValues.push(systemInfo.modelType);
460
+ }
461
+ if (systemInfo.modelDimensions !== undefined) {
462
+ updateFields.push('model_dimensions = ?');
463
+ updateValues.push(systemInfo.modelDimensions);
464
+ }
465
+ if (systemInfo.modelVersion !== undefined) {
466
+ updateFields.push('model_version = ?');
467
+ updateValues.push(systemInfo.modelVersion);
468
+ }
469
+ if (supportedContentTypesJson !== undefined) {
470
+ updateFields.push('supported_content_types = ?');
471
+ updateValues.push(supportedContentTypesJson);
472
+ }
473
+ if (systemInfo.rerankingStrategy !== undefined) {
474
+ updateFields.push('reranking_strategy = ?');
475
+ updateValues.push(systemInfo.rerankingStrategy);
476
+ }
477
+ if (systemInfo.rerankingModel !== undefined) {
478
+ updateFields.push('reranking_model = ?');
479
+ updateValues.push(systemInfo.rerankingModel);
480
+ }
481
+ if (rerankingConfigJson !== undefined) {
482
+ updateFields.push('reranking_config = ?');
483
+ updateValues.push(rerankingConfigJson);
484
+ }
485
+ // Always update the timestamp
486
+ updateFields.push('updated_at = CURRENT_TIMESTAMP');
487
+ updateValues.push(1); // Add WHERE clause parameter
488
+ if (updateFields.length > 1) { // More than just the timestamp
489
+ const sql = `UPDATE system_info SET ${updateFields.join(', ')} WHERE id = ?`;
490
+ await connection.run(sql, updateValues);
491
+ }
492
+ }
493
+ else {
494
+ // Insert new row with provided values and defaults
495
+ const insertSql = `
496
+ INSERT INTO system_info (
497
+ id, mode, model_name, model_type, model_dimensions, model_version,
498
+ supported_content_types, reranking_strategy, reranking_model, reranking_config,
499
+ created_at, updated_at
500
+ ) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
501
+ `;
502
+ await connection.run(insertSql, [
503
+ systemInfo.mode || 'text',
504
+ systemInfo.modelName || 'sentence-transformers/all-MiniLM-L6-v2',
505
+ systemInfo.modelType || 'sentence-transformer',
506
+ systemInfo.modelDimensions || 384,
507
+ systemInfo.modelVersion || '',
508
+ supportedContentTypesJson || '["text"]',
509
+ systemInfo.rerankingStrategy || 'cross-encoder',
510
+ systemInfo.rerankingModel || null,
511
+ rerankingConfigJson || null
512
+ ]);
513
+ }
514
+ }
515
+ catch (error) {
516
+ throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
517
+ }
518
+ }
519
+ /**
520
+ * @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
521
+ * Gets the current model version from system_info table
522
+ * @param connection - Database connection object
523
+ * @returns Promise that resolves to the model version string or null if not set
524
+ */
525
+ export async function getModelVersion(connection) {
526
+ try {
527
+ const systemInfo = await getSystemInfo(connection);
528
+ return systemInfo ? systemInfo.modelVersion : null;
529
+ }
530
+ catch (error) {
531
+ throw new Error(`Failed to get model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
532
+ }
533
+ }
534
+ /**
535
+ * @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
536
+ * Sets the model version in system_info table
537
+ * @param connection - Database connection object
538
+ * @param modelVersion - Model version string to store
539
+ */
540
+ export async function setModelVersion(connection, modelVersion) {
541
+ try {
542
+ await setSystemInfo(connection, { modelVersion });
543
+ }
544
+ catch (error) {
545
+ throw new Error(`Failed to set model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
546
+ }
547
+ }
548
+ /**
549
+ * @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
550
+ * Gets the stored model information from system_info table
551
+ * @param connection - Database connection object
552
+ * @returns Promise that resolves to model info object or null if not set
553
+ */
554
+ export async function getStoredModelInfo(connection) {
555
+ try {
556
+ const systemInfo = await getSystemInfo(connection);
557
+ if (!systemInfo || !systemInfo.modelName || !systemInfo.modelDimensions) {
558
+ return null;
559
+ }
560
+ return {
561
+ modelName: systemInfo.modelName,
562
+ dimensions: systemInfo.modelDimensions
563
+ };
564
+ }
565
+ catch (error) {
566
+ throw new Error(`Failed to get stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
567
+ }
568
+ }
569
+ /**
570
+ * @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
571
+ * Sets the model information in system_info table
572
+ * @param connection - Database connection object
573
+ * @param modelName - Name of the embedding model
574
+ * @param dimensions - Number of dimensions for the model
575
+ */
576
+ export async function setStoredModelInfo(connection, modelName, dimensions) {
577
+ try {
578
+ await setSystemInfo(connection, {
579
+ modelName,
580
+ modelDimensions: dimensions
581
+ });
582
+ }
583
+ catch (error) {
584
+ throw new Error(`Failed to set stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
585
+ }
586
+ }
587
+ /**
588
+ * Retrieves documents by content type
589
+ * @param connection - Database connection object
590
+ * @param contentType - Content type to filter by
591
+ * @returns Promise that resolves to an array of documents
592
+ */
593
+ export async function getDocumentsByContentType(connection, contentType) {
594
+ try {
595
+ validateContentType(contentType);
596
+ const results = await connection.all('SELECT id, source, title, content_type, metadata, created_at FROM documents WHERE content_type = ? ORDER BY created_at DESC', [contentType]);
597
+ // Parse metadata JSON strings back to objects
598
+ return results.map((row) => ({
599
+ ...row,
600
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
601
+ }));
602
+ }
603
+ catch (error) {
604
+ throw new Error(`Failed to get documents by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
605
+ }
606
+ }
607
+ /**
608
+ * Retrieves chunks by content type
609
+ * @param connection - Database connection object
610
+ * @param contentType - Content type to filter by
611
+ * @returns Promise that resolves to an array of chunks with document metadata
612
+ */
613
+ export async function getChunksByContentType(connection, contentType) {
614
+ try {
615
+ validateContentType(contentType);
616
+ const sql = `
617
+ SELECT
618
+ c.id,
619
+ c.embedding_id,
620
+ c.document_id,
621
+ c.content,
622
+ c.content_type,
623
+ c.chunk_index,
624
+ c.metadata,
625
+ c.created_at,
626
+ d.source as document_source,
627
+ d.title as document_title,
628
+ d.content_type as document_content_type,
629
+ d.content_id as document_content_id
630
+ FROM chunks c
631
+ JOIN documents d ON c.document_id = d.id
632
+ WHERE c.content_type = ?
633
+ ORDER BY d.source, c.chunk_index
634
+ `;
635
+ const results = await connection.all(sql, [contentType]);
636
+ // Parse metadata JSON strings back to objects
637
+ return results.map((row) => ({
638
+ ...row,
639
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
640
+ }));
641
+ }
642
+ catch (error) {
643
+ throw new Error(`Failed to get chunks by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
644
+ }
645
+ }
646
+ /**
647
+ * Gets content type statistics from the database
648
+ * @param connection - Database connection object
649
+ * @returns Promise that resolves to content type statistics
650
+ */
651
+ export async function getContentTypeStatistics(connection) {
652
+ try {
653
+ // Get document statistics
654
+ const docStats = await connection.all(`
655
+ SELECT content_type, COUNT(*) as count
656
+ FROM documents
657
+ GROUP BY content_type
658
+ `);
659
+ // Get chunk statistics
660
+ const chunkStats = await connection.all(`
661
+ SELECT content_type, COUNT(*) as count
662
+ FROM chunks
663
+ GROUP BY content_type
664
+ `);
665
+ // Get totals
666
+ const totalDocs = await connection.get('SELECT COUNT(*) as count FROM documents');
667
+ const totalChunks = await connection.get('SELECT COUNT(*) as count FROM chunks');
668
+ const documentStats = {};
669
+ const chunkStatsMap = {};
670
+ docStats.forEach((row) => {
671
+ documentStats[row.content_type] = row.count;
672
+ });
673
+ chunkStats.forEach((row) => {
674
+ chunkStatsMap[row.content_type] = row.count;
675
+ });
676
+ return {
677
+ documents: documentStats,
678
+ chunks: chunkStatsMap,
679
+ total: {
680
+ documents: totalDocs.count,
681
+ chunks: totalChunks.count
682
+ }
683
+ };
684
+ }
685
+ catch (error) {
686
+ throw new Error(`Failed to get content type statistics: ${error instanceof Error ? error.message : 'Unknown error'}`);
687
+ }
688
+ }
689
+ /**
690
+ * Updates document metadata
691
+ * @param connection - Database connection object
692
+ * @param documentId - ID of the document to update
693
+ * @param metadata - New metadata object
694
+ */
695
+ export async function updateDocumentMetadata(connection, documentId, metadata) {
696
+ try {
697
+ const metadataJson = JSON.stringify(metadata);
698
+ const result = await connection.run('UPDATE documents SET metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?', [metadataJson, documentId]);
699
+ if (result.changes === 0) {
700
+ throw new Error(`Document with ID ${documentId} not found`);
701
+ }
702
+ }
703
+ catch (error) {
704
+ throw new Error(`Failed to update document metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
705
+ }
706
+ }
707
+ /**
708
+ * Updates chunk metadata
709
+ * @param connection - Database connection object
710
+ * @param chunkId - ID of the chunk to update
711
+ * @param metadata - New metadata object
712
+ */
713
+ export async function updateChunkMetadata(connection, chunkId, metadata) {
714
+ try {
715
+ const metadataJson = JSON.stringify(metadata);
716
+ const result = await connection.run('UPDATE chunks SET metadata = ? WHERE id = ?', [metadataJson, chunkId]);
717
+ if (result.changes === 0) {
718
+ throw new Error(`Chunk with ID ${chunkId} not found`);
719
+ }
720
+ }
721
+ catch (error) {
722
+ throw new Error(`Failed to update chunk metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
723
+ }
724
+ }
725
+ /**
726
+ * Inserts content metadata into the content_metadata table
727
+ * @param connection - Database connection object
728
+ * @param contentMetadata - Content metadata to insert
729
+ */
730
+ export async function insertContentMetadata(connection, contentMetadata) {
731
+ try {
732
+ await connection.run(`
733
+ INSERT INTO content_metadata (
734
+ id, storage_type, original_path, content_path, display_name,
735
+ content_type, file_size, content_hash
736
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
737
+ `, [
738
+ contentMetadata.id,
739
+ contentMetadata.storageType,
740
+ contentMetadata.originalPath || null,
741
+ contentMetadata.contentPath,
742
+ contentMetadata.displayName,
743
+ contentMetadata.contentType,
744
+ contentMetadata.fileSize,
745
+ contentMetadata.contentHash
746
+ ]);
747
+ }
748
+ catch (error) {
749
+ if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
750
+ throw new Error(`Content with ID '${contentMetadata.id}' already exists`);
751
+ }
752
+ throw new Error(`Failed to insert content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
753
+ }
754
+ }
755
+ /**
756
+ * Gets content metadata by content ID
757
+ * @param connection - Database connection object
758
+ * @param contentId - Content ID to retrieve
759
+ * @returns Promise that resolves to ContentMetadata or null if not found
760
+ */
761
+ export async function getContentMetadata(connection, contentId) {
762
+ try {
763
+ const result = await connection.get(`
764
+ SELECT id, storage_type, original_path, content_path, display_name,
765
+ content_type, file_size, content_hash, created_at
766
+ FROM content_metadata
767
+ WHERE id = ?
768
+ `, [contentId]);
769
+ if (!result) {
770
+ return null;
771
+ }
772
+ return {
773
+ id: result.id,
774
+ storageType: result.storage_type,
775
+ originalPath: result.original_path,
776
+ contentPath: result.content_path,
777
+ displayName: result.display_name,
778
+ contentType: result.content_type,
779
+ fileSize: result.file_size,
780
+ contentHash: result.content_hash,
781
+ createdAt: new Date(result.created_at)
782
+ };
783
+ }
784
+ catch (error) {
785
+ throw new Error(`Failed to get content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
786
+ }
787
+ }
788
+ /**
789
+ * Gets content metadata by content hash (for deduplication)
790
+ * @param connection - Database connection object
791
+ * @param contentHash - Content hash to search for
792
+ * @returns Promise that resolves to ContentMetadata or null if not found
793
+ */
794
+ export async function getContentMetadataByHash(connection, contentHash) {
795
+ try {
796
+ const result = await connection.get(`
797
+ SELECT id, storage_type, original_path, content_path, display_name,
798
+ content_type, file_size, content_hash, created_at
799
+ FROM content_metadata
800
+ WHERE content_hash = ?
801
+ `, [contentHash]);
802
+ if (!result) {
803
+ return null;
804
+ }
805
+ return {
806
+ id: result.id,
807
+ storageType: result.storage_type,
808
+ originalPath: result.original_path,
809
+ contentPath: result.content_path,
810
+ displayName: result.display_name,
811
+ contentType: result.content_type,
812
+ fileSize: result.file_size,
813
+ contentHash: result.content_hash,
814
+ createdAt: new Date(result.created_at)
815
+ };
816
+ }
817
+ catch (error) {
818
+ throw new Error(`Failed to get content metadata by hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
819
+ }
820
+ }
821
+ /**
822
+ * Gets all content metadata by storage type
823
+ * @param connection - Database connection object
824
+ * @param storageType - Storage type to filter by
825
+ * @returns Promise that resolves to array of ContentMetadata
826
+ */
827
+ export async function getContentMetadataByStorageType(connection, storageType) {
828
+ try {
829
+ const results = await connection.all(`
830
+ SELECT id, storage_type, original_path, content_path, display_name,
831
+ content_type, file_size, content_hash, created_at
832
+ FROM content_metadata
833
+ WHERE storage_type = ?
834
+ ORDER BY created_at DESC
835
+ `, [storageType]);
836
+ return results.map((result) => ({
837
+ id: result.id,
838
+ storageType: result.storage_type,
839
+ originalPath: result.original_path,
840
+ contentPath: result.content_path,
841
+ displayName: result.display_name,
842
+ contentType: result.content_type,
843
+ fileSize: result.file_size,
844
+ contentHash: result.content_hash,
845
+ createdAt: new Date(result.created_at)
846
+ }));
847
+ }
848
+ catch (error) {
849
+ throw new Error(`Failed to get content metadata by storage type: ${error instanceof Error ? error.message : 'Unknown error'}`);
850
+ }
851
+ }
852
+ /**
853
+ * Deletes content metadata by content ID
854
+ * @param connection - Database connection object
855
+ * @param contentId - Content ID to delete
856
+ * @returns Promise that resolves to true if deleted, false if not found
857
+ */
858
+ export async function deleteContentMetadata(connection, contentId) {
859
+ try {
860
+ const result = await connection.run('DELETE FROM content_metadata WHERE id = ?', [contentId]);
861
+ return result.changes > 0;
862
+ }
863
+ catch (error) {
864
+ throw new Error(`Failed to delete content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
865
+ }
866
+ }
867
+ /**
868
+ * Gets storage statistics from storage_stats table
869
+ * @param connection - Database connection object
870
+ * @returns Promise that resolves to storage statistics
871
+ */
872
+ export async function getStorageStats(connection) {
873
+ try {
874
+ const result = await connection.get(`
875
+ SELECT content_dir_files, content_dir_size, filesystem_refs,
876
+ last_cleanup, updated_at
877
+ FROM storage_stats
878
+ WHERE id = 1
879
+ `);
880
+ if (!result) {
881
+ return null;
882
+ }
883
+ return {
884
+ contentDirFiles: result.content_dir_files,
885
+ contentDirSize: result.content_dir_size,
886
+ filesystemRefs: result.filesystem_refs,
887
+ lastCleanup: result.last_cleanup ? new Date(result.last_cleanup) : null,
888
+ updatedAt: new Date(result.updated_at)
889
+ };
890
+ }
891
+ catch (error) {
892
+ throw new Error(`Failed to get storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
893
+ }
894
+ }
895
+ /**
896
+ * Updates storage statistics in storage_stats table
897
+ * @param connection - Database connection object
898
+ * @param stats - Partial storage statistics to update
899
+ */
900
+ export async function updateStorageStats(connection, stats) {
901
+ try {
902
+ // Check if there's already a row
903
+ const existing = await connection.get('SELECT id FROM storage_stats WHERE id = 1');
904
+ if (existing) {
905
+ // Build dynamic UPDATE query based on provided fields
906
+ const updateFields = [];
907
+ const updateValues = [];
908
+ if (stats.contentDirFiles !== undefined) {
909
+ updateFields.push('content_dir_files = ?');
910
+ updateValues.push(stats.contentDirFiles);
911
+ }
912
+ if (stats.contentDirSize !== undefined) {
913
+ updateFields.push('content_dir_size = ?');
914
+ updateValues.push(stats.contentDirSize);
915
+ }
916
+ if (stats.filesystemRefs !== undefined) {
917
+ updateFields.push('filesystem_refs = ?');
918
+ updateValues.push(stats.filesystemRefs);
919
+ }
920
+ if (stats.lastCleanup !== undefined) {
921
+ updateFields.push('last_cleanup = ?');
922
+ updateValues.push(stats.lastCleanup.toISOString());
923
+ }
924
+ // Always update the timestamp
925
+ updateFields.push('updated_at = CURRENT_TIMESTAMP');
926
+ updateValues.push(1); // Add WHERE clause parameter
927
+ if (updateFields.length > 1) { // More than just the timestamp
928
+ const sql = `UPDATE storage_stats SET ${updateFields.join(', ')} WHERE id = ?`;
929
+ await connection.run(sql, updateValues);
930
+ }
931
+ }
932
+ else {
933
+ // Insert new row with provided values and defaults
934
+ const insertSql = `
935
+ INSERT INTO storage_stats (
936
+ id, content_dir_files, content_dir_size, filesystem_refs,
937
+ last_cleanup, updated_at
938
+ ) VALUES (1, ?, ?, ?, ?, CURRENT_TIMESTAMP)
939
+ `;
940
+ await connection.run(insertSql, [
941
+ stats.contentDirFiles || 0,
942
+ stats.contentDirSize || 0,
943
+ stats.filesystemRefs || 0,
944
+ stats.lastCleanup ? stats.lastCleanup.toISOString() : null
945
+ ]);
946
+ }
947
+ }
948
+ catch (error) {
949
+ throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
950
+ }
951
+ }
952
+ //# sourceMappingURL=db.js.map