rag-lite-ts 1.0.2 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +605 -93
- package/dist/cli/indexer.js +192 -4
- package/dist/cli/search.js +50 -11
- package/dist/cli.js +183 -26
- package/dist/core/abstract-embedder.d.ts +125 -0
- package/dist/core/abstract-embedder.js +264 -0
- package/dist/core/actionable-error-messages.d.ts +60 -0
- package/dist/core/actionable-error-messages.js +397 -0
- package/dist/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/core/batch-processing-optimizer.js +541 -0
- package/dist/core/binary-index-format.d.ts +52 -0
- package/dist/core/binary-index-format.js +122 -0
- package/dist/core/chunker.d.ts +2 -0
- package/dist/core/cli-database-utils.d.ts +53 -0
- package/dist/core/cli-database-utils.js +239 -0
- package/dist/core/config.js +10 -3
- package/dist/core/content-errors.d.ts +111 -0
- package/dist/core/content-errors.js +362 -0
- package/dist/core/content-manager.d.ts +343 -0
- package/dist/core/content-manager.js +1504 -0
- package/dist/core/content-performance-optimizer.d.ts +150 -0
- package/dist/core/content-performance-optimizer.js +516 -0
- package/dist/core/content-resolver.d.ts +104 -0
- package/dist/core/content-resolver.js +285 -0
- package/dist/core/cross-modal-search.d.ts +164 -0
- package/dist/core/cross-modal-search.js +342 -0
- package/dist/core/database-connection-manager.d.ts +109 -0
- package/dist/core/database-connection-manager.js +304 -0
- package/dist/core/db.d.ts +141 -2
- package/dist/core/db.js +631 -89
- package/dist/core/embedder-factory.d.ts +176 -0
- package/dist/core/embedder-factory.js +338 -0
- package/dist/core/index.d.ts +3 -1
- package/dist/core/index.js +4 -1
- package/dist/core/ingestion.d.ts +85 -15
- package/dist/core/ingestion.js +510 -45
- package/dist/core/lazy-dependency-loader.d.ts +152 -0
- package/dist/core/lazy-dependency-loader.js +453 -0
- package/dist/core/mode-detection-service.d.ts +150 -0
- package/dist/core/mode-detection-service.js +565 -0
- package/dist/core/mode-model-validator.d.ts +92 -0
- package/dist/core/mode-model-validator.js +203 -0
- package/dist/core/model-registry.d.ts +120 -0
- package/dist/core/model-registry.js +415 -0
- package/dist/core/model-validator.d.ts +217 -0
- package/dist/core/model-validator.js +782 -0
- package/dist/core/polymorphic-search-factory.d.ts +154 -0
- package/dist/core/polymorphic-search-factory.js +344 -0
- package/dist/core/raglite-paths.d.ts +121 -0
- package/dist/core/raglite-paths.js +145 -0
- package/dist/core/reranking-config.d.ts +42 -0
- package/dist/core/reranking-config.js +156 -0
- package/dist/core/reranking-factory.d.ts +92 -0
- package/dist/core/reranking-factory.js +591 -0
- package/dist/core/reranking-strategies.d.ts +325 -0
- package/dist/core/reranking-strategies.js +720 -0
- package/dist/core/resource-cleanup.d.ts +163 -0
- package/dist/core/resource-cleanup.js +371 -0
- package/dist/core/resource-manager.d.ts +212 -0
- package/dist/core/resource-manager.js +564 -0
- package/dist/core/search.d.ts +28 -1
- package/dist/core/search.js +83 -5
- package/dist/core/streaming-operations.d.ts +145 -0
- package/dist/core/streaming-operations.js +409 -0
- package/dist/core/types.d.ts +3 -0
- package/dist/core/universal-embedder.d.ts +177 -0
- package/dist/core/universal-embedder.js +139 -0
- package/dist/core/validation-messages.d.ts +99 -0
- package/dist/core/validation-messages.js +334 -0
- package/dist/core/vector-index.d.ts +1 -1
- package/dist/core/vector-index.js +37 -39
- package/dist/factories/index.d.ts +3 -1
- package/dist/factories/index.js +2 -0
- package/dist/factories/polymorphic-factory.d.ts +50 -0
- package/dist/factories/polymorphic-factory.js +159 -0
- package/dist/factories/text-factory.d.ts +128 -34
- package/dist/factories/text-factory.js +346 -97
- package/dist/file-processor.d.ts +88 -2
- package/dist/file-processor.js +720 -17
- package/dist/index.d.ts +32 -0
- package/dist/index.js +29 -0
- package/dist/ingestion.d.ts +16 -0
- package/dist/ingestion.js +21 -0
- package/dist/mcp-server.d.ts +35 -3
- package/dist/mcp-server.js +1107 -31
- package/dist/multimodal/clip-embedder.d.ts +327 -0
- package/dist/multimodal/clip-embedder.js +992 -0
- package/dist/multimodal/index.d.ts +6 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/run-error-recovery-tests.d.ts +7 -0
- package/dist/run-error-recovery-tests.js +101 -0
- package/dist/search.d.ts +60 -9
- package/dist/search.js +82 -11
- package/dist/test-utils.d.ts +8 -26
- package/dist/text/chunker.d.ts +1 -0
- package/dist/text/embedder.js +15 -8
- package/dist/text/index.d.ts +1 -0
- package/dist/text/index.js +1 -0
- package/dist/text/reranker.d.ts +1 -2
- package/dist/text/reranker.js +17 -47
- package/dist/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/text/sentence-transformer-embedder.js +340 -0
- package/dist/types.d.ts +39 -0
- package/dist/utils/vector-math.d.ts +31 -0
- package/dist/utils/vector-math.js +70 -0
- package/package.json +27 -6
- package/dist/api-errors.d.ts.map +0 -1
- package/dist/api-errors.js.map +0 -1
- package/dist/cli/indexer.d.ts.map +0 -1
- package/dist/cli/indexer.js.map +0 -1
- package/dist/cli/search.d.ts.map +0 -1
- package/dist/cli/search.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/core/adapters.d.ts.map +0 -1
- package/dist/core/adapters.js.map +0 -1
- package/dist/core/chunker.d.ts.map +0 -1
- package/dist/core/chunker.js.map +0 -1
- package/dist/core/config.d.ts.map +0 -1
- package/dist/core/config.js.map +0 -1
- package/dist/core/db.d.ts.map +0 -1
- package/dist/core/db.js.map +0 -1
- package/dist/core/error-handler.d.ts.map +0 -1
- package/dist/core/error-handler.js.map +0 -1
- package/dist/core/index.d.ts.map +0 -1
- package/dist/core/index.js.map +0 -1
- package/dist/core/ingestion.d.ts.map +0 -1
- package/dist/core/ingestion.js.map +0 -1
- package/dist/core/interfaces.d.ts.map +0 -1
- package/dist/core/interfaces.js.map +0 -1
- package/dist/core/path-manager.d.ts.map +0 -1
- package/dist/core/path-manager.js.map +0 -1
- package/dist/core/search-example.d.ts +0 -25
- package/dist/core/search-example.d.ts.map +0 -1
- package/dist/core/search-example.js +0 -138
- package/dist/core/search-example.js.map +0 -1
- package/dist/core/search-pipeline-example.d.ts +0 -21
- package/dist/core/search-pipeline-example.d.ts.map +0 -1
- package/dist/core/search-pipeline-example.js +0 -188
- package/dist/core/search-pipeline-example.js.map +0 -1
- package/dist/core/search-pipeline.d.ts.map +0 -1
- package/dist/core/search-pipeline.js.map +0 -1
- package/dist/core/search.d.ts.map +0 -1
- package/dist/core/search.js.map +0 -1
- package/dist/core/types.d.ts.map +0 -1
- package/dist/core/types.js.map +0 -1
- package/dist/core/vector-index.d.ts.map +0 -1
- package/dist/core/vector-index.js.map +0 -1
- package/dist/dom-polyfills.d.ts.map +0 -1
- package/dist/dom-polyfills.js.map +0 -1
- package/dist/examples/clean-api-examples.d.ts +0 -44
- package/dist/examples/clean-api-examples.d.ts.map +0 -1
- package/dist/examples/clean-api-examples.js +0 -206
- package/dist/examples/clean-api-examples.js.map +0 -1
- package/dist/factories/index.d.ts.map +0 -1
- package/dist/factories/index.js.map +0 -1
- package/dist/factories/text-factory.d.ts.map +0 -1
- package/dist/factories/text-factory.js.map +0 -1
- package/dist/file-processor.d.ts.map +0 -1
- package/dist/file-processor.js.map +0 -1
- package/dist/index-manager.d.ts.map +0 -1
- package/dist/index-manager.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/indexer.d.ts.map +0 -1
- package/dist/indexer.js.map +0 -1
- package/dist/ingestion.d.ts.map +0 -1
- package/dist/ingestion.js.map +0 -1
- package/dist/mcp-server.d.ts.map +0 -1
- package/dist/mcp-server.js.map +0 -1
- package/dist/preprocess.d.ts.map +0 -1
- package/dist/preprocess.js.map +0 -1
- package/dist/preprocessors/index.d.ts.map +0 -1
- package/dist/preprocessors/index.js.map +0 -1
- package/dist/preprocessors/mdx.d.ts.map +0 -1
- package/dist/preprocessors/mdx.js.map +0 -1
- package/dist/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/preprocessors/mermaid.js.map +0 -1
- package/dist/preprocessors/registry.d.ts.map +0 -1
- package/dist/preprocessors/registry.js.map +0 -1
- package/dist/search-standalone.d.ts.map +0 -1
- package/dist/search-standalone.js.map +0 -1
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js.map +0 -1
- package/dist/test-utils.d.ts.map +0 -1
- package/dist/test-utils.js.map +0 -1
- package/dist/text/chunker.d.ts.map +0 -1
- package/dist/text/chunker.js.map +0 -1
- package/dist/text/embedder.d.ts.map +0 -1
- package/dist/text/embedder.js.map +0 -1
- package/dist/text/index.d.ts.map +0 -1
- package/dist/text/index.js.map +0 -1
- package/dist/text/preprocessors/index.d.ts.map +0 -1
- package/dist/text/preprocessors/index.js.map +0 -1
- package/dist/text/preprocessors/mdx.d.ts.map +0 -1
- package/dist/text/preprocessors/mdx.js.map +0 -1
- package/dist/text/preprocessors/mermaid.d.ts.map +0 -1
- package/dist/text/preprocessors/mermaid.js.map +0 -1
- package/dist/text/preprocessors/registry.d.ts.map +0 -1
- package/dist/text/preprocessors/registry.js.map +0 -1
- package/dist/text/reranker.d.ts.map +0 -1
- package/dist/text/reranker.js.map +0 -1
- package/dist/text/tokenizer.d.ts.map +0 -1
- package/dist/text/tokenizer.js.map +0 -1
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js.map +0 -1
package/dist/core/db.js
CHANGED
|
@@ -96,15 +96,18 @@ function enhanceSQLiteError(error, sql) {
|
|
|
96
96
|
*/
|
|
97
97
|
export async function initializeSchema(connection) {
|
|
98
98
|
try {
|
|
99
|
-
// Create documents table with content type support
|
|
99
|
+
// Create documents table with content type support and content_id reference
|
|
100
100
|
await connection.run(`
|
|
101
101
|
CREATE TABLE IF NOT EXISTS documents (
|
|
102
102
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
103
|
+
content_id TEXT, -- References content_metadata.id
|
|
103
104
|
source TEXT NOT NULL UNIQUE,
|
|
104
105
|
title TEXT NOT NULL,
|
|
105
106
|
content_type TEXT DEFAULT 'text',
|
|
106
107
|
metadata TEXT,
|
|
107
|
-
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
108
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
109
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
110
|
+
FOREIGN KEY (content_id) REFERENCES content_metadata(id)
|
|
108
111
|
)
|
|
109
112
|
`);
|
|
110
113
|
// Create chunks table with content type and metadata support
|
|
@@ -121,72 +124,60 @@ export async function initializeSchema(connection) {
|
|
|
121
124
|
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
122
125
|
)
|
|
123
126
|
`);
|
|
124
|
-
// Create
|
|
127
|
+
// Create content_metadata table for unified content system
|
|
128
|
+
await connection.run(`
|
|
129
|
+
CREATE TABLE IF NOT EXISTS content_metadata (
|
|
130
|
+
id TEXT PRIMARY KEY, -- Hash-based content ID
|
|
131
|
+
storage_type TEXT NOT NULL CHECK (storage_type IN ('filesystem', 'content_dir')),
|
|
132
|
+
original_path TEXT, -- Original file path (filesystem only)
|
|
133
|
+
content_path TEXT NOT NULL, -- Actual storage path
|
|
134
|
+
display_name TEXT NOT NULL, -- User-friendly name
|
|
135
|
+
content_type TEXT NOT NULL, -- MIME type
|
|
136
|
+
file_size INTEGER NOT NULL, -- Size in bytes
|
|
137
|
+
content_hash TEXT NOT NULL, -- SHA-256 hash
|
|
138
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
139
|
+
)
|
|
140
|
+
`);
|
|
141
|
+
// Create storage_stats table for basic content directory tracking
|
|
142
|
+
await connection.run(`
|
|
143
|
+
CREATE TABLE IF NOT EXISTS storage_stats (
|
|
144
|
+
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
145
|
+
content_dir_files INTEGER DEFAULT 0,
|
|
146
|
+
content_dir_size INTEGER DEFAULT 0,
|
|
147
|
+
filesystem_refs INTEGER DEFAULT 0,
|
|
148
|
+
last_cleanup DATETIME,
|
|
149
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
150
|
+
)
|
|
151
|
+
`);
|
|
152
|
+
// Create system_info table for mode persistence and model tracking
|
|
125
153
|
await connection.run(`
|
|
126
154
|
CREATE TABLE IF NOT EXISTS system_info (
|
|
127
155
|
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
156
|
+
|
|
157
|
+
-- Core mode and model information
|
|
158
|
+
mode TEXT NOT NULL DEFAULT 'text' CHECK (mode IN ('text', 'multimodal')),
|
|
159
|
+
model_name TEXT NOT NULL DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
|
|
160
|
+
model_type TEXT NOT NULL DEFAULT 'sentence-transformer' CHECK (model_type IN ('sentence-transformer', 'clip')),
|
|
161
|
+
model_dimensions INTEGER NOT NULL DEFAULT 384,
|
|
162
|
+
model_version TEXT NOT NULL DEFAULT '',
|
|
163
|
+
|
|
164
|
+
-- Content type support (JSON array)
|
|
165
|
+
supported_content_types TEXT NOT NULL DEFAULT '["text"]',
|
|
166
|
+
|
|
167
|
+
-- Reranking configuration
|
|
168
|
+
reranking_strategy TEXT DEFAULT 'cross-encoder' CHECK (
|
|
169
|
+
reranking_strategy IN ('cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled')
|
|
170
|
+
),
|
|
171
|
+
reranking_model TEXT,
|
|
172
|
+
reranking_config TEXT, -- JSON configuration for strategy-specific settings
|
|
173
|
+
|
|
174
|
+
-- Timestamps
|
|
175
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
131
176
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
132
177
|
)
|
|
133
178
|
`);
|
|
134
|
-
//
|
|
135
|
-
|
|
136
|
-
await connection.run(`ALTER TABLE documents ADD COLUMN content_type TEXT DEFAULT 'text'`);
|
|
137
|
-
}
|
|
138
|
-
catch (error) {
|
|
139
|
-
// Column already exists, ignore error
|
|
140
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
141
|
-
throw error;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
try {
|
|
145
|
-
await connection.run(`ALTER TABLE documents ADD COLUMN metadata TEXT`);
|
|
146
|
-
}
|
|
147
|
-
catch (error) {
|
|
148
|
-
// Column already exists, ignore error
|
|
149
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
150
|
-
throw error;
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
try {
|
|
154
|
-
await connection.run(`ALTER TABLE chunks ADD COLUMN content_type TEXT DEFAULT 'text'`);
|
|
155
|
-
}
|
|
156
|
-
catch (error) {
|
|
157
|
-
// Column already exists, ignore error
|
|
158
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
159
|
-
throw error;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
try {
|
|
163
|
-
await connection.run(`ALTER TABLE chunks ADD COLUMN metadata TEXT`);
|
|
164
|
-
}
|
|
165
|
-
catch (error) {
|
|
166
|
-
// Column already exists, ignore error
|
|
167
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
168
|
-
throw error;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
// Add model tracking columns if they don't exist
|
|
172
|
-
try {
|
|
173
|
-
await connection.run(`ALTER TABLE system_info ADD COLUMN model_name TEXT`);
|
|
174
|
-
}
|
|
175
|
-
catch (error) {
|
|
176
|
-
// Column already exists, ignore error
|
|
177
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
178
|
-
throw error;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
try {
|
|
182
|
-
await connection.run(`ALTER TABLE system_info ADD COLUMN model_dimensions INTEGER`);
|
|
183
|
-
}
|
|
184
|
-
catch (error) {
|
|
185
|
-
// Column already exists, ignore error
|
|
186
|
-
if (error instanceof Error && !error.message.includes('duplicate column name')) {
|
|
187
|
-
throw error;
|
|
188
|
-
}
|
|
189
|
-
}
|
|
179
|
+
// Clean slate approach - no migration logic needed
|
|
180
|
+
// Users will perform fresh ingestion with the new architecture
|
|
190
181
|
// Create indexes for performance
|
|
191
182
|
await connection.run(`
|
|
192
183
|
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)
|
|
@@ -202,6 +193,16 @@ export async function initializeSchema(connection) {
|
|
|
202
193
|
`);
|
|
203
194
|
await connection.run(`
|
|
204
195
|
CREATE INDEX IF NOT EXISTS idx_documents_content_type ON documents(content_type)
|
|
196
|
+
`);
|
|
197
|
+
await connection.run(`
|
|
198
|
+
CREATE INDEX IF NOT EXISTS idx_documents_content_id ON documents(content_id)
|
|
199
|
+
`);
|
|
200
|
+
// Create indexes for content metadata table for efficient lookup
|
|
201
|
+
await connection.run(`
|
|
202
|
+
CREATE INDEX IF NOT EXISTS idx_content_hash ON content_metadata(content_hash)
|
|
203
|
+
`);
|
|
204
|
+
await connection.run(`
|
|
205
|
+
CREATE INDEX IF NOT EXISTS idx_storage_type ON content_metadata(storage_type)
|
|
205
206
|
`);
|
|
206
207
|
console.log('Database schema initialized successfully');
|
|
207
208
|
}
|
|
@@ -216,12 +217,15 @@ export async function initializeSchema(connection) {
|
|
|
216
217
|
* @param title - Title of the document
|
|
217
218
|
* @param contentType - Type of content ('text', 'image', etc.)
|
|
218
219
|
* @param metadata - Optional metadata object
|
|
220
|
+
* @param contentId - Optional content ID referencing content_metadata table
|
|
219
221
|
* @returns Promise that resolves to the document ID
|
|
220
222
|
*/
|
|
221
|
-
export async function insertDocument(connection, source, title, contentType = 'text', metadata) {
|
|
223
|
+
export async function insertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
|
|
222
224
|
try {
|
|
225
|
+
// Validate content type
|
|
226
|
+
validateContentType(contentType);
|
|
223
227
|
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
224
|
-
const result = await connection.run('INSERT INTO documents (source, title, content_type, metadata) VALUES (?, ?, ?, ?)', [source, title, contentType, metadataJson]);
|
|
228
|
+
const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
|
|
225
229
|
if (typeof result.lastID !== 'number' || result.lastID <= 0) {
|
|
226
230
|
throw new Error('Failed to get document ID after insertion');
|
|
227
231
|
}
|
|
@@ -246,6 +250,8 @@ export async function insertDocument(connection, source, title, contentType = 't
|
|
|
246
250
|
*/
|
|
247
251
|
export async function insertChunk(connection, embeddingId, documentId, content, chunkIndex, contentType = 'text', metadata) {
|
|
248
252
|
try {
|
|
253
|
+
// Validate content type
|
|
254
|
+
validateContentType(contentType);
|
|
249
255
|
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
250
256
|
// Use INSERT OR REPLACE to handle duplicates gracefully
|
|
251
257
|
await connection.run('INSERT OR REPLACE INTO chunks (embedding_id, document_id, content, chunk_index, content_type, metadata) VALUES (?, ?, ?, ?, ?, ?)', [embeddingId, documentId, content, chunkIndex, contentType, metadataJson]);
|
|
@@ -265,10 +271,13 @@ export async function insertChunk(connection, embeddingId, documentId, content,
|
|
|
265
271
|
* @param title - Title of the document
|
|
266
272
|
* @param contentType - Type of content ('text', 'image', etc.)
|
|
267
273
|
* @param metadata - Optional metadata object
|
|
274
|
+
* @param contentId - Optional content ID referencing content_metadata table
|
|
268
275
|
* @returns Promise that resolves to the document ID
|
|
269
276
|
*/
|
|
270
|
-
export async function upsertDocument(connection, source, title, contentType = 'text', metadata) {
|
|
277
|
+
export async function upsertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
|
|
271
278
|
try {
|
|
279
|
+
// Validate content type
|
|
280
|
+
validateContentType(contentType);
|
|
272
281
|
// First try to get existing document
|
|
273
282
|
const existing = await connection.get('SELECT id FROM documents WHERE source = ?', [source]);
|
|
274
283
|
if (existing) {
|
|
@@ -276,7 +285,7 @@ export async function upsertDocument(connection, source, title, contentType = 't
|
|
|
276
285
|
}
|
|
277
286
|
// Insert new document if it doesn't exist
|
|
278
287
|
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
279
|
-
const result = await connection.run('INSERT INTO documents (source, title, content_type, metadata) VALUES (?, ?, ?, ?)', [source, title, contentType, metadataJson]);
|
|
288
|
+
const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
|
|
280
289
|
if (typeof result.lastID !== 'number' || result.lastID <= 0) {
|
|
281
290
|
throw new Error('Failed to get document ID after insertion');
|
|
282
291
|
}
|
|
@@ -311,7 +320,8 @@ export async function getChunksByEmbeddingIds(connection, embeddingIds) {
|
|
|
311
320
|
c.created_at,
|
|
312
321
|
d.source as document_source,
|
|
313
322
|
d.title as document_title,
|
|
314
|
-
d.content_type as document_content_type
|
|
323
|
+
d.content_type as document_content_type,
|
|
324
|
+
d.content_id as document_content_id
|
|
315
325
|
FROM chunks c
|
|
316
326
|
JOIN documents d ON c.document_id = d.id
|
|
317
327
|
WHERE c.embedding_id IN (${placeholders})
|
|
@@ -329,55 +339,227 @@ export async function getChunksByEmbeddingIds(connection, embeddingIds) {
|
|
|
329
339
|
}
|
|
330
340
|
}
|
|
331
341
|
/**
|
|
342
|
+
* Validates mode value against allowed enum values
|
|
343
|
+
*/
|
|
344
|
+
function validateMode(mode) {
|
|
345
|
+
const validModes = ['text', 'multimodal'];
|
|
346
|
+
if (!validModes.includes(mode)) {
|
|
347
|
+
throw new Error(`Invalid mode '${mode}'. Must be one of: ${validModes.join(', ')}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Validates model type value against allowed enum values
|
|
352
|
+
*/
|
|
353
|
+
function validateModelType(modelType) {
|
|
354
|
+
const validTypes = ['sentence-transformer', 'clip'];
|
|
355
|
+
if (!validTypes.includes(modelType)) {
|
|
356
|
+
throw new Error(`Invalid model type '${modelType}'. Must be one of: ${validTypes.join(', ')}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Validates reranking strategy value against allowed enum values
|
|
361
|
+
*/
|
|
362
|
+
function validateRerankingStrategy(strategy) {
|
|
363
|
+
const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
|
|
364
|
+
if (!validStrategies.includes(strategy)) {
|
|
365
|
+
throw new Error(`Invalid reranking strategy '${strategy}'. Must be one of: ${validStrategies.join(', ')}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Validates content type value against allowed types
|
|
370
|
+
*/
|
|
371
|
+
function validateContentType(contentType) {
|
|
372
|
+
const validTypes = ['text', 'image', 'pdf', 'docx'];
|
|
373
|
+
if (!validTypes.includes(contentType)) {
|
|
374
|
+
throw new Error(`Invalid content type '${contentType}'. Must be one of: ${validTypes.join(', ')}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Gets the complete system information from system_info table
|
|
379
|
+
* @param connection - Database connection object
|
|
380
|
+
* @returns Promise that resolves to SystemInfo object or null if not set
|
|
381
|
+
*/
|
|
382
|
+
export async function getSystemInfo(connection) {
|
|
383
|
+
try {
|
|
384
|
+
const result = await connection.get(`
|
|
385
|
+
SELECT
|
|
386
|
+
mode, model_name, model_type, model_dimensions, model_version,
|
|
387
|
+
supported_content_types, reranking_strategy, reranking_model,
|
|
388
|
+
reranking_config, created_at, updated_at
|
|
389
|
+
FROM system_info WHERE id = 1
|
|
390
|
+
`);
|
|
391
|
+
if (!result) {
|
|
392
|
+
return null;
|
|
393
|
+
}
|
|
394
|
+
// Parse JSON fields and convert to proper types
|
|
395
|
+
const supportedContentTypes = result.supported_content_types
|
|
396
|
+
? JSON.parse(result.supported_content_types)
|
|
397
|
+
: ['text'];
|
|
398
|
+
const rerankingConfig = result.reranking_config
|
|
399
|
+
? JSON.parse(result.reranking_config)
|
|
400
|
+
: undefined;
|
|
401
|
+
return {
|
|
402
|
+
mode: result.mode,
|
|
403
|
+
modelName: result.model_name,
|
|
404
|
+
modelType: result.model_type,
|
|
405
|
+
modelDimensions: result.model_dimensions,
|
|
406
|
+
modelVersion: result.model_version,
|
|
407
|
+
supportedContentTypes,
|
|
408
|
+
rerankingStrategy: result.reranking_strategy,
|
|
409
|
+
rerankingModel: result.reranking_model,
|
|
410
|
+
rerankingConfig,
|
|
411
|
+
createdAt: new Date(result.created_at),
|
|
412
|
+
updatedAt: new Date(result.updated_at)
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
throw new Error(`Failed to get system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Sets the complete system information in system_info table
|
|
421
|
+
* @param connection - Database connection object
|
|
422
|
+
* @param systemInfo - SystemInfo object to store
|
|
423
|
+
*/
|
|
424
|
+
export async function setSystemInfo(connection, systemInfo) {
|
|
425
|
+
try {
|
|
426
|
+
// Validate enum values if provided
|
|
427
|
+
if (systemInfo.mode) {
|
|
428
|
+
validateMode(systemInfo.mode);
|
|
429
|
+
}
|
|
430
|
+
if (systemInfo.modelType) {
|
|
431
|
+
validateModelType(systemInfo.modelType);
|
|
432
|
+
}
|
|
433
|
+
if (systemInfo.rerankingStrategy) {
|
|
434
|
+
validateRerankingStrategy(systemInfo.rerankingStrategy);
|
|
435
|
+
}
|
|
436
|
+
// Check if there's already a row
|
|
437
|
+
const existing = await connection.get('SELECT id FROM system_info WHERE id = 1');
|
|
438
|
+
// Prepare JSON fields
|
|
439
|
+
const supportedContentTypesJson = systemInfo.supportedContentTypes
|
|
440
|
+
? JSON.stringify(systemInfo.supportedContentTypes)
|
|
441
|
+
: undefined;
|
|
442
|
+
const rerankingConfigJson = systemInfo.rerankingConfig
|
|
443
|
+
? JSON.stringify(systemInfo.rerankingConfig)
|
|
444
|
+
: undefined;
|
|
445
|
+
if (existing) {
|
|
446
|
+
// Build dynamic UPDATE query based on provided fields
|
|
447
|
+
const updateFields = [];
|
|
448
|
+
const updateValues = [];
|
|
449
|
+
if (systemInfo.mode !== undefined) {
|
|
450
|
+
updateFields.push('mode = ?');
|
|
451
|
+
updateValues.push(systemInfo.mode);
|
|
452
|
+
}
|
|
453
|
+
if (systemInfo.modelName !== undefined) {
|
|
454
|
+
updateFields.push('model_name = ?');
|
|
455
|
+
updateValues.push(systemInfo.modelName);
|
|
456
|
+
}
|
|
457
|
+
if (systemInfo.modelType !== undefined) {
|
|
458
|
+
updateFields.push('model_type = ?');
|
|
459
|
+
updateValues.push(systemInfo.modelType);
|
|
460
|
+
}
|
|
461
|
+
if (systemInfo.modelDimensions !== undefined) {
|
|
462
|
+
updateFields.push('model_dimensions = ?');
|
|
463
|
+
updateValues.push(systemInfo.modelDimensions);
|
|
464
|
+
}
|
|
465
|
+
if (systemInfo.modelVersion !== undefined) {
|
|
466
|
+
updateFields.push('model_version = ?');
|
|
467
|
+
updateValues.push(systemInfo.modelVersion);
|
|
468
|
+
}
|
|
469
|
+
if (supportedContentTypesJson !== undefined) {
|
|
470
|
+
updateFields.push('supported_content_types = ?');
|
|
471
|
+
updateValues.push(supportedContentTypesJson);
|
|
472
|
+
}
|
|
473
|
+
if (systemInfo.rerankingStrategy !== undefined) {
|
|
474
|
+
updateFields.push('reranking_strategy = ?');
|
|
475
|
+
updateValues.push(systemInfo.rerankingStrategy);
|
|
476
|
+
}
|
|
477
|
+
if (systemInfo.rerankingModel !== undefined) {
|
|
478
|
+
updateFields.push('reranking_model = ?');
|
|
479
|
+
updateValues.push(systemInfo.rerankingModel);
|
|
480
|
+
}
|
|
481
|
+
if (rerankingConfigJson !== undefined) {
|
|
482
|
+
updateFields.push('reranking_config = ?');
|
|
483
|
+
updateValues.push(rerankingConfigJson);
|
|
484
|
+
}
|
|
485
|
+
// Always update the timestamp
|
|
486
|
+
updateFields.push('updated_at = CURRENT_TIMESTAMP');
|
|
487
|
+
updateValues.push(1); // Add WHERE clause parameter
|
|
488
|
+
if (updateFields.length > 1) { // More than just the timestamp
|
|
489
|
+
const sql = `UPDATE system_info SET ${updateFields.join(', ')} WHERE id = ?`;
|
|
490
|
+
await connection.run(sql, updateValues);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
// Insert new row with provided values and defaults
|
|
495
|
+
const insertSql = `
|
|
496
|
+
INSERT INTO system_info (
|
|
497
|
+
id, mode, model_name, model_type, model_dimensions, model_version,
|
|
498
|
+
supported_content_types, reranking_strategy, reranking_model, reranking_config,
|
|
499
|
+
created_at, updated_at
|
|
500
|
+
) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
501
|
+
`;
|
|
502
|
+
await connection.run(insertSql, [
|
|
503
|
+
systemInfo.mode || 'text',
|
|
504
|
+
systemInfo.modelName || 'sentence-transformers/all-MiniLM-L6-v2',
|
|
505
|
+
systemInfo.modelType || 'sentence-transformer',
|
|
506
|
+
systemInfo.modelDimensions || 384,
|
|
507
|
+
systemInfo.modelVersion || '',
|
|
508
|
+
supportedContentTypesJson || '["text"]',
|
|
509
|
+
systemInfo.rerankingStrategy || 'cross-encoder',
|
|
510
|
+
systemInfo.rerankingModel || null,
|
|
511
|
+
rerankingConfigJson || null
|
|
512
|
+
]);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
catch (error) {
|
|
516
|
+
throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
/**
|
|
520
|
+
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
332
521
|
* Gets the current model version from system_info table
|
|
333
522
|
* @param connection - Database connection object
|
|
334
523
|
* @returns Promise that resolves to the model version string or null if not set
|
|
335
524
|
*/
|
|
336
525
|
export async function getModelVersion(connection) {
|
|
337
526
|
try {
|
|
338
|
-
const
|
|
339
|
-
return
|
|
527
|
+
const systemInfo = await getSystemInfo(connection);
|
|
528
|
+
return systemInfo ? systemInfo.modelVersion : null;
|
|
340
529
|
}
|
|
341
530
|
catch (error) {
|
|
342
531
|
throw new Error(`Failed to get model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
343
532
|
}
|
|
344
533
|
}
|
|
345
534
|
/**
|
|
535
|
+
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
346
536
|
* Sets the model version in system_info table
|
|
347
537
|
* @param connection - Database connection object
|
|
348
538
|
* @param modelVersion - Model version string to store
|
|
349
539
|
*/
|
|
350
540
|
export async function setModelVersion(connection, modelVersion) {
|
|
351
541
|
try {
|
|
352
|
-
|
|
353
|
-
const existing = await connection.get('SELECT model_name, model_dimensions FROM system_info WHERE id = 1');
|
|
354
|
-
if (existing) {
|
|
355
|
-
// Update only the model_version field, preserve existing model info
|
|
356
|
-
await connection.run('UPDATE system_info SET model_version = ?, updated_at = CURRENT_TIMESTAMP WHERE id = 1', [modelVersion]);
|
|
357
|
-
}
|
|
358
|
-
else {
|
|
359
|
-
// Insert new row with just model_version
|
|
360
|
-
await connection.run('INSERT INTO system_info (id, model_version, updated_at) VALUES (1, ?, CURRENT_TIMESTAMP)', [modelVersion]);
|
|
361
|
-
}
|
|
542
|
+
await setSystemInfo(connection, { modelVersion });
|
|
362
543
|
}
|
|
363
544
|
catch (error) {
|
|
364
545
|
throw new Error(`Failed to set model version: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
365
546
|
}
|
|
366
547
|
}
|
|
367
548
|
/**
|
|
549
|
+
* @deprecated Use getSystemInfo() instead. This function is kept for existing code compatibility.
|
|
368
550
|
* Gets the stored model information from system_info table
|
|
369
551
|
* @param connection - Database connection object
|
|
370
552
|
* @returns Promise that resolves to model info object or null if not set
|
|
371
553
|
*/
|
|
372
554
|
export async function getStoredModelInfo(connection) {
|
|
373
555
|
try {
|
|
374
|
-
const
|
|
375
|
-
if (!
|
|
556
|
+
const systemInfo = await getSystemInfo(connection);
|
|
557
|
+
if (!systemInfo || !systemInfo.modelName || !systemInfo.modelDimensions) {
|
|
376
558
|
return null;
|
|
377
559
|
}
|
|
378
560
|
return {
|
|
379
|
-
modelName:
|
|
380
|
-
dimensions:
|
|
561
|
+
modelName: systemInfo.modelName,
|
|
562
|
+
dimensions: systemInfo.modelDimensions
|
|
381
563
|
};
|
|
382
564
|
}
|
|
383
565
|
catch (error) {
|
|
@@ -385,26 +567,386 @@ export async function getStoredModelInfo(connection) {
|
|
|
385
567
|
}
|
|
386
568
|
}
|
|
387
569
|
/**
|
|
570
|
+
* @deprecated Use setSystemInfo() instead. This function is kept for existing code compatibility.
|
|
388
571
|
* Sets the model information in system_info table
|
|
389
572
|
* @param connection - Database connection object
|
|
390
573
|
* @param modelName - Name of the embedding model
|
|
391
574
|
* @param dimensions - Number of dimensions for the model
|
|
392
575
|
*/
|
|
393
576
|
export async function setStoredModelInfo(connection, modelName, dimensions) {
|
|
577
|
+
try {
|
|
578
|
+
await setSystemInfo(connection, {
|
|
579
|
+
modelName,
|
|
580
|
+
modelDimensions: dimensions
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
catch (error) {
|
|
584
|
+
throw new Error(`Failed to set stored model info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
/**
|
|
588
|
+
* Retrieves documents by content type
|
|
589
|
+
* @param connection - Database connection object
|
|
590
|
+
* @param contentType - Content type to filter by
|
|
591
|
+
* @returns Promise that resolves to an array of documents
|
|
592
|
+
*/
|
|
593
|
+
export async function getDocumentsByContentType(connection, contentType) {
|
|
594
|
+
try {
|
|
595
|
+
validateContentType(contentType);
|
|
596
|
+
const results = await connection.all('SELECT id, source, title, content_type, metadata, created_at FROM documents WHERE content_type = ? ORDER BY created_at DESC', [contentType]);
|
|
597
|
+
// Parse metadata JSON strings back to objects
|
|
598
|
+
return results.map((row) => ({
|
|
599
|
+
...row,
|
|
600
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
601
|
+
}));
|
|
602
|
+
}
|
|
603
|
+
catch (error) {
|
|
604
|
+
throw new Error(`Failed to get documents by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Retrieves chunks by content type
|
|
609
|
+
* @param connection - Database connection object
|
|
610
|
+
* @param contentType - Content type to filter by
|
|
611
|
+
* @returns Promise that resolves to an array of chunks with document metadata
|
|
612
|
+
*/
|
|
613
|
+
export async function getChunksByContentType(connection, contentType) {
|
|
614
|
+
try {
|
|
615
|
+
validateContentType(contentType);
|
|
616
|
+
const sql = `
|
|
617
|
+
SELECT
|
|
618
|
+
c.id,
|
|
619
|
+
c.embedding_id,
|
|
620
|
+
c.document_id,
|
|
621
|
+
c.content,
|
|
622
|
+
c.content_type,
|
|
623
|
+
c.chunk_index,
|
|
624
|
+
c.metadata,
|
|
625
|
+
c.created_at,
|
|
626
|
+
d.source as document_source,
|
|
627
|
+
d.title as document_title,
|
|
628
|
+
d.content_type as document_content_type,
|
|
629
|
+
d.content_id as document_content_id
|
|
630
|
+
FROM chunks c
|
|
631
|
+
JOIN documents d ON c.document_id = d.id
|
|
632
|
+
WHERE c.content_type = ?
|
|
633
|
+
ORDER BY d.source, c.chunk_index
|
|
634
|
+
`;
|
|
635
|
+
const results = await connection.all(sql, [contentType]);
|
|
636
|
+
// Parse metadata JSON strings back to objects
|
|
637
|
+
return results.map((row) => ({
|
|
638
|
+
...row,
|
|
639
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
640
|
+
}));
|
|
641
|
+
}
|
|
642
|
+
catch (error) {
|
|
643
|
+
throw new Error(`Failed to get chunks by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Gets content type statistics from the database
|
|
648
|
+
* @param connection - Database connection object
|
|
649
|
+
* @returns Promise that resolves to content type statistics
|
|
650
|
+
*/
|
|
651
|
+
export async function getContentTypeStatistics(connection) {
|
|
652
|
+
try {
|
|
653
|
+
// Get document statistics
|
|
654
|
+
const docStats = await connection.all(`
|
|
655
|
+
SELECT content_type, COUNT(*) as count
|
|
656
|
+
FROM documents
|
|
657
|
+
GROUP BY content_type
|
|
658
|
+
`);
|
|
659
|
+
// Get chunk statistics
|
|
660
|
+
const chunkStats = await connection.all(`
|
|
661
|
+
SELECT content_type, COUNT(*) as count
|
|
662
|
+
FROM chunks
|
|
663
|
+
GROUP BY content_type
|
|
664
|
+
`);
|
|
665
|
+
// Get totals
|
|
666
|
+
const totalDocs = await connection.get('SELECT COUNT(*) as count FROM documents');
|
|
667
|
+
const totalChunks = await connection.get('SELECT COUNT(*) as count FROM chunks');
|
|
668
|
+
const documentStats = {};
|
|
669
|
+
const chunkStatsMap = {};
|
|
670
|
+
docStats.forEach((row) => {
|
|
671
|
+
documentStats[row.content_type] = row.count;
|
|
672
|
+
});
|
|
673
|
+
chunkStats.forEach((row) => {
|
|
674
|
+
chunkStatsMap[row.content_type] = row.count;
|
|
675
|
+
});
|
|
676
|
+
return {
|
|
677
|
+
documents: documentStats,
|
|
678
|
+
chunks: chunkStatsMap,
|
|
679
|
+
total: {
|
|
680
|
+
documents: totalDocs.count,
|
|
681
|
+
chunks: totalChunks.count
|
|
682
|
+
}
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
catch (error) {
|
|
686
|
+
throw new Error(`Failed to get content type statistics: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Updates document metadata
|
|
691
|
+
* @param connection - Database connection object
|
|
692
|
+
* @param documentId - ID of the document to update
|
|
693
|
+
* @param metadata - New metadata object
|
|
694
|
+
*/
|
|
695
|
+
export async function updateDocumentMetadata(connection, documentId, metadata) {
|
|
696
|
+
try {
|
|
697
|
+
const metadataJson = JSON.stringify(metadata);
|
|
698
|
+
const result = await connection.run('UPDATE documents SET metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?', [metadataJson, documentId]);
|
|
699
|
+
if (result.changes === 0) {
|
|
700
|
+
throw new Error(`Document with ID ${documentId} not found`);
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
catch (error) {
|
|
704
|
+
throw new Error(`Failed to update document metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Updates chunk metadata
|
|
709
|
+
* @param connection - Database connection object
|
|
710
|
+
* @param chunkId - ID of the chunk to update
|
|
711
|
+
* @param metadata - New metadata object
|
|
712
|
+
*/
|
|
713
|
+
export async function updateChunkMetadata(connection, chunkId, metadata) {
|
|
714
|
+
try {
|
|
715
|
+
const metadataJson = JSON.stringify(metadata);
|
|
716
|
+
const result = await connection.run('UPDATE chunks SET metadata = ? WHERE id = ?', [metadataJson, chunkId]);
|
|
717
|
+
if (result.changes === 0) {
|
|
718
|
+
throw new Error(`Chunk with ID ${chunkId} not found`);
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
catch (error) {
|
|
722
|
+
throw new Error(`Failed to update chunk metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
/**
|
|
726
|
+
* Inserts content metadata into the content_metadata table
|
|
727
|
+
* @param connection - Database connection object
|
|
728
|
+
* @param contentMetadata - Content metadata to insert
|
|
729
|
+
*/
|
|
730
|
+
export async function insertContentMetadata(connection, contentMetadata) {
|
|
731
|
+
try {
|
|
732
|
+
await connection.run(`
|
|
733
|
+
INSERT INTO content_metadata (
|
|
734
|
+
id, storage_type, original_path, content_path, display_name,
|
|
735
|
+
content_type, file_size, content_hash
|
|
736
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
737
|
+
`, [
|
|
738
|
+
contentMetadata.id,
|
|
739
|
+
contentMetadata.storageType,
|
|
740
|
+
contentMetadata.originalPath || null,
|
|
741
|
+
contentMetadata.contentPath,
|
|
742
|
+
contentMetadata.displayName,
|
|
743
|
+
contentMetadata.contentType,
|
|
744
|
+
contentMetadata.fileSize,
|
|
745
|
+
contentMetadata.contentHash
|
|
746
|
+
]);
|
|
747
|
+
}
|
|
748
|
+
catch (error) {
|
|
749
|
+
if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
|
|
750
|
+
throw new Error(`Content with ID '${contentMetadata.id}' already exists`);
|
|
751
|
+
}
|
|
752
|
+
throw new Error(`Failed to insert content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Gets content metadata by content ID
|
|
757
|
+
* @param connection - Database connection object
|
|
758
|
+
* @param contentId - Content ID to retrieve
|
|
759
|
+
* @returns Promise that resolves to ContentMetadata or null if not found
|
|
760
|
+
*/
|
|
761
|
+
export async function getContentMetadata(connection, contentId) {
|
|
762
|
+
try {
|
|
763
|
+
const result = await connection.get(`
|
|
764
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
765
|
+
content_type, file_size, content_hash, created_at
|
|
766
|
+
FROM content_metadata
|
|
767
|
+
WHERE id = ?
|
|
768
|
+
`, [contentId]);
|
|
769
|
+
if (!result) {
|
|
770
|
+
return null;
|
|
771
|
+
}
|
|
772
|
+
return {
|
|
773
|
+
id: result.id,
|
|
774
|
+
storageType: result.storage_type,
|
|
775
|
+
originalPath: result.original_path,
|
|
776
|
+
contentPath: result.content_path,
|
|
777
|
+
displayName: result.display_name,
|
|
778
|
+
contentType: result.content_type,
|
|
779
|
+
fileSize: result.file_size,
|
|
780
|
+
contentHash: result.content_hash,
|
|
781
|
+
createdAt: new Date(result.created_at)
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
catch (error) {
|
|
785
|
+
throw new Error(`Failed to get content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
/**
|
|
789
|
+
* Gets content metadata by content hash (for deduplication)
|
|
790
|
+
* @param connection - Database connection object
|
|
791
|
+
* @param contentHash - Content hash to search for
|
|
792
|
+
* @returns Promise that resolves to ContentMetadata or null if not found
|
|
793
|
+
*/
|
|
794
|
+
export async function getContentMetadataByHash(connection, contentHash) {
|
|
795
|
+
try {
|
|
796
|
+
const result = await connection.get(`
|
|
797
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
798
|
+
content_type, file_size, content_hash, created_at
|
|
799
|
+
FROM content_metadata
|
|
800
|
+
WHERE content_hash = ?
|
|
801
|
+
`, [contentHash]);
|
|
802
|
+
if (!result) {
|
|
803
|
+
return null;
|
|
804
|
+
}
|
|
805
|
+
return {
|
|
806
|
+
id: result.id,
|
|
807
|
+
storageType: result.storage_type,
|
|
808
|
+
originalPath: result.original_path,
|
|
809
|
+
contentPath: result.content_path,
|
|
810
|
+
displayName: result.display_name,
|
|
811
|
+
contentType: result.content_type,
|
|
812
|
+
fileSize: result.file_size,
|
|
813
|
+
contentHash: result.content_hash,
|
|
814
|
+
createdAt: new Date(result.created_at)
|
|
815
|
+
};
|
|
816
|
+
}
|
|
817
|
+
catch (error) {
|
|
818
|
+
throw new Error(`Failed to get content metadata by hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Gets all content metadata by storage type
|
|
823
|
+
* @param connection - Database connection object
|
|
824
|
+
* @param storageType - Storage type to filter by
|
|
825
|
+
* @returns Promise that resolves to array of ContentMetadata
|
|
826
|
+
*/
|
|
827
|
+
export async function getContentMetadataByStorageType(connection, storageType) {
|
|
828
|
+
try {
|
|
829
|
+
const results = await connection.all(`
|
|
830
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
831
|
+
content_type, file_size, content_hash, created_at
|
|
832
|
+
FROM content_metadata
|
|
833
|
+
WHERE storage_type = ?
|
|
834
|
+
ORDER BY created_at DESC
|
|
835
|
+
`, [storageType]);
|
|
836
|
+
return results.map((result) => ({
|
|
837
|
+
id: result.id,
|
|
838
|
+
storageType: result.storage_type,
|
|
839
|
+
originalPath: result.original_path,
|
|
840
|
+
contentPath: result.content_path,
|
|
841
|
+
displayName: result.display_name,
|
|
842
|
+
contentType: result.content_type,
|
|
843
|
+
fileSize: result.file_size,
|
|
844
|
+
contentHash: result.content_hash,
|
|
845
|
+
createdAt: new Date(result.created_at)
|
|
846
|
+
}));
|
|
847
|
+
}
|
|
848
|
+
catch (error) {
|
|
849
|
+
throw new Error(`Failed to get content metadata by storage type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
/**
|
|
853
|
+
* Deletes content metadata by content ID
|
|
854
|
+
* @param connection - Database connection object
|
|
855
|
+
* @param contentId - Content ID to delete
|
|
856
|
+
* @returns Promise that resolves to true if deleted, false if not found
|
|
857
|
+
*/
|
|
858
|
+
export async function deleteContentMetadata(connection, contentId) {
|
|
859
|
+
try {
|
|
860
|
+
const result = await connection.run('DELETE FROM content_metadata WHERE id = ?', [contentId]);
|
|
861
|
+
return result.changes > 0;
|
|
862
|
+
}
|
|
863
|
+
catch (error) {
|
|
864
|
+
throw new Error(`Failed to delete content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
/**
|
|
868
|
+
* Gets storage statistics from storage_stats table
|
|
869
|
+
* @param connection - Database connection object
|
|
870
|
+
* @returns Promise that resolves to storage statistics
|
|
871
|
+
*/
|
|
872
|
+
export async function getStorageStats(connection) {
|
|
873
|
+
try {
|
|
874
|
+
const result = await connection.get(`
|
|
875
|
+
SELECT content_dir_files, content_dir_size, filesystem_refs,
|
|
876
|
+
last_cleanup, updated_at
|
|
877
|
+
FROM storage_stats
|
|
878
|
+
WHERE id = 1
|
|
879
|
+
`);
|
|
880
|
+
if (!result) {
|
|
881
|
+
return null;
|
|
882
|
+
}
|
|
883
|
+
return {
|
|
884
|
+
contentDirFiles: result.content_dir_files,
|
|
885
|
+
contentDirSize: result.content_dir_size,
|
|
886
|
+
filesystemRefs: result.filesystem_refs,
|
|
887
|
+
lastCleanup: result.last_cleanup ? new Date(result.last_cleanup) : null,
|
|
888
|
+
updatedAt: new Date(result.updated_at)
|
|
889
|
+
};
|
|
890
|
+
}
|
|
891
|
+
catch (error) {
|
|
892
|
+
throw new Error(`Failed to get storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
/**
|
|
896
|
+
* Updates storage statistics in storage_stats table
|
|
897
|
+
* @param connection - Database connection object
|
|
898
|
+
* @param stats - Partial storage statistics to update
|
|
899
|
+
*/
|
|
900
|
+
export async function updateStorageStats(connection, stats) {
|
|
394
901
|
try {
|
|
395
902
|
// Check if there's already a row
|
|
396
|
-
const existing = await connection.get('SELECT
|
|
903
|
+
const existing = await connection.get('SELECT id FROM storage_stats WHERE id = 1');
|
|
397
904
|
if (existing) {
|
|
398
|
-
//
|
|
399
|
-
|
|
905
|
+
// Build dynamic UPDATE query based on provided fields
|
|
906
|
+
const updateFields = [];
|
|
907
|
+
const updateValues = [];
|
|
908
|
+
if (stats.contentDirFiles !== undefined) {
|
|
909
|
+
updateFields.push('content_dir_files = ?');
|
|
910
|
+
updateValues.push(stats.contentDirFiles);
|
|
911
|
+
}
|
|
912
|
+
if (stats.contentDirSize !== undefined) {
|
|
913
|
+
updateFields.push('content_dir_size = ?');
|
|
914
|
+
updateValues.push(stats.contentDirSize);
|
|
915
|
+
}
|
|
916
|
+
if (stats.filesystemRefs !== undefined) {
|
|
917
|
+
updateFields.push('filesystem_refs = ?');
|
|
918
|
+
updateValues.push(stats.filesystemRefs);
|
|
919
|
+
}
|
|
920
|
+
if (stats.lastCleanup !== undefined) {
|
|
921
|
+
updateFields.push('last_cleanup = ?');
|
|
922
|
+
updateValues.push(stats.lastCleanup.toISOString());
|
|
923
|
+
}
|
|
924
|
+
// Always update the timestamp
|
|
925
|
+
updateFields.push('updated_at = CURRENT_TIMESTAMP');
|
|
926
|
+
updateValues.push(1); // Add WHERE clause parameter
|
|
927
|
+
if (updateFields.length > 1) { // More than just the timestamp
|
|
928
|
+
const sql = `UPDATE storage_stats SET ${updateFields.join(', ')} WHERE id = ?`;
|
|
929
|
+
await connection.run(sql, updateValues);
|
|
930
|
+
}
|
|
400
931
|
}
|
|
401
932
|
else {
|
|
402
|
-
// Insert new row with
|
|
403
|
-
|
|
933
|
+
// Insert new row with provided values and defaults
|
|
934
|
+
const insertSql = `
|
|
935
|
+
INSERT INTO storage_stats (
|
|
936
|
+
id, content_dir_files, content_dir_size, filesystem_refs,
|
|
937
|
+
last_cleanup, updated_at
|
|
938
|
+
) VALUES (1, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
939
|
+
`;
|
|
940
|
+
await connection.run(insertSql, [
|
|
941
|
+
stats.contentDirFiles || 0,
|
|
942
|
+
stats.contentDirSize || 0,
|
|
943
|
+
stats.filesystemRefs || 0,
|
|
944
|
+
stats.lastCleanup ? stats.lastCleanup.toISOString() : null
|
|
945
|
+
]);
|
|
404
946
|
}
|
|
405
947
|
}
|
|
406
948
|
catch (error) {
|
|
407
|
-
throw new Error(`Failed to
|
|
949
|
+
throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
408
950
|
}
|
|
409
951
|
}
|
|
410
952
|
//# sourceMappingURL=db.js.map
|