rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,1000 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
import sqlite3 from 'sqlite3';
|
|
6
|
+
import { promisify } from 'util';
|
|
7
|
+
import { handleError, ErrorSeverity, createError } from './error-handler.js';
|
|
8
|
+
/**
|
|
9
|
+
* Opens a SQLite database connection with promisified methods
|
|
10
|
+
* @param dbPath - Path to the SQLite database file
|
|
11
|
+
* @returns Promise that resolves to a database connection object
|
|
12
|
+
*/
|
|
13
|
+
export function openDatabase(dbPath) {
|
|
14
|
+
return new Promise((resolve, reject) => {
|
|
15
|
+
const db = new sqlite3.Database(dbPath, (err) => {
|
|
16
|
+
if (err) {
|
|
17
|
+
const errorMsg = `Failed to open database at ${dbPath}: ${err.message}`;
|
|
18
|
+
// Categorize database errors for better handling
|
|
19
|
+
if (err.message.includes('ENOENT')) {
|
|
20
|
+
handleError(createError.fileSystem(`Database file not found: ${dbPath}. It will be created automatically.`), 'Database Connection', { severity: ErrorSeverity.INFO });
|
|
21
|
+
}
|
|
22
|
+
else if (err.message.includes('EACCES') || err.message.includes('permission')) {
|
|
23
|
+
reject(createError.database(`Permission denied accessing database: ${dbPath}. Check file permissions.`));
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
else if (err.message.includes('SQLITE_CORRUPT')) {
|
|
27
|
+
reject(createError.database(`Database file is corrupted: ${dbPath}. Try running 'raglite rebuild'.`));
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
reject(createError.database(errorMsg));
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Enable foreign key constraints
|
|
36
|
+
db.run('PRAGMA foreign_keys = ON', (err) => {
|
|
37
|
+
if (err) {
|
|
38
|
+
reject(createError.database(`Failed to enable foreign keys: ${err.message}`));
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
// Create promisified methods with proper context binding and error handling
|
|
42
|
+
const connection = {
|
|
43
|
+
db,
|
|
44
|
+
run: (sql, params) => {
|
|
45
|
+
return new Promise((resolve, reject) => {
|
|
46
|
+
db.run(sql, params || [], function (err) {
|
|
47
|
+
if (err) {
|
|
48
|
+
// Enhance SQLite error messages
|
|
49
|
+
const enhancedError = enhanceSQLiteError(err, sql);
|
|
50
|
+
reject(enhancedError);
|
|
51
|
+
}
|
|
52
|
+
else {
|
|
53
|
+
resolve(this);
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
},
|
|
58
|
+
get: promisify(db.get.bind(db)),
|
|
59
|
+
all: promisify(db.all.bind(db)),
|
|
60
|
+
close: promisify(db.close.bind(db))
|
|
61
|
+
};
|
|
62
|
+
resolve(connection);
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Enhance SQLite error messages with more context
|
|
69
|
+
*/
|
|
70
|
+
function enhanceSQLiteError(error, sql) {
|
|
71
|
+
let enhancedMessage = error.message;
|
|
72
|
+
if (error.message.includes('SQLITE_BUSY')) {
|
|
73
|
+
enhancedMessage = 'Database is locked by another process. Ensure no other RAG-lite instances are running.';
|
|
74
|
+
}
|
|
75
|
+
else if (error.message.includes('SQLITE_FULL')) {
|
|
76
|
+
enhancedMessage = 'Database disk is full. Free up disk space and try again.';
|
|
77
|
+
}
|
|
78
|
+
else if (error.message.includes('SQLITE_CORRUPT')) {
|
|
79
|
+
enhancedMessage = 'Database file is corrupted. Try running "raglite rebuild" to recreate it.';
|
|
80
|
+
}
|
|
81
|
+
else if (error.message.includes('UNIQUE constraint failed')) {
|
|
82
|
+
enhancedMessage = `Duplicate entry detected: ${error.message}. This item may already exist.`;
|
|
83
|
+
}
|
|
84
|
+
else if (error.message.includes('FOREIGN KEY constraint failed')) {
|
|
85
|
+
enhancedMessage = `Foreign key constraint violation: ${error.message}. Referenced record may not exist.`;
|
|
86
|
+
}
|
|
87
|
+
if (sql && sql.length < 200) {
|
|
88
|
+
enhancedMessage += `\nSQL: ${sql}`;
|
|
89
|
+
}
|
|
90
|
+
return new Error(enhancedMessage);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Initializes the database schema with all required tables and indexes
|
|
94
|
+
* Enhanced to support content types for multimodal use
|
|
95
|
+
* @param connection - Database connection object
|
|
96
|
+
*/
|
|
97
|
+
export async function initializeSchema(connection) {
|
|
98
|
+
try {
|
|
99
|
+
// Create documents table with content type support and content_id reference
|
|
100
|
+
await connection.run(`
|
|
101
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
102
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
103
|
+
content_id TEXT, -- References content_metadata.id
|
|
104
|
+
source TEXT NOT NULL UNIQUE,
|
|
105
|
+
title TEXT NOT NULL,
|
|
106
|
+
content_type TEXT DEFAULT 'text',
|
|
107
|
+
metadata TEXT,
|
|
108
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
109
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
110
|
+
FOREIGN KEY (content_id) REFERENCES content_metadata(id)
|
|
111
|
+
)
|
|
112
|
+
`);
|
|
113
|
+
// Create chunks table with content type and metadata support
|
|
114
|
+
await connection.run(`
|
|
115
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
116
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
117
|
+
embedding_id TEXT NOT NULL UNIQUE,
|
|
118
|
+
document_id INTEGER NOT NULL,
|
|
119
|
+
content TEXT NOT NULL,
|
|
120
|
+
content_type TEXT DEFAULT 'text',
|
|
121
|
+
chunk_index INTEGER NOT NULL,
|
|
122
|
+
metadata TEXT,
|
|
123
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
124
|
+
FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
|
|
125
|
+
)
|
|
126
|
+
`);
|
|
127
|
+
// Create content_metadata table for unified content system
|
|
128
|
+
await connection.run(`
|
|
129
|
+
CREATE TABLE IF NOT EXISTS content_metadata (
|
|
130
|
+
id TEXT PRIMARY KEY, -- Hash-based content ID
|
|
131
|
+
storage_type TEXT NOT NULL CHECK (storage_type IN ('filesystem', 'content_dir')),
|
|
132
|
+
original_path TEXT, -- Original file path (filesystem only)
|
|
133
|
+
content_path TEXT NOT NULL, -- Actual storage path
|
|
134
|
+
display_name TEXT NOT NULL, -- User-friendly name
|
|
135
|
+
content_type TEXT NOT NULL, -- MIME type
|
|
136
|
+
file_size INTEGER NOT NULL, -- Size in bytes
|
|
137
|
+
content_hash TEXT NOT NULL, -- SHA-256 hash
|
|
138
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
139
|
+
)
|
|
140
|
+
`);
|
|
141
|
+
// Create storage_stats table for basic content directory tracking
|
|
142
|
+
await connection.run(`
|
|
143
|
+
CREATE TABLE IF NOT EXISTS storage_stats (
|
|
144
|
+
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
145
|
+
content_dir_files INTEGER DEFAULT 0,
|
|
146
|
+
content_dir_size INTEGER DEFAULT 0,
|
|
147
|
+
filesystem_refs INTEGER DEFAULT 0,
|
|
148
|
+
last_cleanup DATETIME,
|
|
149
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
150
|
+
)
|
|
151
|
+
`);
|
|
152
|
+
// Create system_info table for mode persistence and model tracking
|
|
153
|
+
await connection.run(`
|
|
154
|
+
CREATE TABLE IF NOT EXISTS system_info (
|
|
155
|
+
id INTEGER PRIMARY KEY CHECK (id = 1),
|
|
156
|
+
|
|
157
|
+
-- Core mode and model information
|
|
158
|
+
mode TEXT NOT NULL DEFAULT 'text' CHECK (mode IN ('text', 'multimodal')),
|
|
159
|
+
model_name TEXT NOT NULL DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
|
|
160
|
+
model_type TEXT NOT NULL DEFAULT 'sentence-transformer' CHECK (model_type IN ('sentence-transformer', 'clip')),
|
|
161
|
+
model_dimensions INTEGER NOT NULL DEFAULT 384,
|
|
162
|
+
model_version TEXT NOT NULL DEFAULT '',
|
|
163
|
+
|
|
164
|
+
-- Content type support (JSON array)
|
|
165
|
+
supported_content_types TEXT NOT NULL DEFAULT '["text"]',
|
|
166
|
+
|
|
167
|
+
-- Reranking configuration
|
|
168
|
+
reranking_strategy TEXT DEFAULT 'cross-encoder' CHECK (
|
|
169
|
+
reranking_strategy IN ('cross-encoder', 'text-derived', 'disabled')
|
|
170
|
+
),
|
|
171
|
+
reranking_model TEXT,
|
|
172
|
+
reranking_config TEXT, -- JSON configuration for strategy-specific settings
|
|
173
|
+
|
|
174
|
+
-- Timestamps
|
|
175
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
176
|
+
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
177
|
+
)
|
|
178
|
+
`);
|
|
179
|
+
// Clean slate approach - no migration logic needed
|
|
180
|
+
// Users will perform fresh ingestion with the new architecture
|
|
181
|
+
// Create indexes for performance
|
|
182
|
+
await connection.run(`
|
|
183
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)
|
|
184
|
+
`);
|
|
185
|
+
await connection.run(`
|
|
186
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_embedding_id ON chunks(embedding_id)
|
|
187
|
+
`);
|
|
188
|
+
await connection.run(`
|
|
189
|
+
CREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source)
|
|
190
|
+
`);
|
|
191
|
+
await connection.run(`
|
|
192
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_content_type ON chunks(content_type)
|
|
193
|
+
`);
|
|
194
|
+
await connection.run(`
|
|
195
|
+
CREATE INDEX IF NOT EXISTS idx_documents_content_type ON documents(content_type)
|
|
196
|
+
`);
|
|
197
|
+
await connection.run(`
|
|
198
|
+
CREATE INDEX IF NOT EXISTS idx_documents_content_id ON documents(content_id)
|
|
199
|
+
`);
|
|
200
|
+
// Create indexes for content metadata table for efficient lookup
|
|
201
|
+
await connection.run(`
|
|
202
|
+
CREATE INDEX IF NOT EXISTS idx_content_hash ON content_metadata(content_hash)
|
|
203
|
+
`);
|
|
204
|
+
await connection.run(`
|
|
205
|
+
CREATE INDEX IF NOT EXISTS idx_storage_type ON content_metadata(storage_type)
|
|
206
|
+
`);
|
|
207
|
+
console.log('Database schema initialized successfully');
|
|
208
|
+
}
|
|
209
|
+
catch (error) {
|
|
210
|
+
throw new Error(`Failed to initialize database schema: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Inserts a new document into the database with content type support
|
|
215
|
+
* @param connection - Database connection object
|
|
216
|
+
* @param source - Source path of the document
|
|
217
|
+
* @param title - Title of the document
|
|
218
|
+
* @param contentType - Type of content ('text', 'image', etc.)
|
|
219
|
+
* @param metadata - Optional metadata object
|
|
220
|
+
* @param contentId - Optional content ID referencing content_metadata table
|
|
221
|
+
* @returns Promise that resolves to the document ID
|
|
222
|
+
*/
|
|
223
|
+
export async function insertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
|
|
224
|
+
try {
|
|
225
|
+
// Validate content type
|
|
226
|
+
validateContentType(contentType);
|
|
227
|
+
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
228
|
+
const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
|
|
229
|
+
if (typeof result.lastID !== 'number' || result.lastID <= 0) {
|
|
230
|
+
throw new Error('Failed to get document ID after insertion');
|
|
231
|
+
}
|
|
232
|
+
return result.lastID;
|
|
233
|
+
}
|
|
234
|
+
catch (error) {
|
|
235
|
+
if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
|
|
236
|
+
throw new Error(`Document with source '${source}' already exists`);
|
|
237
|
+
}
|
|
238
|
+
throw new Error(`Failed to insert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Inserts or updates a chunk in the database with content type support (upsert operation)
|
|
243
|
+
* @param connection - Database connection object
|
|
244
|
+
* @param embeddingId - Unique embedding ID for the chunk
|
|
245
|
+
* @param documentId - ID of the parent document
|
|
246
|
+
* @param content - Content of the chunk (text, image path, etc.)
|
|
247
|
+
* @param chunkIndex - Index of the chunk within the document
|
|
248
|
+
* @param contentType - Type of content ('text', 'image', etc.)
|
|
249
|
+
* @param metadata - Optional metadata object
|
|
250
|
+
*/
|
|
251
|
+
export async function insertChunk(connection, embeddingId, documentId, content, chunkIndex, contentType = 'text', metadata) {
|
|
252
|
+
try {
|
|
253
|
+
// Validate content type
|
|
254
|
+
validateContentType(contentType);
|
|
255
|
+
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
256
|
+
// Use INSERT OR REPLACE to handle duplicates gracefully
|
|
257
|
+
await connection.run('INSERT OR REPLACE INTO chunks (embedding_id, document_id, content, chunk_index, content_type, metadata) VALUES (?, ?, ?, ?, ?, ?)', [embeddingId, documentId, content, chunkIndex, contentType, metadataJson]);
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
if (error instanceof Error && error.message.includes('FOREIGN KEY constraint failed')) {
|
|
261
|
+
throw new Error(`Document with ID ${documentId} does not exist`);
|
|
262
|
+
}
|
|
263
|
+
throw new Error(`Failed to insert/update chunk: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Inserts a new document or returns existing document ID if it already exists
|
|
268
|
+
* Enhanced with content type support
|
|
269
|
+
* @param connection - Database connection object
|
|
270
|
+
* @param source - Source path of the document
|
|
271
|
+
* @param title - Title of the document
|
|
272
|
+
* @param contentType - Type of content ('text', 'image', etc.)
|
|
273
|
+
* @param metadata - Optional metadata object
|
|
274
|
+
* @param contentId - Optional content ID referencing content_metadata table
|
|
275
|
+
* @returns Promise that resolves to the document ID
|
|
276
|
+
*/
|
|
277
|
+
export async function upsertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
|
|
278
|
+
try {
|
|
279
|
+
// Validate content type
|
|
280
|
+
validateContentType(contentType);
|
|
281
|
+
// First try to get existing document
|
|
282
|
+
const existing = await connection.get('SELECT id FROM documents WHERE source = ?', [source]);
|
|
283
|
+
if (existing) {
|
|
284
|
+
return existing.id;
|
|
285
|
+
}
|
|
286
|
+
// Insert new document if it doesn't exist
|
|
287
|
+
const metadataJson = metadata ? JSON.stringify(metadata) : null;
|
|
288
|
+
const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
|
|
289
|
+
if (typeof result.lastID !== 'number' || result.lastID <= 0) {
|
|
290
|
+
throw new Error('Failed to get document ID after insertion');
|
|
291
|
+
}
|
|
292
|
+
return result.lastID;
|
|
293
|
+
}
|
|
294
|
+
catch (error) {
|
|
295
|
+
throw new Error(`Failed to upsert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Retrieves chunks by their embedding IDs with document metadata
|
|
300
|
+
* Enhanced to include content type information
|
|
301
|
+
* @param connection - Database connection object
|
|
302
|
+
* @param embeddingIds - Array of embedding IDs to retrieve
|
|
303
|
+
* @returns Promise that resolves to an array of chunk results with document metadata
|
|
304
|
+
*/
|
|
305
|
+
export async function getChunksByEmbeddingIds(connection, embeddingIds) {
|
|
306
|
+
if (embeddingIds.length === 0) {
|
|
307
|
+
return [];
|
|
308
|
+
}
|
|
309
|
+
try {
|
|
310
|
+
const placeholders = embeddingIds.map(() => '?').join(',');
|
|
311
|
+
const sql = `
|
|
312
|
+
SELECT
|
|
313
|
+
c.id,
|
|
314
|
+
c.embedding_id,
|
|
315
|
+
c.document_id,
|
|
316
|
+
c.content,
|
|
317
|
+
c.content_type,
|
|
318
|
+
c.chunk_index,
|
|
319
|
+
c.metadata,
|
|
320
|
+
c.created_at,
|
|
321
|
+
d.source as document_source,
|
|
322
|
+
d.title as document_title,
|
|
323
|
+
d.content_type as document_content_type,
|
|
324
|
+
d.content_id as document_content_id
|
|
325
|
+
FROM chunks c
|
|
326
|
+
JOIN documents d ON c.document_id = d.id
|
|
327
|
+
WHERE c.embedding_id IN (${placeholders})
|
|
328
|
+
ORDER BY c.chunk_index
|
|
329
|
+
`;
|
|
330
|
+
const results = await connection.all(sql, embeddingIds);
|
|
331
|
+
// Parse metadata JSON strings back to objects
|
|
332
|
+
return results.map((row) => ({
|
|
333
|
+
...row,
|
|
334
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
335
|
+
}));
|
|
336
|
+
}
|
|
337
|
+
catch (error) {
|
|
338
|
+
throw new Error(`Failed to retrieve chunks: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Validates mode value against allowed enum values
|
|
343
|
+
*/
|
|
344
|
+
function validateMode(mode) {
|
|
345
|
+
const validModes = ['text', 'multimodal'];
|
|
346
|
+
if (!validModes.includes(mode)) {
|
|
347
|
+
throw new Error(`Invalid mode '${mode}'. Must be one of: ${validModes.join(', ')}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Validates model type value against allowed enum values
|
|
352
|
+
*/
|
|
353
|
+
function validateModelType(modelType) {
|
|
354
|
+
const validTypes = ['sentence-transformer', 'clip'];
|
|
355
|
+
if (!validTypes.includes(modelType)) {
|
|
356
|
+
throw new Error(`Invalid model type '${modelType}'. Must be one of: ${validTypes.join(', ')}`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Validates reranking strategy value against allowed enum values
|
|
361
|
+
*/
|
|
362
|
+
function validateRerankingStrategy(strategy) {
|
|
363
|
+
const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
|
|
364
|
+
if (!validStrategies.includes(strategy)) {
|
|
365
|
+
throw new Error(`Invalid reranking strategy '${strategy}'. Must be one of: ${validStrategies.join(', ')}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Validates content type value against allowed types
|
|
370
|
+
*/
|
|
371
|
+
function validateContentType(contentType) {
|
|
372
|
+
const validTypes = ['text', 'image', 'pdf', 'docx'];
|
|
373
|
+
if (!validTypes.includes(contentType)) {
|
|
374
|
+
throw new Error(`Invalid content type '${contentType}'. Must be one of: ${validTypes.join(', ')}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Gets the complete system information from system_info table
|
|
379
|
+
* @param connection - Database connection object
|
|
380
|
+
* @returns Promise that resolves to SystemInfo object or null if not set
|
|
381
|
+
*/
|
|
382
|
+
export async function getSystemInfo(connection) {
|
|
383
|
+
try {
|
|
384
|
+
const result = await connection.get(`
|
|
385
|
+
SELECT
|
|
386
|
+
mode, model_name, model_type, model_dimensions, model_version,
|
|
387
|
+
supported_content_types, reranking_strategy, reranking_model,
|
|
388
|
+
reranking_config, created_at, updated_at
|
|
389
|
+
FROM system_info WHERE id = 1
|
|
390
|
+
`);
|
|
391
|
+
if (!result) {
|
|
392
|
+
return null;
|
|
393
|
+
}
|
|
394
|
+
// Parse JSON fields and convert to proper types
|
|
395
|
+
const supportedContentTypes = result.supported_content_types
|
|
396
|
+
? JSON.parse(result.supported_content_types)
|
|
397
|
+
: ['text'];
|
|
398
|
+
const rerankingConfig = result.reranking_config
|
|
399
|
+
? JSON.parse(result.reranking_config)
|
|
400
|
+
: undefined;
|
|
401
|
+
return {
|
|
402
|
+
mode: result.mode,
|
|
403
|
+
modelName: result.model_name,
|
|
404
|
+
modelType: result.model_type,
|
|
405
|
+
modelDimensions: result.model_dimensions,
|
|
406
|
+
modelVersion: result.model_version,
|
|
407
|
+
supportedContentTypes,
|
|
408
|
+
rerankingStrategy: result.reranking_strategy,
|
|
409
|
+
rerankingModel: result.reranking_model,
|
|
410
|
+
rerankingConfig,
|
|
411
|
+
createdAt: new Date(result.created_at),
|
|
412
|
+
updatedAt: new Date(result.updated_at)
|
|
413
|
+
};
|
|
414
|
+
}
|
|
415
|
+
catch (error) {
|
|
416
|
+
throw new Error(`Failed to get system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Sets the complete system information in system_info table
|
|
421
|
+
* @param connection - Database connection object
|
|
422
|
+
* @param systemInfo - SystemInfo object to store
|
|
423
|
+
*/
|
|
424
|
+
export async function setSystemInfo(connection, systemInfo) {
|
|
425
|
+
try {
|
|
426
|
+
// Validate enum values if provided
|
|
427
|
+
if (systemInfo.mode) {
|
|
428
|
+
validateMode(systemInfo.mode);
|
|
429
|
+
}
|
|
430
|
+
if (systemInfo.modelType) {
|
|
431
|
+
validateModelType(systemInfo.modelType);
|
|
432
|
+
}
|
|
433
|
+
if (systemInfo.rerankingStrategy) {
|
|
434
|
+
validateRerankingStrategy(systemInfo.rerankingStrategy);
|
|
435
|
+
}
|
|
436
|
+
// Check if there's already a row
|
|
437
|
+
const existing = await connection.get('SELECT id FROM system_info WHERE id = 1');
|
|
438
|
+
// Prepare JSON fields
|
|
439
|
+
const supportedContentTypesJson = systemInfo.supportedContentTypes
|
|
440
|
+
? JSON.stringify(systemInfo.supportedContentTypes)
|
|
441
|
+
: undefined;
|
|
442
|
+
const rerankingConfigJson = systemInfo.rerankingConfig
|
|
443
|
+
? JSON.stringify(systemInfo.rerankingConfig)
|
|
444
|
+
: undefined;
|
|
445
|
+
if (existing) {
|
|
446
|
+
// Build dynamic UPDATE query based on provided fields
|
|
447
|
+
const updateFields = [];
|
|
448
|
+
const updateValues = [];
|
|
449
|
+
if (systemInfo.mode !== undefined) {
|
|
450
|
+
updateFields.push('mode = ?');
|
|
451
|
+
updateValues.push(systemInfo.mode);
|
|
452
|
+
}
|
|
453
|
+
if (systemInfo.modelName !== undefined) {
|
|
454
|
+
updateFields.push('model_name = ?');
|
|
455
|
+
updateValues.push(systemInfo.modelName);
|
|
456
|
+
}
|
|
457
|
+
if (systemInfo.modelType !== undefined) {
|
|
458
|
+
updateFields.push('model_type = ?');
|
|
459
|
+
updateValues.push(systemInfo.modelType);
|
|
460
|
+
}
|
|
461
|
+
if (systemInfo.modelDimensions !== undefined) {
|
|
462
|
+
updateFields.push('model_dimensions = ?');
|
|
463
|
+
updateValues.push(systemInfo.modelDimensions);
|
|
464
|
+
}
|
|
465
|
+
if (systemInfo.modelVersion !== undefined) {
|
|
466
|
+
updateFields.push('model_version = ?');
|
|
467
|
+
updateValues.push(systemInfo.modelVersion);
|
|
468
|
+
}
|
|
469
|
+
if (supportedContentTypesJson !== undefined) {
|
|
470
|
+
updateFields.push('supported_content_types = ?');
|
|
471
|
+
updateValues.push(supportedContentTypesJson);
|
|
472
|
+
}
|
|
473
|
+
if (systemInfo.rerankingStrategy !== undefined) {
|
|
474
|
+
updateFields.push('reranking_strategy = ?');
|
|
475
|
+
updateValues.push(systemInfo.rerankingStrategy);
|
|
476
|
+
}
|
|
477
|
+
if (systemInfo.rerankingModel !== undefined) {
|
|
478
|
+
updateFields.push('reranking_model = ?');
|
|
479
|
+
updateValues.push(systemInfo.rerankingModel);
|
|
480
|
+
}
|
|
481
|
+
if (rerankingConfigJson !== undefined) {
|
|
482
|
+
updateFields.push('reranking_config = ?');
|
|
483
|
+
updateValues.push(rerankingConfigJson);
|
|
484
|
+
}
|
|
485
|
+
// Always update the timestamp
|
|
486
|
+
updateFields.push('updated_at = CURRENT_TIMESTAMP');
|
|
487
|
+
updateValues.push(1); // Add WHERE clause parameter
|
|
488
|
+
if (updateFields.length > 1) { // More than just the timestamp
|
|
489
|
+
const sql = `UPDATE system_info SET ${updateFields.join(', ')} WHERE id = ?`;
|
|
490
|
+
await connection.run(sql, updateValues);
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
else {
|
|
494
|
+
// Insert new row with provided values and defaults
|
|
495
|
+
const insertSql = `
|
|
496
|
+
INSERT INTO system_info (
|
|
497
|
+
id, mode, model_name, model_type, model_dimensions, model_version,
|
|
498
|
+
supported_content_types, reranking_strategy, reranking_model, reranking_config,
|
|
499
|
+
created_at, updated_at
|
|
500
|
+
) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
501
|
+
`;
|
|
502
|
+
await connection.run(insertSql, [
|
|
503
|
+
systemInfo.mode || 'text',
|
|
504
|
+
systemInfo.modelName || 'sentence-transformers/all-MiniLM-L6-v2',
|
|
505
|
+
systemInfo.modelType || 'sentence-transformer',
|
|
506
|
+
systemInfo.modelDimensions || 384,
|
|
507
|
+
systemInfo.modelVersion || '',
|
|
508
|
+
supportedContentTypesJson || '["text"]',
|
|
509
|
+
systemInfo.rerankingStrategy || 'cross-encoder',
|
|
510
|
+
systemInfo.rerankingModel || null,
|
|
511
|
+
rerankingConfigJson || null
|
|
512
|
+
]);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
catch (error) {
|
|
516
|
+
throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
// =============================================================================
|
|
520
|
+
// REMOVED IN v3.0.0: Legacy database functions
|
|
521
|
+
// =============================================================================
|
|
522
|
+
// The following functions have been removed. Use getSystemInfo() and setSystemInfo() instead:
|
|
523
|
+
//
|
|
524
|
+
// - getModelVersion() → Use: const systemInfo = await getSystemInfo(db); const version = systemInfo?.modelVersion;
|
|
525
|
+
// - setModelVersion() → Use: await setSystemInfo(db, { modelVersion: 'version' });
|
|
526
|
+
// - getStoredModelInfo() → Use: const systemInfo = await getSystemInfo(db); access systemInfo.modelName and systemInfo.modelDimensions
|
|
527
|
+
// - setStoredModelInfo() → Use: await setSystemInfo(db, { modelName: 'name', modelDimensions: 384 });
|
|
528
|
+
//
|
|
529
|
+
// Migration guide: See CHANGELOG.md for v3.0.0 breaking changes
|
|
530
|
+
/**
|
|
531
|
+
* Retrieves documents by content type
|
|
532
|
+
* @param connection - Database connection object
|
|
533
|
+
* @param contentType - Content type to filter by
|
|
534
|
+
* @returns Promise that resolves to an array of documents
|
|
535
|
+
*/
|
|
536
|
+
export async function getDocumentsByContentType(connection, contentType) {
|
|
537
|
+
try {
|
|
538
|
+
validateContentType(contentType);
|
|
539
|
+
const results = await connection.all('SELECT id, source, title, content_type, metadata, created_at FROM documents WHERE content_type = ? ORDER BY created_at DESC', [contentType]);
|
|
540
|
+
// Parse metadata JSON strings back to objects
|
|
541
|
+
return results.map((row) => ({
|
|
542
|
+
...row,
|
|
543
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
544
|
+
}));
|
|
545
|
+
}
|
|
546
|
+
catch (error) {
|
|
547
|
+
throw new Error(`Failed to get documents by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
/**
|
|
551
|
+
* Retrieves chunks by content type
|
|
552
|
+
* @param connection - Database connection object
|
|
553
|
+
* @param contentType - Content type to filter by
|
|
554
|
+
* @returns Promise that resolves to an array of chunks with document metadata
|
|
555
|
+
*/
|
|
556
|
+
export async function getChunksByContentType(connection, contentType) {
|
|
557
|
+
try {
|
|
558
|
+
validateContentType(contentType);
|
|
559
|
+
const sql = `
|
|
560
|
+
SELECT
|
|
561
|
+
c.id,
|
|
562
|
+
c.embedding_id,
|
|
563
|
+
c.document_id,
|
|
564
|
+
c.content,
|
|
565
|
+
c.content_type,
|
|
566
|
+
c.chunk_index,
|
|
567
|
+
c.metadata,
|
|
568
|
+
c.created_at,
|
|
569
|
+
d.source as document_source,
|
|
570
|
+
d.title as document_title,
|
|
571
|
+
d.content_type as document_content_type,
|
|
572
|
+
d.content_id as document_content_id
|
|
573
|
+
FROM chunks c
|
|
574
|
+
JOIN documents d ON c.document_id = d.id
|
|
575
|
+
WHERE c.content_type = ?
|
|
576
|
+
ORDER BY d.source, c.chunk_index
|
|
577
|
+
`;
|
|
578
|
+
const results = await connection.all(sql, [contentType]);
|
|
579
|
+
// Parse metadata JSON strings back to objects
|
|
580
|
+
return results.map((row) => ({
|
|
581
|
+
...row,
|
|
582
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : undefined
|
|
583
|
+
}));
|
|
584
|
+
}
|
|
585
|
+
catch (error) {
|
|
586
|
+
throw new Error(`Failed to get chunks by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
/**
|
|
590
|
+
* Gets content type statistics from the database
|
|
591
|
+
* @param connection - Database connection object
|
|
592
|
+
* @returns Promise that resolves to content type statistics
|
|
593
|
+
*/
|
|
594
|
+
export async function getContentTypeStatistics(connection) {
|
|
595
|
+
try {
|
|
596
|
+
// Get document statistics
|
|
597
|
+
const docStats = await connection.all(`
|
|
598
|
+
SELECT content_type, COUNT(*) as count
|
|
599
|
+
FROM documents
|
|
600
|
+
GROUP BY content_type
|
|
601
|
+
`);
|
|
602
|
+
// Get chunk statistics
|
|
603
|
+
const chunkStats = await connection.all(`
|
|
604
|
+
SELECT content_type, COUNT(*) as count
|
|
605
|
+
FROM chunks
|
|
606
|
+
GROUP BY content_type
|
|
607
|
+
`);
|
|
608
|
+
// Get totals
|
|
609
|
+
const totalDocs = await connection.get('SELECT COUNT(*) as count FROM documents');
|
|
610
|
+
const totalChunks = await connection.get('SELECT COUNT(*) as count FROM chunks');
|
|
611
|
+
const documentStats = {};
|
|
612
|
+
const chunkStatsMap = {};
|
|
613
|
+
docStats.forEach((row) => {
|
|
614
|
+
documentStats[row.content_type] = row.count;
|
|
615
|
+
});
|
|
616
|
+
chunkStats.forEach((row) => {
|
|
617
|
+
chunkStatsMap[row.content_type] = row.count;
|
|
618
|
+
});
|
|
619
|
+
return {
|
|
620
|
+
documents: documentStats,
|
|
621
|
+
chunks: chunkStatsMap,
|
|
622
|
+
total: {
|
|
623
|
+
documents: totalDocs.count,
|
|
624
|
+
chunks: totalChunks.count
|
|
625
|
+
}
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
catch (error) {
|
|
629
|
+
throw new Error(`Failed to get content type statistics: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Updates document metadata
|
|
634
|
+
* @param connection - Database connection object
|
|
635
|
+
* @param documentId - ID of the document to update
|
|
636
|
+
* @param metadata - New metadata object
|
|
637
|
+
*/
|
|
638
|
+
export async function updateDocumentMetadata(connection, documentId, metadata) {
|
|
639
|
+
try {
|
|
640
|
+
const metadataJson = JSON.stringify(metadata);
|
|
641
|
+
const result = await connection.run('UPDATE documents SET metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?', [metadataJson, documentId]);
|
|
642
|
+
if (result.changes === 0) {
|
|
643
|
+
throw new Error(`Document with ID ${documentId} not found`);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
catch (error) {
|
|
647
|
+
throw new Error(`Failed to update document metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Updates chunk metadata
|
|
652
|
+
* @param connection - Database connection object
|
|
653
|
+
* @param chunkId - ID of the chunk to update
|
|
654
|
+
* @param metadata - New metadata object
|
|
655
|
+
*/
|
|
656
|
+
export async function updateChunkMetadata(connection, chunkId, metadata) {
|
|
657
|
+
try {
|
|
658
|
+
const metadataJson = JSON.stringify(metadata);
|
|
659
|
+
const result = await connection.run('UPDATE chunks SET metadata = ? WHERE id = ?', [metadataJson, chunkId]);
|
|
660
|
+
if (result.changes === 0) {
|
|
661
|
+
throw new Error(`Chunk with ID ${chunkId} not found`);
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
catch (error) {
|
|
665
|
+
throw new Error(`Failed to update chunk metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
/**
|
|
669
|
+
* Inserts content metadata into the content_metadata table
|
|
670
|
+
* @param connection - Database connection object
|
|
671
|
+
* @param contentMetadata - Content metadata to insert
|
|
672
|
+
*/
|
|
673
|
+
export async function insertContentMetadata(connection, contentMetadata) {
|
|
674
|
+
try {
|
|
675
|
+
await connection.run(`
|
|
676
|
+
INSERT INTO content_metadata (
|
|
677
|
+
id, storage_type, original_path, content_path, display_name,
|
|
678
|
+
content_type, file_size, content_hash
|
|
679
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
680
|
+
`, [
|
|
681
|
+
contentMetadata.id,
|
|
682
|
+
contentMetadata.storageType,
|
|
683
|
+
contentMetadata.originalPath || null,
|
|
684
|
+
contentMetadata.contentPath,
|
|
685
|
+
contentMetadata.displayName,
|
|
686
|
+
contentMetadata.contentType,
|
|
687
|
+
contentMetadata.fileSize,
|
|
688
|
+
contentMetadata.contentHash
|
|
689
|
+
]);
|
|
690
|
+
}
|
|
691
|
+
catch (error) {
|
|
692
|
+
if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
|
|
693
|
+
throw new Error(`Content with ID '${contentMetadata.id}' already exists`);
|
|
694
|
+
}
|
|
695
|
+
throw new Error(`Failed to insert content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
/**
|
|
699
|
+
* Gets content metadata by content ID
|
|
700
|
+
* @param connection - Database connection object
|
|
701
|
+
* @param contentId - Content ID to retrieve
|
|
702
|
+
* @returns Promise that resolves to ContentMetadata or null if not found
|
|
703
|
+
*/
|
|
704
|
+
export async function getContentMetadata(connection, contentId) {
|
|
705
|
+
try {
|
|
706
|
+
const result = await connection.get(`
|
|
707
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
708
|
+
content_type, file_size, content_hash, created_at
|
|
709
|
+
FROM content_metadata
|
|
710
|
+
WHERE id = ?
|
|
711
|
+
`, [contentId]);
|
|
712
|
+
if (!result) {
|
|
713
|
+
return null;
|
|
714
|
+
}
|
|
715
|
+
return {
|
|
716
|
+
id: result.id,
|
|
717
|
+
storageType: result.storage_type,
|
|
718
|
+
originalPath: result.original_path,
|
|
719
|
+
contentPath: result.content_path,
|
|
720
|
+
displayName: result.display_name,
|
|
721
|
+
contentType: result.content_type,
|
|
722
|
+
fileSize: result.file_size,
|
|
723
|
+
contentHash: result.content_hash,
|
|
724
|
+
createdAt: new Date(result.created_at)
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
catch (error) {
|
|
728
|
+
throw new Error(`Failed to get content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
/**
|
|
732
|
+
* Gets content metadata by content hash (for deduplication)
|
|
733
|
+
* @param connection - Database connection object
|
|
734
|
+
* @param contentHash - Content hash to search for
|
|
735
|
+
* @returns Promise that resolves to ContentMetadata or null if not found
|
|
736
|
+
*/
|
|
737
|
+
export async function getContentMetadataByHash(connection, contentHash) {
|
|
738
|
+
try {
|
|
739
|
+
const result = await connection.get(`
|
|
740
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
741
|
+
content_type, file_size, content_hash, created_at
|
|
742
|
+
FROM content_metadata
|
|
743
|
+
WHERE content_hash = ?
|
|
744
|
+
`, [contentHash]);
|
|
745
|
+
if (!result) {
|
|
746
|
+
return null;
|
|
747
|
+
}
|
|
748
|
+
return {
|
|
749
|
+
id: result.id,
|
|
750
|
+
storageType: result.storage_type,
|
|
751
|
+
originalPath: result.original_path,
|
|
752
|
+
contentPath: result.content_path,
|
|
753
|
+
displayName: result.display_name,
|
|
754
|
+
contentType: result.content_type,
|
|
755
|
+
fileSize: result.file_size,
|
|
756
|
+
contentHash: result.content_hash,
|
|
757
|
+
createdAt: new Date(result.created_at)
|
|
758
|
+
};
|
|
759
|
+
}
|
|
760
|
+
catch (error) {
|
|
761
|
+
throw new Error(`Failed to get content metadata by hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
/**
|
|
765
|
+
* Gets all content metadata by storage type
|
|
766
|
+
* @param connection - Database connection object
|
|
767
|
+
* @param storageType - Storage type to filter by
|
|
768
|
+
* @returns Promise that resolves to array of ContentMetadata
|
|
769
|
+
*/
|
|
770
|
+
export async function getContentMetadataByStorageType(connection, storageType) {
|
|
771
|
+
try {
|
|
772
|
+
const results = await connection.all(`
|
|
773
|
+
SELECT id, storage_type, original_path, content_path, display_name,
|
|
774
|
+
content_type, file_size, content_hash, created_at
|
|
775
|
+
FROM content_metadata
|
|
776
|
+
WHERE storage_type = ?
|
|
777
|
+
ORDER BY created_at DESC
|
|
778
|
+
`, [storageType]);
|
|
779
|
+
return results.map((result) => ({
|
|
780
|
+
id: result.id,
|
|
781
|
+
storageType: result.storage_type,
|
|
782
|
+
originalPath: result.original_path,
|
|
783
|
+
contentPath: result.content_path,
|
|
784
|
+
displayName: result.display_name,
|
|
785
|
+
contentType: result.content_type,
|
|
786
|
+
fileSize: result.file_size,
|
|
787
|
+
contentHash: result.content_hash,
|
|
788
|
+
createdAt: new Date(result.created_at)
|
|
789
|
+
}));
|
|
790
|
+
}
|
|
791
|
+
catch (error) {
|
|
792
|
+
throw new Error(`Failed to get content metadata by storage type: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
/**
|
|
796
|
+
* Deletes content metadata by content ID
|
|
797
|
+
* @param connection - Database connection object
|
|
798
|
+
* @param contentId - Content ID to delete
|
|
799
|
+
* @returns Promise that resolves to true if deleted, false if not found
|
|
800
|
+
*/
|
|
801
|
+
export async function deleteContentMetadata(connection, contentId) {
|
|
802
|
+
try {
|
|
803
|
+
const result = await connection.run('DELETE FROM content_metadata WHERE id = ?', [contentId]);
|
|
804
|
+
return result.changes > 0;
|
|
805
|
+
}
|
|
806
|
+
catch (error) {
|
|
807
|
+
throw new Error(`Failed to delete content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
/**
|
|
811
|
+
* Gets storage statistics from storage_stats table
|
|
812
|
+
* @param connection - Database connection object
|
|
813
|
+
* @returns Promise that resolves to storage statistics
|
|
814
|
+
*/
|
|
815
|
+
export async function getStorageStats(connection) {
|
|
816
|
+
try {
|
|
817
|
+
const result = await connection.get(`
|
|
818
|
+
SELECT content_dir_files, content_dir_size, filesystem_refs,
|
|
819
|
+
last_cleanup, updated_at
|
|
820
|
+
FROM storage_stats
|
|
821
|
+
WHERE id = 1
|
|
822
|
+
`);
|
|
823
|
+
if (!result) {
|
|
824
|
+
return null;
|
|
825
|
+
}
|
|
826
|
+
return {
|
|
827
|
+
contentDirFiles: result.content_dir_files,
|
|
828
|
+
contentDirSize: result.content_dir_size,
|
|
829
|
+
filesystemRefs: result.filesystem_refs,
|
|
830
|
+
lastCleanup: result.last_cleanup ? new Date(result.last_cleanup) : null,
|
|
831
|
+
updatedAt: new Date(result.updated_at)
|
|
832
|
+
};
|
|
833
|
+
}
|
|
834
|
+
catch (error) {
|
|
835
|
+
throw new Error(`Failed to get storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
/**
|
|
839
|
+
* Updates storage statistics in storage_stats table
|
|
840
|
+
* @param connection - Database connection object
|
|
841
|
+
* @param stats - Partial storage statistics to update
|
|
842
|
+
*/
|
|
843
|
+
export async function updateStorageStats(connection, stats) {
|
|
844
|
+
try {
|
|
845
|
+
// Check if there's already a row
|
|
846
|
+
const existing = await connection.get('SELECT id FROM storage_stats WHERE id = 1');
|
|
847
|
+
if (existing) {
|
|
848
|
+
// Build dynamic UPDATE query based on provided fields
|
|
849
|
+
const updateFields = [];
|
|
850
|
+
const updateValues = [];
|
|
851
|
+
if (stats.contentDirFiles !== undefined) {
|
|
852
|
+
updateFields.push('content_dir_files = ?');
|
|
853
|
+
updateValues.push(stats.contentDirFiles);
|
|
854
|
+
}
|
|
855
|
+
if (stats.contentDirSize !== undefined) {
|
|
856
|
+
updateFields.push('content_dir_size = ?');
|
|
857
|
+
updateValues.push(stats.contentDirSize);
|
|
858
|
+
}
|
|
859
|
+
if (stats.filesystemRefs !== undefined) {
|
|
860
|
+
updateFields.push('filesystem_refs = ?');
|
|
861
|
+
updateValues.push(stats.filesystemRefs);
|
|
862
|
+
}
|
|
863
|
+
if (stats.lastCleanup !== undefined) {
|
|
864
|
+
updateFields.push('last_cleanup = ?');
|
|
865
|
+
updateValues.push(stats.lastCleanup.toISOString());
|
|
866
|
+
}
|
|
867
|
+
// Always update the timestamp
|
|
868
|
+
updateFields.push('updated_at = CURRENT_TIMESTAMP');
|
|
869
|
+
updateValues.push(1); // Add WHERE clause parameter
|
|
870
|
+
if (updateFields.length > 1) { // More than just the timestamp
|
|
871
|
+
const sql = `UPDATE storage_stats SET ${updateFields.join(', ')} WHERE id = ?`;
|
|
872
|
+
await connection.run(sql, updateValues);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
else {
|
|
876
|
+
// Insert new row with provided values and defaults
|
|
877
|
+
const insertSql = `
|
|
878
|
+
INSERT INTO storage_stats (
|
|
879
|
+
id, content_dir_files, content_dir_size, filesystem_refs,
|
|
880
|
+
last_cleanup, updated_at
|
|
881
|
+
) VALUES (1, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
882
|
+
`;
|
|
883
|
+
await connection.run(insertSql, [
|
|
884
|
+
stats.contentDirFiles || 0,
|
|
885
|
+
stats.contentDirSize || 0,
|
|
886
|
+
stats.filesystemRefs || 0,
|
|
887
|
+
stats.lastCleanup ? stats.lastCleanup.toISOString() : null
|
|
888
|
+
]);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
catch (error) {
|
|
892
|
+
throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
/**
|
|
896
|
+
* Reset the database by deleting all data while keeping the schema intact.
|
|
897
|
+
* This is a safer alternative to file deletion that avoids file locking issues on Windows.
|
|
898
|
+
*
|
|
899
|
+
* This function:
|
|
900
|
+
* 1. Deletes all rows from chunks, documents, content_metadata tables
|
|
901
|
+
* 2. Optionally clears system_info (mode/model configuration)
|
|
902
|
+
* 3. Resets storage_stats counters
|
|
903
|
+
* 4. Optionally runs VACUUM to reclaim disk space
|
|
904
|
+
*
|
|
905
|
+
* @param connection - Database connection object
|
|
906
|
+
* @param options - Reset options
|
|
907
|
+
* @returns Promise resolving to reset result statistics
|
|
908
|
+
*
|
|
909
|
+
* @example
|
|
910
|
+
* ```typescript
|
|
911
|
+
* const db = await openDatabase('./db.sqlite');
|
|
912
|
+
* const result = await resetDatabase(db, { preserveSystemInfo: false });
|
|
913
|
+
* console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
|
|
914
|
+
* ```
|
|
915
|
+
*/
|
|
916
|
+
export async function resetDatabase(connection, options = {}) {
|
|
917
|
+
const startTime = Date.now();
|
|
918
|
+
const { preserveSystemInfo = false, runVacuum = true } = options;
|
|
919
|
+
try {
|
|
920
|
+
console.log('🔄 Starting database reset...');
|
|
921
|
+
// Get counts before deletion for reporting
|
|
922
|
+
const docCountResult = await connection.get('SELECT COUNT(*) as count FROM documents');
|
|
923
|
+
const chunkCountResult = await connection.get('SELECT COUNT(*) as count FROM chunks');
|
|
924
|
+
const contentMetadataCountResult = await connection.get('SELECT COUNT(*) as count FROM content_metadata');
|
|
925
|
+
const documentsDeleted = docCountResult?.count || 0;
|
|
926
|
+
const chunksDeleted = chunkCountResult?.count || 0;
|
|
927
|
+
const contentMetadataDeleted = contentMetadataCountResult?.count || 0;
|
|
928
|
+
// Delete in order respecting foreign key constraints
|
|
929
|
+
// chunks → documents → content_metadata (chunks reference documents, documents reference content_metadata)
|
|
930
|
+
console.log(' Deleting chunks...');
|
|
931
|
+
await connection.run('DELETE FROM chunks');
|
|
932
|
+
console.log(' Deleting documents...');
|
|
933
|
+
await connection.run('DELETE FROM documents');
|
|
934
|
+
console.log(' Deleting content_metadata...');
|
|
935
|
+
await connection.run('DELETE FROM content_metadata');
|
|
936
|
+
// Reset storage_stats counters
|
|
937
|
+
console.log(' Resetting storage_stats...');
|
|
938
|
+
await connection.run(`
|
|
939
|
+
UPDATE storage_stats SET
|
|
940
|
+
content_dir_files = 0,
|
|
941
|
+
content_dir_size = 0,
|
|
942
|
+
filesystem_refs = 0,
|
|
943
|
+
updated_at = CURRENT_TIMESTAMP
|
|
944
|
+
WHERE id = 1
|
|
945
|
+
`);
|
|
946
|
+
// Optionally clear system_info
|
|
947
|
+
let systemInfoCleared = false;
|
|
948
|
+
if (!preserveSystemInfo) {
|
|
949
|
+
console.log(' Clearing system_info...');
|
|
950
|
+
await connection.run('DELETE FROM system_info WHERE id = 1');
|
|
951
|
+
systemInfoCleared = true;
|
|
952
|
+
}
|
|
953
|
+
else {
|
|
954
|
+
console.log(' Preserving system_info (mode/model configuration)');
|
|
955
|
+
}
|
|
956
|
+
// Run VACUUM to reclaim disk space
|
|
957
|
+
if (runVacuum) {
|
|
958
|
+
console.log(' Running VACUUM to reclaim disk space...');
|
|
959
|
+
await connection.run('VACUUM');
|
|
960
|
+
}
|
|
961
|
+
const resetTimeMs = Date.now() - startTime;
|
|
962
|
+
console.log(`✓ Database reset complete in ${resetTimeMs}ms`);
|
|
963
|
+
console.log(` Documents deleted: ${documentsDeleted}`);
|
|
964
|
+
console.log(` Chunks deleted: ${chunksDeleted}`);
|
|
965
|
+
console.log(` Content metadata deleted: ${contentMetadataDeleted}`);
|
|
966
|
+
console.log(` System info cleared: ${systemInfoCleared}`);
|
|
967
|
+
return {
|
|
968
|
+
success: true,
|
|
969
|
+
documentsDeleted,
|
|
970
|
+
chunksDeleted,
|
|
971
|
+
contentMetadataDeleted,
|
|
972
|
+
systemInfoCleared,
|
|
973
|
+
resetTimeMs
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
catch (error) {
|
|
977
|
+
const resetTimeMs = Date.now() - startTime;
|
|
978
|
+
console.error(`❌ Database reset failed after ${resetTimeMs}ms:`, error);
|
|
979
|
+
throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
/**
|
|
983
|
+
* Check if the database has any data (documents, chunks, or content)
|
|
984
|
+
* Useful for determining if a reset is needed
|
|
985
|
+
*
|
|
986
|
+
* @param connection - Database connection object
|
|
987
|
+
* @returns Promise resolving to true if database has data, false if empty
|
|
988
|
+
*/
|
|
989
|
+
export async function hasDatabaseData(connection) {
|
|
990
|
+
try {
|
|
991
|
+
const docCount = await connection.get('SELECT COUNT(*) as count FROM documents');
|
|
992
|
+
const chunkCount = await connection.get('SELECT COUNT(*) as count FROM chunks');
|
|
993
|
+
return (docCount?.count || 0) > 0 || (chunkCount?.count || 0) > 0;
|
|
994
|
+
}
|
|
995
|
+
catch (error) {
|
|
996
|
+
// If tables don't exist, consider it empty
|
|
997
|
+
return false;
|
|
998
|
+
}
|
|
999
|
+
}
|
|
1000
|
+
//# sourceMappingURL=db.js.map
|