rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Streaming Operations for Large Content - Task 9.1 Implementation
|
|
3
|
+
* Provides memory-efficient streaming operations for content ingestion and retrieval
|
|
4
|
+
* Minimizes memory usage for large files through streaming algorithms
|
|
5
|
+
*/
|
|
6
|
+
import { createHash } from 'crypto';
|
|
7
|
+
import { createReadStream, createWriteStream, promises as fs } from 'fs';
|
|
8
|
+
import { pipeline } from 'stream/promises';
|
|
9
|
+
import { Transform, Readable } from 'stream';
|
|
10
|
+
import { dirname } from 'path';
|
|
11
|
+
/**
|
|
12
|
+
* Default streaming configuration
|
|
13
|
+
*/
|
|
14
|
+
const DEFAULT_STREAMING_CONFIG = {
|
|
15
|
+
chunkSize: 64 * 1024, // 64KB chunks
|
|
16
|
+
enableProgress: false,
|
|
17
|
+
enableHashing: false,
|
|
18
|
+
timeout: 300000 // 5 minutes
|
|
19
|
+
};
|
|
20
|
+
/**
|
|
21
|
+
* StreamingOperations class provides memory-efficient operations for large content
|
|
22
|
+
*/
|
|
23
|
+
export class StreamingOperations {
|
|
24
|
+
config;
|
|
25
|
+
constructor(config = {}) {
|
|
26
|
+
this.config = { ...DEFAULT_STREAMING_CONFIG, ...config };
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Calculates SHA-256 hash of a file using streaming to minimize memory usage
|
|
30
|
+
* @param filePath - Path to the file to hash
|
|
31
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
32
|
+
* @returns Promise that resolves to hash result
|
|
33
|
+
*/
|
|
34
|
+
async calculateFileHashStreaming(filePath, progressCallback) {
|
|
35
|
+
const startTime = Date.now();
|
|
36
|
+
let bytesProcessed = 0;
|
|
37
|
+
let totalBytes;
|
|
38
|
+
try {
|
|
39
|
+
// Get file size for progress reporting
|
|
40
|
+
if (this.config.enableProgress || progressCallback) {
|
|
41
|
+
const stats = await fs.stat(filePath);
|
|
42
|
+
totalBytes = stats.size;
|
|
43
|
+
}
|
|
44
|
+
const hash = createHash('sha256');
|
|
45
|
+
const readStream = createReadStream(filePath, {
|
|
46
|
+
highWaterMark: this.config.chunkSize
|
|
47
|
+
});
|
|
48
|
+
// Use promise-based approach instead of pipeline for better control
|
|
49
|
+
return new Promise((resolve, reject) => {
|
|
50
|
+
const timeoutId = setTimeout(() => {
|
|
51
|
+
readStream.destroy();
|
|
52
|
+
reject(new Error('File hash calculation timed out'));
|
|
53
|
+
}, this.config.timeout);
|
|
54
|
+
readStream.on('data', (chunk) => {
|
|
55
|
+
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
56
|
+
hash.update(buffer);
|
|
57
|
+
bytesProcessed += buffer.length;
|
|
58
|
+
// Report progress if callback provided
|
|
59
|
+
if (progressCallback) {
|
|
60
|
+
progressCallback(bytesProcessed, totalBytes);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
readStream.on('end', () => {
|
|
64
|
+
clearTimeout(timeoutId);
|
|
65
|
+
const processingTimeMs = Date.now() - startTime;
|
|
66
|
+
resolve({
|
|
67
|
+
hash: hash.digest('hex'),
|
|
68
|
+
bytesProcessed,
|
|
69
|
+
processingTimeMs
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
readStream.on('error', (error) => {
|
|
73
|
+
clearTimeout(timeoutId);
|
|
74
|
+
reject(new Error(`Failed to read file: ${error.message}`));
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
throw new Error(`Failed to calculate file hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Calculates SHA-256 hash of a buffer using streaming to minimize memory usage
|
|
84
|
+
* @param content - Buffer to hash
|
|
85
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
86
|
+
* @returns Promise that resolves to hash result
|
|
87
|
+
*/
|
|
88
|
+
async calculateBufferHashStreaming(content, progressCallback) {
|
|
89
|
+
const startTime = Date.now();
|
|
90
|
+
let bytesProcessed = 0;
|
|
91
|
+
const totalBytes = content.length;
|
|
92
|
+
try {
|
|
93
|
+
const hash = createHash('sha256');
|
|
94
|
+
// Process buffer in chunks to avoid memory spikes
|
|
95
|
+
const chunkSize = this.config.chunkSize;
|
|
96
|
+
for (let offset = 0; offset < content.length; offset += chunkSize) {
|
|
97
|
+
const chunk = content.subarray(offset, Math.min(offset + chunkSize, content.length));
|
|
98
|
+
hash.update(chunk);
|
|
99
|
+
bytesProcessed += chunk.length;
|
|
100
|
+
// Report progress if callback provided
|
|
101
|
+
if (progressCallback) {
|
|
102
|
+
progressCallback(bytesProcessed, totalBytes);
|
|
103
|
+
}
|
|
104
|
+
// Yield control to event loop to prevent blocking
|
|
105
|
+
if (offset % (chunkSize * 10) === 0) {
|
|
106
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
const processingTimeMs = Date.now() - startTime;
|
|
110
|
+
return {
|
|
111
|
+
hash: hash.digest('hex'),
|
|
112
|
+
bytesProcessed,
|
|
113
|
+
processingTimeMs
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
catch (error) {
|
|
117
|
+
throw new Error(`Failed to calculate buffer hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Copies a file using streaming operations with optional hashing
|
|
122
|
+
* @param sourcePath - Source file path
|
|
123
|
+
* @param destinationPath - Destination file path
|
|
124
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
125
|
+
* @returns Promise that resolves to copy result
|
|
126
|
+
*/
|
|
127
|
+
async copyFileStreaming(sourcePath, destinationPath, progressCallback) {
|
|
128
|
+
const startTime = Date.now();
|
|
129
|
+
let bytesWritten = 0;
|
|
130
|
+
let totalBytes;
|
|
131
|
+
let hash;
|
|
132
|
+
try {
|
|
133
|
+
// Get file size for progress reporting
|
|
134
|
+
if (this.config.enableProgress || progressCallback) {
|
|
135
|
+
const stats = await fs.stat(sourcePath);
|
|
136
|
+
totalBytes = stats.size;
|
|
137
|
+
}
|
|
138
|
+
// Ensure destination directory exists
|
|
139
|
+
await fs.mkdir(dirname(destinationPath), { recursive: true });
|
|
140
|
+
const readStream = createReadStream(sourcePath, {
|
|
141
|
+
highWaterMark: this.config.chunkSize
|
|
142
|
+
});
|
|
143
|
+
const writeStream = createWriteStream(destinationPath);
|
|
144
|
+
let hashCalculator;
|
|
145
|
+
if (this.config.enableHashing) {
|
|
146
|
+
hashCalculator = createHash('sha256');
|
|
147
|
+
}
|
|
148
|
+
// Create transform stream for progress tracking and optional hashing
|
|
149
|
+
const progressTransform = new Transform({
|
|
150
|
+
transform(chunk, encoding, callback) {
|
|
151
|
+
bytesWritten += chunk.length;
|
|
152
|
+
// Update hash if enabled
|
|
153
|
+
if (hashCalculator) {
|
|
154
|
+
hashCalculator.update(chunk);
|
|
155
|
+
}
|
|
156
|
+
// Report progress if callback provided
|
|
157
|
+
if (progressCallback) {
|
|
158
|
+
progressCallback(bytesWritten, totalBytes);
|
|
159
|
+
}
|
|
160
|
+
callback(null, chunk);
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
// Use pipeline for proper error handling and cleanup
|
|
164
|
+
await this.withTimeout(pipeline(readStream, progressTransform, writeStream), this.config.timeout, 'File copy operation timed out');
|
|
165
|
+
const processingTimeMs = Date.now() - startTime;
|
|
166
|
+
if (hashCalculator) {
|
|
167
|
+
hash = hashCalculator.digest('hex');
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
bytesWritten,
|
|
171
|
+
processingTimeMs,
|
|
172
|
+
hash
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
// Clean up destination file if copy failed
|
|
177
|
+
try {
|
|
178
|
+
await fs.unlink(destinationPath);
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
// Ignore cleanup errors
|
|
182
|
+
}
|
|
183
|
+
throw new Error(`Failed to copy file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Writes buffer content to file using streaming operations
|
|
188
|
+
* @param content - Buffer to write
|
|
189
|
+
* @param destinationPath - Destination file path
|
|
190
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
191
|
+
* @returns Promise that resolves to write result
|
|
192
|
+
*/
|
|
193
|
+
async writeBufferStreaming(content, destinationPath, progressCallback) {
|
|
194
|
+
const startTime = Date.now();
|
|
195
|
+
let bytesWritten = 0;
|
|
196
|
+
const totalBytes = content.length;
|
|
197
|
+
let hash;
|
|
198
|
+
try {
|
|
199
|
+
// Ensure destination directory exists
|
|
200
|
+
await fs.mkdir(dirname(destinationPath), { recursive: true });
|
|
201
|
+
const writeStream = createWriteStream(destinationPath);
|
|
202
|
+
let hashCalculator;
|
|
203
|
+
if (this.config.enableHashing) {
|
|
204
|
+
hashCalculator = createHash('sha256');
|
|
205
|
+
}
|
|
206
|
+
// Create readable stream from buffer
|
|
207
|
+
const readableStream = Readable.from(this.bufferToChunks(content));
|
|
208
|
+
// Create transform stream for progress tracking and optional hashing
|
|
209
|
+
const progressTransform = new Transform({
|
|
210
|
+
transform(chunk, encoding, callback) {
|
|
211
|
+
bytesWritten += chunk.length;
|
|
212
|
+
// Update hash if enabled
|
|
213
|
+
if (hashCalculator) {
|
|
214
|
+
hashCalculator.update(chunk);
|
|
215
|
+
}
|
|
216
|
+
// Report progress if callback provided
|
|
217
|
+
if (progressCallback) {
|
|
218
|
+
progressCallback(bytesWritten, totalBytes);
|
|
219
|
+
}
|
|
220
|
+
callback(null, chunk);
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
// Use pipeline for proper error handling and cleanup
|
|
224
|
+
await this.withTimeout(pipeline(readableStream, progressTransform, writeStream), this.config.timeout, 'Buffer write operation timed out');
|
|
225
|
+
const processingTimeMs = Date.now() - startTime;
|
|
226
|
+
if (hashCalculator) {
|
|
227
|
+
hash = hashCalculator.digest('hex');
|
|
228
|
+
}
|
|
229
|
+
return {
|
|
230
|
+
bytesWritten,
|
|
231
|
+
processingTimeMs,
|
|
232
|
+
hash
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
catch (error) {
|
|
236
|
+
// Clean up destination file if write failed
|
|
237
|
+
try {
|
|
238
|
+
await fs.unlink(destinationPath);
|
|
239
|
+
}
|
|
240
|
+
catch {
|
|
241
|
+
// Ignore cleanup errors
|
|
242
|
+
}
|
|
243
|
+
throw new Error(`Failed to write buffer: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Reads file content and converts to base64 using streaming to minimize memory usage
|
|
248
|
+
* @param filePath - Path to the file to read
|
|
249
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
250
|
+
* @returns Promise that resolves to base64 string
|
|
251
|
+
*/
|
|
252
|
+
async readFileAsBase64Streaming(filePath, progressCallback) {
|
|
253
|
+
let bytesProcessed = 0;
|
|
254
|
+
let totalBytes;
|
|
255
|
+
try {
|
|
256
|
+
// Get file size for progress reporting
|
|
257
|
+
if (this.config.enableProgress || progressCallback) {
|
|
258
|
+
const stats = await fs.stat(filePath);
|
|
259
|
+
totalBytes = stats.size;
|
|
260
|
+
}
|
|
261
|
+
// For base64 conversion, we need to read the entire file to get correct encoding
|
|
262
|
+
// Streaming base64 conversion chunk by chunk doesn't work correctly because
|
|
263
|
+
// base64 encoding requires complete byte sequences
|
|
264
|
+
const content = await fs.readFile(filePath);
|
|
265
|
+
bytesProcessed = content.length;
|
|
266
|
+
// Report progress if callback provided
|
|
267
|
+
if (progressCallback) {
|
|
268
|
+
progressCallback(bytesProcessed, totalBytes);
|
|
269
|
+
}
|
|
270
|
+
// Convert to base64
|
|
271
|
+
return content.toString('base64');
|
|
272
|
+
}
|
|
273
|
+
catch (error) {
|
|
274
|
+
throw new Error(`Failed to read file as base64: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Validates file integrity by comparing streaming hash with expected hash
|
|
279
|
+
* @param filePath - Path to the file to validate
|
|
280
|
+
* @param expectedHash - Expected SHA-256 hash
|
|
281
|
+
* @param progressCallback - Optional callback for progress reporting
|
|
282
|
+
* @returns Promise that resolves to validation result
|
|
283
|
+
*/
|
|
284
|
+
async validateFileIntegrityStreaming(filePath, expectedHash, progressCallback) {
|
|
285
|
+
try {
|
|
286
|
+
const result = await this.calculateFileHashStreaming(filePath, progressCallback);
|
|
287
|
+
return {
|
|
288
|
+
isValid: result.hash === expectedHash.toLowerCase(),
|
|
289
|
+
actualHash: result.hash,
|
|
290
|
+
bytesProcessed: result.bytesProcessed
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
catch (error) {
|
|
294
|
+
throw new Error(`Failed to validate file integrity: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Gets file information without loading content into memory
|
|
299
|
+
* @param filePath - Path to the file
|
|
300
|
+
* @returns Promise that resolves to file information
|
|
301
|
+
*/
|
|
302
|
+
async getFileInfo(filePath) {
|
|
303
|
+
try {
|
|
304
|
+
const stats = await fs.stat(filePath);
|
|
305
|
+
// Check permissions
|
|
306
|
+
let canRead = false;
|
|
307
|
+
let canWrite = false;
|
|
308
|
+
try {
|
|
309
|
+
await fs.access(filePath, fs.constants.R_OK);
|
|
310
|
+
canRead = true;
|
|
311
|
+
}
|
|
312
|
+
catch {
|
|
313
|
+
// Cannot read
|
|
314
|
+
}
|
|
315
|
+
try {
|
|
316
|
+
await fs.access(filePath, fs.constants.W_OK);
|
|
317
|
+
canWrite = true;
|
|
318
|
+
}
|
|
319
|
+
catch {
|
|
320
|
+
// Cannot write
|
|
321
|
+
}
|
|
322
|
+
return {
|
|
323
|
+
size: stats.size,
|
|
324
|
+
isFile: stats.isFile(),
|
|
325
|
+
isDirectory: stats.isDirectory(),
|
|
326
|
+
lastModified: stats.mtime,
|
|
327
|
+
canRead,
|
|
328
|
+
canWrite
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
catch (error) {
|
|
332
|
+
throw new Error(`Failed to get file info: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
// =============================================================================
|
|
336
|
+
// PRIVATE HELPER METHODS
|
|
337
|
+
// =============================================================================
|
|
338
|
+
/**
|
|
339
|
+
* Converts buffer to chunks for streaming
|
|
340
|
+
* @param buffer - Buffer to chunk
|
|
341
|
+
* @returns Generator that yields buffer chunks
|
|
342
|
+
*/
|
|
343
|
+
*bufferToChunks(buffer) {
|
|
344
|
+
const chunkSize = this.config.chunkSize;
|
|
345
|
+
for (let offset = 0; offset < buffer.length; offset += chunkSize) {
|
|
346
|
+
yield buffer.subarray(offset, Math.min(offset + chunkSize, buffer.length));
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
/**
|
|
350
|
+
* Wraps a promise with timeout functionality
|
|
351
|
+
* @param promise - Promise to wrap
|
|
352
|
+
* @param timeoutMs - Timeout in milliseconds
|
|
353
|
+
* @param errorMessage - Error message for timeout
|
|
354
|
+
* @returns Promise that rejects if timeout is reached
|
|
355
|
+
*/
|
|
356
|
+
async withTimeout(promise, timeoutMs, errorMessage) {
|
|
357
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
358
|
+
setTimeout(() => reject(new Error(errorMessage)), timeoutMs);
|
|
359
|
+
});
|
|
360
|
+
return Promise.race([promise, timeoutPromise]);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* Creates a StreamingOperations instance with default configuration
|
|
365
|
+
* @param config - Optional configuration overrides
|
|
366
|
+
* @returns StreamingOperations instance
|
|
367
|
+
*/
|
|
368
|
+
export function createStreamingOperations(config) {
|
|
369
|
+
return new StreamingOperations(config);
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Utility function to format bytes for progress reporting
|
|
373
|
+
* @param bytes - Number of bytes
|
|
374
|
+
* @returns Formatted string (e.g., "1.5 MB")
|
|
375
|
+
*/
|
|
376
|
+
export function formatBytes(bytes) {
|
|
377
|
+
if (bytes === 0)
|
|
378
|
+
return '0 B';
|
|
379
|
+
const k = 1024;
|
|
380
|
+
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
381
|
+
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
382
|
+
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`;
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Utility function to format processing time
|
|
386
|
+
* @param milliseconds - Processing time in milliseconds
|
|
387
|
+
* @returns Formatted string (e.g., "1.5s" or "150ms")
|
|
388
|
+
*/
|
|
389
|
+
export function formatProcessingTime(milliseconds) {
|
|
390
|
+
if (milliseconds < 1000) {
|
|
391
|
+
return `${Math.round(milliseconds)}ms`;
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
return `${(milliseconds / 1000).toFixed(1)}s`;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Utility function to calculate processing speed
|
|
399
|
+
* @param bytes - Number of bytes processed
|
|
400
|
+
* @param milliseconds - Processing time in milliseconds
|
|
401
|
+
* @returns Speed in MB/s
|
|
402
|
+
*/
|
|
403
|
+
export function calculateProcessingSpeed(bytes, milliseconds) {
|
|
404
|
+
if (milliseconds === 0)
|
|
405
|
+
return 0;
|
|
406
|
+
const bytesPerSecond = (bytes / milliseconds) * 1000;
|
|
407
|
+
return bytesPerSecond / (1024 * 1024); // Convert to MB/s
|
|
408
|
+
}
|
|
409
|
+
//# sourceMappingURL=streaming-operations.js.map
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
|
|
3
|
+
* Model-agnostic. No transformer or modality-specific logic.
|
|
4
|
+
*/
|
|
5
|
+
export interface ContentDocument {
|
|
6
|
+
source: string;
|
|
7
|
+
title: string;
|
|
8
|
+
content: string;
|
|
9
|
+
contentType: string;
|
|
10
|
+
metadata?: Record<string, any>;
|
|
11
|
+
}
|
|
12
|
+
export interface ContentChunk {
|
|
13
|
+
text: string;
|
|
14
|
+
chunkIndex: number;
|
|
15
|
+
contentType: string;
|
|
16
|
+
metadata?: Record<string, any>;
|
|
17
|
+
}
|
|
18
|
+
export interface EmbeddingResult {
|
|
19
|
+
embedding_id: string;
|
|
20
|
+
vector: Float32Array;
|
|
21
|
+
contentType?: string;
|
|
22
|
+
metadata?: Record<string, any>;
|
|
23
|
+
}
|
|
24
|
+
export type EmbedFunction = (query: string, contentType?: string) => Promise<EmbeddingResult>;
|
|
25
|
+
export interface SearchResult {
|
|
26
|
+
content: string;
|
|
27
|
+
score: number;
|
|
28
|
+
contentType: string;
|
|
29
|
+
document: {
|
|
30
|
+
id: number;
|
|
31
|
+
source: string;
|
|
32
|
+
title: string;
|
|
33
|
+
contentType: string;
|
|
34
|
+
contentId?: string;
|
|
35
|
+
};
|
|
36
|
+
metadata?: Record<string, any>;
|
|
37
|
+
}
|
|
38
|
+
export type RerankFunction = (query: string, results: SearchResult[], contentType?: string) => Promise<SearchResult[]>;
|
|
39
|
+
export interface EmbeddingQueryInterface {
|
|
40
|
+
embedQuery: EmbedFunction;
|
|
41
|
+
supportedContentTypes: string[];
|
|
42
|
+
embeddingDimensions: number;
|
|
43
|
+
}
|
|
44
|
+
export interface RerankingInterface {
|
|
45
|
+
rerankResults: RerankFunction;
|
|
46
|
+
supportedContentTypes: string[];
|
|
47
|
+
isEnabled: boolean;
|
|
48
|
+
}
|
|
49
|
+
export interface SearchOptions {
|
|
50
|
+
top_k?: number;
|
|
51
|
+
rerank?: boolean;
|
|
52
|
+
contentType?: 'text' | 'image' | 'combined';
|
|
53
|
+
}
|
|
54
|
+
export interface Chunk {
|
|
55
|
+
text: string;
|
|
56
|
+
chunk_index: number;
|
|
57
|
+
}
|
|
58
|
+
export interface Document {
|
|
59
|
+
source: string;
|
|
60
|
+
title: string;
|
|
61
|
+
content: string;
|
|
62
|
+
metadata?: Record<string, any>;
|
|
63
|
+
}
|
|
64
|
+
export type { DatabaseConnection } from './db.js';
|
|
65
|
+
export type { ContentChunk as ChunkResult } from './db.js';
|
|
66
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CORE MODULE — Universal Embedder Interface for Chameleon Architecture
|
|
3
|
+
*
|
|
4
|
+
* Model-agnostic interfaces supporting both text and multimodal models.
|
|
5
|
+
* Designed for runtime polymorphism and extensibility.
|
|
6
|
+
*
|
|
7
|
+
* ARCHITECTURAL ROLE:
|
|
8
|
+
* This file contains ONLY interfaces, types, and utility functions - no implementation logic.
|
|
9
|
+
* It defines the contract that all embedder implementations must follow.
|
|
10
|
+
*
|
|
11
|
+
* CONTENTS:
|
|
12
|
+
* - UniversalEmbedder interface: Core contract for all embedders
|
|
13
|
+
* - Type definitions: ModelType, ContentType, etc.
|
|
14
|
+
* - Error classes: ModelValidationError, ContentTypeError, etc.
|
|
15
|
+
* - Utility functions: Type guards and validation helpers
|
|
16
|
+
* - Constants: Default capabilities and content types
|
|
17
|
+
*
|
|
18
|
+
* USAGE:
|
|
19
|
+
* - Implementation layers (text, multimodal) implement these interfaces
|
|
20
|
+
* - Core layer uses these types for dependency injection
|
|
21
|
+
* - Public API exports these types for external use
|
|
22
|
+
*/
|
|
23
|
+
import type { EmbeddingResult } from '../types.js';
|
|
24
|
+
/**
|
|
25
|
+
* Universal embedder interface supporting both text and multimodal models
|
|
26
|
+
* Provides a consistent API across different model types and content types
|
|
27
|
+
*/
|
|
28
|
+
export interface UniversalEmbedder {
|
|
29
|
+
readonly modelName: string;
|
|
30
|
+
readonly modelType: ModelType;
|
|
31
|
+
readonly dimensions: number;
|
|
32
|
+
readonly supportedContentTypes: readonly string[];
|
|
33
|
+
embedText(text: string): Promise<EmbeddingResult>;
|
|
34
|
+
embedImage?(imagePath: string): Promise<EmbeddingResult>;
|
|
35
|
+
embedBatch(items: EmbeddingBatchItem[]): Promise<EmbeddingResult[]>;
|
|
36
|
+
loadModel(): Promise<void>;
|
|
37
|
+
isLoaded(): boolean;
|
|
38
|
+
getModelInfo(): ModelInfo;
|
|
39
|
+
cleanup(): Promise<void>;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Enhanced embedding result with content type and metadata support
|
|
43
|
+
* Extends the existing EmbeddingResult interface for multimodal compatibility
|
|
44
|
+
*/
|
|
45
|
+
export interface EnhancedEmbeddingResult extends EmbeddingResult {
|
|
46
|
+
contentType: string;
|
|
47
|
+
metadata?: Record<string, any>;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Batch embedding item for efficient processing
|
|
51
|
+
*/
|
|
52
|
+
export interface EmbeddingBatchItem {
|
|
53
|
+
content: string;
|
|
54
|
+
contentType: string;
|
|
55
|
+
metadata?: Record<string, any>;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Model information interface for runtime introspection
|
|
59
|
+
*/
|
|
60
|
+
export interface ModelInfo {
|
|
61
|
+
name: string;
|
|
62
|
+
type: ModelType;
|
|
63
|
+
dimensions: number;
|
|
64
|
+
version: string;
|
|
65
|
+
supportedContentTypes: readonly string[];
|
|
66
|
+
capabilities: ModelCapabilities;
|
|
67
|
+
requirements: ModelRequirements;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Model capabilities for feature detection
|
|
71
|
+
*/
|
|
72
|
+
export interface ModelCapabilities {
|
|
73
|
+
supportsText: boolean;
|
|
74
|
+
supportsImages: boolean;
|
|
75
|
+
supportsBatchProcessing: boolean;
|
|
76
|
+
supportsMetadata: boolean;
|
|
77
|
+
maxBatchSize?: number;
|
|
78
|
+
maxTextLength?: number;
|
|
79
|
+
supportedImageFormats?: readonly string[];
|
|
80
|
+
supportsMultimodal?: boolean;
|
|
81
|
+
supportsCrossModalSearch?: boolean;
|
|
82
|
+
unifiedEmbeddingSpace?: boolean;
|
|
83
|
+
reliableImplementation?: boolean;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Model requirements for validation and compatibility checking
|
|
87
|
+
*/
|
|
88
|
+
export interface ModelRequirements {
|
|
89
|
+
transformersJsVersion: string;
|
|
90
|
+
minimumMemory?: number;
|
|
91
|
+
requiredFeatures?: readonly string[];
|
|
92
|
+
platformSupport?: readonly string[];
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Supported model types in the Chameleon architecture
|
|
96
|
+
*/
|
|
97
|
+
export type ModelType = 'sentence-transformer' | 'clip';
|
|
98
|
+
/**
|
|
99
|
+
* Content types supported by the system
|
|
100
|
+
*/
|
|
101
|
+
export type ContentType = 'text' | 'image' | 'pdf' | 'docx';
|
|
102
|
+
/**
|
|
103
|
+
* Model validation result for compatibility checking
|
|
104
|
+
*/
|
|
105
|
+
export interface ModelValidationResult {
|
|
106
|
+
isValid: boolean;
|
|
107
|
+
errors: string[];
|
|
108
|
+
warnings: string[];
|
|
109
|
+
suggestions: string[];
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Simple embedder creation function type
|
|
113
|
+
* Replaces complex factory patterns with a straightforward function approach
|
|
114
|
+
*/
|
|
115
|
+
export type CreateEmbedderFunction = (modelName: string, options?: EmbedderCreationOptions) => Promise<UniversalEmbedder>;
|
|
116
|
+
/**
|
|
117
|
+
* Options for creating embedder instances
|
|
118
|
+
*/
|
|
119
|
+
export interface EmbedderCreationOptions {
|
|
120
|
+
cachePath?: string;
|
|
121
|
+
maxBatchSize?: number;
|
|
122
|
+
timeout?: number;
|
|
123
|
+
enableGPU?: boolean;
|
|
124
|
+
customConfig?: Record<string, any>;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Model validation error for unsupported or incompatible models
|
|
128
|
+
*/
|
|
129
|
+
export declare class ModelValidationError extends Error {
|
|
130
|
+
readonly modelName: string;
|
|
131
|
+
readonly availableModels: readonly string[];
|
|
132
|
+
constructor(modelName: string, availableModels: readonly string[], message: string);
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Transformers.js compatibility error for version mismatches
|
|
136
|
+
*/
|
|
137
|
+
export declare class TransformersCompatibilityError extends Error {
|
|
138
|
+
readonly modelName: string;
|
|
139
|
+
readonly requiredVersion: string;
|
|
140
|
+
readonly currentVersion: string;
|
|
141
|
+
constructor(modelName: string, requiredVersion: string, currentVersion: string, message: string);
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Content type error for unsupported content types
|
|
145
|
+
*/
|
|
146
|
+
export declare class ContentTypeError extends Error {
|
|
147
|
+
readonly contentType: string;
|
|
148
|
+
readonly supportedTypes: readonly string[];
|
|
149
|
+
constructor(contentType: string, supportedTypes: readonly string[], message: string);
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Type guard for checking if an embedder supports images
|
|
153
|
+
*/
|
|
154
|
+
export declare function supportsImages(embedder: UniversalEmbedder): embedder is UniversalEmbedder & {
|
|
155
|
+
embedImage(imagePath: string): Promise<EmbeddingResult>;
|
|
156
|
+
};
|
|
157
|
+
/**
|
|
158
|
+
* Type guard for checking if an embedder supports a specific content type
|
|
159
|
+
*/
|
|
160
|
+
export declare function supportsContentType(embedder: UniversalEmbedder, contentType: string): boolean;
|
|
161
|
+
/**
|
|
162
|
+
* Utility function to create enhanced embedding results
|
|
163
|
+
*/
|
|
164
|
+
export declare function createEnhancedEmbeddingResult(embeddingId: string, vector: Float32Array, contentType: string, metadata?: Record<string, any>): EnhancedEmbeddingResult;
|
|
165
|
+
/**
|
|
166
|
+
* Utility function to validate content type against supported types
|
|
167
|
+
*/
|
|
168
|
+
export declare function validateContentType(contentType: string, supportedTypes: readonly string[]): void;
|
|
169
|
+
/**
|
|
170
|
+
* Default supported content types for different model types
|
|
171
|
+
*/
|
|
172
|
+
export declare const DEFAULT_CONTENT_TYPES: Record<ModelType, readonly string[]>;
|
|
173
|
+
/**
|
|
174
|
+
* Default model capabilities for different model types
|
|
175
|
+
*/
|
|
176
|
+
export declare const DEFAULT_CAPABILITIES: Record<ModelType, ModelCapabilities>;
|
|
177
|
+
//# sourceMappingURL=universal-embedder.d.ts.map
|