rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MULTIMODAL IMPLEMENTATION — CLIP Embedder Implementation
|
|
3
|
+
*
|
|
4
|
+
* Implements UniversalEmbedder interface for CLIP models with full multimodal support.
|
|
5
|
+
* Provides reliable text and image embedding using CLIPTextModelWithProjection and
|
|
6
|
+
* CLIPVisionModelWithProjection for true cross-modal search capabilities.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Text embedding using CLIP text encoder (512-dimensional vectors)
|
|
10
|
+
* - Image embedding using CLIP vision encoder (512-dimensional vectors)
|
|
11
|
+
* - Unified embedding space enabling cross-modal similarity search
|
|
12
|
+
* - Text queries can find semantically similar images
|
|
13
|
+
* - Image queries can find semantically similar text
|
|
14
|
+
* - Batch processing optimization for both text and images
|
|
15
|
+
*
|
|
16
|
+
* Supported Models:
|
|
17
|
+
* - Xenova/clip-vit-base-patch32 (recommended, faster)
|
|
18
|
+
* - Xenova/clip-vit-base-patch16 (higher accuracy, slower)
|
|
19
|
+
*/
|
|
20
|
+
import { BaseUniversalEmbedder, type EmbedderOptions } from '../core/abstract-embedder.js';
|
|
21
|
+
import type { EmbeddingResult } from '../types.js';
|
|
22
|
+
/**
|
|
23
|
+
* CLIP embedder implementation for multimodal content
|
|
24
|
+
*
|
|
25
|
+
* Provides reliable text and image embedding using separate CLIP model components:
|
|
26
|
+
* - CLIPTextModelWithProjection for text-only embedding (no pixel_values errors)
|
|
27
|
+
* - CLIPVisionModelWithProjection for image embedding
|
|
28
|
+
* - AutoTokenizer for proper text tokenization with CLIP's 77 token limit
|
|
29
|
+
*
|
|
30
|
+
* All embeddings are 512-dimensional vectors in a unified embedding space,
|
|
31
|
+
* enabling true cross-modal search where text queries can find images and
|
|
32
|
+
* image queries can find text based on semantic similarity.
|
|
33
|
+
*
|
|
34
|
+
* Example Usage:
|
|
35
|
+
* ```typescript
|
|
36
|
+
* const embedder = await createEmbedder('Xenova/clip-vit-base-patch32');
|
|
37
|
+
*
|
|
38
|
+
* // Embed text
|
|
39
|
+
* const textResult = await embedder.embedText('a red sports car');
|
|
40
|
+
*
|
|
41
|
+
* // Embed image
|
|
42
|
+
* const imageResult = await embedder.embedImage('./car.jpg');
|
|
43
|
+
*
|
|
44
|
+
* // Calculate cross-modal similarity
|
|
45
|
+
* const similarity = cosineSimilarity(textResult.vector, imageResult.vector);
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
49
|
+
private tokenizer;
|
|
50
|
+
private textModel;
|
|
51
|
+
private imageModel;
|
|
52
|
+
private resourceManager;
|
|
53
|
+
private embedderResourceId?;
|
|
54
|
+
private tokenizerResourceId?;
|
|
55
|
+
private textModelResourceId?;
|
|
56
|
+
private imageModelResourceId?;
|
|
57
|
+
constructor(modelName: string, options?: EmbedderOptions);
|
|
58
|
+
/**
|
|
59
|
+
* Load the CLIP model components
|
|
60
|
+
*
|
|
61
|
+
* Loads three separate components for reliable multimodal embedding:
|
|
62
|
+
* 1. AutoTokenizer - Handles text tokenization with CLIP's 77 token limit
|
|
63
|
+
* 2. CLIPTextModelWithProjection - Generates text embeddings without pixel_values errors
|
|
64
|
+
* 3. CLIPVisionModelWithProjection - Generates image embeddings
|
|
65
|
+
*
|
|
66
|
+
* All components are registered with the resource manager for proper cleanup.
|
|
67
|
+
* Models are cached locally after first download for faster subsequent loads.
|
|
68
|
+
*
|
|
69
|
+
* @throws {Error} If model loading fails or components are not available
|
|
70
|
+
*/
|
|
71
|
+
loadModel(): Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Clean up model resources with comprehensive disposal
|
|
74
|
+
*
|
|
75
|
+
* Properly disposes of all CLIP model components:
|
|
76
|
+
* - Tokenizer resources
|
|
77
|
+
* - Text model resources
|
|
78
|
+
* - Vision model resources
|
|
79
|
+
*
|
|
80
|
+
* Uses the resource manager for coordinated cleanup and forces garbage
|
|
81
|
+
* collection to free memory from CLIP models which can be memory intensive.
|
|
82
|
+
*
|
|
83
|
+
* This method is safe to call multiple times and will not throw errors
|
|
84
|
+
* during cleanup - errors are logged but don't prevent cleanup completion.
|
|
85
|
+
*/
|
|
86
|
+
cleanup(): Promise<void>;
|
|
87
|
+
/**
|
|
88
|
+
* Apply L2-normalization to an embedding vector
|
|
89
|
+
*
|
|
90
|
+
* L2-normalization ensures that all embeddings have unit length (magnitude = 1),
|
|
91
|
+
* which is essential for CLIP models as they were trained with normalized embeddings.
|
|
92
|
+
* This normalization makes cosine similarity calculations more reliable and ensures
|
|
93
|
+
* that vector magnitudes don't affect similarity scores.
|
|
94
|
+
*
|
|
95
|
+
* @param embedding - The embedding vector to normalize (modified in-place)
|
|
96
|
+
* @returns The normalized embedding vector (same reference as input)
|
|
97
|
+
* @private
|
|
98
|
+
*/
|
|
99
|
+
private normalizeEmbedding;
|
|
100
|
+
/**
|
|
101
|
+
* Embed text using CLIP text encoder
|
|
102
|
+
*
|
|
103
|
+
* Uses CLIPTextModelWithProjection for reliable text-only embedding without
|
|
104
|
+
* pixel_values errors. Text is tokenized with CLIP's 77 token limit and
|
|
105
|
+
* automatically truncated if necessary.
|
|
106
|
+
*
|
|
107
|
+
* Returns a 512-dimensional L2-normalized embedding vector in the unified CLIP
|
|
108
|
+
* embedding space, which is directly comparable to image embeddings for cross-modal search.
|
|
109
|
+
*
|
|
110
|
+
* @param text - The text to embed (will be trimmed and validated)
|
|
111
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
112
|
+
* @throws {Error} If text is empty, model not loaded, or embedding fails
|
|
113
|
+
*
|
|
114
|
+
* @example
|
|
115
|
+
* ```typescript
|
|
116
|
+
* const result = await embedder.embedText('a red sports car');
|
|
117
|
+
* console.log(result.vector.length); // 512
|
|
118
|
+
* console.log(result.contentType); // 'text'
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
embedText(text: string): Promise<EmbeddingResult>;
|
|
122
|
+
/**
|
|
123
|
+
* Embed image using CLIP vision encoder
|
|
124
|
+
*
|
|
125
|
+
* Uses CLIPVisionModelWithProjection to generate image embeddings in the same
|
|
126
|
+
* unified embedding space as text embeddings, enabling true cross-modal search.
|
|
127
|
+
*
|
|
128
|
+
* Supports both local file paths and URLs. Images are automatically preprocessed:
|
|
129
|
+
* - Resized to 224x224 pixels (CLIP's expected input size)
|
|
130
|
+
* - Converted to proper pixel_values format using AutoProcessor
|
|
131
|
+
* - Normalized for CLIP vision model
|
|
132
|
+
*
|
|
133
|
+
* Returns a 512-dimensional L2-normalized embedding vector directly comparable to text embeddings.
|
|
134
|
+
*
|
|
135
|
+
* @param imagePath - Local file path or URL to the image
|
|
136
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
137
|
+
* @throws {Error} If image not found, unsupported format, or embedding fails
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```typescript
|
|
141
|
+
* // Local file
|
|
142
|
+
* const result = await embedder.embedImage('./car.jpg');
|
|
143
|
+
*
|
|
144
|
+
* // URL
|
|
145
|
+
* const result = await embedder.embedImage('https://example.com/car.jpg');
|
|
146
|
+
*
|
|
147
|
+
* console.log(result.vector.length); // 512
|
|
148
|
+
* console.log(result.contentType); // 'image'
|
|
149
|
+
* ```
|
|
150
|
+
*
|
|
151
|
+
* Supported formats: PNG, JPEG, GIF, BMP, WebP
|
|
152
|
+
*/
|
|
153
|
+
embedImage(imagePath: string): Promise<EmbeddingResult>;
|
|
154
|
+
/**
|
|
155
|
+
* Load and preprocess image for CLIP vision model
|
|
156
|
+
*
|
|
157
|
+
* Handles image loading from both local files and URLs with automatic format
|
|
158
|
+
* detection and preprocessing. Uses Sharp library when available for better
|
|
159
|
+
* Node.js support, falls back to RawImage for browser compatibility.
|
|
160
|
+
*
|
|
161
|
+
* Preprocessing steps:
|
|
162
|
+
* 1. Load image from path or URL
|
|
163
|
+
* 2. Resize to 224x224 pixels (CLIP's expected input size)
|
|
164
|
+
* 3. Convert to RGB format if needed
|
|
165
|
+
* 4. Return RawImage object for AutoProcessor
|
|
166
|
+
*
|
|
167
|
+
* @param imagePath - Local file path or URL to the image
|
|
168
|
+
* @returns RawImage object ready for AutoProcessor
|
|
169
|
+
* @throws {Error} If image loading or preprocessing fails
|
|
170
|
+
* @private
|
|
171
|
+
*/
|
|
172
|
+
private loadAndPreprocessImage;
|
|
173
|
+
/**
|
|
174
|
+
* Optimized batch processing for CLIP models
|
|
175
|
+
*
|
|
176
|
+
* Processes mixed batches of text and image content efficiently using the
|
|
177
|
+
* BatchProcessingOptimizer for memory management and progress tracking.
|
|
178
|
+
*
|
|
179
|
+
* Features:
|
|
180
|
+
* - Automatic separation of text and image items
|
|
181
|
+
* - Memory-efficient processing for large batches
|
|
182
|
+
* - Progress reporting for batches > 20 items
|
|
183
|
+
* - Garbage collection between batches
|
|
184
|
+
* - Detailed statistics logging
|
|
185
|
+
*
|
|
186
|
+
* @param batch - Array of items with content, contentType, and optional metadata
|
|
187
|
+
* @returns Array of EmbeddingResult objects in the same order as input
|
|
188
|
+
* @throws {Error} If batch processing fails
|
|
189
|
+
* @protected
|
|
190
|
+
*/
|
|
191
|
+
protected processBatch(batch: Array<{
|
|
192
|
+
content: string;
|
|
193
|
+
contentType: string;
|
|
194
|
+
metadata?: Record<string, any>;
|
|
195
|
+
}>): Promise<EmbeddingResult[]>;
|
|
196
|
+
/**
|
|
197
|
+
* Process batch of text items using CLIPTextModelWithProjection
|
|
198
|
+
*
|
|
199
|
+
* Efficiently processes multiple text items by tokenizing all texts first,
|
|
200
|
+
* then generating embeddings sequentially. This approach balances memory
|
|
201
|
+
* usage with processing speed.
|
|
202
|
+
*
|
|
203
|
+
* @param textItems - Array of text items to process
|
|
204
|
+
* @returns Array of EmbeddingResult objects
|
|
205
|
+
* @throws {Error} If batch processing fails or dimension mismatch occurs
|
|
206
|
+
* @private
|
|
207
|
+
*/
|
|
208
|
+
private processBatchText;
|
|
209
|
+
/**
|
|
210
|
+
* Get comprehensive model information including CLIP-specific capabilities
|
|
211
|
+
*
|
|
212
|
+
* Extends base model info with CLIP-specific capabilities including multimodal
|
|
213
|
+
* support, zero-shot classification, and cross-modal retrieval features.
|
|
214
|
+
*
|
|
215
|
+
* @returns Object with model information and capabilities
|
|
216
|
+
*/
|
|
217
|
+
getModelInfo(): {
|
|
218
|
+
capabilities: {
|
|
219
|
+
supportsMultimodal: boolean;
|
|
220
|
+
supportsZeroShotClassification: boolean;
|
|
221
|
+
supportsImageTextSimilarity: boolean;
|
|
222
|
+
supportsTextImageRetrieval: boolean;
|
|
223
|
+
recommendedUseCase: string;
|
|
224
|
+
imageEmbeddingStatus: string;
|
|
225
|
+
supportsText: boolean;
|
|
226
|
+
supportsImages: boolean;
|
|
227
|
+
supportsBatchProcessing: boolean;
|
|
228
|
+
supportsMetadata: boolean;
|
|
229
|
+
maxBatchSize?: number;
|
|
230
|
+
maxTextLength?: number;
|
|
231
|
+
supportedImageFormats?: readonly string[];
|
|
232
|
+
supportsCrossModalSearch?: boolean;
|
|
233
|
+
unifiedEmbeddingSpace?: boolean;
|
|
234
|
+
reliableImplementation?: boolean;
|
|
235
|
+
};
|
|
236
|
+
name: string;
|
|
237
|
+
type: import("../core/universal-embedder.js").ModelType;
|
|
238
|
+
dimensions: number;
|
|
239
|
+
version: string;
|
|
240
|
+
supportedContentTypes: readonly string[];
|
|
241
|
+
requirements: import("../types.js").ModelRequirements;
|
|
242
|
+
};
|
|
243
|
+
/**
|
|
244
|
+
* Check if the model is suitable for a specific task
|
|
245
|
+
*
|
|
246
|
+
* CLIP models excel at similarity, classification, retrieval, and multimodal
|
|
247
|
+
* tasks due to their unified embedding space and zero-shot capabilities.
|
|
248
|
+
*
|
|
249
|
+
* @param task - The task type to check
|
|
250
|
+
* @returns true if CLIP is suitable for the task, false otherwise
|
|
251
|
+
*/
|
|
252
|
+
isSuitableForTask(task: 'similarity' | 'classification' | 'clustering' | 'retrieval' | 'multimodal'): boolean;
|
|
253
|
+
/**
|
|
254
|
+
* Get information about multimodal capabilities
|
|
255
|
+
*
|
|
256
|
+
* Returns detailed information about what content types are supported and
|
|
257
|
+
* what features are planned for future implementation.
|
|
258
|
+
*
|
|
259
|
+
* @returns Object describing multimodal support status
|
|
260
|
+
*/
|
|
261
|
+
getMultimodalCapabilities(): {
|
|
262
|
+
textSupport: boolean;
|
|
263
|
+
imageSupport: boolean;
|
|
264
|
+
videoSupport: boolean;
|
|
265
|
+
audioSupport: boolean;
|
|
266
|
+
plannedFeatures: string[];
|
|
267
|
+
};
|
|
268
|
+
/**
|
|
269
|
+
* Get CLIP model variant information
|
|
270
|
+
*
|
|
271
|
+
* Extracts architecture details from the model name to provide variant-specific
|
|
272
|
+
* configuration parameters like patch size, image size, and text length limits.
|
|
273
|
+
*
|
|
274
|
+
* @returns Object with architecture details
|
|
275
|
+
*/
|
|
276
|
+
getModelVariant(): {
|
|
277
|
+
architecture: string;
|
|
278
|
+
patchSize: number;
|
|
279
|
+
imageSize: number;
|
|
280
|
+
textMaxLength: number;
|
|
281
|
+
};
|
|
282
|
+
/**
|
|
283
|
+
* Check if text length is within CLIP's token limit
|
|
284
|
+
*
|
|
285
|
+
* Estimates token count based on character length (rough approximation of
|
|
286
|
+
* ~4 characters per token for English text). CLIP has a hard limit of 77 tokens.
|
|
287
|
+
*
|
|
288
|
+
* @param text - Text to validate
|
|
289
|
+
* @returns true if text is within token limit, false otherwise
|
|
290
|
+
*/
|
|
291
|
+
isTextLengthValid(text: string): boolean;
|
|
292
|
+
/**
|
|
293
|
+
* Get performance characteristics for this CLIP variant
|
|
294
|
+
*
|
|
295
|
+
* Provides guidance on speed, accuracy, memory usage, and recommended batch
|
|
296
|
+
* sizes based on the CLIP model variant (patch32 vs patch16).
|
|
297
|
+
*
|
|
298
|
+
* @returns Object with performance characteristics
|
|
299
|
+
*/
|
|
300
|
+
getPerformanceInfo(): {
|
|
301
|
+
speed: 'fast' | 'medium' | 'slow';
|
|
302
|
+
accuracy: 'good' | 'better' | 'best';
|
|
303
|
+
memoryUsage: 'low' | 'medium' | 'high';
|
|
304
|
+
recommendedBatchSize: number;
|
|
305
|
+
};
|
|
306
|
+
/**
|
|
307
|
+
* Check if all CLIP model components are loaded
|
|
308
|
+
*
|
|
309
|
+
* Verifies that tokenizer, text model, and vision model are all loaded and
|
|
310
|
+
* ready for use. All three components must be available for the embedder
|
|
311
|
+
* to be considered fully loaded.
|
|
312
|
+
*
|
|
313
|
+
* @returns true if all components are loaded, false otherwise
|
|
314
|
+
*/
|
|
315
|
+
isLoaded(): boolean;
|
|
316
|
+
/**
|
|
317
|
+
* Validate that this is a supported CLIP model
|
|
318
|
+
*
|
|
319
|
+
* Checks the model name against the list of supported CLIP models. Currently
|
|
320
|
+
* supports Xenova/clip-vit-base-patch32 and Xenova/clip-vit-base-patch16.
|
|
321
|
+
*
|
|
322
|
+
* @throws {Error} If model is not in the supported list
|
|
323
|
+
* @private
|
|
324
|
+
*/
|
|
325
|
+
private validateCLIPModel;
|
|
326
|
+
}
|
|
327
|
+
//# sourceMappingURL=clip-embedder.d.ts.map
|