rag-lite-ts 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -5
- package/dist/{cli → cjs/cli}/indexer.js +73 -15
- package/dist/cjs/cli/ui-server.d.ts +5 -0
- package/dist/cjs/cli/ui-server.js +152 -0
- package/dist/{cli.js → cjs/cli.js} +25 -6
- package/dist/{core → cjs/core}/binary-index-format.js +6 -3
- package/dist/{core → cjs/core}/db.d.ts +56 -0
- package/dist/{core → cjs/core}/db.js +105 -0
- package/dist/{core → cjs/core}/ingestion.js +3 -0
- package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
- package/dist/cjs/core/knowledge-base-manager.js +256 -0
- package/dist/{core → cjs/core}/model-validator.js +1 -1
- package/dist/{core → cjs/core}/search-pipeline.js +1 -1
- package/dist/{core → cjs/core}/search.js +1 -1
- package/dist/cjs/core/vector-index-messages.d.ts +52 -0
- package/dist/cjs/core/vector-index-messages.js +5 -0
- package/dist/cjs/core/vector-index-worker.d.ts +6 -0
- package/dist/cjs/core/vector-index-worker.js +304 -0
- package/dist/cjs/core/vector-index.d.ts +107 -0
- package/dist/cjs/core/vector-index.js +344 -0
- package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
- package/dist/{factories → cjs/factories}/search-factory.js +11 -0
- package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
- package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
- package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
- package/dist/{index.js → cjs/index.js} +3 -1
- package/dist/esm/api-errors.d.ts +90 -0
- package/dist/esm/api-errors.js +320 -0
- package/dist/esm/cli/indexer.d.ts +11 -0
- package/dist/esm/cli/indexer.js +529 -0
- package/dist/esm/cli/search.d.ts +7 -0
- package/dist/esm/cli/search.js +332 -0
- package/dist/esm/cli/ui-server.d.ts +5 -0
- package/dist/esm/cli/ui-server.js +152 -0
- package/dist/esm/cli.d.ts +3 -0
- package/dist/esm/cli.js +548 -0
- package/dist/esm/config.d.ts +51 -0
- package/dist/esm/config.js +79 -0
- package/dist/esm/core/abstract-embedder.d.ts +125 -0
- package/dist/esm/core/abstract-embedder.js +264 -0
- package/dist/esm/core/actionable-error-messages.d.ts +60 -0
- package/dist/esm/core/actionable-error-messages.js +397 -0
- package/dist/esm/core/adapters.d.ts +93 -0
- package/dist/esm/core/adapters.js +139 -0
- package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
- package/dist/esm/core/batch-processing-optimizer.js +536 -0
- package/dist/esm/core/binary-index-format.d.ts +78 -0
- package/dist/esm/core/binary-index-format.js +294 -0
- package/dist/esm/core/chunker.d.ts +119 -0
- package/dist/esm/core/chunker.js +73 -0
- package/dist/esm/core/cli-database-utils.d.ts +53 -0
- package/dist/esm/core/cli-database-utils.js +239 -0
- package/dist/esm/core/config.d.ts +102 -0
- package/dist/esm/core/config.js +247 -0
- package/dist/esm/core/content-errors.d.ts +111 -0
- package/dist/esm/core/content-errors.js +362 -0
- package/dist/esm/core/content-manager.d.ts +335 -0
- package/dist/esm/core/content-manager.js +1476 -0
- package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
- package/dist/esm/core/content-performance-optimizer.js +516 -0
- package/dist/esm/core/content-resolver.d.ts +104 -0
- package/dist/esm/core/content-resolver.js +285 -0
- package/dist/esm/core/cross-modal-search.d.ts +164 -0
- package/dist/esm/core/cross-modal-search.js +342 -0
- package/dist/esm/core/database-connection-manager.d.ts +109 -0
- package/dist/esm/core/database-connection-manager.js +310 -0
- package/dist/esm/core/db.d.ts +269 -0
- package/dist/esm/core/db.js +1000 -0
- package/dist/esm/core/embedder-factory.d.ts +154 -0
- package/dist/esm/core/embedder-factory.js +311 -0
- package/dist/esm/core/error-handler.d.ts +112 -0
- package/dist/esm/core/error-handler.js +239 -0
- package/dist/esm/core/index.d.ts +59 -0
- package/dist/esm/core/index.js +69 -0
- package/dist/esm/core/ingestion.d.ts +202 -0
- package/dist/esm/core/ingestion.js +904 -0
- package/dist/esm/core/interfaces.d.ts +408 -0
- package/dist/esm/core/interfaces.js +106 -0
- package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
- package/dist/esm/core/knowledge-base-manager.js +256 -0
- package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
- package/dist/esm/core/lazy-dependency-loader.js +435 -0
- package/dist/esm/core/mode-detection-service.d.ts +150 -0
- package/dist/esm/core/mode-detection-service.js +565 -0
- package/dist/esm/core/mode-model-validator.d.ts +92 -0
- package/dist/esm/core/mode-model-validator.js +203 -0
- package/dist/esm/core/model-registry.d.ts +116 -0
- package/dist/esm/core/model-registry.js +411 -0
- package/dist/esm/core/model-validator.d.ts +217 -0
- package/dist/esm/core/model-validator.js +782 -0
- package/dist/esm/core/path-manager.d.ts +47 -0
- package/dist/esm/core/path-manager.js +71 -0
- package/dist/esm/core/raglite-paths.d.ts +121 -0
- package/dist/esm/core/raglite-paths.js +145 -0
- package/dist/esm/core/reranking-config.d.ts +42 -0
- package/dist/esm/core/reranking-config.js +147 -0
- package/dist/esm/core/reranking-factory.d.ts +92 -0
- package/dist/esm/core/reranking-factory.js +410 -0
- package/dist/esm/core/reranking-strategies.d.ts +310 -0
- package/dist/esm/core/reranking-strategies.js +650 -0
- package/dist/esm/core/resource-cleanup.d.ts +163 -0
- package/dist/esm/core/resource-cleanup.js +371 -0
- package/dist/esm/core/resource-manager.d.ts +212 -0
- package/dist/esm/core/resource-manager.js +564 -0
- package/dist/esm/core/search-pipeline.d.ts +111 -0
- package/dist/esm/core/search-pipeline.js +287 -0
- package/dist/esm/core/search.d.ts +141 -0
- package/dist/esm/core/search.js +320 -0
- package/dist/esm/core/streaming-operations.d.ts +145 -0
- package/dist/esm/core/streaming-operations.js +409 -0
- package/dist/esm/core/types.d.ts +66 -0
- package/dist/esm/core/types.js +6 -0
- package/dist/esm/core/universal-embedder.d.ts +177 -0
- package/dist/esm/core/universal-embedder.js +139 -0
- package/dist/esm/core/validation-messages.d.ts +99 -0
- package/dist/esm/core/validation-messages.js +334 -0
- package/dist/esm/core/vector-index-messages.d.ts +52 -0
- package/dist/esm/core/vector-index-messages.js +5 -0
- package/dist/esm/core/vector-index-worker.d.ts +6 -0
- package/dist/esm/core/vector-index-worker.js +304 -0
- package/dist/esm/core/vector-index.d.ts +107 -0
- package/dist/esm/core/vector-index.js +344 -0
- package/dist/esm/dom-polyfills.d.ts +6 -0
- package/dist/esm/dom-polyfills.js +37 -0
- package/dist/esm/factories/index.d.ts +27 -0
- package/dist/esm/factories/index.js +29 -0
- package/dist/esm/factories/ingestion-factory.d.ts +200 -0
- package/dist/esm/factories/ingestion-factory.js +473 -0
- package/dist/esm/factories/search-factory.d.ts +154 -0
- package/dist/esm/factories/search-factory.js +355 -0
- package/dist/esm/file-processor.d.ts +147 -0
- package/dist/esm/file-processor.js +963 -0
- package/dist/esm/index-manager.d.ts +136 -0
- package/dist/esm/index-manager.js +667 -0
- package/dist/esm/index.d.ts +76 -0
- package/dist/esm/index.js +112 -0
- package/dist/esm/indexer.d.ts +7 -0
- package/dist/esm/indexer.js +54 -0
- package/dist/esm/ingestion.d.ts +63 -0
- package/dist/esm/ingestion.js +124 -0
- package/dist/esm/mcp-server.d.ts +46 -0
- package/dist/esm/mcp-server.js +1820 -0
- package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
- package/dist/esm/multimodal/clip-embedder.js +996 -0
- package/dist/esm/multimodal/index.d.ts +6 -0
- package/dist/esm/multimodal/index.js +6 -0
- package/dist/esm/preprocess.d.ts +19 -0
- package/dist/esm/preprocess.js +203 -0
- package/dist/esm/preprocessors/index.d.ts +17 -0
- package/dist/esm/preprocessors/index.js +38 -0
- package/dist/esm/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/preprocessors/mdx.js +101 -0
- package/dist/esm/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/preprocessors/mermaid.js +329 -0
- package/dist/esm/preprocessors/registry.d.ts +56 -0
- package/dist/esm/preprocessors/registry.js +179 -0
- package/dist/esm/run-error-recovery-tests.d.ts +7 -0
- package/dist/esm/run-error-recovery-tests.js +101 -0
- package/dist/esm/search-standalone.d.ts +7 -0
- package/dist/esm/search-standalone.js +117 -0
- package/dist/esm/search.d.ts +99 -0
- package/dist/esm/search.js +177 -0
- package/dist/esm/test-utils.d.ts +18 -0
- package/dist/esm/test-utils.js +27 -0
- package/dist/esm/text/chunker.d.ts +33 -0
- package/dist/esm/text/chunker.js +279 -0
- package/dist/esm/text/embedder.d.ts +111 -0
- package/dist/esm/text/embedder.js +386 -0
- package/dist/esm/text/index.d.ts +8 -0
- package/dist/esm/text/index.js +9 -0
- package/dist/esm/text/preprocessors/index.d.ts +17 -0
- package/dist/esm/text/preprocessors/index.js +38 -0
- package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
- package/dist/esm/text/preprocessors/mdx.js +101 -0
- package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
- package/dist/esm/text/preprocessors/mermaid.js +330 -0
- package/dist/esm/text/preprocessors/registry.d.ts +56 -0
- package/dist/esm/text/preprocessors/registry.js +180 -0
- package/dist/esm/text/reranker.d.ts +49 -0
- package/dist/esm/text/reranker.js +274 -0
- package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
- package/dist/esm/text/sentence-transformer-embedder.js +340 -0
- package/dist/esm/text/tokenizer.d.ts +22 -0
- package/dist/esm/text/tokenizer.js +64 -0
- package/dist/esm/types.d.ts +83 -0
- package/dist/esm/types.js +3 -0
- package/dist/esm/utils/vector-math.d.ts +31 -0
- package/dist/esm/utils/vector-math.js +70 -0
- package/package.json +39 -14
- package/dist/core/vector-index.d.ts +0 -72
- package/dist/core/vector-index.js +0 -331
- /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
- /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
- /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
- /package/dist/{cli → cjs/cli}/search.js +0 -0
- /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
- /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
- /package/dist/{config.js → cjs/config.js} +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
- /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
- /package/dist/{core → cjs/core}/adapters.js +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
- /package/dist/{core → cjs/core}/chunker.js +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
- /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
- /package/dist/{core → cjs/core}/config.d.ts +0 -0
- /package/dist/{core → cjs/core}/config.js +0 -0
- /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-errors.js +0 -0
- /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-manager.js +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
- /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
- /package/dist/{core → cjs/core}/content-resolver.js +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
- /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
- /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
- /package/dist/{core → cjs/core}/error-handler.js +0 -0
- /package/dist/{core → cjs/core}/index.d.ts +0 -0
- /package/dist/{core → cjs/core}/index.js +0 -0
- /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
- /package/dist/{core → cjs/core}/interfaces.js +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
- /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
- /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
- /package/dist/{core → cjs/core}/model-registry.js +0 -0
- /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/path-manager.js +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
- /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
- /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-config.js +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
- /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
- /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
- /package/dist/{core → cjs/core}/resource-manager.js +0 -0
- /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
- /package/dist/{core → cjs/core}/search.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
- /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
- /package/dist/{core → cjs/core}/types.d.ts +0 -0
- /package/dist/{core → cjs/core}/types.js +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
- /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
- /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
- /package/dist/{core → cjs/core}/validation-messages.js +0 -0
- /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
- /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
- /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/index.js +0 -0
- /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
- /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
- /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
- /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
- /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
- /package/dist/{indexer.js → cjs/indexer.js} +0 -0
- /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
- /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
- /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
- /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
- /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
- /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
- /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
- /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
- /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
- /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
- /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
- /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
- /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
- /package/dist/{search.js → cjs/search.js} +0 -0
- /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
- /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
- /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
- /package/dist/{text → cjs/text}/chunker.js +0 -0
- /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/embedder.js +0 -0
- /package/dist/{text → cjs/text}/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
- /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
- /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
- /package/dist/{text → cjs/text}/reranker.js +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
- /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
- /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
- /package/dist/{text → cjs/text}/tokenizer.js +0 -0
- /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
- /package/dist/{types.js → cjs/types.js} +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
- /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
|
@@ -0,0 +1,996 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MULTIMODAL IMPLEMENTATION — CLIP Embedder Implementation
|
|
3
|
+
*
|
|
4
|
+
* Implements UniversalEmbedder interface for CLIP models with full multimodal support.
|
|
5
|
+
* Provides reliable text and image embedding using CLIPTextModelWithProjection and
|
|
6
|
+
* CLIPVisionModelWithProjection for true cross-modal search capabilities.
|
|
7
|
+
*
|
|
8
|
+
* Features:
|
|
9
|
+
* - Text embedding using CLIP text encoder (512-dimensional vectors)
|
|
10
|
+
* - Image embedding using CLIP vision encoder (512-dimensional vectors)
|
|
11
|
+
* - Unified embedding space enabling cross-modal similarity search
|
|
12
|
+
* - Text queries can find semantically similar images
|
|
13
|
+
* - Image queries can find semantically similar text
|
|
14
|
+
* - Batch processing optimization for both text and images
|
|
15
|
+
*
|
|
16
|
+
* Supported Models:
|
|
17
|
+
* - Xenova/clip-vit-base-patch32 (recommended, faster)
|
|
18
|
+
* - Xenova/clip-vit-base-patch16 (higher accuracy, slower)
|
|
19
|
+
*/
|
|
20
|
+
import { BaseUniversalEmbedder } from '../core/abstract-embedder.js';
|
|
21
|
+
import { getResourceManager } from '../core/resource-manager.js';
|
|
22
|
+
// =============================================================================
|
|
23
|
+
// CLIP EMBEDDER IMPLEMENTATION
|
|
24
|
+
// =============================================================================
|
|
25
|
+
/**
|
|
26
|
+
* CLIP embedder implementation for multimodal content
|
|
27
|
+
*
|
|
28
|
+
* Provides reliable text and image embedding using separate CLIP model components:
|
|
29
|
+
* - CLIPTextModelWithProjection for text-only embedding (no pixel_values errors)
|
|
30
|
+
* - CLIPVisionModelWithProjection for image embedding
|
|
31
|
+
* - AutoTokenizer for proper text tokenization with CLIP's 77 token limit
|
|
32
|
+
*
|
|
33
|
+
* All embeddings are 512-dimensional vectors in a unified embedding space,
|
|
34
|
+
* enabling true cross-modal search where text queries can find images and
|
|
35
|
+
* image queries can find text based on semantic similarity.
|
|
36
|
+
*
|
|
37
|
+
* Example Usage:
|
|
38
|
+
* ```typescript
|
|
39
|
+
* const embedder = await createEmbedder('Xenova/clip-vit-base-patch32');
|
|
40
|
+
*
|
|
41
|
+
* // Embed text
|
|
42
|
+
* const textResult = await embedder.embedText('a red sports car');
|
|
43
|
+
*
|
|
44
|
+
* // Embed image
|
|
45
|
+
* const imageResult = await embedder.embedImage('./car.jpg');
|
|
46
|
+
*
|
|
47
|
+
* // Calculate cross-modal similarity
|
|
48
|
+
* const similarity = cosineSimilarity(textResult.vector, imageResult.vector);
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
export class CLIPEmbedder extends BaseUniversalEmbedder {
|
|
52
|
+
tokenizer = null;
|
|
53
|
+
textModel = null;
|
|
54
|
+
imageModel = null; // Placeholder for future image support
|
|
55
|
+
resourceManager = getResourceManager();
|
|
56
|
+
embedderResourceId;
|
|
57
|
+
tokenizerResourceId;
|
|
58
|
+
textModelResourceId;
|
|
59
|
+
imageModelResourceId;
|
|
60
|
+
constructor(modelName, options = {}) {
|
|
61
|
+
super(modelName, options);
|
|
62
|
+
// Validate that this is a supported CLIP model
|
|
63
|
+
this.validateCLIPModel();
|
|
64
|
+
// Register this embedder with the resource manager
|
|
65
|
+
this.embedderResourceId = this.resourceManager.registerEmbedder(this);
|
|
66
|
+
}
|
|
67
|
+
// =============================================================================
|
|
68
|
+
// MODEL LIFECYCLE METHODS
|
|
69
|
+
// =============================================================================
|
|
70
|
+
/**
|
|
71
|
+
* Load the CLIP model components
|
|
72
|
+
*
|
|
73
|
+
* Loads three separate components for reliable multimodal embedding:
|
|
74
|
+
* 1. AutoTokenizer - Handles text tokenization with CLIP's 77 token limit
|
|
75
|
+
* 2. CLIPTextModelWithProjection - Generates text embeddings without pixel_values errors
|
|
76
|
+
* 3. CLIPVisionModelWithProjection - Generates image embeddings
|
|
77
|
+
*
|
|
78
|
+
* All components are registered with the resource manager for proper cleanup.
|
|
79
|
+
* Models are cached locally after first download for faster subsequent loads.
|
|
80
|
+
*
|
|
81
|
+
* @throws {Error} If model loading fails or components are not available
|
|
82
|
+
*/
|
|
83
|
+
async loadModel() {
|
|
84
|
+
// Check if already loaded
|
|
85
|
+
if (this._isLoaded && this.textModel) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
try {
|
|
89
|
+
this.logModelLoading('Loading CLIP model');
|
|
90
|
+
// Use the validated CLIPTextModelWithProjection approach instead of feature-extraction pipeline
|
|
91
|
+
const { AutoTokenizer, CLIPTextModelWithProjection, CLIPVisionModelWithProjection } = await import('@huggingface/transformers');
|
|
92
|
+
this.logModelLoading('Loading CLIP tokenizer and text model components');
|
|
93
|
+
// Load tokenizer and text model separately (validated approach from task 1.1)
|
|
94
|
+
if (!this.textModel) {
|
|
95
|
+
// Import config for cache path
|
|
96
|
+
const { config } = await import('../core/config.js');
|
|
97
|
+
// Load tokenizer
|
|
98
|
+
this.logModelLoading('Loading CLIP tokenizer...');
|
|
99
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelName, {
|
|
100
|
+
cache_dir: config.model_cache_path,
|
|
101
|
+
local_files_only: false,
|
|
102
|
+
progress_callback: (progress) => {
|
|
103
|
+
if (progress.status === 'downloading') {
|
|
104
|
+
this.logModelLoading(`Downloading tokenizer: ${Math.round(progress.progress || 0)}%`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
// Load text model using CLIPTextModelWithProjection
|
|
109
|
+
this.logModelLoading('Loading CLIP text model...');
|
|
110
|
+
this.textModel = await CLIPTextModelWithProjection.from_pretrained(this.modelName, {
|
|
111
|
+
cache_dir: config.model_cache_path,
|
|
112
|
+
local_files_only: false,
|
|
113
|
+
dtype: 'fp32',
|
|
114
|
+
progress_callback: (progress) => {
|
|
115
|
+
if (progress.status === 'downloading') {
|
|
116
|
+
this.logModelLoading(`Downloading text model: ${Math.round(progress.progress || 0)}%`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
});
|
|
120
|
+
// Load vision model using CLIPVisionModelWithProjection for image embedding
|
|
121
|
+
this.logModelLoading('Loading CLIP vision model...');
|
|
122
|
+
this.imageModel = await CLIPVisionModelWithProjection.from_pretrained(this.modelName, {
|
|
123
|
+
cache_dir: config.model_cache_path,
|
|
124
|
+
local_files_only: false,
|
|
125
|
+
dtype: 'fp32',
|
|
126
|
+
progress_callback: (progress) => {
|
|
127
|
+
if (progress.status === 'downloading') {
|
|
128
|
+
this.logModelLoading(`Downloading vision model: ${Math.round(progress.progress || 0)}%`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
// Register the text model with resource manager if not already registered
|
|
134
|
+
if (!this.textModelResourceId) {
|
|
135
|
+
this.textModelResourceId = this.resourceManager.registerModel(this.textModel, this.modelName, 'clip-text');
|
|
136
|
+
}
|
|
137
|
+
// Register the image model with resource manager if not already registered
|
|
138
|
+
if (!this.imageModelResourceId && this.imageModel) {
|
|
139
|
+
this.imageModelResourceId = this.resourceManager.registerModel(this.imageModel, this.modelName, 'clip-vision');
|
|
140
|
+
}
|
|
141
|
+
// Verify models are actually loaded
|
|
142
|
+
if (this.textModel && this.imageModel) {
|
|
143
|
+
this._isLoaded = true;
|
|
144
|
+
this.logModelLoading('CLIP text and vision models loaded successfully');
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
const missingModels = [];
|
|
148
|
+
if (!this.textModel)
|
|
149
|
+
missingModels.push('text model');
|
|
150
|
+
if (!this.imageModel)
|
|
151
|
+
missingModels.push('vision model');
|
|
152
|
+
throw new Error(`CLIP model loading failed - ${missingModels.join(' and ')} ${missingModels.length === 1 ? 'is' : 'are'} null`);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
// Reset state on failure
|
|
157
|
+
this._isLoaded = false;
|
|
158
|
+
this.textModel = null;
|
|
159
|
+
throw error;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Clean up model resources with comprehensive disposal
|
|
164
|
+
*
|
|
165
|
+
* Properly disposes of all CLIP model components:
|
|
166
|
+
* - Tokenizer resources
|
|
167
|
+
* - Text model resources
|
|
168
|
+
* - Vision model resources
|
|
169
|
+
*
|
|
170
|
+
* Uses the resource manager for coordinated cleanup and forces garbage
|
|
171
|
+
* collection to free memory from CLIP models which can be memory intensive.
|
|
172
|
+
*
|
|
173
|
+
* This method is safe to call multiple times and will not throw errors
|
|
174
|
+
* during cleanup - errors are logged but don't prevent cleanup completion.
|
|
175
|
+
*/
|
|
176
|
+
async cleanup() {
|
|
177
|
+
let cleanupErrors = [];
|
|
178
|
+
try {
|
|
179
|
+
// Clean up tokenizer resources
|
|
180
|
+
if (this.tokenizer) {
|
|
181
|
+
try {
|
|
182
|
+
// Use resource manager for proper cleanup
|
|
183
|
+
if (this.tokenizerResourceId) {
|
|
184
|
+
await this.resourceManager.cleanupResource(this.tokenizerResourceId);
|
|
185
|
+
this.tokenizerResourceId = undefined;
|
|
186
|
+
}
|
|
187
|
+
// Clear tokenizer reference
|
|
188
|
+
this.tokenizer = null;
|
|
189
|
+
this.logModelLoading('CLIP tokenizer disposed');
|
|
190
|
+
}
|
|
191
|
+
catch (error) {
|
|
192
|
+
const errorMsg = `Failed to dispose CLIP tokenizer: ${error instanceof Error ? error.message : 'Unknown error'}`;
|
|
193
|
+
cleanupErrors.push(errorMsg);
|
|
194
|
+
console.warn(errorMsg);
|
|
195
|
+
// Force clear reference even if disposal failed
|
|
196
|
+
this.tokenizer = null;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// Clean up text model resources
|
|
200
|
+
if (this.textModel) {
|
|
201
|
+
try {
|
|
202
|
+
// Use resource manager for proper cleanup
|
|
203
|
+
if (this.textModelResourceId) {
|
|
204
|
+
await this.resourceManager.cleanupResource(this.textModelResourceId);
|
|
205
|
+
this.textModelResourceId = undefined;
|
|
206
|
+
}
|
|
207
|
+
// Clear model reference
|
|
208
|
+
this.textModel = null;
|
|
209
|
+
this.logModelLoading('CLIP text model disposed');
|
|
210
|
+
}
|
|
211
|
+
catch (error) {
|
|
212
|
+
const errorMsg = `Failed to dispose CLIP text model: ${error instanceof Error ? error.message : 'Unknown error'}`;
|
|
213
|
+
cleanupErrors.push(errorMsg);
|
|
214
|
+
console.warn(errorMsg);
|
|
215
|
+
// Force clear reference even if disposal failed
|
|
216
|
+
this.textModel = null;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// Clean up image model resources (when implemented)
|
|
220
|
+
if (this.imageModel) {
|
|
221
|
+
try {
|
|
222
|
+
// Use resource manager for proper cleanup
|
|
223
|
+
if (this.imageModelResourceId) {
|
|
224
|
+
await this.resourceManager.cleanupResource(this.imageModelResourceId);
|
|
225
|
+
this.imageModelResourceId = undefined;
|
|
226
|
+
}
|
|
227
|
+
// Clear model reference
|
|
228
|
+
this.imageModel = null;
|
|
229
|
+
this.logModelLoading('CLIP image model disposed');
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
const errorMsg = `Failed to dispose CLIP image model: ${error instanceof Error ? error.message : 'Unknown error'}`;
|
|
233
|
+
cleanupErrors.push(errorMsg);
|
|
234
|
+
console.warn(errorMsg);
|
|
235
|
+
// Force clear reference even if disposal failed
|
|
236
|
+
this.imageModel = null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// Clear embedder resource registration (don't call resource manager to avoid circular cleanup)
|
|
240
|
+
if (this.embedderResourceId) {
|
|
241
|
+
this.embedderResourceId = undefined;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
finally {
|
|
245
|
+
// Always clear loaded state regardless of cleanup success
|
|
246
|
+
this._isLoaded = false;
|
|
247
|
+
// Remove from lazy loading cache to ensure fresh instances
|
|
248
|
+
try {
|
|
249
|
+
const { LazyEmbedderLoader } = await import('../core/lazy-dependency-loader.js');
|
|
250
|
+
LazyEmbedderLoader.removeEmbedderFromCache(this.modelName, 'clip');
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
console.warn('Failed to remove embedder from cache:', error);
|
|
254
|
+
}
|
|
255
|
+
// Force garbage collection for CLIP models (they can be memory intensive)
|
|
256
|
+
if (global.gc) {
|
|
257
|
+
global.gc();
|
|
258
|
+
this.logModelLoading('Forced garbage collection after CLIP model cleanup');
|
|
259
|
+
}
|
|
260
|
+
// Log cleanup completion
|
|
261
|
+
if (cleanupErrors.length === 0) {
|
|
262
|
+
this.logModelLoading('CLIP model resources cleaned up successfully');
|
|
263
|
+
}
|
|
264
|
+
else {
|
|
265
|
+
this.logModelLoading(`CLIP model cleanup completed with ${cleanupErrors.length} errors`);
|
|
266
|
+
// Don't throw errors during cleanup - just log them
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
// =============================================================================
|
|
271
|
+
// NORMALIZATION UTILITIES
|
|
272
|
+
// =============================================================================
|
|
273
|
+
/**
|
|
274
|
+
* Apply L2-normalization to an embedding vector
|
|
275
|
+
*
|
|
276
|
+
* L2-normalization ensures that all embeddings have unit length (magnitude = 1),
|
|
277
|
+
* which is essential for CLIP models as they were trained with normalized embeddings.
|
|
278
|
+
* This normalization makes cosine similarity calculations more reliable and ensures
|
|
279
|
+
* that vector magnitudes don't affect similarity scores.
|
|
280
|
+
*
|
|
281
|
+
* @param embedding - The embedding vector to normalize (modified in-place)
|
|
282
|
+
* @returns The normalized embedding vector (same reference as input)
|
|
283
|
+
* @private
|
|
284
|
+
*/
|
|
285
|
+
normalizeEmbedding(embedding) {
|
|
286
|
+
// Calculate L2 norm (magnitude)
|
|
287
|
+
const magnitude = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
288
|
+
// Avoid division by zero
|
|
289
|
+
if (magnitude > 0) {
|
|
290
|
+
// Normalize each component by dividing by magnitude
|
|
291
|
+
for (let i = 0; i < embedding.length; i++) {
|
|
292
|
+
embedding[i] /= magnitude;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return embedding;
|
|
296
|
+
}
|
|
297
|
+
// =============================================================================
|
|
298
|
+
// TEXT EMBEDDING METHODS
|
|
299
|
+
// =============================================================================
|
|
300
|
+
/**
|
|
301
|
+
* Embed text using CLIP text encoder
|
|
302
|
+
*
|
|
303
|
+
* Uses CLIPTextModelWithProjection for reliable text-only embedding without
|
|
304
|
+
* pixel_values errors. Text is tokenized with CLIP's 77 token limit and
|
|
305
|
+
* automatically truncated if necessary.
|
|
306
|
+
*
|
|
307
|
+
* Returns a 512-dimensional L2-normalized embedding vector in the unified CLIP
|
|
308
|
+
* embedding space, which is directly comparable to image embeddings for cross-modal search.
|
|
309
|
+
*
|
|
310
|
+
* @param text - The text to embed (will be trimmed and validated)
|
|
311
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
312
|
+
* @throws {Error} If text is empty, model not loaded, or embedding fails
|
|
313
|
+
*
|
|
314
|
+
* @example
|
|
315
|
+
* ```typescript
|
|
316
|
+
* const result = await embedder.embedText('a red sports car');
|
|
317
|
+
* console.log(result.vector.length); // 512
|
|
318
|
+
* console.log(result.contentType); // 'text'
|
|
319
|
+
* ```
|
|
320
|
+
*/
|
|
321
|
+
async embedText(text) {
|
|
322
|
+
// Enhanced input validation and preprocessing
|
|
323
|
+
if (typeof text !== 'string') {
|
|
324
|
+
throw new Error('Input must be a string');
|
|
325
|
+
}
|
|
326
|
+
const processedText = text.trim();
|
|
327
|
+
if (processedText.length === 0) {
|
|
328
|
+
throw new Error('Empty text provided to CLIP embedder');
|
|
329
|
+
}
|
|
330
|
+
this.ensureLoaded();
|
|
331
|
+
// Update resource usage tracking
|
|
332
|
+
if (this.embedderResourceId) {
|
|
333
|
+
this.resourceManager.updateResourceUsage(this.embedderResourceId);
|
|
334
|
+
}
|
|
335
|
+
if (this.textModelResourceId) {
|
|
336
|
+
this.resourceManager.updateResourceUsage(this.textModelResourceId);
|
|
337
|
+
}
|
|
338
|
+
if (!this.textModel || !this.tokenizer) {
|
|
339
|
+
throw new Error('CLIP text model or tokenizer not initialized');
|
|
340
|
+
}
|
|
341
|
+
try {
|
|
342
|
+
// Use the validated CLIPTextModelWithProjection approach (no pixel_values errors)
|
|
343
|
+
// Tokenize text with CLIP's requirements
|
|
344
|
+
// The tokenizer handles truncation at 77 TOKENS (not characters)
|
|
345
|
+
const tokens = await this.tokenizer(processedText, {
|
|
346
|
+
padding: true,
|
|
347
|
+
truncation: true,
|
|
348
|
+
max_length: 77, // CLIP's text sequence length limit (77 tokens)
|
|
349
|
+
return_tensors: 'pt'
|
|
350
|
+
});
|
|
351
|
+
// Log token information for debugging (only in development)
|
|
352
|
+
if (process.env.NODE_ENV === 'development') {
|
|
353
|
+
const tokenIds = tokens.input_ids?.data || [];
|
|
354
|
+
const actualTokenCount = Array.from(tokenIds).filter((id) => id !== 0).length;
|
|
355
|
+
if (actualTokenCount >= 77) {
|
|
356
|
+
console.warn(`Text truncated by tokenizer: "${processedText.substring(0, 50)}..." (truncated to 77 tokens)`);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
// Generate text embedding using CLIPTextModelWithProjection
|
|
360
|
+
const output = await this.textModel(tokens);
|
|
361
|
+
// Extract embedding from text_embeds (no pixel_values dependency)
|
|
362
|
+
const embedding = new Float32Array(output.text_embeds.data);
|
|
363
|
+
// Validate embedding dimensions and values
|
|
364
|
+
if (embedding.length !== this.dimensions) {
|
|
365
|
+
throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
|
|
366
|
+
}
|
|
367
|
+
// Validate that all values are finite numbers
|
|
368
|
+
const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
|
|
369
|
+
if (invalidValues.length > 0) {
|
|
370
|
+
throw new Error(`CLIP embedding contains ${invalidValues.length} invalid values`);
|
|
371
|
+
}
|
|
372
|
+
// Validate embedding quality - should not be all zeros
|
|
373
|
+
const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
|
|
374
|
+
if (nonZeroValues.length === 0) {
|
|
375
|
+
throw new Error('CLIP embedding is all zeros');
|
|
376
|
+
}
|
|
377
|
+
// Calculate embedding magnitude before normalization for quality assessment
|
|
378
|
+
const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
379
|
+
if (magnitudeBeforeNorm < 1e-6) {
|
|
380
|
+
throw new Error(`CLIP embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
|
|
381
|
+
}
|
|
382
|
+
// Apply L2-normalization (CLIP models are trained with normalized embeddings)
|
|
383
|
+
this.normalizeEmbedding(embedding);
|
|
384
|
+
// Verify normalization was successful
|
|
385
|
+
const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
386
|
+
if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
|
|
387
|
+
console.warn(`Warning: Embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
|
|
388
|
+
}
|
|
389
|
+
// Log text embedding generation
|
|
390
|
+
console.log(`[CLIP] Generated text embedding for: "${processedText.substring(0, 30)}${processedText.length > 30 ? '...' : ''}"`);
|
|
391
|
+
// Generate unique embedding ID
|
|
392
|
+
const embeddingId = this.generateEmbeddingId(processedText, 'text');
|
|
393
|
+
return {
|
|
394
|
+
embedding_id: embeddingId,
|
|
395
|
+
vector: embedding,
|
|
396
|
+
contentType: 'text',
|
|
397
|
+
metadata: {
|
|
398
|
+
originalText: text,
|
|
399
|
+
processedText: processedText,
|
|
400
|
+
textLength: processedText.length,
|
|
401
|
+
embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
|
|
402
|
+
embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
|
|
403
|
+
normalized: true,
|
|
404
|
+
modelName: this.modelName,
|
|
405
|
+
modelType: this.modelType,
|
|
406
|
+
dimensions: this.dimensions
|
|
407
|
+
}
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
catch (error) {
|
|
411
|
+
throw error;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
// =============================================================================
|
|
415
|
+
// IMAGE EMBEDDING METHODS
|
|
416
|
+
// =============================================================================
|
|
417
|
+
/**
|
|
418
|
+
* Embed image using CLIP vision encoder
|
|
419
|
+
*
|
|
420
|
+
* Uses CLIPVisionModelWithProjection to generate image embeddings in the same
|
|
421
|
+
* unified embedding space as text embeddings, enabling true cross-modal search.
|
|
422
|
+
*
|
|
423
|
+
* Supports both local file paths and URLs. Images are automatically preprocessed:
|
|
424
|
+
* - Resized to 224x224 pixels (CLIP's expected input size)
|
|
425
|
+
* - Converted to proper pixel_values format using AutoProcessor
|
|
426
|
+
* - Normalized for CLIP vision model
|
|
427
|
+
*
|
|
428
|
+
* Returns a 512-dimensional L2-normalized embedding vector directly comparable to text embeddings.
|
|
429
|
+
*
|
|
430
|
+
* @param imagePath - Local file path or URL to the image
|
|
431
|
+
* @returns EmbeddingResult with 512-dimensional normalized vector and metadata
|
|
432
|
+
* @throws {Error} If image not found, unsupported format, or embedding fails
|
|
433
|
+
*
|
|
434
|
+
* @example
|
|
435
|
+
* ```typescript
|
|
436
|
+
* // Local file
|
|
437
|
+
* const result = await embedder.embedImage('./car.jpg');
|
|
438
|
+
*
|
|
439
|
+
* // URL
|
|
440
|
+
* const result = await embedder.embedImage('https://example.com/car.jpg');
|
|
441
|
+
*
|
|
442
|
+
* console.log(result.vector.length); // 512
|
|
443
|
+
* console.log(result.contentType); // 'image'
|
|
444
|
+
* ```
|
|
445
|
+
*
|
|
446
|
+
* Supported formats: PNG, JPEG, GIF, BMP, WebP
|
|
447
|
+
*/
|
|
448
|
+
async embedImage(imagePath) {
|
|
449
|
+
// Enhanced input validation and preprocessing
|
|
450
|
+
if (typeof imagePath !== 'string') {
|
|
451
|
+
throw new Error('Image path must be a string');
|
|
452
|
+
}
|
|
453
|
+
const processedPath = imagePath.trim();
|
|
454
|
+
if (processedPath.length === 0) {
|
|
455
|
+
throw new Error('Image path cannot be empty');
|
|
456
|
+
}
|
|
457
|
+
// Validate that the model supports images
|
|
458
|
+
if (!this.supportedContentTypes.includes('image')) {
|
|
459
|
+
throw new Error(`Model '${this.modelName}' does not support image embeddings`);
|
|
460
|
+
}
|
|
461
|
+
this.ensureLoaded();
|
|
462
|
+
// Update resource usage tracking
|
|
463
|
+
if (this.embedderResourceId) {
|
|
464
|
+
this.resourceManager.updateResourceUsage(this.embedderResourceId);
|
|
465
|
+
}
|
|
466
|
+
if (this.imageModelResourceId) {
|
|
467
|
+
this.resourceManager.updateResourceUsage(this.imageModelResourceId);
|
|
468
|
+
}
|
|
469
|
+
if (!this.imageModel) {
|
|
470
|
+
throw new Error('CLIP vision model not initialized');
|
|
471
|
+
}
|
|
472
|
+
try {
|
|
473
|
+
// Load and preprocess image using transformers.js utilities
|
|
474
|
+
const image = await this.loadAndPreprocessImage(processedPath);
|
|
475
|
+
// Use AutoProcessor to convert image to proper pixel_values format
|
|
476
|
+
const { AutoProcessor } = await import('@huggingface/transformers');
|
|
477
|
+
const processor = await AutoProcessor.from_pretrained(this.modelName);
|
|
478
|
+
const processedInputs = await processor(image);
|
|
479
|
+
// Generate image embedding using CLIPVisionModelWithProjection
|
|
480
|
+
// The model expects pixel_values as input
|
|
481
|
+
const output = await this.imageModel(processedInputs);
|
|
482
|
+
// Extract embedding from image_embeds output (similar to text_embeds)
|
|
483
|
+
const embedding = new Float32Array(output.image_embeds.data);
|
|
484
|
+
// Validate embedding dimensions and values
|
|
485
|
+
if (embedding.length !== this.dimensions) {
|
|
486
|
+
throw new Error(`CLIP image embedding dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`);
|
|
487
|
+
}
|
|
488
|
+
// Validate that all values are finite numbers
|
|
489
|
+
const invalidValues = Array.from(embedding).filter(val => !isFinite(val) || isNaN(val));
|
|
490
|
+
if (invalidValues.length > 0) {
|
|
491
|
+
throw new Error(`CLIP image embedding contains ${invalidValues.length} invalid values`);
|
|
492
|
+
}
|
|
493
|
+
// Validate embedding quality - should not be all zeros
|
|
494
|
+
const nonZeroValues = Array.from(embedding).filter(val => Math.abs(val) > 1e-8);
|
|
495
|
+
if (nonZeroValues.length === 0) {
|
|
496
|
+
throw new Error('CLIP image embedding is all zeros');
|
|
497
|
+
}
|
|
498
|
+
// Calculate embedding magnitude before normalization for quality assessment
|
|
499
|
+
const magnitudeBeforeNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
500
|
+
if (magnitudeBeforeNorm < 1e-6) {
|
|
501
|
+
throw new Error(`CLIP image embedding has critically low magnitude: ${magnitudeBeforeNorm.toExponential(3)}`);
|
|
502
|
+
}
|
|
503
|
+
// Apply L2-normalization (CLIP models are trained with normalized embeddings)
|
|
504
|
+
this.normalizeEmbedding(embedding);
|
|
505
|
+
// Verify normalization was successful
|
|
506
|
+
const magnitudeAfterNorm = Math.sqrt(Array.from(embedding).reduce((sum, val) => sum + val * val, 0));
|
|
507
|
+
if (Math.abs(magnitudeAfterNorm - 1.0) > 0.01) {
|
|
508
|
+
console.warn(`Warning: Image embedding normalization may be imprecise (magnitude: ${magnitudeAfterNorm.toFixed(6)})`);
|
|
509
|
+
}
|
|
510
|
+
// Generate unique embedding ID
|
|
511
|
+
const embeddingId = this.generateEmbeddingId(processedPath, 'image');
|
|
512
|
+
return {
|
|
513
|
+
embedding_id: embeddingId,
|
|
514
|
+
vector: embedding,
|
|
515
|
+
contentType: 'image',
|
|
516
|
+
metadata: {
|
|
517
|
+
imagePath: processedPath,
|
|
518
|
+
embeddingMagnitudeBeforeNorm: magnitudeBeforeNorm,
|
|
519
|
+
embeddingMagnitudeAfterNorm: magnitudeAfterNorm,
|
|
520
|
+
normalized: true,
|
|
521
|
+
modelName: this.modelName,
|
|
522
|
+
modelType: this.modelType,
|
|
523
|
+
dimensions: this.dimensions
|
|
524
|
+
}
|
|
525
|
+
};
|
|
526
|
+
}
|
|
527
|
+
catch (error) {
|
|
528
|
+
if (error instanceof Error) {
|
|
529
|
+
// Provide more context for common errors
|
|
530
|
+
if (error.message.includes('ENOENT') || error.message.includes('no such file')) {
|
|
531
|
+
throw new Error(`Image file not found: ${processedPath}`);
|
|
532
|
+
}
|
|
533
|
+
if (error.message.includes('unsupported format') || error.message.includes('invalid image')) {
|
|
534
|
+
throw new Error(`Unsupported image format or corrupted file: ${processedPath}`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
throw error;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
// =============================================================================
|
|
541
|
+
// IMAGE PREPROCESSING UTILITIES
|
|
542
|
+
// =============================================================================
|
|
543
|
+
/**
|
|
544
|
+
* Load and preprocess image for CLIP vision model
|
|
545
|
+
*
|
|
546
|
+
* Handles image loading from both local files and URLs with automatic format
|
|
547
|
+
* detection and preprocessing. Uses Sharp library when available for better
|
|
548
|
+
* Node.js support, falls back to RawImage for browser compatibility.
|
|
549
|
+
*
|
|
550
|
+
* Preprocessing steps:
|
|
551
|
+
* 1. Load image from path or URL
|
|
552
|
+
* 2. Resize to 224x224 pixels (CLIP's expected input size)
|
|
553
|
+
* 3. Convert to RGB format if needed
|
|
554
|
+
* 4. Return RawImage object for AutoProcessor
|
|
555
|
+
*
|
|
556
|
+
* @param imagePath - Local file path or URL to the image
|
|
557
|
+
* @returns RawImage object ready for AutoProcessor
|
|
558
|
+
* @throws {Error} If image loading or preprocessing fails
|
|
559
|
+
* @private
|
|
560
|
+
*/
|
|
561
|
+
async loadAndPreprocessImage(imagePath) {
|
|
562
|
+
try {
|
|
563
|
+
// Import required utilities
|
|
564
|
+
const { RawImage } = await import('@huggingface/transformers');
|
|
565
|
+
const path = await import('path');
|
|
566
|
+
const fs = await import('fs');
|
|
567
|
+
// Get CLIP model variant info for preprocessing parameters
|
|
568
|
+
const variant = this.getModelVariant();
|
|
569
|
+
// Check if this is a URL or local file path
|
|
570
|
+
const isUrl = imagePath.startsWith('http://') || imagePath.startsWith('https://');
|
|
571
|
+
if (isUrl) {
|
|
572
|
+
// Load from URL using RawImage
|
|
573
|
+
// Temporarily suppress ALL console output to avoid logging base64 data
|
|
574
|
+
const originalConsoleLog = console.log;
|
|
575
|
+
const originalConsoleWarn = console.warn;
|
|
576
|
+
const originalConsoleInfo = console.info;
|
|
577
|
+
const originalConsoleError = console.error;
|
|
578
|
+
const originalConsoleDebug = console.debug;
|
|
579
|
+
try {
|
|
580
|
+
// Suppress ALL console output during image loading
|
|
581
|
+
console.log = () => { };
|
|
582
|
+
console.warn = () => { };
|
|
583
|
+
console.info = () => { };
|
|
584
|
+
console.error = () => { };
|
|
585
|
+
console.debug = () => { };
|
|
586
|
+
const image = await RawImage.fromURL(imagePath);
|
|
587
|
+
const processedImage = await image.resize(variant.imageSize, variant.imageSize);
|
|
588
|
+
return processedImage;
|
|
589
|
+
}
|
|
590
|
+
finally {
|
|
591
|
+
// Restore ALL console output
|
|
592
|
+
console.log = originalConsoleLog;
|
|
593
|
+
console.warn = originalConsoleWarn;
|
|
594
|
+
console.info = originalConsoleInfo;
|
|
595
|
+
console.error = originalConsoleError;
|
|
596
|
+
console.debug = originalConsoleDebug;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
// For local files, try Sharp first (if available), then fall back to RawImage
|
|
600
|
+
// Check if file exists
|
|
601
|
+
if (!fs.existsSync(imagePath)) {
|
|
602
|
+
throw new Error(`Image file not found: ${imagePath}`);
|
|
603
|
+
}
|
|
604
|
+
const absolutePath = path.resolve(imagePath);
|
|
605
|
+
// Try to use Sharp for better Node.js support
|
|
606
|
+
try {
|
|
607
|
+
const sharpModule = await import('sharp');
|
|
608
|
+
const sharp = sharpModule.default;
|
|
609
|
+
sharp.concurrency(2);
|
|
610
|
+
// Use Sharp to load and get raw pixel data
|
|
611
|
+
const { data, info } = await sharp(absolutePath)
|
|
612
|
+
.resize(variant.imageSize, variant.imageSize, {
|
|
613
|
+
fit: 'cover',
|
|
614
|
+
position: 'center'
|
|
615
|
+
})
|
|
616
|
+
.raw()
|
|
617
|
+
.toBuffer({ resolveWithObject: true });
|
|
618
|
+
// Create RawImage directly from pixel data (avoids data URL logging)
|
|
619
|
+
const { RawImage } = await import('@huggingface/transformers');
|
|
620
|
+
const image = new RawImage(new Uint8ClampedArray(data), info.width, info.height, info.channels);
|
|
621
|
+
return image;
|
|
622
|
+
}
|
|
623
|
+
catch (sharpError) {
|
|
624
|
+
// Sharp not available or failed, fall back to RawImage.read()
|
|
625
|
+
console.warn('Sharp not available, using RawImage fallback:', sharpError instanceof Error ? sharpError.message : 'Unknown error');
|
|
626
|
+
const image = await RawImage.read(absolutePath);
|
|
627
|
+
const processedImage = await image.resize(variant.imageSize, variant.imageSize);
|
|
628
|
+
return processedImage;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
catch (error) {
|
|
632
|
+
if (error instanceof Error) {
|
|
633
|
+
// Provide helpful error messages for common issues
|
|
634
|
+
if (error.message.includes('fetch') || error.message.includes('Failed to load image')) {
|
|
635
|
+
throw new Error(`Failed to load image from path: ${imagePath}. Ensure the path is correct and accessible.`);
|
|
636
|
+
}
|
|
637
|
+
if (error.message.includes('decode') || error.message.includes('IDAT') || error.message.includes('PNG')) {
|
|
638
|
+
throw new Error(`Failed to decode image: ${imagePath}. The file may be corrupted or in an unsupported format. Supported formats: PNG, JPEG, GIF, BMP, WebP.`);
|
|
639
|
+
}
|
|
640
|
+
if (error.message.includes('not found') || error.message.includes('ENOENT')) {
|
|
641
|
+
throw new Error(`Image file not found: ${imagePath}`);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
throw new Error(`Image preprocessing failed for ${imagePath}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
// =============================================================================
|
|
648
|
+
// BATCH PROCESSING OPTIMIZATION
|
|
649
|
+
// =============================================================================
|
|
650
|
+
/**
|
|
651
|
+
* Optimized batch processing for CLIP models
|
|
652
|
+
*
|
|
653
|
+
* Processes mixed batches of text and image content efficiently using the
|
|
654
|
+
* BatchProcessingOptimizer for memory management and progress tracking.
|
|
655
|
+
*
|
|
656
|
+
* Features:
|
|
657
|
+
* - Automatic separation of text and image items
|
|
658
|
+
* - Memory-efficient processing for large batches
|
|
659
|
+
* - Progress reporting for batches > 20 items
|
|
660
|
+
* - Garbage collection between batches
|
|
661
|
+
* - Detailed statistics logging
|
|
662
|
+
*
|
|
663
|
+
* @param batch - Array of items with content, contentType, and optional metadata
|
|
664
|
+
* @returns Array of EmbeddingResult objects in the same order as input
|
|
665
|
+
* @throws {Error} If batch processing fails
|
|
666
|
+
* @protected
|
|
667
|
+
*/
|
|
668
|
+
async processBatch(batch) {
|
|
669
|
+
this.ensureLoaded();
|
|
670
|
+
// Separate text and image items
|
|
671
|
+
const textItems = batch.filter(item => item.contentType === 'text');
|
|
672
|
+
const imageItems = batch.filter(item => item.contentType === 'image');
|
|
673
|
+
const results = [];
|
|
674
|
+
// Process text items with optimization
|
|
675
|
+
if (textItems.length > 0) {
|
|
676
|
+
// For small batches, use direct processing
|
|
677
|
+
if (textItems.length <= 5) {
|
|
678
|
+
const textResults = await this.processBatchText(textItems);
|
|
679
|
+
results.push(...textResults);
|
|
680
|
+
}
|
|
681
|
+
else {
|
|
682
|
+
// For larger batches, use BatchProcessingOptimizer
|
|
683
|
+
try {
|
|
684
|
+
const { createTextBatchProcessor } = await import('../core/batch-processing-optimizer.js');
|
|
685
|
+
const batchProcessor = createTextBatchProcessor();
|
|
686
|
+
// Convert to EmbeddingBatchItem format
|
|
687
|
+
// Let tokenizer handle truncation at 77 tokens (not characters)
|
|
688
|
+
const batchItems = textItems.map(item => ({
|
|
689
|
+
content: item.content.trim(),
|
|
690
|
+
contentType: item.contentType,
|
|
691
|
+
metadata: item.metadata
|
|
692
|
+
}));
|
|
693
|
+
// Create embed function that uses this CLIP embedder
|
|
694
|
+
const embedFunction = async (item) => {
|
|
695
|
+
const result = await this.embedText(item.content);
|
|
696
|
+
// Validate dimensions
|
|
697
|
+
if (result.vector.length !== this.dimensions) {
|
|
698
|
+
throw new Error(`CLIP embedding dimension mismatch: expected ${this.dimensions}, got ${result.vector.length}`);
|
|
699
|
+
}
|
|
700
|
+
return result;
|
|
701
|
+
};
|
|
702
|
+
// Process with optimization and progress reporting
|
|
703
|
+
const batchResult = await batchProcessor.processBatch(batchItems, embedFunction, (stats) => {
|
|
704
|
+
if (stats.totalItems > 20) { // Log for moderate-sized batches
|
|
705
|
+
console.log(`CLIP text embedding progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
|
|
706
|
+
}
|
|
707
|
+
});
|
|
708
|
+
// Log final statistics for larger batches
|
|
709
|
+
if (batchResult.stats.totalItems > 20) {
|
|
710
|
+
console.log(`✓ CLIP text embedding complete: ${batchResult.stats.processedItems} processed, ${batchResult.stats.failedItems} failed`);
|
|
711
|
+
console.log(` Processing time: ${Math.round(batchResult.stats.processingTimeMs / 1000)}s, Rate: ${Math.round(batchResult.stats.itemsPerSecond)} items/sec`);
|
|
712
|
+
if (batchResult.stats.peakMemoryUsageMB > 100) {
|
|
713
|
+
console.log(` Peak memory usage: ${batchResult.stats.peakMemoryUsageMB}MB`);
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
results.push(...batchResult.results);
|
|
717
|
+
}
|
|
718
|
+
catch (error) {
|
|
719
|
+
console.error('Text batch processing failed:', error);
|
|
720
|
+
throw error;
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
// Process image items with memory-efficient optimization (placeholder for future implementation)
|
|
725
|
+
if (imageItems.length > 0) {
|
|
726
|
+
console.warn(`Processing ${imageItems.length} image items - using placeholder implementation`);
|
|
727
|
+
// Future implementation will use createImageBatchProcessor() for memory-efficient image processing
|
|
728
|
+
try {
|
|
729
|
+
const { createImageBatchProcessor } = await import('../core/batch-processing-optimizer.js');
|
|
730
|
+
const imageBatchProcessor = createImageBatchProcessor();
|
|
731
|
+
// Convert to EmbeddingBatchItem format
|
|
732
|
+
const imageBatchItems = imageItems.map(item => ({
|
|
733
|
+
content: item.content,
|
|
734
|
+
contentType: item.contentType,
|
|
735
|
+
metadata: item.metadata
|
|
736
|
+
}));
|
|
737
|
+
// Create placeholder embed function for images
|
|
738
|
+
const imageEmbedFunction = async (item) => {
|
|
739
|
+
// TODO: Replace with actual image embedding when implemented
|
|
740
|
+
console.warn(`Placeholder: Would embed image ${item.content}`);
|
|
741
|
+
// Return placeholder result
|
|
742
|
+
const zeroVector = new Float32Array(this.dimensions).fill(0);
|
|
743
|
+
return {
|
|
744
|
+
embedding_id: `image_placeholder_${Date.now()}_${Math.random()}`,
|
|
745
|
+
vector: zeroVector,
|
|
746
|
+
contentType: 'image'
|
|
747
|
+
};
|
|
748
|
+
};
|
|
749
|
+
// Process with memory-efficient image batch processor
|
|
750
|
+
const imageBatchResult = await imageBatchProcessor.processBatch(imageBatchItems, imageEmbedFunction, (stats) => {
|
|
751
|
+
console.log(`Image processing progress: ${stats.processedItems}/${stats.totalItems} (${Math.round((stats.processedItems / stats.totalItems) * 100)}%)`);
|
|
752
|
+
console.log(` Memory usage: ${stats.memoryUsageMB}MB (peak: ${stats.peakMemoryUsageMB}MB)`);
|
|
753
|
+
});
|
|
754
|
+
console.log(`✓ Image processing complete: ${imageBatchResult.stats.processedItems} processed`);
|
|
755
|
+
console.log(` Memory efficiency: Peak usage ${imageBatchResult.stats.peakMemoryUsageMB}MB`);
|
|
756
|
+
results.push(...imageBatchResult.results);
|
|
757
|
+
}
|
|
758
|
+
catch (error) {
|
|
759
|
+
console.error('Image batch processing failed:', error);
|
|
760
|
+
throw error;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
return results;
|
|
764
|
+
}
|
|
765
|
+
/**
|
|
766
|
+
* Process batch of text items using CLIPTextModelWithProjection
|
|
767
|
+
*
|
|
768
|
+
* Efficiently processes multiple text items by tokenizing all texts first,
|
|
769
|
+
* then generating embeddings sequentially. This approach balances memory
|
|
770
|
+
* usage with processing speed.
|
|
771
|
+
*
|
|
772
|
+
* @param textItems - Array of text items to process
|
|
773
|
+
* @returns Array of EmbeddingResult objects
|
|
774
|
+
* @throws {Error} If batch processing fails or dimension mismatch occurs
|
|
775
|
+
* @private
|
|
776
|
+
*/
|
|
777
|
+
async processBatchText(textItems) {
|
|
778
|
+
// Prepare texts for batch processing
|
|
779
|
+
// Let tokenizer handle truncation at 77 tokens (not characters)
|
|
780
|
+
const texts = textItems.map(item => item.content.trim());
|
|
781
|
+
// Tokenize all texts in batch
|
|
782
|
+
const tokensBatch = await Promise.all(texts.map(text => this.tokenizer(text, {
|
|
783
|
+
padding: true,
|
|
784
|
+
truncation: true,
|
|
785
|
+
max_length: 77, // CLIP's text sequence length limit
|
|
786
|
+
return_tensors: 'pt'
|
|
787
|
+
})));
|
|
788
|
+
// Process each tokenized text through the CLIP text model
|
|
789
|
+
const results = [];
|
|
790
|
+
for (let i = 0; i < textItems.length; i++) {
|
|
791
|
+
const item = textItems[i];
|
|
792
|
+
const tokens = tokensBatch[i];
|
|
793
|
+
// Generate embedding using CLIPTextModelWithProjection
|
|
794
|
+
const output = await this.textModel(tokens);
|
|
795
|
+
// Extract embedding from text_embeds (no pixel_values dependency)
|
|
796
|
+
const embedding = new Float32Array(output.text_embeds.data);
|
|
797
|
+
// Validate dimensions
|
|
798
|
+
if (embedding.length !== this.dimensions) {
|
|
799
|
+
throw new Error(`CLIP embedding dimension mismatch for item ${i}: expected ${this.dimensions}, got ${embedding.length}`);
|
|
800
|
+
}
|
|
801
|
+
// Apply L2-normalization (CLIP models are trained with normalized embeddings)
|
|
802
|
+
this.normalizeEmbedding(embedding);
|
|
803
|
+
const embeddingId = this.generateEmbeddingId(item.content, 'text');
|
|
804
|
+
results.push({
|
|
805
|
+
embedding_id: embeddingId,
|
|
806
|
+
vector: embedding,
|
|
807
|
+
contentType: 'text'
|
|
808
|
+
});
|
|
809
|
+
}
|
|
810
|
+
return results;
|
|
811
|
+
}
|
|
812
|
+
// =============================================================================
|
|
813
|
+
// UTILITY METHODS
|
|
814
|
+
// =============================================================================
|
|
815
|
+
/**
|
|
816
|
+
* Get comprehensive model information including CLIP-specific capabilities
|
|
817
|
+
*
|
|
818
|
+
* Extends base model info with CLIP-specific capabilities including multimodal
|
|
819
|
+
* support, zero-shot classification, and cross-modal retrieval features.
|
|
820
|
+
*
|
|
821
|
+
* @returns Object with model information and capabilities
|
|
822
|
+
*/
|
|
823
|
+
getModelInfo() {
|
|
824
|
+
const baseInfo = super.getModelInfo();
|
|
825
|
+
return {
|
|
826
|
+
...baseInfo,
|
|
827
|
+
capabilities: {
|
|
828
|
+
...baseInfo.capabilities,
|
|
829
|
+
// CLIP-specific capabilities
|
|
830
|
+
supportsMultimodal: true,
|
|
831
|
+
supportsZeroShotClassification: true,
|
|
832
|
+
supportsImageTextSimilarity: true, // Fully implemented
|
|
833
|
+
supportsTextImageRetrieval: true, // Fully implemented
|
|
834
|
+
recommendedUseCase: 'multimodal similarity and zero-shot classification',
|
|
835
|
+
imageEmbeddingStatus: 'implemented' // Image embedding is fully functional
|
|
836
|
+
}
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
/**
|
|
840
|
+
* Check if the model is suitable for a specific task
|
|
841
|
+
*
|
|
842
|
+
* CLIP models excel at similarity, classification, retrieval, and multimodal
|
|
843
|
+
* tasks due to their unified embedding space and zero-shot capabilities.
|
|
844
|
+
*
|
|
845
|
+
* @param task - The task type to check
|
|
846
|
+
* @returns true if CLIP is suitable for the task, false otherwise
|
|
847
|
+
*/
|
|
848
|
+
isSuitableForTask(task) {
|
|
849
|
+
const supportedTasks = ['similarity', 'classification', 'retrieval', 'multimodal'];
|
|
850
|
+
return supportedTasks.includes(task);
|
|
851
|
+
}
|
|
852
|
+
/**
|
|
853
|
+
* Get information about multimodal capabilities
|
|
854
|
+
*
|
|
855
|
+
* Returns detailed information about what content types are supported and
|
|
856
|
+
* what features are planned for future implementation.
|
|
857
|
+
*
|
|
858
|
+
* @returns Object describing multimodal support status
|
|
859
|
+
*/
|
|
860
|
+
getMultimodalCapabilities() {
|
|
861
|
+
return {
|
|
862
|
+
textSupport: true,
|
|
863
|
+
imageSupport: true, // Now implemented
|
|
864
|
+
videoSupport: false,
|
|
865
|
+
audioSupport: false,
|
|
866
|
+
plannedFeatures: [
|
|
867
|
+
'Zero-shot image classification',
|
|
868
|
+
'Advanced image preprocessing options',
|
|
869
|
+
'Batch image processing optimization',
|
|
870
|
+
'Video frame extraction and embedding'
|
|
871
|
+
]
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
// =============================================================================
|
|
875
|
+
// CLIP-SPECIFIC METHODS
|
|
876
|
+
// =============================================================================
|
|
877
|
+
/**
|
|
878
|
+
* Get CLIP model variant information
|
|
879
|
+
*
|
|
880
|
+
* Extracts architecture details from the model name to provide variant-specific
|
|
881
|
+
* configuration parameters like patch size, image size, and text length limits.
|
|
882
|
+
*
|
|
883
|
+
* @returns Object with architecture details
|
|
884
|
+
*/
|
|
885
|
+
getModelVariant() {
|
|
886
|
+
// Extract information from model name
|
|
887
|
+
const modelName = this.modelName.toLowerCase();
|
|
888
|
+
if (modelName.includes('patch32')) {
|
|
889
|
+
return {
|
|
890
|
+
architecture: 'ViT-B/32',
|
|
891
|
+
patchSize: 32,
|
|
892
|
+
imageSize: 224,
|
|
893
|
+
textMaxLength: 77
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
else if (modelName.includes('patch16')) {
|
|
897
|
+
return {
|
|
898
|
+
architecture: 'ViT-B/16',
|
|
899
|
+
patchSize: 16,
|
|
900
|
+
imageSize: 224,
|
|
901
|
+
textMaxLength: 77
|
|
902
|
+
};
|
|
903
|
+
}
|
|
904
|
+
else {
|
|
905
|
+
// Default to patch32 if unclear
|
|
906
|
+
return {
|
|
907
|
+
architecture: 'ViT-B/32',
|
|
908
|
+
patchSize: 32,
|
|
909
|
+
imageSize: 224,
|
|
910
|
+
textMaxLength: 77
|
|
911
|
+
};
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Check if text length is within CLIP's token limit
|
|
916
|
+
*
|
|
917
|
+
* Estimates token count based on character length (rough approximation of
|
|
918
|
+
* ~4 characters per token for English text). CLIP has a hard limit of 77 tokens.
|
|
919
|
+
*
|
|
920
|
+
* @param text - Text to validate
|
|
921
|
+
* @returns true if text is within token limit, false otherwise
|
|
922
|
+
*/
|
|
923
|
+
isTextLengthValid(text) {
|
|
924
|
+
const variant = this.getModelVariant();
|
|
925
|
+
// Rough estimation: ~4 characters per token for English text
|
|
926
|
+
const estimatedTokens = Math.ceil(text.length / 4);
|
|
927
|
+
return estimatedTokens <= variant.textMaxLength;
|
|
928
|
+
}
|
|
929
|
+
/**
|
|
930
|
+
* Get performance characteristics for this CLIP variant
|
|
931
|
+
*
|
|
932
|
+
* Provides guidance on speed, accuracy, memory usage, and recommended batch
|
|
933
|
+
* sizes based on the CLIP model variant (patch32 vs patch16).
|
|
934
|
+
*
|
|
935
|
+
* @returns Object with performance characteristics
|
|
936
|
+
*/
|
|
937
|
+
getPerformanceInfo() {
|
|
938
|
+
const variant = this.getModelVariant();
|
|
939
|
+
if (variant.patchSize === 32) {
|
|
940
|
+
return {
|
|
941
|
+
speed: 'fast',
|
|
942
|
+
accuracy: 'good',
|
|
943
|
+
memoryUsage: 'medium',
|
|
944
|
+
recommendedBatchSize: 8
|
|
945
|
+
};
|
|
946
|
+
}
|
|
947
|
+
else if (variant.patchSize === 16) {
|
|
948
|
+
return {
|
|
949
|
+
speed: 'medium',
|
|
950
|
+
accuracy: 'better',
|
|
951
|
+
memoryUsage: 'high',
|
|
952
|
+
recommendedBatchSize: 4
|
|
953
|
+
};
|
|
954
|
+
}
|
|
955
|
+
else {
|
|
956
|
+
return {
|
|
957
|
+
speed: 'medium',
|
|
958
|
+
accuracy: 'good',
|
|
959
|
+
memoryUsage: 'medium',
|
|
960
|
+
recommendedBatchSize: 6
|
|
961
|
+
};
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
/**
|
|
965
|
+
* Check if all CLIP model components are loaded
|
|
966
|
+
*
|
|
967
|
+
* Verifies that tokenizer, text model, and vision model are all loaded and
|
|
968
|
+
* ready for use. All three components must be available for the embedder
|
|
969
|
+
* to be considered fully loaded.
|
|
970
|
+
*
|
|
971
|
+
* @returns true if all components are loaded, false otherwise
|
|
972
|
+
*/
|
|
973
|
+
isLoaded() {
|
|
974
|
+
return this._isLoaded && this.tokenizer !== null && this.textModel !== null && this.imageModel !== null;
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Validate that this is a supported CLIP model
|
|
978
|
+
*
|
|
979
|
+
* Checks the model name against the list of supported CLIP models. Currently
|
|
980
|
+
* supports Xenova/clip-vit-base-patch32 and Xenova/clip-vit-base-patch16.
|
|
981
|
+
*
|
|
982
|
+
* @throws {Error} If model is not in the supported list
|
|
983
|
+
* @private
|
|
984
|
+
*/
|
|
985
|
+
validateCLIPModel() {
|
|
986
|
+
const supportedModels = [
|
|
987
|
+
'Xenova/clip-vit-base-patch32',
|
|
988
|
+
'Xenova/clip-vit-base-patch16'
|
|
989
|
+
];
|
|
990
|
+
if (!supportedModels.includes(this.modelName)) {
|
|
991
|
+
throw new Error(`Unsupported CLIP model: ${this.modelName}. ` +
|
|
992
|
+
`Supported models: ${supportedModels.join(', ')}`);
|
|
993
|
+
}
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
//# sourceMappingURL=clip-embedder.js.map
|