sie-server 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cpu +8 -4
- {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cuda11 +4 -1
- {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cuda12 +4 -1
- {sie_server-0.3.2 → sie_server-0.3.3}/PKG-INFO +1 -1
- {sie_server-0.3.2 → sie_server-0.3.3}/bundles/default.yaml +3 -2
- {sie_server-0.3.2 → sie_server-0.3.3}/bundles/transformers5.yaml +4 -1
- {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__llama-embed-nemotron-8b.yaml +11 -2
- sie_server-0.3.3/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +43 -0
- sie_server-0.3.3/models/tomoroai__tomoro-colqwen3-embed-4b.yaml +42 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/openapi.json +19 -2
- {sie_server-0.3.2 → sie_server-0.3.3}/pyproject.toml +1 -1
- sie_server-0.3.3/src/sie_server/adapters/colqwen3/__init__.py +337 -0
- sie_server-0.3.3/src/sie_server/adapters/errors.py +2 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliclass/__init__.py +98 -29
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nemo_colembed/__init__.py +175 -65
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/sglang/__init__.py +7 -2
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/extract.py +3 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/helpers.py +12 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/options.py +15 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/cli.py +13 -1
- sie_server-0.3.3/src/sie_server/core/hf_env.py +37 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/load_errors.py +32 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/model_loader.py +225 -42
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/registry.py +19 -3
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/main.py +11 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/metrics.py +20 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/openapi.py +17 -2
- sie_server-0.3.3/src/sie_server/types/overflow_policy.py +5 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/responses.py +1 -0
- sie_server-0.3.3/tests/adapters/test_gliclass_overflow_policy.py +102 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_runtime_options.py +34 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_visual_document.py +151 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract_integration.py +4 -3
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_adaptive_batching.py +3 -3
- sie_server-0.3.3/tests/core/test_model_load_timeout.py +372 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/.gitignore +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/CONTRIBUTING.md +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/LICENSE +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/README.md +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/bundles/sglang.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-m3.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/answerdotai__ModernBERT-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/colbert-ir__colbertv2.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/docling.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/fastino__gliner2-base-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/google__embeddinggemma-300m.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip2-base-patch16-224.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/jinaai__jina-colbert-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/models/zai-org__GLM-OCR.yaml +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_base_adapter.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_flash_base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_spec.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_utils.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/clip/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/docling/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner2/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/owlv2/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/siglip/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/encode.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/health.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/models.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/openai_compat.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/openapi.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/root.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/score.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/validation.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/ws.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/app_factory.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/app_state_config.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/engine.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/model.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/adaptive_batching.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/extract_cost.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/inference_output.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/loader.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/memory.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/oom.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/prepared.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/image.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/vision.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/readiness.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/extract.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/model_worker.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/oom_recovery.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/types.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/nats_pull_loop.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/nats_subscriber.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/static/index.html +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/inputs.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_bge_m3.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_bge_m3_flash.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_docling.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_docling_smoke.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_flash_base.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_glm_ocr.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_paddleocr_vl.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sentence_transformer.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_stablebridge_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_stablebridge_pruner.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_dtype.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_endpoint.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_json_schema.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_timing.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_validation.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract_oom.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_health.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_models.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_openai_compat.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_score.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_version_header.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_ws.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/app/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/app/test_app_factory.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/test_bundle_coverage.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/test_config.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/conftest.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_batcher.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_idle_evict.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_inference.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_loader.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_logging.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_memory.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_oom_detection.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_prepared.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_preprocessor.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_quantization.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_readiness.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_failed_state.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_timing.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_watcher.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/worker/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/worker/test_oom_recovery.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_metrics.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_all_models.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_docker_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_nats_pull_loop.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_nats_pull_loop_batching.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_openapi_export.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_server_smoke.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_sparse_integration.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/__init__.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/test_inputs.py +0 -0
- {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/test_types.py +0 -0
|
@@ -18,14 +18,15 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
18
18
|
UV_LINK_MODE=copy \
|
|
19
19
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
20
20
|
|
|
21
|
-
# build-essential + git are builder-only; they do NOT leak into the runtime stage.
|
|
21
|
+
# build-essential + git + zlib1g-dev are builder-only; they do NOT leak into the runtime stage.
|
|
22
22
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
23
23
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
24
24
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
25
25
|
build-essential \
|
|
26
26
|
ca-certificates \
|
|
27
27
|
curl \
|
|
28
|
-
git
|
|
28
|
+
git \
|
|
29
|
+
zlib1g-dev
|
|
29
30
|
|
|
30
31
|
ARG UV_VERSION=0.9.28
|
|
31
32
|
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
|
|
@@ -154,13 +155,16 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
154
155
|
OMP_NUM_THREADS=4 \
|
|
155
156
|
MKL_NUM_THREADS=4
|
|
156
157
|
|
|
157
|
-
# Only the shared libs torch + pillow actually dlopen at runtime.
|
|
158
|
+
# Only the shared libs torch + pillow + rtree actually dlopen at runtime.
|
|
159
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
160
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
158
161
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
159
162
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
160
163
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
161
164
|
libgomp1 \
|
|
162
165
|
libjpeg62-turbo \
|
|
163
|
-
libpng16-16
|
|
166
|
+
libpng16-16 \
|
|
167
|
+
libspatialindex-c6
|
|
164
168
|
|
|
165
169
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
166
170
|
|
|
@@ -152,6 +152,8 @@ FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
|
|
|
152
152
|
|
|
153
153
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
154
154
|
|
|
155
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
156
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
155
157
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
156
158
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
157
159
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
@@ -159,7 +161,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
|
159
161
|
gcc \
|
|
160
162
|
libc6-dev \
|
|
161
163
|
libgomp1 \
|
|
162
|
-
libnuma1
|
|
164
|
+
libnuma1 \
|
|
165
|
+
libspatialindex-c6
|
|
163
166
|
|
|
164
167
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
165
168
|
|
|
@@ -173,6 +173,8 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
|
173
173
|
# libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
|
|
174
174
|
# misleading SM-arch error without it.
|
|
175
175
|
# libgomp1: torch OpenMP runtime.
|
|
176
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
177
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
176
178
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
177
179
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
178
180
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
@@ -180,7 +182,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
|
180
182
|
gcc \
|
|
181
183
|
libc6-dev \
|
|
182
184
|
libgomp1 \
|
|
183
|
-
libnuma1
|
|
185
|
+
libnuma1 \
|
|
186
|
+
libspatialindex-c6
|
|
184
187
|
|
|
185
188
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
186
189
|
|
|
@@ -12,6 +12,7 @@ adapters:
|
|
|
12
12
|
- sie_server.adapters.colbert_rotary_flash
|
|
13
13
|
- sie_server.adapters.colpali
|
|
14
14
|
- sie_server.adapters.colqwen2
|
|
15
|
+
- sie_server.adapters.colqwen3
|
|
15
16
|
- sie_server.adapters.grounding_dino
|
|
16
17
|
- sie_server.adapters.gte_sparse_flash
|
|
17
18
|
- sie_server.adapters.jina_flash_cross_encoder
|
|
@@ -44,7 +45,7 @@ adapters:
|
|
|
44
45
|
- sie_server.adapters.paddleocr_vl
|
|
45
46
|
deps:
|
|
46
47
|
# Most flash adapters; sentence_transformer needs >=4.57
|
|
47
|
-
transformers: '>=4.57'
|
|
48
|
+
transformers: '>=4.57,<5'
|
|
48
49
|
# Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
|
|
49
50
|
torch: '>=2.9,<2.10'
|
|
50
51
|
# bge_m3, bge_m3_flash, nemo_colembed, nomic_flash, xlm_roberta_flash
|
|
@@ -56,7 +57,7 @@ deps:
|
|
|
56
57
|
# bge_m3_flag
|
|
57
58
|
FlagEmbedding: '>=1.3'
|
|
58
59
|
# grounding_dino
|
|
59
|
-
pillow: ''
|
|
60
|
+
pillow: '>=11,<12'
|
|
60
61
|
# grounding_dino
|
|
61
62
|
requests: ''
|
|
62
63
|
# gliner, gliner_bi
|
|
@@ -13,5 +13,8 @@ deps:
|
|
|
13
13
|
transformers: '>=5.0'
|
|
14
14
|
# Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
|
|
15
15
|
torch: '>=2.9,<2.10'
|
|
16
|
-
huggingface-hub:
|
|
16
|
+
# No huggingface-hub override: transformers 5.x's own metadata
|
|
17
|
+
# pins it to >=1.3,<2.0. An override here was previously
|
|
18
|
+
# unsatisfiable against that pin and broke `uv run` resolution.
|
|
19
|
+
# Let transformers drive the version.
|
|
17
20
|
pillow: ''
|
|
@@ -18,10 +18,19 @@ profiles:
|
|
|
18
18
|
default:
|
|
19
19
|
max_batch_tokens: 8192
|
|
20
20
|
compute_precision: bfloat16
|
|
21
|
-
|
|
21
|
+
# Was sie_server.adapters.sglang:SGLangEmbeddingAdapter, but
|
|
22
|
+
# SGLang's generic transformers fallback hits
|
|
23
|
+
# `assert get_embedding is False` for this Llama-based arch
|
|
24
|
+
# (no model-specific embedding implementation registered). Route
|
|
25
|
+
# through PyTorchEmbeddingAdapter like NV-Embed-v2 (also Llama-
|
|
26
|
+
# based 7B+ embedder); the heavy lane has 96 GiB so non-paged
|
|
27
|
+
# attention is fine. May be reconciled with a colleague's
|
|
28
|
+
# in-flight SGLang/arch fix later.
|
|
29
|
+
adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
|
|
22
30
|
adapter_options:
|
|
23
31
|
loadtime:
|
|
24
|
-
|
|
32
|
+
trust_remote_code: true
|
|
33
|
+
attn_implementation: eager
|
|
25
34
|
runtime:
|
|
26
35
|
pooling: last_token
|
|
27
36
|
normalize: true
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
sie_id: nvidia/nemotron-colembed-vl-4b-v2
|
|
2
|
+
hf_id: nvidia/nemotron-colembed-vl-4b-v2
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode:
|
|
10
|
+
dense: null
|
|
11
|
+
sparse: null
|
|
12
|
+
multivector:
|
|
13
|
+
dim: 2560
|
|
14
|
+
score: null
|
|
15
|
+
extract: null
|
|
16
|
+
max_sequence_length: 8192
|
|
17
|
+
profiles:
|
|
18
|
+
default:
|
|
19
|
+
max_batch_tokens: 4096
|
|
20
|
+
compute_precision: bfloat16
|
|
21
|
+
adapter_path: sie_server.adapters.nemo_colembed:NemoColEmbedAdapter
|
|
22
|
+
adapter_options:
|
|
23
|
+
loadtime:
|
|
24
|
+
muvera_config:
|
|
25
|
+
num_repetitions: 40
|
|
26
|
+
num_simhash_projections: 6
|
|
27
|
+
projection_dim: null
|
|
28
|
+
final_projection_dim: 10240
|
|
29
|
+
token_dim: 2560
|
|
30
|
+
runtime:
|
|
31
|
+
normalize: true
|
|
32
|
+
batch_size: 4
|
|
33
|
+
muvera:
|
|
34
|
+
extends: default
|
|
35
|
+
adapter_options:
|
|
36
|
+
runtime:
|
|
37
|
+
normalize: true
|
|
38
|
+
batch_size: 4
|
|
39
|
+
output_types:
|
|
40
|
+
- dense
|
|
41
|
+
output_similarity:
|
|
42
|
+
dense: dot
|
|
43
|
+
muvera: {}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
sie_id: TomoroAI/tomoro-colqwen3-embed-4b
|
|
2
|
+
hf_id: TomoroAI/tomoro-colqwen3-embed-4b
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode:
|
|
10
|
+
dense: null
|
|
11
|
+
sparse: null
|
|
12
|
+
multivector:
|
|
13
|
+
dim: 320
|
|
14
|
+
score: null
|
|
15
|
+
extract: null
|
|
16
|
+
max_sequence_length: 8192
|
|
17
|
+
profiles:
|
|
18
|
+
default:
|
|
19
|
+
max_batch_tokens: 4096
|
|
20
|
+
compute_precision: bfloat16
|
|
21
|
+
adapter_path: sie_server.adapters.colqwen3:ColQwen3Adapter
|
|
22
|
+
adapter_options:
|
|
23
|
+
loadtime:
|
|
24
|
+
muvera_config:
|
|
25
|
+
num_repetitions: 40
|
|
26
|
+
num_simhash_projections: 6
|
|
27
|
+
projection_dim: null
|
|
28
|
+
final_projection_dim: 10240
|
|
29
|
+
token_dim: 320
|
|
30
|
+
trust_remote_code: true
|
|
31
|
+
runtime:
|
|
32
|
+
normalize: true
|
|
33
|
+
muvera:
|
|
34
|
+
extends: default
|
|
35
|
+
adapter_options:
|
|
36
|
+
runtime:
|
|
37
|
+
normalize: true
|
|
38
|
+
output_types:
|
|
39
|
+
- dense
|
|
40
|
+
output_similarity:
|
|
41
|
+
dense: dot
|
|
42
|
+
muvera: {}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "SIE Server",
|
|
5
5
|
"description": "Search Inference Engine - GPU inference server for search workloads",
|
|
6
|
-
"version": "0.3.
|
|
6
|
+
"version": "0.3.3"
|
|
7
7
|
},
|
|
8
8
|
"paths": {
|
|
9
9
|
"/": {
|
|
@@ -1740,7 +1740,7 @@
|
|
|
1740
1740
|
}
|
|
1741
1741
|
],
|
|
1742
1742
|
"default": null,
|
|
1743
|
-
"description": "Adapter-specific options",
|
|
1743
|
+
"description": "Adapter-specific options. Recognized sub-keys include 'overflow_policy' (one of 'default', 'truncate_text', 'error'; default 'default') controlling how inputs exceeding the model's max_sequence_length are handled.",
|
|
1744
1744
|
"title": "Options"
|
|
1745
1745
|
}
|
|
1746
1746
|
},
|
|
@@ -1763,6 +1763,23 @@
|
|
|
1763
1763
|
"location"
|
|
1764
1764
|
]
|
|
1765
1765
|
}
|
|
1766
|
+
},
|
|
1767
|
+
{
|
|
1768
|
+
"items": [
|
|
1769
|
+
{
|
|
1770
|
+
"text": "Apple Inc. was founded by Steve Jobs in Cupertino, California."
|
|
1771
|
+
}
|
|
1772
|
+
],
|
|
1773
|
+
"params": {
|
|
1774
|
+
"labels": [
|
|
1775
|
+
"person",
|
|
1776
|
+
"organization",
|
|
1777
|
+
"location"
|
|
1778
|
+
],
|
|
1779
|
+
"options": {
|
|
1780
|
+
"overflow_policy": "truncate_text"
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1766
1783
|
}
|
|
1767
1784
|
],
|
|
1768
1785
|
"properties": {
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import torch
|
|
10
|
+
from torch.nn import functional as F
|
|
11
|
+
|
|
12
|
+
from sie_server.adapters._base_adapter import BaseAdapter
|
|
13
|
+
from sie_server.adapters._spec import AdapterSpec
|
|
14
|
+
from sie_server.adapters._types import ComputePrecision
|
|
15
|
+
from sie_server.core.inference_output import EncodeOutput
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from PIL import Image as PILImage
|
|
19
|
+
|
|
20
|
+
from sie_server.types.inputs import Item
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
_ERR_NO_INPUT = "ColQwen3Adapter requires either text or images input"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ColQwen3Adapter(BaseAdapter):
|
|
28
|
+
"""Adapter for ColQwen3-style visual document retrieval models.
|
|
29
|
+
|
|
30
|
+
ColQwen3 encodes document page images into multi-vector representations
|
|
31
|
+
(320-dim per token) for late interaction retrieval. Built on Qwen3-VL,
|
|
32
|
+
with a custom projection layer wrapper that exposes ``out.embeddings``.
|
|
33
|
+
|
|
34
|
+
Target model: ``TomoroAI/tomoro-colqwen3-embed-4b`` (4B params).
|
|
35
|
+
|
|
36
|
+
Loaded via ``AutoModel`` + ``AutoProcessor`` with ``trust_remote_code``
|
|
37
|
+
because the model ships its own ``ColQwen3`` / ``ColQwen3Processor``
|
|
38
|
+
classes (not in native transformers).
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
spec = AdapterSpec(
|
|
42
|
+
inputs=("text", "image"),
|
|
43
|
+
outputs=("multivector", "score"),
|
|
44
|
+
multivector_dim=320,
|
|
45
|
+
unload_fields=("_model", "_processor"),
|
|
46
|
+
default_preprocessor="image",
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
model_name_or_path: str | Path,
|
|
52
|
+
*,
|
|
53
|
+
normalize: bool = True,
|
|
54
|
+
compute_precision: ComputePrecision = "bfloat16",
|
|
55
|
+
trust_remote_code: bool = True,
|
|
56
|
+
max_seq_length: int | None = None,
|
|
57
|
+
muvera_config: dict[str, Any] | None = None,
|
|
58
|
+
token_dim: int = 320,
|
|
59
|
+
max_num_visual_tokens: int = 1280,
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Initialize the adapter.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
model_name_or_path: HuggingFace model ID or local path.
|
|
65
|
+
normalize: Whether to L2-normalize embeddings (the model's wrapper
|
|
66
|
+
already normalizes; kept for interface parity).
|
|
67
|
+
compute_precision: Compute precision for inference.
|
|
68
|
+
trust_remote_code: Required for ColQwen3 (custom processor + model classes).
|
|
69
|
+
max_seq_length: Ignored — ColQwen3 uses dynamic sequence length.
|
|
70
|
+
muvera_config: Optional MUVERA configuration (passed to postprocessor).
|
|
71
|
+
token_dim: Per-token embedding dimension (320 for ColQwen3).
|
|
72
|
+
max_num_visual_tokens: Cap on visual tokens per image (passed to processor).
|
|
73
|
+
"""
|
|
74
|
+
self._model_name_or_path = str(model_name_or_path)
|
|
75
|
+
self._normalize = normalize
|
|
76
|
+
self._compute_precision = compute_precision
|
|
77
|
+
self._trust_remote_code = trust_remote_code
|
|
78
|
+
self._max_num_visual_tokens = max_num_visual_tokens
|
|
79
|
+
|
|
80
|
+
self._model: Any = None
|
|
81
|
+
self._processor: Any = None
|
|
82
|
+
self._device: str | None = None
|
|
83
|
+
self._multivector_dim: int = token_dim
|
|
84
|
+
|
|
85
|
+
def load(self, device: str) -> None:
|
|
86
|
+
"""Load processor + model onto the specified device."""
|
|
87
|
+
from transformers import AutoModel, AutoProcessor
|
|
88
|
+
|
|
89
|
+
self._device = device
|
|
90
|
+
|
|
91
|
+
dtype = self._resolve_dtype()
|
|
92
|
+
attn_impl = self._resolve_attn_implementation(device)
|
|
93
|
+
|
|
94
|
+
logger.info(
|
|
95
|
+
"Loading ColQwen3 model %s on device=%s with dtype=%s, attn=%s",
|
|
96
|
+
self._model_name_or_path,
|
|
97
|
+
device,
|
|
98
|
+
dtype,
|
|
99
|
+
attn_impl,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
self._processor = AutoProcessor.from_pretrained(
|
|
103
|
+
self._model_name_or_path,
|
|
104
|
+
trust_remote_code=self._trust_remote_code,
|
|
105
|
+
max_num_visual_tokens=self._max_num_visual_tokens,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
load_kwargs: dict[str, Any] = {
|
|
109
|
+
"trust_remote_code": self._trust_remote_code,
|
|
110
|
+
"device_map": device,
|
|
111
|
+
"dtype": dtype,
|
|
112
|
+
}
|
|
113
|
+
if attn_impl is not None:
|
|
114
|
+
load_kwargs["attn_implementation"] = attn_impl
|
|
115
|
+
|
|
116
|
+
self._model = AutoModel.from_pretrained(
|
|
117
|
+
self._model_name_or_path,
|
|
118
|
+
**load_kwargs,
|
|
119
|
+
).eval()
|
|
120
|
+
|
|
121
|
+
# Discover token dim from the projection layer when present.
|
|
122
|
+
proj = getattr(self._model, "embedding_proj_layer", None)
|
|
123
|
+
out_features = getattr(proj, "out_features", None)
|
|
124
|
+
if isinstance(out_features, int) and out_features > 0:
|
|
125
|
+
self._multivector_dim = out_features
|
|
126
|
+
|
|
127
|
+
def _resolve_dtype(self) -> torch.dtype:
|
|
128
|
+
if not self._device or not str(self._device).startswith("cuda"):
|
|
129
|
+
return torch.float32
|
|
130
|
+
dtype_map = {
|
|
131
|
+
"float16": torch.float16,
|
|
132
|
+
"bfloat16": torch.bfloat16,
|
|
133
|
+
"float32": torch.float32,
|
|
134
|
+
}
|
|
135
|
+
return dtype_map.get(self._compute_precision, torch.bfloat16)
|
|
136
|
+
|
|
137
|
+
def _resolve_attn_implementation(self, device: str) -> str | None:
|
|
138
|
+
if not device.startswith("cuda"):
|
|
139
|
+
return None
|
|
140
|
+
try:
|
|
141
|
+
import flash_attn # ty: ignore[unresolved-import]
|
|
142
|
+
|
|
143
|
+
return "flash_attention_2"
|
|
144
|
+
except ImportError:
|
|
145
|
+
logger.info("flash_attn not available, using sdpa attention")
|
|
146
|
+
return "sdpa"
|
|
147
|
+
|
|
148
|
+
# ------------------------------------------------------------------
|
|
149
|
+
# Encode
|
|
150
|
+
# ------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
def encode(
|
|
153
|
+
self,
|
|
154
|
+
items: list[Item],
|
|
155
|
+
output_types: list[str],
|
|
156
|
+
*,
|
|
157
|
+
instruction: str | None = None,
|
|
158
|
+
is_query: bool = False,
|
|
159
|
+
prepared_items: Any = None,
|
|
160
|
+
options: dict[str, Any] | None = None,
|
|
161
|
+
) -> EncodeOutput:
|
|
162
|
+
self._check_loaded()
|
|
163
|
+
self._validate_output_types(output_types)
|
|
164
|
+
|
|
165
|
+
if is_query:
|
|
166
|
+
multivector_list: list[np.ndarray] = []
|
|
167
|
+
for item in items:
|
|
168
|
+
if item.text is None:
|
|
169
|
+
raise ValueError(_ERR_NO_INPUT)
|
|
170
|
+
multivector_list.append(self._encode_text(item.text))
|
|
171
|
+
return EncodeOutput(
|
|
172
|
+
multivector=multivector_list,
|
|
173
|
+
batch_size=len(items),
|
|
174
|
+
is_query=is_query,
|
|
175
|
+
multivector_token_dim=self._multivector_dim,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Preallocate by index so output order matches input order regardless of
|
|
179
|
+
# text/image mix, and so multi-image items collapse to one multivector.
|
|
180
|
+
results: list[np.ndarray | None] = [None] * len(items)
|
|
181
|
+
all_images: list[PILImage.Image] = []
|
|
182
|
+
image_slots: list[tuple[int, int]] = [] # (item_idx, image_count)
|
|
183
|
+
for idx, item in enumerate(items):
|
|
184
|
+
has_images = item.images is not None and len(item.images) > 0
|
|
185
|
+
if has_images:
|
|
186
|
+
images = self._load_images(item)
|
|
187
|
+
all_images.extend(images)
|
|
188
|
+
image_slots.append((idx, len(images)))
|
|
189
|
+
elif item.text is not None:
|
|
190
|
+
results[idx] = self._encode_text(item.text)
|
|
191
|
+
else:
|
|
192
|
+
raise ValueError(_ERR_NO_INPUT)
|
|
193
|
+
|
|
194
|
+
if all_images:
|
|
195
|
+
per_image_mvs = self._encode_images(all_images)
|
|
196
|
+
cursor = 0
|
|
197
|
+
for idx, count in image_slots:
|
|
198
|
+
segment = per_image_mvs[cursor : cursor + count]
|
|
199
|
+
cursor += count
|
|
200
|
+
results[idx] = segment[0] if count == 1 else np.concatenate(segment, axis=0)
|
|
201
|
+
|
|
202
|
+
multivector_list = [mv for mv in results if mv is not None]
|
|
203
|
+
assert len(multivector_list) == len(items)
|
|
204
|
+
|
|
205
|
+
return EncodeOutput(
|
|
206
|
+
multivector=multivector_list,
|
|
207
|
+
batch_size=len(items),
|
|
208
|
+
is_query=is_query,
|
|
209
|
+
multivector_token_dim=self._multivector_dim,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# ------------------------------------------------------------------
|
|
213
|
+
# Image encoding
|
|
214
|
+
# ------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
def _load_images(self, item: Any) -> list[PILImage.Image]:
|
|
217
|
+
from PIL import Image
|
|
218
|
+
|
|
219
|
+
pil_images: list[PILImage.Image] = []
|
|
220
|
+
for img_input in item.images or []:
|
|
221
|
+
pil_img = Image.open(io.BytesIO(img_input["data"]))
|
|
222
|
+
if pil_img.mode != "RGB":
|
|
223
|
+
pil_img = pil_img.convert("RGB")
|
|
224
|
+
pil_images.append(pil_img)
|
|
225
|
+
return pil_images
|
|
226
|
+
|
|
227
|
+
def _encode_images(self, images: list[PILImage.Image]) -> list[np.ndarray]:
|
|
228
|
+
"""Encode a batch of images and return per-image multi-vectors."""
|
|
229
|
+
assert self._model is not None
|
|
230
|
+
assert self._processor is not None
|
|
231
|
+
|
|
232
|
+
inputs = self._processor(
|
|
233
|
+
images=images,
|
|
234
|
+
return_tensors="pt",
|
|
235
|
+
padding="longest",
|
|
236
|
+
)
|
|
237
|
+
inputs = {k: v.to(self._device) for k, v in inputs.items() if hasattr(v, "to")}
|
|
238
|
+
|
|
239
|
+
with torch.inference_mode():
|
|
240
|
+
outputs = self._model(**inputs)
|
|
241
|
+
|
|
242
|
+
# ColQwen3 returns a ModelOutput-like object with ``.embeddings``
|
|
243
|
+
# of shape (batch, seq, token_dim). The wrapper already L2-normalizes
|
|
244
|
+
# and applies attention-masking; our ``self._normalize`` is a no-op
|
|
245
|
+
# safety belt for downstream parity.
|
|
246
|
+
embeddings = outputs.embeddings
|
|
247
|
+
if self._normalize:
|
|
248
|
+
embeddings = F.normalize(embeddings, p=2, dim=-1)
|
|
249
|
+
|
|
250
|
+
results: list[np.ndarray] = [embeddings[i].float().cpu().numpy() for i in range(embeddings.shape[0])]
|
|
251
|
+
|
|
252
|
+
# Free GPU memory between batches to prevent OOM on subsequent calls
|
|
253
|
+
# (L4 22GB GPUs are tight for VLM models).
|
|
254
|
+
del outputs, embeddings, inputs
|
|
255
|
+
if self._device and self._device.startswith("cuda"):
|
|
256
|
+
torch.cuda.empty_cache()
|
|
257
|
+
|
|
258
|
+
return results
|
|
259
|
+
|
|
260
|
+
# ------------------------------------------------------------------
|
|
261
|
+
# Text encoding
|
|
262
|
+
# ------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
def _encode_text(self, text: str) -> np.ndarray:
|
|
265
|
+
"""Encode a single text query."""
|
|
266
|
+
assert self._model is not None
|
|
267
|
+
assert self._processor is not None
|
|
268
|
+
|
|
269
|
+
inputs = self._processor(
|
|
270
|
+
text=[text],
|
|
271
|
+
return_tensors="pt",
|
|
272
|
+
padding="longest",
|
|
273
|
+
)
|
|
274
|
+
inputs = {k: v.to(self._device) for k, v in inputs.items() if hasattr(v, "to")}
|
|
275
|
+
|
|
276
|
+
with torch.inference_mode():
|
|
277
|
+
outputs = self._model(**inputs)
|
|
278
|
+
|
|
279
|
+
embeddings = outputs.embeddings # (1, seq, token_dim)
|
|
280
|
+
if self._normalize:
|
|
281
|
+
embeddings = F.normalize(embeddings, p=2, dim=-1)
|
|
282
|
+
|
|
283
|
+
result = embeddings[0].float().cpu().numpy()
|
|
284
|
+
|
|
285
|
+
del outputs, embeddings, inputs
|
|
286
|
+
if self._device and self._device.startswith("cuda"):
|
|
287
|
+
torch.cuda.empty_cache()
|
|
288
|
+
|
|
289
|
+
return result
|
|
290
|
+
|
|
291
|
+
# ------------------------------------------------------------------
|
|
292
|
+
# Scoring
|
|
293
|
+
# ------------------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
def score(
|
|
296
|
+
self,
|
|
297
|
+
query: Any,
|
|
298
|
+
items: list[Any],
|
|
299
|
+
*,
|
|
300
|
+
instruction: str | None = None,
|
|
301
|
+
options: dict[str, Any] | None = None,
|
|
302
|
+
) -> list[float]:
|
|
303
|
+
"""Score documents against a text query using MaxSim."""
|
|
304
|
+
self._check_loaded()
|
|
305
|
+
|
|
306
|
+
query_output = self.encode([query], output_types=["multivector"], is_query=True)
|
|
307
|
+
if query_output.multivector is None:
|
|
308
|
+
raise RuntimeError("Failed to encode query: no multivector output")
|
|
309
|
+
query_vecs = query_output.multivector[0]
|
|
310
|
+
|
|
311
|
+
doc_output = self.encode(items, output_types=["multivector"], is_query=False)
|
|
312
|
+
if doc_output.multivector is None:
|
|
313
|
+
raise RuntimeError("Failed to encode documents: no multivector output")
|
|
314
|
+
|
|
315
|
+
scores: list[float] = []
|
|
316
|
+
query_tensor = torch.from_numpy(query_vecs).to(self._device)
|
|
317
|
+
for doc_vecs in doc_output.multivector:
|
|
318
|
+
doc_tensor = torch.from_numpy(doc_vecs).to(self._device)
|
|
319
|
+
sim = torch.matmul(query_tensor, doc_tensor.T)
|
|
320
|
+
scores.append(sim.max(dim=-1).values.sum().item())
|
|
321
|
+
return scores
|
|
322
|
+
|
|
323
|
+
# ------------------------------------------------------------------
|
|
324
|
+
# Helpers
|
|
325
|
+
# ------------------------------------------------------------------
|
|
326
|
+
|
|
327
|
+
def _validate_output_types(self, output_types: list[str]) -> None:
|
|
328
|
+
unsupported = set(output_types) - {"multivector"}
|
|
329
|
+
if unsupported:
|
|
330
|
+
msg = f"Unsupported output types: {unsupported}. ColQwen3Adapter only supports 'multivector'."
|
|
331
|
+
raise ValueError(msg)
|
|
332
|
+
|
|
333
|
+
def get_preprocessor(self) -> Any | None:
|
|
334
|
+
# ColQwen3 uses a custom processor that handles both text and images
|
|
335
|
+
# internally via the ColQwen3Processor; the generic ImagePreprocessor
|
|
336
|
+
# does not match the (text-only / image-only) call pattern.
|
|
337
|
+
return None
|