sie-server 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.3.0 → sie_server-0.3.1}/PKG-INFO +2 -1
- {sie_server-0.3.0 → sie_server-0.3.1}/bundles/default.yaml +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/BAAI__bge-m3.yaml +1 -1
- sie_server-0.3.1/models/Marqo__marqo-ecommerce-embeddings-B.yaml +28 -0
- sie_server-0.3.1/models/google__embeddinggemma-300m.yaml +49 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/openapi.json +64 -1
- {sie_server-0.3.0 → sie_server-0.3.1}/pyproject.toml +3 -1
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/bge_m3/__init__.py +6 -2
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/bge_m3_flag/__init__.py +6 -2
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/bge_m3_flash/__init__.py +5 -2
- sie_server-0.3.1/src/sie_server/adapters/bge_m3_score_mixin.py +283 -0
- sie_server-0.3.1/src/sie_server/adapters/siglip/__init__.py +507 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/encode.py +8 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/extract.py +8 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/helpers.py +87 -14
- sie_server-0.3.1/src/sie_server/api/models.py +198 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/openai_compat.py +8 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/score.py +8 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/ws.py +7 -1
- sie_server-0.3.1/src/sie_server/core/load_errors.py +196 -0
- sie_server-0.3.1/src/sie_server/core/preprocessor/image.py +260 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/registry.py +153 -2
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/responses.py +1 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_bge_m3.py +2 -2
- sie_server-0.3.1/tests/adapters/test_bge_m3_flash.py +507 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_encode_dtype.py +4 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_encode_endpoint.py +44 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_encode_json_schema.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_encode_timing.py +4 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_encode_validation.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_extract.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_extract_oom.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_models.py +87 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_openai_compat.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_score.py +2 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_preprocessor.py +126 -0
- sie_server-0.3.1/tests/core/test_registry_failed_state.py +217 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_all_models.py +26 -0
- sie_server-0.3.0/models/google__embeddinggemma-300m.yaml +0 -29
- sie_server-0.3.0/src/sie_server/adapters/siglip/__init__.py +0 -316
- sie_server-0.3.0/src/sie_server/api/models.py +0 -112
- sie_server-0.3.0/src/sie_server/core/preprocessor/image.py +0 -129
- {sie_server-0.3.0 → sie_server-0.3.1}/.gitignore +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/CONTRIBUTING.md +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/Dockerfile.cpu +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/Dockerfile.cuda11 +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/Dockerfile.cuda12 +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/LICENSE +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/README.md +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/bundles/sglang.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/bundles/transformers5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/answerdotai__ModernBERT-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/colbert-ir__colbertv2.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/docling.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/fastino__gliner2-base-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/google__siglip2-base-patch16-224.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/jinaai__jina-colbert-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/models/zai-org__GLM-OCR.yaml +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/_base_adapter.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/_flash_base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/_spec.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/_utils.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/clip/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/colbert/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/docling/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/gliclass/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/gliner2/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/owlv2/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/sglang/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/health.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/openapi.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/options.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/root.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/api/validation.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/app/app_factory.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/app/app_state_config.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/cli.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/config/engine.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/config/model.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/adaptive_batching.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/extract_cost.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/inference_output.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/loader.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/memory.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/model_loader.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/oom.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/prepared.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/preprocessor/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/preprocessor/vision.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/readiness.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/handlers/extract.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/model_worker.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/oom_recovery.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/core/worker/types.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/main.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/nats_pull_loop.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/nats_subscriber.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/metrics.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/static/index.html +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/inputs.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/openapi.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_docling.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_docling_smoke.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_flash_base.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_glm_ocr.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_paddleocr_vl.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_runtime_options.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_sentence_transformer.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_stablebridge_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_stablebridge_pruner.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/adapters/test_visual_document.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_extract_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_health.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_version_header.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/api/test_ws.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/app/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/app/test_app_factory.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/config/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/config/test_bundle_coverage.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/config/test_config.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/conftest.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_adaptive_batching.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_batcher.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_idle_evict.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_inference.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_loader.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_logging.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_memory.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_oom_detection.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_prepared.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_quantization.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_readiness.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_timing.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_watcher.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/worker/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/core/worker/test_oom_recovery.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/observability/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/observability/test_metrics.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_docker_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_nats_pull_loop.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_nats_pull_loop_batching.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_openapi_export.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_server_smoke.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/test_sparse_integration.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/type_defs/__init__.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/type_defs/test_inputs.py +0 -0
- {sie_server-0.3.0 → sie_server-0.3.1}/tests/type_defs/test_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sie-server
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Search Inference Engine - GPU inference server for search workloads
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -19,6 +19,7 @@ Requires-Dist: msgpack<2,>=1.1
|
|
|
19
19
|
Requires-Dist: msgspec>=0.20.0
|
|
20
20
|
Requires-Dist: nats-py<3,>=2.9
|
|
21
21
|
Requires-Dist: numpy<3,>=2
|
|
22
|
+
Requires-Dist: open-clip-torch>=2.24
|
|
22
23
|
Requires-Dist: opentelemetry-api<2,>=1.28
|
|
23
24
|
Requires-Dist: opentelemetry-exporter-otlp<2,>=1.28
|
|
24
25
|
Requires-Dist: opentelemetry-instrumentation-fastapi<1,>=0.49b0
|
|
@@ -71,6 +71,8 @@ deps:
|
|
|
71
71
|
loguru: '>=0.7,<1'
|
|
72
72
|
# donut, florence2
|
|
73
73
|
timm: '>=0.9.0,<1.0'
|
|
74
|
+
# siglip (Marqo/marqo-ecommerce-embeddings-B uses open_clip native loader)
|
|
75
|
+
open-clip-torch: '>=2.24'
|
|
74
76
|
# docling — composite-document parser (PDF/DOCX/HTML)
|
|
75
77
|
docling: '>=2,<3'
|
|
76
78
|
# Flash Attention 2 — CUDA only, prebuilt wheel
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
sie_id: Marqo/marqo-ecommerce-embeddings-B
|
|
2
|
+
hf_id: Marqo/marqo-ecommerce-embeddings-B
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode:
|
|
10
|
+
dense:
|
|
11
|
+
dim: 768
|
|
12
|
+
sparse: null
|
|
13
|
+
multivector: null
|
|
14
|
+
score: null
|
|
15
|
+
extract: null
|
|
16
|
+
max_sequence_length: 64
|
|
17
|
+
profiles:
|
|
18
|
+
default:
|
|
19
|
+
max_batch_tokens: 16384
|
|
20
|
+
compute_precision: float16
|
|
21
|
+
adapter_path: sie_server.adapters.siglip:SiglipAdapter
|
|
22
|
+
adapter_options:
|
|
23
|
+
loadtime:
|
|
24
|
+
backend: open_clip
|
|
25
|
+
open_clip_model_id: hf-hub:Marqo/marqo-ecommerce-embeddings-B
|
|
26
|
+
dense_dim: 768
|
|
27
|
+
runtime:
|
|
28
|
+
normalize: true
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# NOTE: ``google/embeddinggemma-300m`` is a *gated* HuggingFace repo. The
|
|
2
|
+
# server requires ``HF_TOKEN`` (with the model license accepted on the
|
|
3
|
+
# HF account) to load this model. Without it, the registry records a
|
|
4
|
+
# terminal ``GATED`` failure and the API returns ``MODEL_LOAD_FAILED``
|
|
5
|
+
# (502, no Retry-After) so the SDK does not loop.
|
|
6
|
+
#
|
|
7
|
+
# Architecture support: needs ``transformers>=4.56`` for
|
|
8
|
+
# ``Gemma3TextModel``. Older versions raise an unsupported-model error
|
|
9
|
+
# which the registry classifies as ``DEPENDENCY``.
|
|
10
|
+
sie_id: google/embeddinggemma-300m
|
|
11
|
+
hf_id: google/embeddinggemma-300m
|
|
12
|
+
# Track the default branch. Note: ``main`` is mutable on the Hub, so
|
|
13
|
+
# this does NOT guarantee bit-for-bit reproducibility — it merely names
|
|
14
|
+
# the branch we expect HuggingFace to resolve. For a true pin, replace
|
|
15
|
+
# this with an immutable commit SHA after verifying the new revision
|
|
16
|
+
# against ``test_google_embeddinggemma_300m_dense``.
|
|
17
|
+
hf_revision: main
|
|
18
|
+
inputs:
|
|
19
|
+
text: true
|
|
20
|
+
image: false
|
|
21
|
+
audio: false
|
|
22
|
+
video: false
|
|
23
|
+
tasks:
|
|
24
|
+
encode:
|
|
25
|
+
dense:
|
|
26
|
+
dim: 768
|
|
27
|
+
sparse: null
|
|
28
|
+
multivector: null
|
|
29
|
+
score: null
|
|
30
|
+
extract: null
|
|
31
|
+
max_sequence_length: 2048
|
|
32
|
+
profiles:
|
|
33
|
+
default:
|
|
34
|
+
max_batch_tokens: 16384
|
|
35
|
+
# bfloat16 on CUDA matches the captured reference embedding in
|
|
36
|
+
# ``test_all_models.py``. On CPU the adapter falls back to fp32
|
|
37
|
+
# automatically (see pytorch_embedding adapter); the loaded model
|
|
38
|
+
# still works, but numerical-equivalence tests should be gated on
|
|
39
|
+
# CUDA availability if drift becomes an issue.
|
|
40
|
+
compute_precision: bfloat16
|
|
41
|
+
adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
|
|
42
|
+
adapter_options:
|
|
43
|
+
loadtime:
|
|
44
|
+
attn_implementation: sdpa
|
|
45
|
+
runtime:
|
|
46
|
+
pooling: mean
|
|
47
|
+
normalize: true
|
|
48
|
+
query_template: 'task: search result | query: {text}'
|
|
49
|
+
doc_template: 'title: none | text: {text}'
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "SIE Server",
|
|
5
5
|
"description": "Search Inference Engine - GPU inference server for search workloads",
|
|
6
|
-
"version": "0.3.
|
|
6
|
+
"version": "0.3.1"
|
|
7
7
|
},
|
|
8
8
|
"paths": {
|
|
9
9
|
"/": {
|
|
@@ -134,6 +134,9 @@
|
|
|
134
134
|
"404": {
|
|
135
135
|
"description": "Model not found"
|
|
136
136
|
},
|
|
137
|
+
"502": {
|
|
138
|
+
"description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
|
|
139
|
+
},
|
|
137
140
|
"503": {
|
|
138
141
|
"description": "Model not loaded or service unavailable"
|
|
139
142
|
},
|
|
@@ -234,6 +237,9 @@
|
|
|
234
237
|
"404": {
|
|
235
238
|
"description": "Model not found"
|
|
236
239
|
},
|
|
240
|
+
"502": {
|
|
241
|
+
"description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
|
|
242
|
+
},
|
|
237
243
|
"503": {
|
|
238
244
|
"description": "Model not loaded or service unavailable"
|
|
239
245
|
},
|
|
@@ -334,6 +340,9 @@
|
|
|
334
340
|
"404": {
|
|
335
341
|
"description": "Model not found"
|
|
336
342
|
},
|
|
343
|
+
"502": {
|
|
344
|
+
"description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
|
|
345
|
+
},
|
|
337
346
|
"503": {
|
|
338
347
|
"description": "Model not loaded or service unavailable"
|
|
339
348
|
},
|
|
@@ -504,6 +513,9 @@
|
|
|
504
513
|
"404": {
|
|
505
514
|
"description": "Model not found"
|
|
506
515
|
},
|
|
516
|
+
"502": {
|
|
517
|
+
"description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
|
|
518
|
+
},
|
|
507
519
|
"503": {
|
|
508
520
|
"description": "Service unavailable"
|
|
509
521
|
},
|
|
@@ -847,6 +859,28 @@
|
|
|
847
859
|
"type": "boolean",
|
|
848
860
|
"title": "Loaded"
|
|
849
861
|
},
|
|
862
|
+
"state": {
|
|
863
|
+
"type": "string",
|
|
864
|
+
"enum": [
|
|
865
|
+
"available",
|
|
866
|
+
"loading",
|
|
867
|
+
"loaded",
|
|
868
|
+
"unloading",
|
|
869
|
+
"failed"
|
|
870
|
+
],
|
|
871
|
+
"title": "State",
|
|
872
|
+
"default": "available"
|
|
873
|
+
},
|
|
874
|
+
"last_error": {
|
|
875
|
+
"anyOf": [
|
|
876
|
+
{
|
|
877
|
+
"$ref": "#/components/schemas/ModelLoadError"
|
|
878
|
+
},
|
|
879
|
+
{
|
|
880
|
+
"type": "null"
|
|
881
|
+
}
|
|
882
|
+
]
|
|
883
|
+
},
|
|
850
884
|
"max_sequence_length": {
|
|
851
885
|
"anyOf": [
|
|
852
886
|
{
|
|
@@ -878,6 +912,35 @@
|
|
|
878
912
|
"title": "ModelInfo",
|
|
879
913
|
"description": "Information about a model."
|
|
880
914
|
},
|
|
915
|
+
"ModelLoadError": {
|
|
916
|
+
"properties": {
|
|
917
|
+
"code": {
|
|
918
|
+
"type": "string",
|
|
919
|
+
"title": "Code"
|
|
920
|
+
},
|
|
921
|
+
"message": {
|
|
922
|
+
"type": "string",
|
|
923
|
+
"title": "Message"
|
|
924
|
+
},
|
|
925
|
+
"attempts": {
|
|
926
|
+
"type": "integer",
|
|
927
|
+
"title": "Attempts"
|
|
928
|
+
},
|
|
929
|
+
"permanent": {
|
|
930
|
+
"type": "boolean",
|
|
931
|
+
"title": "Permanent"
|
|
932
|
+
}
|
|
933
|
+
},
|
|
934
|
+
"type": "object",
|
|
935
|
+
"required": [
|
|
936
|
+
"code",
|
|
937
|
+
"message",
|
|
938
|
+
"attempts",
|
|
939
|
+
"permanent"
|
|
940
|
+
],
|
|
941
|
+
"title": "ModelLoadError",
|
|
942
|
+
"description": "Diagnostic detail for a recorded load failure.\n\nSurfaced in :class:`ModelInfo` when the registry has a sticky\nfailure for the model. Attributes mirror the server-side\n:class:`sie_server.core.load_errors.LoadFailure`."
|
|
943
|
+
},
|
|
881
944
|
"ModelsListResponse": {
|
|
882
945
|
"properties": {
|
|
883
946
|
"models": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sie-server"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
description = "Search Inference Engine - GPU inference server for search workloads"
|
|
5
5
|
requires-python = ">=3.12,<3.13"
|
|
6
6
|
license = { text = "Apache-2.0" }
|
|
@@ -29,6 +29,8 @@ dependencies = [
|
|
|
29
29
|
# Docling — composite-document parser (PDF/DOCX/HTML) for extract()
|
|
30
30
|
"docling>=2,<3",
|
|
31
31
|
"loguru>=0.7,<1",
|
|
32
|
+
# SigLIP (Marqo/marqo-ecommerce-embeddings-B native open_clip loader)
|
|
33
|
+
"open-clip-torch>=2.24",
|
|
32
34
|
# Image processing
|
|
33
35
|
"pillow>=11,<12",
|
|
34
36
|
"numpy>=2,<3",
|
|
@@ -26,6 +26,7 @@ from torch.nn import functional
|
|
|
26
26
|
from sie_server.adapters._base_adapter import BaseAdapter
|
|
27
27
|
from sie_server.adapters._spec import AdapterSpec
|
|
28
28
|
from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
|
|
29
|
+
from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
|
|
29
30
|
from sie_server.core.inference_output import EncodeOutput, SparseVector
|
|
30
31
|
from sie_server.types.inputs import Item
|
|
31
32
|
|
|
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
class BGEM3Adapter(BaseAdapter):
|
|
39
|
+
class BGEM3Adapter(BGEM3ScoreMixin, BaseAdapter):
|
|
39
40
|
"""Adapter for BAAI/bge-m3 model.
|
|
40
41
|
|
|
41
42
|
This adapter uses direct PyTorch inference with Flash Attention 2
|
|
42
43
|
for optimal performance (dense, sparse, and multi-vector outputs).
|
|
44
|
+
|
|
45
|
+
Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
|
|
46
|
+
composes scores from the encoder outputs (dense / sparse / multivector).
|
|
43
47
|
"""
|
|
44
48
|
|
|
45
49
|
spec = AdapterSpec(
|
|
46
50
|
inputs=("text",),
|
|
47
|
-
outputs=("dense", "sparse", "multivector"),
|
|
51
|
+
outputs=("dense", "sparse", "multivector", "score"),
|
|
48
52
|
dense_dim=1024,
|
|
49
53
|
sparse_dim=250002,
|
|
50
54
|
multivector_dim=1024,
|
|
@@ -23,6 +23,7 @@ import torch
|
|
|
23
23
|
from sie_server.adapters._base_adapter import BaseAdapter
|
|
24
24
|
from sie_server.adapters._spec import AdapterSpec
|
|
25
25
|
from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
|
|
26
|
+
from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
|
|
26
27
|
from sie_server.core.inference_output import EncodeOutput, SparseVector
|
|
27
28
|
|
|
28
29
|
if TYPE_CHECKING:
|
|
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
class BGEM3FlagAdapter(BaseAdapter):
|
|
39
|
+
class BGEM3FlagAdapter(BGEM3ScoreMixin, BaseAdapter):
|
|
39
40
|
"""Adapter for BAAI/bge-m3 using FlagEmbedding library.
|
|
40
41
|
|
|
41
42
|
This adapter uses the FlagEmbedding library's BGEM3FlagModel.
|
|
42
43
|
For better performance, use BGEM3Adapter which uses Flash Attention 2.
|
|
44
|
+
|
|
45
|
+
Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
|
|
46
|
+
composes scores from the encoder outputs (dense / sparse / multivector).
|
|
43
47
|
"""
|
|
44
48
|
|
|
45
49
|
spec = AdapterSpec(
|
|
46
50
|
inputs=("text",),
|
|
47
|
-
outputs=("dense", "sparse", "multivector"),
|
|
51
|
+
outputs=("dense", "sparse", "multivector", "score"),
|
|
48
52
|
dense_dim=1024,
|
|
49
53
|
sparse_dim=250002,
|
|
50
54
|
multivector_dim=1024,
|
|
@@ -14,6 +14,7 @@ from sie_server.adapters._flash_base import FlashBaseAdapter
|
|
|
14
14
|
from sie_server.adapters._spec import AdapterSpec
|
|
15
15
|
from sie_server.adapters._types import ERR_NOT_LOADED, ComputePrecision
|
|
16
16
|
from sie_server.adapters._utils import validate_output_types
|
|
17
|
+
from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
|
|
17
18
|
from sie_server.adapters.peft_lora_mixin import PEFTLoRAMixin
|
|
18
19
|
from sie_server.core.inference_output import EncodeOutput, SparseVector
|
|
19
20
|
from sie_server.types.inputs import Item
|
|
@@ -26,7 +27,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
27
|
_ERR_CPU_NOT_SUPPORTED = "BGEM3FlashAdapter requires CUDA. Use bge_m3 adapter for CPU."
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
class BGEM3FlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
|
|
30
|
+
class BGEM3FlashAdapter(BGEM3ScoreMixin, PEFTLoRAMixin, FlashBaseAdapter):
|
|
30
31
|
"""BGE-M3 adapter using Flash Attention 2 with variable-length sequences.
|
|
31
32
|
|
|
32
33
|
This adapter eliminates padding waste by packing sequences and using
|
|
@@ -41,7 +42,7 @@ class BGEM3FlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
|
|
|
41
42
|
|
|
42
43
|
spec = AdapterSpec(
|
|
43
44
|
inputs=("text",),
|
|
44
|
-
outputs=("dense", "sparse", "multivector"),
|
|
45
|
+
outputs=("dense", "sparse", "multivector", "score"),
|
|
45
46
|
dense_dim=1024,
|
|
46
47
|
sparse_dim=250002,
|
|
47
48
|
multivector_dim=1024,
|
|
@@ -232,6 +233,8 @@ class BGEM3FlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
|
|
|
232
233
|
|
|
233
234
|
return self._to_inference_output(results, output_types, len(items), is_query)
|
|
234
235
|
|
|
236
|
+
# score() and score_pairs() are provided by BGEM3ScoreMixin.
|
|
237
|
+
|
|
235
238
|
def _build_position_ids(self, cu_seqlens: torch.Tensor, num_seqs: int) -> torch.Tensor:
|
|
236
239
|
"""Build XLMRoberta-style position IDs for packed sequences.
|
|
237
240
|
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from sie_server.core.inference_output import ScoreOutput
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from sie_server.core.inference_output import EncodeOutput
|
|
11
|
+
from sie_server.types.inputs import Item
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BGEM3ScoreMixin:
|
|
15
|
+
"""Adds BGE-M3 score()/score_pairs() to any adapter implementing encode().
|
|
16
|
+
|
|
17
|
+
BGE-M3 supports four scoring modes composable from any of its encode
|
|
18
|
+
outputs (dense / sparse / multivector). This mixin implements the modes
|
|
19
|
+
once and is consumed by every BGE-M3 adapter (``bge_m3_flash``,
|
|
20
|
+
``bge_m3``, ``bge_m3_flag``) so they all expose the same ``/v1/score``
|
|
21
|
+
semantics.
|
|
22
|
+
|
|
23
|
+
Modes:
|
|
24
|
+
- ``dense`` : cosine similarity between CLS-pooled, L2-normalized vectors.
|
|
25
|
+
- ``sparse`` : ``Σ q_w * d_w`` over shared token ids (BGE-M3 paper /
|
|
26
|
+
FlagEmbedding's ``compute_lexical_matching_score``).
|
|
27
|
+
- ``colbert``: ColBERT-style MaxSim over the multi-vector projection,
|
|
28
|
+
normalized by query length (matches FlagEmbedding's ``colbert_score``).
|
|
29
|
+
- ``hybrid`` : weighted sum (default ``{dense: 0.4, sparse: 0.2, colbert: 0.4}``,
|
|
30
|
+
override via ``options["score_weights"]``).
|
|
31
|
+
|
|
32
|
+
Runtime-side companion to ``"score"`` being declared in
|
|
33
|
+
:class:`AdapterSpec.outputs`; the class-level validator at
|
|
34
|
+
:mod:`sie_server.adapters._base_adapter` only checks that ``score`` /
|
|
35
|
+
``score_pairs`` are overridden on the class — both come from this mixin.
|
|
36
|
+
|
|
37
|
+
Subclasses must provide ``encode()`` and ``_check_loaded()`` (the latter
|
|
38
|
+
is supplied by the standard adapter base classes). The TYPE_CHECKING
|
|
39
|
+
stubs below let the type checker resolve ``self.encode`` / ``self._check_loaded``
|
|
40
|
+
inside this module without affecting Python's runtime MRO — they only
|
|
41
|
+
exist for the type checker, never as live attributes.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
|
|
46
|
+
def encode(
|
|
47
|
+
self,
|
|
48
|
+
items: list[Item],
|
|
49
|
+
output_types: list[str],
|
|
50
|
+
*,
|
|
51
|
+
instruction: str | None = ...,
|
|
52
|
+
is_query: bool = ...,
|
|
53
|
+
prepared_items: Any = ...,
|
|
54
|
+
options: dict[str, Any] | None = ...,
|
|
55
|
+
) -> EncodeOutput: ...
|
|
56
|
+
|
|
57
|
+
def _check_loaded(self) -> None: ...
|
|
58
|
+
|
|
59
|
+
# Default hybrid weights from the BGE-M3 paper (Chen et al., 2024).
|
|
60
|
+
_DEFAULT_HYBRID_WEIGHTS: ClassVar[dict[str, float]] = {"dense": 0.4, "sparse": 0.2, "colbert": 0.4}
|
|
61
|
+
_VALID_SCORE_MODES: ClassVar[frozenset[str]] = frozenset({"dense", "sparse", "colbert", "hybrid"})
|
|
62
|
+
_MODE_TO_OUTPUT: ClassVar[dict[str, str]] = {
|
|
63
|
+
"dense": "dense",
|
|
64
|
+
"sparse": "sparse",
|
|
65
|
+
"colbert": "multivector",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
# ------------------------------------------------------------------ public
|
|
69
|
+
|
|
70
|
+
def score(
|
|
71
|
+
self,
|
|
72
|
+
query: Item,
|
|
73
|
+
items: list[Item],
|
|
74
|
+
*,
|
|
75
|
+
instruction: str | None = None,
|
|
76
|
+
options: dict[str, Any] | None = None,
|
|
77
|
+
) -> list[float]:
|
|
78
|
+
"""Score items against a query using bi-encoder similarity.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
query: Query item.
|
|
82
|
+
items: Document items to score.
|
|
83
|
+
instruction: Optional instruction prepended to query and items.
|
|
84
|
+
options: Runtime options. Recognized keys:
|
|
85
|
+
``score_mode``: one of
|
|
86
|
+
``"dense" | "sparse" | "colbert" | "hybrid"`` (default ``"dense"``).
|
|
87
|
+
``score_weights``: mapping
|
|
88
|
+
``{dense, sparse, colbert} -> float`` for hybrid mode.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of scores parallel to ``items``.
|
|
92
|
+
"""
|
|
93
|
+
self._check_loaded()
|
|
94
|
+
if not items:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
score_mode, weights = self._resolve_score_mode(options)
|
|
98
|
+
output_types = self._output_types_for_mode(score_mode, weights)
|
|
99
|
+
|
|
100
|
+
query_out = self.encode(
|
|
101
|
+
[query],
|
|
102
|
+
output_types=output_types,
|
|
103
|
+
instruction=instruction,
|
|
104
|
+
is_query=True,
|
|
105
|
+
options=options,
|
|
106
|
+
)
|
|
107
|
+
items_out = self.encode(
|
|
108
|
+
items,
|
|
109
|
+
output_types=output_types,
|
|
110
|
+
instruction=instruction,
|
|
111
|
+
is_query=False,
|
|
112
|
+
options=options,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return [self._compute_pair_score(query_out, 0, items_out, i, score_mode, weights) for i in range(len(items))]
|
|
116
|
+
|
|
117
|
+
def score_pairs(
|
|
118
|
+
self,
|
|
119
|
+
queries: list[Item],
|
|
120
|
+
docs: list[Item],
|
|
121
|
+
*,
|
|
122
|
+
instruction: str | None = None,
|
|
123
|
+
options: dict[str, Any] | None = None,
|
|
124
|
+
) -> ScoreOutput:
|
|
125
|
+
"""Score parallel (query, doc) pairs in a single batched encode."""
|
|
126
|
+
self._check_loaded()
|
|
127
|
+
if len(queries) != len(docs):
|
|
128
|
+
msg = f"score_pairs requires equal-length queries and docs, got {len(queries)} queries and {len(docs)} docs"
|
|
129
|
+
raise ValueError(msg)
|
|
130
|
+
if not queries:
|
|
131
|
+
return ScoreOutput(scores=np.empty(0, dtype=np.float32))
|
|
132
|
+
|
|
133
|
+
score_mode, weights = self._resolve_score_mode(options)
|
|
134
|
+
output_types = self._output_types_for_mode(score_mode, weights)
|
|
135
|
+
|
|
136
|
+
queries_out = self.encode(
|
|
137
|
+
queries,
|
|
138
|
+
output_types=output_types,
|
|
139
|
+
instruction=instruction,
|
|
140
|
+
is_query=True,
|
|
141
|
+
options=options,
|
|
142
|
+
)
|
|
143
|
+
docs_out = self.encode(
|
|
144
|
+
docs,
|
|
145
|
+
output_types=output_types,
|
|
146
|
+
instruction=instruction,
|
|
147
|
+
is_query=False,
|
|
148
|
+
options=options,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
scores = np.asarray(
|
|
152
|
+
[self._compute_pair_score(queries_out, i, docs_out, i, score_mode, weights) for i in range(len(queries))],
|
|
153
|
+
dtype=np.float32,
|
|
154
|
+
)
|
|
155
|
+
return ScoreOutput(scores=scores)
|
|
156
|
+
|
|
157
|
+
# ------------------------------------------------------------ option resolve
|
|
158
|
+
|
|
159
|
+
def _resolve_score_mode(self, options: dict[str, Any] | None) -> tuple[str, dict[str, float]]:
|
|
160
|
+
"""Validate and resolve ``score_mode`` and ``score_weights`` from options."""
|
|
161
|
+
opts = options or {}
|
|
162
|
+
score_mode = opts.get("score_mode", "dense")
|
|
163
|
+
# Validate type before membership (frozenset.__contains__ would raise
|
|
164
|
+
# TypeError on unhashable inputs like list/dict, leaking a 500).
|
|
165
|
+
if not isinstance(score_mode, str) or score_mode not in self._VALID_SCORE_MODES:
|
|
166
|
+
msg = f"Invalid score_mode '{score_mode}'. Expected one of {sorted(self._VALID_SCORE_MODES)}."
|
|
167
|
+
raise ValueError(msg)
|
|
168
|
+
|
|
169
|
+
weights = dict(self._DEFAULT_HYBRID_WEIGHTS)
|
|
170
|
+
override = opts.get("score_weights")
|
|
171
|
+
if override is not None:
|
|
172
|
+
if not isinstance(override, dict):
|
|
173
|
+
msg = "score_weights must be a mapping of {dense, sparse, colbert} -> float"
|
|
174
|
+
raise ValueError(msg)
|
|
175
|
+
unknown = set(override) - set(self._DEFAULT_HYBRID_WEIGHTS)
|
|
176
|
+
if unknown:
|
|
177
|
+
msg = f"Unknown score_weights keys: {sorted(unknown)}. Allowed: dense, sparse, colbert"
|
|
178
|
+
raise ValueError(msg)
|
|
179
|
+
for key, value in override.items():
|
|
180
|
+
# bool is a subclass of int — reject it explicitly to avoid silently
|
|
181
|
+
# treating True/False as 1.0/0.0 weights.
|
|
182
|
+
if isinstance(value, bool) or not isinstance(value, (int, float)) or value < 0:
|
|
183
|
+
msg = f"score_weights['{key}'] must be a non-negative number, got {value!r}"
|
|
184
|
+
raise ValueError(msg)
|
|
185
|
+
weights[key] = float(value)
|
|
186
|
+
|
|
187
|
+
if score_mode == "hybrid":
|
|
188
|
+
total = sum(weights.values())
|
|
189
|
+
if total <= 0:
|
|
190
|
+
msg = "score_weights for hybrid mode must contain at least one positive weight"
|
|
191
|
+
raise ValueError(msg)
|
|
192
|
+
|
|
193
|
+
return score_mode, weights
|
|
194
|
+
|
|
195
|
+
def _output_types_for_mode(self, score_mode: str, weights: dict[str, float]) -> list[str]:
|
|
196
|
+
"""Resolve which encode outputs are needed for the requested score mode."""
|
|
197
|
+
if score_mode == "hybrid":
|
|
198
|
+
# Skip outputs whose weight is zero to save compute.
|
|
199
|
+
return [self._MODE_TO_OUTPUT[mode] for mode in ("dense", "sparse", "colbert") if weights.get(mode, 0.0) > 0]
|
|
200
|
+
return [self._MODE_TO_OUTPUT[score_mode]]
|
|
201
|
+
|
|
202
|
+
# ------------------------------------------------------------------ similarity
|
|
203
|
+
|
|
204
|
+
def _compute_pair_score(
|
|
205
|
+
self,
|
|
206
|
+
q_out: EncodeOutput,
|
|
207
|
+
q_idx: int,
|
|
208
|
+
d_out: EncodeOutput,
|
|
209
|
+
d_idx: int,
|
|
210
|
+
score_mode: str,
|
|
211
|
+
weights: dict[str, float],
|
|
212
|
+
) -> float:
|
|
213
|
+
"""Compute a single (query, doc) score under the resolved mode."""
|
|
214
|
+
if score_mode == "dense":
|
|
215
|
+
return self._dense_sim(q_out, q_idx, d_out, d_idx)
|
|
216
|
+
if score_mode == "sparse":
|
|
217
|
+
return self._sparse_sim(q_out, q_idx, d_out, d_idx)
|
|
218
|
+
if score_mode == "colbert":
|
|
219
|
+
return self._colbert_sim(q_out, q_idx, d_out, d_idx)
|
|
220
|
+
# hybrid
|
|
221
|
+
score = 0.0
|
|
222
|
+
if weights.get("dense", 0.0) > 0:
|
|
223
|
+
score += weights["dense"] * self._dense_sim(q_out, q_idx, d_out, d_idx)
|
|
224
|
+
if weights.get("sparse", 0.0) > 0:
|
|
225
|
+
score += weights["sparse"] * self._sparse_sim(q_out, q_idx, d_out, d_idx)
|
|
226
|
+
if weights.get("colbert", 0.0) > 0:
|
|
227
|
+
score += weights["colbert"] * self._colbert_sim(q_out, q_idx, d_out, d_idx)
|
|
228
|
+
return float(score)
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _dense_sim(q_out: EncodeOutput, q_idx: int, d_out: EncodeOutput, d_idx: int) -> float:
|
|
232
|
+
"""Cosine similarity between dense vectors (normalized inside encode)."""
|
|
233
|
+
if q_out.dense is None or d_out.dense is None:
|
|
234
|
+
msg = "Dense vectors required for dense scoring but missing from encode output"
|
|
235
|
+
raise RuntimeError(msg)
|
|
236
|
+
q = q_out.dense[q_idx]
|
|
237
|
+
d = d_out.dense[d_idx]
|
|
238
|
+
# Defensive normalization in case caller disabled normalize at runtime.
|
|
239
|
+
q_norm = float(np.linalg.norm(q))
|
|
240
|
+
d_norm = float(np.linalg.norm(d))
|
|
241
|
+
if q_norm == 0.0 or d_norm == 0.0:
|
|
242
|
+
return 0.0
|
|
243
|
+
return float(np.dot(q, d) / (q_norm * d_norm))
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _sparse_sim(q_out: EncodeOutput, q_idx: int, d_out: EncodeOutput, d_idx: int) -> float:
|
|
247
|
+
"""BGE-M3 lexical-match score: sum of q_w * d_w over shared token ids."""
|
|
248
|
+
if q_out.sparse is None or d_out.sparse is None:
|
|
249
|
+
msg = "Sparse vectors required for sparse scoring but missing from encode output"
|
|
250
|
+
raise RuntimeError(msg)
|
|
251
|
+
q_vec = q_out.sparse[q_idx]
|
|
252
|
+
d_vec = d_out.sparse[d_idx]
|
|
253
|
+
if len(q_vec.indices) == 0 or len(d_vec.indices) == 0:
|
|
254
|
+
return 0.0
|
|
255
|
+
d_lookup = dict(zip(d_vec.indices.tolist(), d_vec.values.tolist(), strict=True))
|
|
256
|
+
total = 0.0
|
|
257
|
+
for tid, q_w in zip(q_vec.indices.tolist(), q_vec.values.tolist(), strict=True):
|
|
258
|
+
d_w = d_lookup.get(tid)
|
|
259
|
+
if d_w is not None:
|
|
260
|
+
total += float(q_w) * float(d_w)
|
|
261
|
+
return float(total)
|
|
262
|
+
|
|
263
|
+
@staticmethod
|
|
264
|
+
def _colbert_sim(q_out: EncodeOutput, q_idx: int, d_out: EncodeOutput, d_idx: int) -> float:
|
|
265
|
+
"""ColBERT MaxSim: sum over query tokens of max-dot against doc tokens, normalized by query length.
|
|
266
|
+
|
|
267
|
+
Matches FlagEmbedding's ``BGEM3FlagModel.colbert_score`` exactly.
|
|
268
|
+
"""
|
|
269
|
+
if q_out.multivector is None or d_out.multivector is None:
|
|
270
|
+
msg = "Multivector outputs required for colbert scoring but missing from encode output"
|
|
271
|
+
raise RuntimeError(msg)
|
|
272
|
+
q_mv = q_out.multivector[q_idx]
|
|
273
|
+
d_mv = d_out.multivector[d_idx]
|
|
274
|
+
if q_mv.size == 0 or d_mv.size == 0:
|
|
275
|
+
return 0.0
|
|
276
|
+
# Defensive normalization (multivector is normalized inside encode by default).
|
|
277
|
+
q_norms = np.linalg.norm(q_mv, axis=-1, keepdims=True)
|
|
278
|
+
d_norms = np.linalg.norm(d_mv, axis=-1, keepdims=True)
|
|
279
|
+
q_normed = np.divide(q_mv, q_norms, out=np.zeros_like(q_mv), where=q_norms > 0)
|
|
280
|
+
d_normed = np.divide(d_mv, d_norms, out=np.zeros_like(d_mv), where=d_norms > 0)
|
|
281
|
+
sim = q_normed @ d_normed.T # [q_len, d_len]
|
|
282
|
+
max_per_query_token = sim.max(axis=-1)
|
|
283
|
+
return float(max_per_query_token.sum() / q_mv.shape[0])
|