sie-server 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.3.1 → sie_server-0.3.2}/.gitignore +3 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cpu +4 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cuda11 +4 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cuda12 +4 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/PKG-INFO +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/answerdotai__answerai-colbert-small-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/colbert-ir__colbertv2.0.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/docling.yaml +8 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/jinaai__jina-colbert-v2.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__GTE-ModernColBERT-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__Reason-ModernColBERT.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/openapi.json +1 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/pyproject.toml +5 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_base_adapter.py +49 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_utils.py +79 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/base.py +14 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash/__init__.py +1 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/docling/__init__.py +72 -21
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash/__init__.py +1 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/paddleocr_vl/__init__.py +2 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/sentence_transformer/__init__.py +5 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/app_factory.py +65 -7
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/cli.py +14 -3
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/model_loader.py +20 -3
- sie_server-0.3.2/tests/adapters/test_docling.py +356 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_paddleocr_vl.py +45 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sentence_transformer.py +2 -1
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/app/test_app_factory.py +65 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/conftest.py +5 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_all_models.py +39 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_docker_integration.py +3 -3
- sie_server-0.3.1/tests/adapters/test_docling.py +0 -194
- {sie_server-0.3.1 → sie_server-0.3.2}/CONTRIBUTING.md +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/LICENSE +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/README.md +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/bundles/default.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/bundles/sglang.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/bundles/transformers5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-m3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/answerdotai__ModernBERT-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/fastino__gliner2-base-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/google__embeddinggemma-300m.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip2-base-patch16-224.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/models/zai-org__GLM-OCR.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_flash_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_spec.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/clip/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliclass/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/owlv2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/sglang/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/siglip/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/encode.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/health.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/helpers.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/models.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/openai_compat.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/openapi.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/options.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/root.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/validation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/ws.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/app_state_config.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/engine.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/model.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/adaptive_batching.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/extract_cost.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/inference_output.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/load_errors.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/loader.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/oom.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/prepared.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/image.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/vision.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/readiness.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/model_worker.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/oom_recovery.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/types.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/main.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/nats_pull_loop.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/nats_subscriber.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/metrics.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/static/index.html +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/inputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/openapi.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/responses.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_bge_m3.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_bge_m3_flash.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_docling_smoke.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_flash_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_glm_ocr.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_runtime_options.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_stablebridge_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_stablebridge_pruner.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_visual_document.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_dtype.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_endpoint.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_json_schema.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_validation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract_oom.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_health.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_models.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_openai_compat.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_version_header.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_ws.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/app/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/test_bundle_coverage.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/test_config.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_adaptive_batching.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_batcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_idle_evict.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_inference.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_loader.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_logging.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_oom_detection.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_prepared.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_preprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_quantization.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_readiness.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_failed_state.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_watcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/worker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/worker/test_oom_recovery.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_metrics.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_nats_pull_loop.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_nats_pull_loop_batching.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_openapi_export.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_server_smoke.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_sparse_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/test_inputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/test_types.py +0 -0
|
@@ -39,7 +39,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
|
39
39
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
40
40
|
|
|
41
41
|
# Stub source trees so pip accepts the editable installs during dep resolution.
|
|
42
|
-
|
|
42
|
+
# Also create empty bundles/ and models/ — referenced by force-include in
|
|
43
|
+
# pyproject.toml; hatchling resolves them at editable-metadata time even though
|
|
44
|
+
# real contents only land in the `base` stage.
|
|
45
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
43
46
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
44
47
|
&& touch src/sie_server/__init__.py
|
|
45
48
|
|
|
@@ -41,7 +41,10 @@ WORKDIR /app
|
|
|
41
41
|
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
42
42
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
# Stubs for editable metadata generation — bundles/ and models/ are referenced
|
|
45
|
+
# by force-include in pyproject.toml and must exist; real contents are copied
|
|
46
|
+
# in the base stage.
|
|
47
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
45
48
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
46
49
|
&& touch src/sie_server/__init__.py
|
|
47
50
|
|
|
@@ -44,7 +44,10 @@ WORKDIR /app
|
|
|
44
44
|
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
45
45
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
# Stubs for editable metadata generation — bundles/ and models/ are referenced
|
|
48
|
+
# by force-include in pyproject.toml and must exist; real contents are copied
|
|
49
|
+
# in the base stage.
|
|
50
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
48
51
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
49
52
|
&& touch src/sie_server/__init__.py
|
|
50
53
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sie-server"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
description = "Search Inference Engine - GPU inference server for search workloads"
|
|
5
5
|
requires-python = ">=3.12,<3.13"
|
|
6
6
|
license = { text = "Apache-2.0" }
|
|
@@ -80,6 +80,10 @@ build-backend = "hatchling.build"
|
|
|
80
80
|
[tool.hatch.build.targets.wheel]
|
|
81
81
|
packages = ["src/sie_server"]
|
|
82
82
|
|
|
83
|
+
[tool.hatch.build.targets.wheel.force-include]
|
|
84
|
+
"models" = "sie_server/models"
|
|
85
|
+
"bundles" = "sie_server/bundles"
|
|
86
|
+
|
|
83
87
|
[tool.uv.sources]
|
|
84
88
|
# Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
|
|
85
89
|
# Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.
|
|
@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
|
6
6
|
|
|
7
7
|
from sie_server.adapters._spec import AdapterSpec
|
|
8
8
|
from sie_server.adapters._types import ERR_NOT_LOADED
|
|
9
|
+
from sie_server.adapters._utils import grouped_score_pairs
|
|
9
10
|
from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
|
|
10
11
|
|
|
11
12
|
if TYPE_CHECKING:
|
|
12
13
|
import torch
|
|
13
14
|
|
|
15
|
+
from sie_server.core.inference_output import ScoreOutput
|
|
16
|
+
from sie_server.types.inputs import Item
|
|
17
|
+
|
|
14
18
|
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
|
|
|
57
61
|
raise TypeError(msg)
|
|
58
62
|
|
|
59
63
|
if "score" in spec.outputs:
|
|
60
|
-
|
|
64
|
+
# BaseAdapter ships a default score_pairs() that delegates to score().
|
|
65
|
+
# Treat that default as "not implemented" for validation purposes:
|
|
66
|
+
# subclasses must override either score() or score_pairs() so the
|
|
67
|
+
# default delegate doesn't bottom out in ModelAdapter.score().
|
|
68
|
+
score_overridden = cls.score is not ModelAdapter.score
|
|
69
|
+
score_pairs_overridden = cls.score_pairs not in (
|
|
70
|
+
ModelAdapter.score_pairs,
|
|
71
|
+
BaseAdapter.score_pairs,
|
|
72
|
+
)
|
|
73
|
+
if not score_overridden and not score_pairs_overridden:
|
|
61
74
|
msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
|
|
62
75
|
raise TypeError(msg)
|
|
63
76
|
|
|
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
|
|
|
117
130
|
model_name=getattr(self, "_model_name_or_path", ""),
|
|
118
131
|
)
|
|
119
132
|
|
|
133
|
+
# -- Default batched scoring ---------------------------------------------
|
|
134
|
+
|
|
135
|
+
def score_pairs(
|
|
136
|
+
self,
|
|
137
|
+
queries: list[Item],
|
|
138
|
+
docs: list[Item],
|
|
139
|
+
*,
|
|
140
|
+
instruction: str | None = None,
|
|
141
|
+
options: dict[str, Any] | None = None,
|
|
142
|
+
) -> ScoreOutput:
|
|
143
|
+
"""Default ``score_pairs()`` that batches via per-query grouping.
|
|
144
|
+
|
|
145
|
+
Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
|
|
146
|
+
so each unique query is encoded once and its docs are scored as a
|
|
147
|
+
single ``score()`` call. Subclasses with a more efficient native
|
|
148
|
+
cross-batch path (e.g. cross-encoders that pack queries and docs
|
|
149
|
+
into one transformer pass) should override this.
|
|
150
|
+
|
|
151
|
+
Per-call ``options`` are not supported by this default delegate
|
|
152
|
+
(it dispatches per-query and cannot route options into ``score()``
|
|
153
|
+
without subclass-specific knowledge). If ``options`` is a non-empty
|
|
154
|
+
mapping, this raises ``NotImplementedError`` to surface the
|
|
155
|
+
unsupported configuration; pass ``options=None`` (or ``{}``) or
|
|
156
|
+
override ``score_pairs()`` with an options-aware implementation.
|
|
157
|
+
"""
|
|
158
|
+
if options:
|
|
159
|
+
msg = (
|
|
160
|
+
f"{type(self).__name__}.score_pairs(): per-call options are "
|
|
161
|
+
f"not supported by the default batching path "
|
|
162
|
+
f"(got options={options!r}). Override score_pairs() with an "
|
|
163
|
+
f"options-aware implementation."
|
|
164
|
+
)
|
|
165
|
+
raise NotImplementedError(msg)
|
|
166
|
+
return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
|
|
167
|
+
|
|
120
168
|
# -- Shared helpers ------------------------------------------------------
|
|
121
169
|
|
|
122
170
|
def _check_loaded(self) -> None:
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from sie_server.core.inference_output import ScoreOutput
|
|
4
8
|
|
|
5
9
|
if TYPE_CHECKING:
|
|
6
10
|
import torch
|
|
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
|
|
|
8
12
|
from sie_server.types.inputs import Item
|
|
9
13
|
|
|
10
14
|
|
|
15
|
+
class _ScoreFn(Protocol):
|
|
16
|
+
def __call__(
|
|
17
|
+
self,
|
|
18
|
+
query: Item,
|
|
19
|
+
items: list[Item],
|
|
20
|
+
*,
|
|
21
|
+
instruction: str | None = ...,
|
|
22
|
+
) -> list[float]: ...
|
|
23
|
+
|
|
24
|
+
|
|
11
25
|
# ---------------------------------------------------------------------------
|
|
12
26
|
# RoPE utilities (eliminates 7 identical copies)
|
|
13
27
|
# ---------------------------------------------------------------------------
|
|
@@ -140,3 +154,67 @@ def resolve_embedding_options(
|
|
|
140
154
|
opts.get("query_template", default_query_template),
|
|
141
155
|
opts.get("doc_template", default_doc_template),
|
|
142
156
|
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Score-pair grouping (shared by ColBERT-family adapters)
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def grouped_score_pairs(
|
|
165
|
+
score_fn: _ScoreFn,
|
|
166
|
+
queries: list[Item],
|
|
167
|
+
docs: list[Item],
|
|
168
|
+
*,
|
|
169
|
+
instruction: str | None = None,
|
|
170
|
+
) -> ScoreOutput:
|
|
171
|
+
"""Run a per-query ``score()`` callable over parallel (query, doc) pairs.
|
|
172
|
+
|
|
173
|
+
Groups pairs by ``(query.text, query.id, instruction)`` so each unique
|
|
174
|
+
query is encoded once and its docs are scored as one batch. Used by
|
|
175
|
+
ColBERT-family adapters to satisfy the worker's ``score_pairs()``
|
|
176
|
+
contract while reusing the optimized batched ``score()``.
|
|
177
|
+
|
|
178
|
+
Queries with ``text is None`` are not supported and raise ``ValueError``
|
|
179
|
+
(ColBERT scoring requires text). The grouping key is
|
|
180
|
+
``(query.text, query.id or "", instruction or "")`` — two distinct
|
|
181
|
+
``Item`` objects with identical text/id/instruction collapse to one
|
|
182
|
+
encoding pass.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
|
|
186
|
+
queries: Query items (parallel to docs).
|
|
187
|
+
docs: Document items to score.
|
|
188
|
+
instruction: Optional instruction passed through to ``score_fn``.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
``ScoreOutput`` with one float per pair, in the original input order.
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If ``queries`` and ``docs`` lengths differ, or any query
|
|
195
|
+
lacks text.
|
|
196
|
+
"""
|
|
197
|
+
if len(queries) != len(docs):
|
|
198
|
+
msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
|
|
199
|
+
raise ValueError(msg)
|
|
200
|
+
|
|
201
|
+
if not docs:
|
|
202
|
+
return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
|
|
203
|
+
|
|
204
|
+
groups: dict[tuple[str, str, str], list[int]] = {}
|
|
205
|
+
for i, q in enumerate(queries):
|
|
206
|
+
if q.text is None:
|
|
207
|
+
msg = f"grouped_score_pairs requires queries[{i}].text; got None"
|
|
208
|
+
raise ValueError(msg)
|
|
209
|
+
key = (q.text, q.id or "", instruction or "")
|
|
210
|
+
groups.setdefault(key, []).append(i)
|
|
211
|
+
|
|
212
|
+
scores = np.zeros(len(docs), dtype=np.float32)
|
|
213
|
+
for indices in groups.values():
|
|
214
|
+
q = queries[indices[0]]
|
|
215
|
+
group_docs = [docs[i] for i in indices]
|
|
216
|
+
group_scores = score_fn(q, group_docs, instruction=instruction)
|
|
217
|
+
for idx, s in zip(indices, group_scores, strict=True):
|
|
218
|
+
scores[idx] = float(s)
|
|
219
|
+
|
|
220
|
+
return ScoreOutput(scores=scores, batch_size=len(docs))
|
|
@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
|
|
|
127
127
|
device: Device string (e.g., "cuda:0", "cpu").
|
|
128
128
|
"""
|
|
129
129
|
|
|
130
|
+
def warmup(self) -> None:
|
|
131
|
+
"""Run a warmup forward pass on the loaded model.
|
|
132
|
+
|
|
133
|
+
Called by the model loader after ``load()`` has completed. The default
|
|
134
|
+
implementation is a no-op for adapters that do not need warmup. Adapters
|
|
135
|
+
that compile kernels on first call (e.g. flash-attention) or otherwise
|
|
136
|
+
benefit from a priming pass should override this and run a single
|
|
137
|
+
inference pass against a tiny synthetic input.
|
|
138
|
+
|
|
139
|
+
Splitting this from ``load()`` lets the cold-start instrumentation
|
|
140
|
+
attribute deserialize and warmup time separately.
|
|
141
|
+
"""
|
|
142
|
+
return
|
|
143
|
+
|
|
130
144
|
@abstractmethod
|
|
131
145
|
def unload(self) -> None:
|
|
132
146
|
"""Unload the model and free resources.
|
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
4
|
import logging
|
|
5
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
6
5
|
from typing import TYPE_CHECKING, Any, ClassVar
|
|
7
6
|
|
|
8
7
|
from sie_server.adapters._base_adapter import BaseAdapter
|
|
@@ -52,11 +51,17 @@ class DoclingAdapter(BaseAdapter):
|
|
|
52
51
|
OCR is disabled by default for speed and predictability. Pass
|
|
53
52
|
``options={"ocr": True}`` per request to enable it.
|
|
54
53
|
|
|
55
|
-
Concurrency:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
54
|
+
Concurrency: one ``DocumentConverter`` is cached per ``ocr_enabled`` value
|
|
55
|
+
on the adapter instance. ``self._device`` is set once in ``load()`` and is
|
|
56
|
+
stable for the adapter's lifetime, so the effective cache key is
|
|
57
|
+
``(self._device, ocr_enabled)`` and at most two converters ever exist per
|
|
58
|
+
adapter instance. Cross-request serialization is provided by
|
|
59
|
+
``ModelWorker._inference_executor`` (max_workers=1), so the cache itself
|
|
60
|
+
does not need a lock. Items within one batch are processed serially
|
|
61
|
+
(rather than via a per-item thread pool) to sidestep the converter's known
|
|
62
|
+
thread-safety issue (https://github.com/docling-project/docling/issues/115);
|
|
63
|
+
at GPU-bound concurrency the upstream worker is already saturating the
|
|
64
|
+
device, so intra-batch parallelism does not buy real throughput.
|
|
60
65
|
"""
|
|
61
66
|
|
|
62
67
|
spec: ClassVar[AdapterSpec] = AdapterSpec(
|
|
@@ -69,12 +74,13 @@ class DoclingAdapter(BaseAdapter):
|
|
|
69
74
|
self,
|
|
70
75
|
model_name_or_path: str | None = None, # unused; Docling is package-backed
|
|
71
76
|
*,
|
|
72
|
-
compute_precision: str | None = None, # unused;
|
|
77
|
+
compute_precision: str | None = None, # unused; device is threaded via load()
|
|
73
78
|
**kwargs: Any,
|
|
74
79
|
) -> None:
|
|
75
80
|
_ = (model_name_or_path, compute_precision, kwargs)
|
|
76
81
|
self._loaded = False
|
|
77
82
|
self._device: str | None = None
|
|
83
|
+
self._converters: dict[bool, Any] = {}
|
|
78
84
|
|
|
79
85
|
def load(self, device: str) -> None:
|
|
80
86
|
self._device = device
|
|
@@ -82,12 +88,20 @@ class DoclingAdapter(BaseAdapter):
|
|
|
82
88
|
# the first real request doesn't block on a multi-hundred-MB pull.
|
|
83
89
|
# Models cache globally, so subsequent per-task converters are cheap.
|
|
84
90
|
try:
|
|
85
|
-
warm_converter = self.
|
|
91
|
+
warm_converter = self._get_converter(ocr_enabled=False)
|
|
86
92
|
self._convert_bytes(warm_converter, _TINY_PDF_BYTES, format_hint="pdf")
|
|
93
|
+
# Also build the OCR-enabled converter so the first ocr-profile
|
|
94
|
+
# request doesn't pay layout+OCR model-init latency.
|
|
95
|
+
self._get_converter(ocr_enabled=True)
|
|
87
96
|
except Exception:
|
|
88
97
|
logger.exception("Docling pre-warm failed; first real request may be slow")
|
|
89
98
|
self._loaded = True
|
|
90
99
|
|
|
100
|
+
def unload(self) -> None:
|
|
101
|
+
self._converters.clear()
|
|
102
|
+
self._loaded = False
|
|
103
|
+
super().unload()
|
|
104
|
+
|
|
91
105
|
def extract(
|
|
92
106
|
self,
|
|
93
107
|
items: list[Item],
|
|
@@ -113,28 +127,35 @@ class DoclingAdapter(BaseAdapter):
|
|
|
113
127
|
)
|
|
114
128
|
|
|
115
129
|
def _run_extract(self, items: list[Item], *, ocr_enabled: bool) -> list[dict[str, Any]]:
|
|
116
|
-
"""Run extract per-item,
|
|
130
|
+
"""Run extract per-item, serially.
|
|
117
131
|
|
|
118
|
-
|
|
132
|
+
Items are processed one at a time so we can share a single cached
|
|
133
|
+
DocumentConverter (see class docstring). At GPU-bound concurrency the
|
|
134
|
+
worker-level inference executor is already saturating the device, so
|
|
135
|
+
intra-batch parallelism does not buy real throughput.
|
|
119
136
|
"""
|
|
120
|
-
|
|
121
|
-
return [self._extract_one(item, ocr_enabled=ocr_enabled) for item in items]
|
|
122
|
-
|
|
123
|
-
with ThreadPoolExecutor(max_workers=min(len(items), 4)) as pool:
|
|
124
|
-
futures = [pool.submit(self._extract_one, item, ocr_enabled=ocr_enabled) for item in items]
|
|
125
|
-
return [f.result() for f in futures]
|
|
137
|
+
return [self._extract_one(item, ocr_enabled=ocr_enabled) for item in items]
|
|
126
138
|
|
|
127
139
|
def _extract_one(self, item: Item, *, ocr_enabled: bool) -> dict[str, Any]:
|
|
128
140
|
document = item.document
|
|
129
141
|
if not is_document_input(document):
|
|
130
142
|
return {"error": _ERR_REQUIRES_DOCUMENT}
|
|
131
143
|
try:
|
|
132
|
-
converter = self.
|
|
144
|
+
converter = self._get_converter(ocr_enabled=ocr_enabled)
|
|
133
145
|
return self._convert_bytes(converter, document["data"], format_hint=document.get("format"))
|
|
134
146
|
except Exception as e: # noqa: BLE001 - per-item failure must not poison the batch
|
|
135
147
|
logger.warning("Docling extract failed for item id=%s: %s", item.id, e)
|
|
136
148
|
return {"error": str(e)}
|
|
137
149
|
|
|
150
|
+
def _get_converter(self, *, ocr_enabled: bool) -> Any:
|
|
151
|
+
"""Return the cached DocumentConverter for this ocr_enabled value, building lazily on first use."""
|
|
152
|
+
cached = self._converters.get(ocr_enabled)
|
|
153
|
+
if cached is not None:
|
|
154
|
+
return cached
|
|
155
|
+
converter = self._make_converter(ocr_enabled=ocr_enabled)
|
|
156
|
+
self._converters[ocr_enabled] = converter
|
|
157
|
+
return converter
|
|
158
|
+
|
|
138
159
|
def _convert_bytes(self, converter: Any, data: bytes, *, format_hint: str | None) -> dict[str, Any]:
|
|
139
160
|
from docling.datamodel.base_models import DocumentStream # ty: ignore[unresolved-import]
|
|
140
161
|
|
|
@@ -151,15 +172,45 @@ class DoclingAdapter(BaseAdapter):
|
|
|
151
172
|
}
|
|
152
173
|
|
|
153
174
|
def _make_converter(self, *, ocr_enabled: bool) -> Any:
|
|
154
|
-
"""Build a fresh DocumentConverter.
|
|
175
|
+
"""Build a fresh DocumentConverter. Callers should usually go through _get_converter() for caching.
|
|
176
|
+
|
|
177
|
+
Threads self._device through Docling's AcceleratorOptions so layout, table,
|
|
178
|
+
and OCR models actually run on the configured device. Without this, Docling
|
|
179
|
+
silently defaults to CPU regardless of how SIE was launched.
|
|
180
|
+
"""
|
|
155
181
|
from docling.document_converter import DocumentConverter # ty: ignore[unresolved-import]
|
|
156
182
|
|
|
157
|
-
|
|
158
|
-
return DocumentConverter()
|
|
183
|
+
accelerator_options = self._build_accelerator_options()
|
|
159
184
|
|
|
160
185
|
from docling.datamodel.base_models import InputFormat # ty: ignore[unresolved-import]
|
|
161
186
|
from docling.datamodel.pipeline_options import PdfPipelineOptions # ty: ignore[unresolved-import]
|
|
162
187
|
from docling.document_converter import PdfFormatOption # ty: ignore[unresolved-import]
|
|
163
188
|
|
|
164
|
-
|
|
189
|
+
# Pass do_ocr explicitly on both paths. Docling's PdfPipelineOptions defaults
|
|
190
|
+
# do_ocr=True, so an unset default would silently OCR every PDF and make the
|
|
191
|
+
# `ocr` profile a no-op vs. the default profile.
|
|
192
|
+
pdf_kwargs: dict[str, Any] = {"do_ocr": ocr_enabled}
|
|
193
|
+
if accelerator_options is not None:
|
|
194
|
+
pdf_kwargs["accelerator_options"] = accelerator_options
|
|
195
|
+
pdf_opts = PdfPipelineOptions(**pdf_kwargs)
|
|
165
196
|
return DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_opts)})
|
|
197
|
+
|
|
198
|
+
def _build_accelerator_options(self) -> Any:
|
|
199
|
+
"""Translate self._device into a Docling AcceleratorOptions, or None."""
|
|
200
|
+
if not self._device:
|
|
201
|
+
return None
|
|
202
|
+
from docling.datamodel.accelerator_options import AcceleratorOptions # ty: ignore[unresolved-import]
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
return AcceleratorOptions(device=str(self._device))
|
|
206
|
+
except Exception as e: # noqa: BLE001 - pydantic validation; fall back to auto
|
|
207
|
+
logger.warning(
|
|
208
|
+
"Docling: invalid device %r, falling back to 'auto' (%s)",
|
|
209
|
+
self._device,
|
|
210
|
+
e,
|
|
211
|
+
)
|
|
212
|
+
try:
|
|
213
|
+
return AcceleratorOptions(device="auto")
|
|
214
|
+
except Exception:
|
|
215
|
+
logger.exception("Docling: failed to build AcceleratorOptions even with 'auto'")
|
|
216
|
+
return None
|
|
@@ -293,6 +293,7 @@ class PaddleOCRVLAdapter(BaseAdapter):
|
|
|
293
293
|
max_new_tokens=max_new_tokens,
|
|
294
294
|
do_sample=False,
|
|
295
295
|
num_beams=num_beams,
|
|
296
|
+
use_cache=True,
|
|
296
297
|
)
|
|
297
298
|
|
|
298
299
|
generated_ids = output_ids[0, prompt_len:]
|
|
@@ -344,6 +345,7 @@ class PaddleOCRVLAdapter(BaseAdapter):
|
|
|
344
345
|
max_new_tokens=max_new_tokens,
|
|
345
346
|
do_sample=False,
|
|
346
347
|
num_beams=num_beams,
|
|
348
|
+
use_cache=True,
|
|
347
349
|
)
|
|
348
350
|
|
|
349
351
|
generated_ids = output_ids[0, prompt_len:]
|
{sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/sentence_transformer/__init__.py
RENAMED
|
@@ -82,13 +82,17 @@ class SentenceTransformerDenseAdapter(BaseAdapter):
|
|
|
82
82
|
trust_remote_code=self._trust_remote_code,
|
|
83
83
|
config_kwargs=self._config_kwargs,
|
|
84
84
|
)
|
|
85
|
-
_ = self._model.encode(["warmup"], convert_to_numpy=True, show_progress_bar=False)
|
|
86
85
|
|
|
87
86
|
if self._max_seq_length is not None:
|
|
88
87
|
self._model.max_seq_length = self._max_seq_length
|
|
89
88
|
|
|
90
89
|
self._dense_dim = self._model.get_embedding_dimension()
|
|
91
90
|
|
|
91
|
+
def warmup(self) -> None:
|
|
92
|
+
if self._model is None:
|
|
93
|
+
return
|
|
94
|
+
_ = self._model.encode(["warmup"], convert_to_numpy=True, show_progress_bar=False)
|
|
95
|
+
|
|
92
96
|
def encode(
|
|
93
97
|
self,
|
|
94
98
|
items: list[Item],
|