sie-server 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.3.1 → sie_server-0.3.3}/.gitignore +3 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cpu +12 -5
- {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cuda11 +8 -2
- {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cuda12 +8 -2
- {sie_server-0.3.1 → sie_server-0.3.3}/PKG-INFO +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/bundles/default.yaml +3 -2
- {sie_server-0.3.1 → sie_server-0.3.3}/bundles/transformers5.yaml +4 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/answerdotai__answerai-colbert-small-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/colbert-ir__colbertv2.0.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/docling.yaml +8 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/jinaai__jina-colbert-v2.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__GTE-ModernColBERT-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__Reason-ModernColBERT.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +1 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__llama-embed-nemotron-8b.yaml +11 -2
- sie_server-0.3.3/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +43 -0
- sie_server-0.3.3/models/tomoroai__tomoro-colqwen3-embed-4b.yaml +42 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/openapi.json +19 -2
- {sie_server-0.3.1 → sie_server-0.3.3}/pyproject.toml +5 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_base_adapter.py +49 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_utils.py +79 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/base.py +14 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash/__init__.py +1 -0
- sie_server-0.3.3/src/sie_server/adapters/colqwen3/__init__.py +337 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/docling/__init__.py +72 -21
- sie_server-0.3.3/src/sie_server/adapters/errors.py +2 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliclass/__init__.py +98 -29
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash/__init__.py +1 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nemo_colembed/__init__.py +175 -65
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/paddleocr_vl/__init__.py +2 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/sentence_transformer/__init__.py +5 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/sglang/__init__.py +7 -2
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/extract.py +3 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/helpers.py +12 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/options.py +15 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/app_factory.py +65 -7
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/cli.py +27 -4
- sie_server-0.3.3/src/sie_server/core/hf_env.py +37 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/load_errors.py +32 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/model_loader.py +244 -44
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/registry.py +19 -3
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/main.py +11 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/metrics.py +20 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/openapi.py +17 -2
- sie_server-0.3.3/src/sie_server/types/overflow_policy.py +5 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/responses.py +1 -0
- sie_server-0.3.3/tests/adapters/test_docling.py +356 -0
- sie_server-0.3.3/tests/adapters/test_gliclass_overflow_policy.py +102 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_paddleocr_vl.py +45 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_runtime_options.py +34 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sentence_transformer.py +2 -1
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_visual_document.py +151 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract_integration.py +4 -3
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/app/test_app_factory.py +65 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/conftest.py +5 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_adaptive_batching.py +3 -3
- sie_server-0.3.3/tests/core/test_model_load_timeout.py +372 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_all_models.py +39 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_docker_integration.py +3 -3
- sie_server-0.3.1/tests/adapters/test_docling.py +0 -194
- {sie_server-0.3.1 → sie_server-0.3.3}/CONTRIBUTING.md +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/LICENSE +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/README.md +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/bundles/sglang.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-m3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/answerdotai__ModernBERT-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/fastino__gliner2-base-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/google__embeddinggemma-300m.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip2-base-patch16-224.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/models/zai-org__GLM-OCR.yaml +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_flash_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_spec.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/clip/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/owlv2/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/siglip/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/encode.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/health.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/models.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/openai_compat.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/openapi.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/root.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/validation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/ws.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/app_state_config.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/engine.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/model.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/adaptive_batching.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/extract_cost.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/inference_output.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/loader.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/oom.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/prepared.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/image.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/vision.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/readiness.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/model_worker.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/oom_recovery.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/types.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/nats_pull_loop.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/nats_subscriber.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/static/index.html +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/inputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_bge_m3.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_bge_m3_flash.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_docling_smoke.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_flash_base.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_glm_ocr.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_stablebridge_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_stablebridge_pruner.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_dtype.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_endpoint.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_json_schema.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_validation.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract_oom.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_health.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_models.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_openai_compat.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_version_header.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_ws.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/app/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/test_bundle_coverage.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/test_config.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_batcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_idle_evict.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_inference.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_loader.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_logging.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_oom_detection.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_prepared.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_preprocessor.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_quantization.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_readiness.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_failed_state.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_timing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_watcher.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/worker/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/worker/test_oom_recovery.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_metrics.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_nats_pull_loop.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_nats_pull_loop_batching.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_openapi_export.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_server_smoke.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_sparse_integration.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/__init__.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/test_inputs.py +0 -0
- {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/test_types.py +0 -0
|
@@ -18,14 +18,15 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
18
18
|
UV_LINK_MODE=copy \
|
|
19
19
|
PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
20
20
|
|
|
21
|
-
# build-essential + git are builder-only; they do NOT leak into the runtime stage.
|
|
21
|
+
# build-essential + git + zlib1g-dev are builder-only; they do NOT leak into the runtime stage.
|
|
22
22
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
23
23
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
24
24
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
25
25
|
build-essential \
|
|
26
26
|
ca-certificates \
|
|
27
27
|
curl \
|
|
28
|
-
git
|
|
28
|
+
git \
|
|
29
|
+
zlib1g-dev
|
|
29
30
|
|
|
30
31
|
ARG UV_VERSION=0.9.28
|
|
31
32
|
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
|
|
@@ -39,7 +40,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
|
39
40
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
40
41
|
|
|
41
42
|
# Stub source trees so pip accepts the editable installs during dep resolution.
|
|
42
|
-
|
|
43
|
+
# Also create empty bundles/ and models/ — referenced by force-include in
|
|
44
|
+
# pyproject.toml; hatchling resolves them at editable-metadata time even though
|
|
45
|
+
# real contents only land in the `base` stage.
|
|
46
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
43
47
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
44
48
|
&& touch src/sie_server/__init__.py
|
|
45
49
|
|
|
@@ -151,13 +155,16 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
151
155
|
OMP_NUM_THREADS=4 \
|
|
152
156
|
MKL_NUM_THREADS=4
|
|
153
157
|
|
|
154
|
-
# Only the shared libs torch + pillow actually dlopen at runtime.
|
|
158
|
+
# Only the shared libs torch + pillow + rtree actually dlopen at runtime.
|
|
159
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
160
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
155
161
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
156
162
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
157
163
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
158
164
|
libgomp1 \
|
|
159
165
|
libjpeg62-turbo \
|
|
160
|
-
libpng16-16
|
|
166
|
+
libpng16-16 \
|
|
167
|
+
libspatialindex-c6
|
|
161
168
|
|
|
162
169
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
163
170
|
|
|
@@ -41,7 +41,10 @@ WORKDIR /app
|
|
|
41
41
|
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
42
42
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
# Stubs for editable metadata generation — bundles/ and models/ are referenced
|
|
45
|
+
# by force-include in pyproject.toml and must exist; real contents are copied
|
|
46
|
+
# in the base stage.
|
|
47
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
45
48
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
46
49
|
&& touch src/sie_server/__init__.py
|
|
47
50
|
|
|
@@ -149,6 +152,8 @@ FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
|
|
|
149
152
|
|
|
150
153
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
151
154
|
|
|
155
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
156
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
152
157
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
153
158
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
154
159
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
@@ -156,7 +161,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
|
156
161
|
gcc \
|
|
157
162
|
libc6-dev \
|
|
158
163
|
libgomp1 \
|
|
159
|
-
libnuma1
|
|
164
|
+
libnuma1 \
|
|
165
|
+
libspatialindex-c6
|
|
160
166
|
|
|
161
167
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
162
168
|
|
|
@@ -44,7 +44,10 @@ WORKDIR /app
|
|
|
44
44
|
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
45
45
|
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
# Stubs for editable metadata generation — bundles/ and models/ are referenced
|
|
48
|
+
# by force-include in pyproject.toml and must exist; real contents are copied
|
|
49
|
+
# in the base stage.
|
|
50
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
|
|
48
51
|
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
49
52
|
&& touch src/sie_server/__init__.py
|
|
50
53
|
|
|
@@ -170,6 +173,8 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
|
170
173
|
# libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
|
|
171
174
|
# misleading SM-arch error without it.
|
|
172
175
|
# libgomp1: torch OpenMP runtime.
|
|
176
|
+
# libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
|
|
177
|
+
# rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
|
|
173
178
|
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
174
179
|
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
175
180
|
apt-get update && apt-get install -y --no-install-recommends \
|
|
@@ -177,7 +182,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
|
177
182
|
gcc \
|
|
178
183
|
libc6-dev \
|
|
179
184
|
libgomp1 \
|
|
180
|
-
libnuma1
|
|
185
|
+
libnuma1 \
|
|
186
|
+
libspatialindex-c6
|
|
181
187
|
|
|
182
188
|
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
183
189
|
|
|
@@ -12,6 +12,7 @@ adapters:
|
|
|
12
12
|
- sie_server.adapters.colbert_rotary_flash
|
|
13
13
|
- sie_server.adapters.colpali
|
|
14
14
|
- sie_server.adapters.colqwen2
|
|
15
|
+
- sie_server.adapters.colqwen3
|
|
15
16
|
- sie_server.adapters.grounding_dino
|
|
16
17
|
- sie_server.adapters.gte_sparse_flash
|
|
17
18
|
- sie_server.adapters.jina_flash_cross_encoder
|
|
@@ -44,7 +45,7 @@ adapters:
|
|
|
44
45
|
- sie_server.adapters.paddleocr_vl
|
|
45
46
|
deps:
|
|
46
47
|
# Most flash adapters; sentence_transformer needs >=4.57
|
|
47
|
-
transformers: '>=4.57'
|
|
48
|
+
transformers: '>=4.57,<5'
|
|
48
49
|
# Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
|
|
49
50
|
torch: '>=2.9,<2.10'
|
|
50
51
|
# bge_m3, bge_m3_flash, nemo_colembed, nomic_flash, xlm_roberta_flash
|
|
@@ -56,7 +57,7 @@ deps:
|
|
|
56
57
|
# bge_m3_flag
|
|
57
58
|
FlagEmbedding: '>=1.3'
|
|
58
59
|
# grounding_dino
|
|
59
|
-
pillow: ''
|
|
60
|
+
pillow: '>=11,<12'
|
|
60
61
|
# grounding_dino
|
|
61
62
|
requests: ''
|
|
62
63
|
# gliner, gliner_bi
|
|
@@ -13,5 +13,8 @@ deps:
|
|
|
13
13
|
transformers: '>=5.0'
|
|
14
14
|
# Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
|
|
15
15
|
torch: '>=2.9,<2.10'
|
|
16
|
-
huggingface-hub:
|
|
16
|
+
# No huggingface-hub override: transformers 5.x's own metadata
|
|
17
|
+
# pins it to >=1.3,<2.0. An override here was previously
|
|
18
|
+
# unsatisfiable against that pin and broke `uv run` resolution.
|
|
19
|
+
# Let transformers drive the version.
|
|
17
20
|
pillow: ''
|
|
@@ -18,10 +18,19 @@ profiles:
|
|
|
18
18
|
default:
|
|
19
19
|
max_batch_tokens: 8192
|
|
20
20
|
compute_precision: bfloat16
|
|
21
|
-
|
|
21
|
+
# Was sie_server.adapters.sglang:SGLangEmbeddingAdapter, but
|
|
22
|
+
# SGLang's generic transformers fallback hits
|
|
23
|
+
# `assert get_embedding is False` for this Llama-based arch
|
|
24
|
+
# (no model-specific embedding implementation registered). Route
|
|
25
|
+
# through PyTorchEmbeddingAdapter like NV-Embed-v2 (also Llama-
|
|
26
|
+
# based 7B+ embedder); the heavy lane has 96 GiB so non-paged
|
|
27
|
+
# attention is fine. May be reconciled with a colleague's
|
|
28
|
+
# in-flight SGLang/arch fix later.
|
|
29
|
+
adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
|
|
22
30
|
adapter_options:
|
|
23
31
|
loadtime:
|
|
24
|
-
|
|
32
|
+
trust_remote_code: true
|
|
33
|
+
attn_implementation: eager
|
|
25
34
|
runtime:
|
|
26
35
|
pooling: last_token
|
|
27
36
|
normalize: true
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
sie_id: nvidia/nemotron-colembed-vl-4b-v2
|
|
2
|
+
hf_id: nvidia/nemotron-colembed-vl-4b-v2
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode:
|
|
10
|
+
dense: null
|
|
11
|
+
sparse: null
|
|
12
|
+
multivector:
|
|
13
|
+
dim: 2560
|
|
14
|
+
score: null
|
|
15
|
+
extract: null
|
|
16
|
+
max_sequence_length: 8192
|
|
17
|
+
profiles:
|
|
18
|
+
default:
|
|
19
|
+
max_batch_tokens: 4096
|
|
20
|
+
compute_precision: bfloat16
|
|
21
|
+
adapter_path: sie_server.adapters.nemo_colembed:NemoColEmbedAdapter
|
|
22
|
+
adapter_options:
|
|
23
|
+
loadtime:
|
|
24
|
+
muvera_config:
|
|
25
|
+
num_repetitions: 40
|
|
26
|
+
num_simhash_projections: 6
|
|
27
|
+
projection_dim: null
|
|
28
|
+
final_projection_dim: 10240
|
|
29
|
+
token_dim: 2560
|
|
30
|
+
runtime:
|
|
31
|
+
normalize: true
|
|
32
|
+
batch_size: 4
|
|
33
|
+
muvera:
|
|
34
|
+
extends: default
|
|
35
|
+
adapter_options:
|
|
36
|
+
runtime:
|
|
37
|
+
normalize: true
|
|
38
|
+
batch_size: 4
|
|
39
|
+
output_types:
|
|
40
|
+
- dense
|
|
41
|
+
output_similarity:
|
|
42
|
+
dense: dot
|
|
43
|
+
muvera: {}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
sie_id: TomoroAI/tomoro-colqwen3-embed-4b
|
|
2
|
+
hf_id: TomoroAI/tomoro-colqwen3-embed-4b
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode:
|
|
10
|
+
dense: null
|
|
11
|
+
sparse: null
|
|
12
|
+
multivector:
|
|
13
|
+
dim: 320
|
|
14
|
+
score: null
|
|
15
|
+
extract: null
|
|
16
|
+
max_sequence_length: 8192
|
|
17
|
+
profiles:
|
|
18
|
+
default:
|
|
19
|
+
max_batch_tokens: 4096
|
|
20
|
+
compute_precision: bfloat16
|
|
21
|
+
adapter_path: sie_server.adapters.colqwen3:ColQwen3Adapter
|
|
22
|
+
adapter_options:
|
|
23
|
+
loadtime:
|
|
24
|
+
muvera_config:
|
|
25
|
+
num_repetitions: 40
|
|
26
|
+
num_simhash_projections: 6
|
|
27
|
+
projection_dim: null
|
|
28
|
+
final_projection_dim: 10240
|
|
29
|
+
token_dim: 320
|
|
30
|
+
trust_remote_code: true
|
|
31
|
+
runtime:
|
|
32
|
+
normalize: true
|
|
33
|
+
muvera:
|
|
34
|
+
extends: default
|
|
35
|
+
adapter_options:
|
|
36
|
+
runtime:
|
|
37
|
+
normalize: true
|
|
38
|
+
output_types:
|
|
39
|
+
- dense
|
|
40
|
+
output_similarity:
|
|
41
|
+
dense: dot
|
|
42
|
+
muvera: {}
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
"info": {
|
|
4
4
|
"title": "SIE Server",
|
|
5
5
|
"description": "Search Inference Engine - GPU inference server for search workloads",
|
|
6
|
-
"version": "0.3.
|
|
6
|
+
"version": "0.3.3"
|
|
7
7
|
},
|
|
8
8
|
"paths": {
|
|
9
9
|
"/": {
|
|
@@ -1740,7 +1740,7 @@
|
|
|
1740
1740
|
}
|
|
1741
1741
|
],
|
|
1742
1742
|
"default": null,
|
|
1743
|
-
"description": "Adapter-specific options",
|
|
1743
|
+
"description": "Adapter-specific options. Recognized sub-keys include 'overflow_policy' (one of 'default', 'truncate_text', 'error'; default 'default') controlling how inputs exceeding the model's max_sequence_length are handled.",
|
|
1744
1744
|
"title": "Options"
|
|
1745
1745
|
}
|
|
1746
1746
|
},
|
|
@@ -1763,6 +1763,23 @@
|
|
|
1763
1763
|
"location"
|
|
1764
1764
|
]
|
|
1765
1765
|
}
|
|
1766
|
+
},
|
|
1767
|
+
{
|
|
1768
|
+
"items": [
|
|
1769
|
+
{
|
|
1770
|
+
"text": "Apple Inc. was founded by Steve Jobs in Cupertino, California."
|
|
1771
|
+
}
|
|
1772
|
+
],
|
|
1773
|
+
"params": {
|
|
1774
|
+
"labels": [
|
|
1775
|
+
"person",
|
|
1776
|
+
"organization",
|
|
1777
|
+
"location"
|
|
1778
|
+
],
|
|
1779
|
+
"options": {
|
|
1780
|
+
"overflow_policy": "truncate_text"
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1766
1783
|
}
|
|
1767
1784
|
],
|
|
1768
1785
|
"properties": {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sie-server"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.3"
|
|
4
4
|
description = "Search Inference Engine - GPU inference server for search workloads"
|
|
5
5
|
requires-python = ">=3.12,<3.13"
|
|
6
6
|
license = { text = "Apache-2.0" }
|
|
@@ -80,6 +80,10 @@ build-backend = "hatchling.build"
|
|
|
80
80
|
[tool.hatch.build.targets.wheel]
|
|
81
81
|
packages = ["src/sie_server"]
|
|
82
82
|
|
|
83
|
+
[tool.hatch.build.targets.wheel.force-include]
|
|
84
|
+
"models" = "sie_server/models"
|
|
85
|
+
"bundles" = "sie_server/bundles"
|
|
86
|
+
|
|
83
87
|
[tool.uv.sources]
|
|
84
88
|
# Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
|
|
85
89
|
# Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.
|
|
@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
|
6
6
|
|
|
7
7
|
from sie_server.adapters._spec import AdapterSpec
|
|
8
8
|
from sie_server.adapters._types import ERR_NOT_LOADED
|
|
9
|
+
from sie_server.adapters._utils import grouped_score_pairs
|
|
9
10
|
from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
|
|
10
11
|
|
|
11
12
|
if TYPE_CHECKING:
|
|
12
13
|
import torch
|
|
13
14
|
|
|
15
|
+
from sie_server.core.inference_output import ScoreOutput
|
|
16
|
+
from sie_server.types.inputs import Item
|
|
17
|
+
|
|
14
18
|
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
|
|
|
57
61
|
raise TypeError(msg)
|
|
58
62
|
|
|
59
63
|
if "score" in spec.outputs:
|
|
60
|
-
|
|
64
|
+
# BaseAdapter ships a default score_pairs() that delegates to score().
|
|
65
|
+
# Treat that default as "not implemented" for validation purposes:
|
|
66
|
+
# subclasses must override either score() or score_pairs() so the
|
|
67
|
+
# default delegate doesn't bottom out in ModelAdapter.score().
|
|
68
|
+
score_overridden = cls.score is not ModelAdapter.score
|
|
69
|
+
score_pairs_overridden = cls.score_pairs not in (
|
|
70
|
+
ModelAdapter.score_pairs,
|
|
71
|
+
BaseAdapter.score_pairs,
|
|
72
|
+
)
|
|
73
|
+
if not score_overridden and not score_pairs_overridden:
|
|
61
74
|
msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
|
|
62
75
|
raise TypeError(msg)
|
|
63
76
|
|
|
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
|
|
|
117
130
|
model_name=getattr(self, "_model_name_or_path", ""),
|
|
118
131
|
)
|
|
119
132
|
|
|
133
|
+
# -- Default batched scoring ---------------------------------------------
|
|
134
|
+
|
|
135
|
+
def score_pairs(
|
|
136
|
+
self,
|
|
137
|
+
queries: list[Item],
|
|
138
|
+
docs: list[Item],
|
|
139
|
+
*,
|
|
140
|
+
instruction: str | None = None,
|
|
141
|
+
options: dict[str, Any] | None = None,
|
|
142
|
+
) -> ScoreOutput:
|
|
143
|
+
"""Default ``score_pairs()`` that batches via per-query grouping.
|
|
144
|
+
|
|
145
|
+
Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
|
|
146
|
+
so each unique query is encoded once and its docs are scored as a
|
|
147
|
+
single ``score()`` call. Subclasses with a more efficient native
|
|
148
|
+
cross-batch path (e.g. cross-encoders that pack queries and docs
|
|
149
|
+
into one transformer pass) should override this.
|
|
150
|
+
|
|
151
|
+
Per-call ``options`` are not supported by this default delegate
|
|
152
|
+
(it dispatches per-query and cannot route options into ``score()``
|
|
153
|
+
without subclass-specific knowledge). If ``options`` is a non-empty
|
|
154
|
+
mapping, this raises ``NotImplementedError`` to surface the
|
|
155
|
+
unsupported configuration; pass ``options=None`` (or ``{}``) or
|
|
156
|
+
override ``score_pairs()`` with an options-aware implementation.
|
|
157
|
+
"""
|
|
158
|
+
if options:
|
|
159
|
+
msg = (
|
|
160
|
+
f"{type(self).__name__}.score_pairs(): per-call options are "
|
|
161
|
+
f"not supported by the default batching path "
|
|
162
|
+
f"(got options={options!r}). Override score_pairs() with an "
|
|
163
|
+
f"options-aware implementation."
|
|
164
|
+
)
|
|
165
|
+
raise NotImplementedError(msg)
|
|
166
|
+
return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
|
|
167
|
+
|
|
120
168
|
# -- Shared helpers ------------------------------------------------------
|
|
121
169
|
|
|
122
170
|
def _check_loaded(self) -> None:
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from sie_server.core.inference_output import ScoreOutput
|
|
4
8
|
|
|
5
9
|
if TYPE_CHECKING:
|
|
6
10
|
import torch
|
|
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
|
|
|
8
12
|
from sie_server.types.inputs import Item
|
|
9
13
|
|
|
10
14
|
|
|
15
|
+
class _ScoreFn(Protocol):
|
|
16
|
+
def __call__(
|
|
17
|
+
self,
|
|
18
|
+
query: Item,
|
|
19
|
+
items: list[Item],
|
|
20
|
+
*,
|
|
21
|
+
instruction: str | None = ...,
|
|
22
|
+
) -> list[float]: ...
|
|
23
|
+
|
|
24
|
+
|
|
11
25
|
# ---------------------------------------------------------------------------
|
|
12
26
|
# RoPE utilities (eliminates 7 identical copies)
|
|
13
27
|
# ---------------------------------------------------------------------------
|
|
@@ -140,3 +154,67 @@ def resolve_embedding_options(
|
|
|
140
154
|
opts.get("query_template", default_query_template),
|
|
141
155
|
opts.get("doc_template", default_doc_template),
|
|
142
156
|
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# Score-pair grouping (shared by ColBERT-family adapters)
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def grouped_score_pairs(
|
|
165
|
+
score_fn: _ScoreFn,
|
|
166
|
+
queries: list[Item],
|
|
167
|
+
docs: list[Item],
|
|
168
|
+
*,
|
|
169
|
+
instruction: str | None = None,
|
|
170
|
+
) -> ScoreOutput:
|
|
171
|
+
"""Run a per-query ``score()`` callable over parallel (query, doc) pairs.
|
|
172
|
+
|
|
173
|
+
Groups pairs by ``(query.text, query.id, instruction)`` so each unique
|
|
174
|
+
query is encoded once and its docs are scored as one batch. Used by
|
|
175
|
+
ColBERT-family adapters to satisfy the worker's ``score_pairs()``
|
|
176
|
+
contract while reusing the optimized batched ``score()``.
|
|
177
|
+
|
|
178
|
+
Queries with ``text is None`` are not supported and raise ``ValueError``
|
|
179
|
+
(ColBERT scoring requires text). The grouping key is
|
|
180
|
+
``(query.text, query.id or "", instruction or "")`` — two distinct
|
|
181
|
+
``Item`` objects with identical text/id/instruction collapse to one
|
|
182
|
+
encoding pass.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
|
|
186
|
+
queries: Query items (parallel to docs).
|
|
187
|
+
docs: Document items to score.
|
|
188
|
+
instruction: Optional instruction passed through to ``score_fn``.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
``ScoreOutput`` with one float per pair, in the original input order.
|
|
192
|
+
|
|
193
|
+
Raises:
|
|
194
|
+
ValueError: If ``queries`` and ``docs`` lengths differ, or any query
|
|
195
|
+
lacks text.
|
|
196
|
+
"""
|
|
197
|
+
if len(queries) != len(docs):
|
|
198
|
+
msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
|
|
199
|
+
raise ValueError(msg)
|
|
200
|
+
|
|
201
|
+
if not docs:
|
|
202
|
+
return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
|
|
203
|
+
|
|
204
|
+
groups: dict[tuple[str, str, str], list[int]] = {}
|
|
205
|
+
for i, q in enumerate(queries):
|
|
206
|
+
if q.text is None:
|
|
207
|
+
msg = f"grouped_score_pairs requires queries[{i}].text; got None"
|
|
208
|
+
raise ValueError(msg)
|
|
209
|
+
key = (q.text, q.id or "", instruction or "")
|
|
210
|
+
groups.setdefault(key, []).append(i)
|
|
211
|
+
|
|
212
|
+
scores = np.zeros(len(docs), dtype=np.float32)
|
|
213
|
+
for indices in groups.values():
|
|
214
|
+
q = queries[indices[0]]
|
|
215
|
+
group_docs = [docs[i] for i in indices]
|
|
216
|
+
group_scores = score_fn(q, group_docs, instruction=instruction)
|
|
217
|
+
for idx, s in zip(indices, group_scores, strict=True):
|
|
218
|
+
scores[idx] = float(s)
|
|
219
|
+
|
|
220
|
+
return ScoreOutput(scores=scores, batch_size=len(docs))
|
|
@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
|
|
|
127
127
|
device: Device string (e.g., "cuda:0", "cpu").
|
|
128
128
|
"""
|
|
129
129
|
|
|
130
|
+
def warmup(self) -> None:
|
|
131
|
+
"""Run a warmup forward pass on the loaded model.
|
|
132
|
+
|
|
133
|
+
Called by the model loader after ``load()`` has completed. The default
|
|
134
|
+
implementation is a no-op for adapters that do not need warmup. Adapters
|
|
135
|
+
that compile kernels on first call (e.g. flash-attention) or otherwise
|
|
136
|
+
benefit from a priming pass should override this and run a single
|
|
137
|
+
inference pass against a tiny synthetic input.
|
|
138
|
+
|
|
139
|
+
Splitting this from ``load()`` lets the cold-start instrumentation
|
|
140
|
+
attribute deserialize and warmup time separately.
|
|
141
|
+
"""
|
|
142
|
+
return
|
|
143
|
+
|
|
130
144
|
@abstractmethod
|
|
131
145
|
def unload(self) -> None:
|
|
132
146
|
"""Unload the model and free resources.
|