sie-server 0.6.2__tar.gz → 0.6.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.6.2 → sie_server-0.6.4}/PKG-INFO +1 -1
- sie_server-0.6.4/models/MoritzLaurer__ModernBERT-base-zeroshot-v2.0.yaml +22 -0
- sie_server-0.6.4/models/facebook__bart-large-mnli.yaml +22 -0
- sie_server-0.6.4/models/fastino__gliner2-large-v1.yaml +20 -0
- sie_server-0.6.4/models/google__owlv2-large-patch14-ensemble.yaml +21 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/openapi.json +1 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/pyproject.toml +1 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/_ipc_test_harness.py +46 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_generation_base.py +9 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nemo_colembed/__init__.py +41 -5
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/owlv2/__init__.py +1 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/generation.py +57 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/app_state_config.py +1 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/cli.py +8 -2
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/loader.py +2 -2
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/model_loader.py +10 -5
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/registry.py +2 -2
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/streaming.py +423 -9
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_runtime_options.py +4 -1
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sglang_generation.py +108 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming.py +480 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/.gitignore +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/CONTRIBUTING.md +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/Dockerfile.cpu +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/Dockerfile.cuda12 +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/LICENSE +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/README.md +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/bundles/default.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/bundles/sglang-embedding.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/bundles/sglang.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/bundles/transformers5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-m3.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Marqo__marqo-fashionSigLIP.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-0.6B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-4B-Instruct-2507.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3.5-4B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3.6-27B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Snowflake__snowflake-arctic-embed-l-v2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/TomoroAI__tomoro-colqwen3-embed-4b.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/answerdotai__ModernBERT-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/colbert-ir__colbertv2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/defog__sqlcoder-7b-2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/docling.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/fastino__gliner2-base-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/google__embeddinggemma-300m.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip2-base-patch16-224.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-guardian-3.0-2b.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/jinaai__jina-colbert-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-embed-large-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nomic-ai__modernbert-embed-base.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opendatalab__MinerU2.5-Pro-2604-1.2B.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/models/zai-org__GLM-OCR.yaml +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/scripts/generate_tokenize_fixture.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapter_call_loop.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_base_adapter.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_flash_base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_spec.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_utils.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/clip/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colqwen3/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/docling/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/errors.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliclass/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner2/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/mineru_vl/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/_server.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/embedding.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/siglip/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/encode.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/extract.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/generate.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/health.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/helpers.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/models.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/openai_compat.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/openapi.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/options.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/root.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/score.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/validation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/ws.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/app_factory.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/engine.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/model.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/adaptive_batching.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/extract_cost.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/gpu_health.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/hf_env.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/inference_output.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/load_errors.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/memory.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/oom.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/pool_isolation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/prepared.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/image.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/vision.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/readiness.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/text_tokens.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/extract.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/model_worker.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/oom_recovery.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/types.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/nats_publisher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/saturation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/ipc_server.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/ipc_types.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/main.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/metrics.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/admission.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/grammar_cache.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/grammar_compile.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/tool_call_grammar.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/tool_call_parser.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/work_class_scheduler.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/queue_executor.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/static/index.html +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/grammar.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/inputs.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/openapi.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/overflow_policy.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/responses.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_bge_m3.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_bge_m3_flash.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_docling.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_docling_smoke.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_flash_base.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_gliclass_overflow_policy.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_glm_ocr.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_mineru_vl.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_paddleocr_vl.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_pytorch_embedding_revision.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sentence_transformer.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_stablebridge_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_stablebridge_pruner.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_visual_document.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_dtype.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_endpoint.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_json_schema.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_timing.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_validation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract_oom.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_generate.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_health.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_models.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_openai_compat.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_score.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_version_header.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_ws.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/app/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/app/test_app_factory.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_bundle_coverage.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_config.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_model_prewarm_grammars.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_profile_backend_consistency.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/conftest.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_adaptive_batching.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_batcher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_gpu_health.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_idle_evict.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_inference.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_loader.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_logging.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_lora_generation_exclusion.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_memory.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_model_load_timeout.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_oom_detection.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_pool_isolation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_prepared.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_preprocessor.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_quantization.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_readiness.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_failed_state.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_timing.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_watcher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_passthrough.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/worker/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/worker/test_oom_recovery.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_nats_publisher.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_saturation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_worker_id_consistency.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/test_chat_completions.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/test_grammar_generate.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_generation_metrics.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_metrics.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_trace_propagation.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_cache.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_compile.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_prewarm.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming_admission.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_tool_call_grammar.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_tool_call_parser.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_work_class_scheduler.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_adapter_call_loop.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_all_models.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_docker_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_ipc_server.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_ipc_types_raw_output.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_model_yaml_filenames.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_openapi_export.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_parity_run_batch.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_queue_executor.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_queue_executor_stage1d.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_readiness.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_server_smoke.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_sparse_integration.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_stage1d_byte_identity.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/__init__.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_inputs.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_inputs_json_decode.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_media_bytes.py +0 -0
- {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_types.py +0 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
sie_id: MoritzLaurer/ModernBERT-base-zeroshot-v2.0
|
|
2
|
+
hf_id: MoritzLaurer/ModernBERT-base-zeroshot-v2.0
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: false
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode: null
|
|
10
|
+
score: null
|
|
11
|
+
extract: {}
|
|
12
|
+
max_sequence_length: 512
|
|
13
|
+
profiles:
|
|
14
|
+
default:
|
|
15
|
+
max_batch_tokens: 16384
|
|
16
|
+
compute_precision: null
|
|
17
|
+
adapter_path: sie_server.adapters.nli_classification_flash:NLIClassificationFlashAdapter
|
|
18
|
+
adapter_options:
|
|
19
|
+
loadtime: {}
|
|
20
|
+
runtime:
|
|
21
|
+
hypothesis_template: This text is about {}.
|
|
22
|
+
multi_label: false
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
sie_id: facebook/bart-large-mnli
|
|
2
|
+
hf_id: facebook/bart-large-mnli
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: false
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode: null
|
|
10
|
+
score: null
|
|
11
|
+
extract: {}
|
|
12
|
+
max_sequence_length: 512
|
|
13
|
+
profiles:
|
|
14
|
+
default:
|
|
15
|
+
max_batch_tokens: 16384
|
|
16
|
+
compute_precision: null
|
|
17
|
+
adapter_path: sie_server.adapters.nli_classification_flash:NLIClassificationFlashAdapter
|
|
18
|
+
adapter_options:
|
|
19
|
+
loadtime: {}
|
|
20
|
+
runtime:
|
|
21
|
+
hypothesis_template: This text is about {}.
|
|
22
|
+
multi_label: false
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
sie_id: fastino/gliner2-large-v1
|
|
2
|
+
hf_id: fastino/gliner2-large-v1
|
|
3
|
+
inputs:
|
|
4
|
+
text: true
|
|
5
|
+
image: false
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode: null
|
|
10
|
+
score: null
|
|
11
|
+
extract: {}
|
|
12
|
+
max_sequence_length: 512
|
|
13
|
+
profiles:
|
|
14
|
+
default:
|
|
15
|
+
max_batch_tokens: 16384
|
|
16
|
+
compute_precision: float16
|
|
17
|
+
adapter_path: sie_server.adapters.gliner2:GLiNER2Adapter
|
|
18
|
+
adapter_options:
|
|
19
|
+
loadtime: {}
|
|
20
|
+
runtime: {}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
sie_id: google/owlv2-large-patch14-ensemble
|
|
2
|
+
hf_id: google/owlv2-large-patch14-ensemble
|
|
3
|
+
inputs:
|
|
4
|
+
text: false
|
|
5
|
+
image: true
|
|
6
|
+
audio: false
|
|
7
|
+
video: false
|
|
8
|
+
tasks:
|
|
9
|
+
encode: null
|
|
10
|
+
score: null
|
|
11
|
+
extract: {}
|
|
12
|
+
profiles:
|
|
13
|
+
default:
|
|
14
|
+
max_batch_tokens: 16384
|
|
15
|
+
compute_precision: float16
|
|
16
|
+
adapter_path: sie_server.adapters.owlv2:Owlv2Adapter
|
|
17
|
+
adapter_options:
|
|
18
|
+
loadtime:
|
|
19
|
+
score_threshold: 0.1
|
|
20
|
+
runtime:
|
|
21
|
+
score_threshold: 0.1
|
|
@@ -136,7 +136,7 @@ class _StubExecutor:
|
|
|
136
136
|
|
|
137
137
|
async def process_extract_batch(self, req: ProcessExtractBatchRequest) -> BatchOutcome:
|
|
138
138
|
await self._maybe_sleep()
|
|
139
|
-
return
|
|
139
|
+
return _canned_extract_batch_outcome(req.items)
|
|
140
140
|
|
|
141
141
|
|
|
142
142
|
class _FakeGenerateProcessor:
|
|
@@ -179,6 +179,51 @@ def _canned_batch_outcome(items: list[Any]) -> BatchOutcome:
|
|
|
179
179
|
)
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
def _extract_document_echo(item: Any) -> dict[str, Any]:
|
|
183
|
+
document = item.item.get("document") if isinstance(item.item, dict) else None
|
|
184
|
+
if not isinstance(document, dict):
|
|
185
|
+
return {
|
|
186
|
+
"present": False,
|
|
187
|
+
"data_is_bytes": False,
|
|
188
|
+
"data": b"",
|
|
189
|
+
"data_len": 0,
|
|
190
|
+
"format": None,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
data = document.get("data")
|
|
194
|
+
data_is_bytes = isinstance(data, bytes | bytearray)
|
|
195
|
+
data_bytes = bytes(data) if data_is_bytes else b""
|
|
196
|
+
return {
|
|
197
|
+
"present": True,
|
|
198
|
+
"data_is_bytes": data_is_bytes,
|
|
199
|
+
"data": data_bytes,
|
|
200
|
+
"data_len": len(data_bytes),
|
|
201
|
+
"format": document.get("format"),
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _canned_extract_batch_outcome(items: list[Any]) -> BatchOutcome:
|
|
206
|
+
outcomes: list[ItemOutcome] = []
|
|
207
|
+
for item in items:
|
|
208
|
+
payload = msgpack.packb(
|
|
209
|
+
{**_CANNED_RESULT, "extract_document": _extract_document_echo(item)},
|
|
210
|
+
use_bin_type=True,
|
|
211
|
+
)
|
|
212
|
+
outcomes.append(
|
|
213
|
+
ItemOutcome(
|
|
214
|
+
work_item_id=item.work_item_id,
|
|
215
|
+
request_id=item.request_id,
|
|
216
|
+
item_index=item.item_index,
|
|
217
|
+
disposition="publish_and_ack",
|
|
218
|
+
result_msgpack=payload,
|
|
219
|
+
inference_ms=0.1,
|
|
220
|
+
tokenization_ms=0.05,
|
|
221
|
+
postprocessing_ms=0.01,
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
return BatchOutcome(outcomes=outcomes)
|
|
225
|
+
|
|
226
|
+
|
|
182
227
|
def _canned_batch_outcome_echoing_prepared_tokens(items: list[Any]) -> BatchOutcome:
|
|
183
228
|
"""Like :func:`_canned_batch_outcome` but folds each item's
|
|
184
229
|
``prepared_tokens`` presence / content into the per-item
|
|
@@ -23,6 +23,7 @@ from typing import Any, ClassVar, Literal, cast
|
|
|
23
23
|
|
|
24
24
|
from sie_server.adapters._spec import AdapterSpec
|
|
25
25
|
from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
|
|
26
|
+
from sie_server.types.inputs import ImageInput
|
|
26
27
|
|
|
27
28
|
logger = logging.getLogger(__name__)
|
|
28
29
|
|
|
@@ -247,6 +248,7 @@ class GenerationAdapter(ModelAdapter):
|
|
|
247
248
|
logit_bias: dict[str, float] | None = None,
|
|
248
249
|
logprobs: bool = False,
|
|
249
250
|
top_logprobs: int | None = None,
|
|
251
|
+
images: list[ImageInput] | None = None,
|
|
250
252
|
) -> AsyncIterator[GenerationChunk]:
|
|
251
253
|
"""Stream generation chunks from a prompt.
|
|
252
254
|
|
|
@@ -279,6 +281,13 @@ class GenerationAdapter(ModelAdapter):
|
|
|
279
281
|
with per-token log-probabilities.
|
|
280
282
|
top_logprobs: How many alternates per position; only
|
|
281
283
|
consulted when ``logprobs`` is True.
|
|
284
|
+
images: Optional list of wire-format :class:`ImageInput`
|
|
285
|
+
entries for vision-language models. The ``prompt`` is
|
|
286
|
+
expected to already carry the model's image placeholder
|
|
287
|
+
tokens (rendered by the chat template upstream); the
|
|
288
|
+
adapter forwards the image bytes to the engine. ``None``
|
|
289
|
+
or empty for text-only generation. Text-only adapters may
|
|
290
|
+
ignore this argument.
|
|
282
291
|
|
|
283
292
|
Yields:
|
|
284
293
|
:class:`GenerationChunk` instances. At least one terminal
|
|
@@ -346,6 +346,31 @@ class NemoColEmbedAdapter(BaseAdapter):
|
|
|
346
346
|
num_image_token,
|
|
347
347
|
)
|
|
348
348
|
|
|
349
|
+
def get_preprocessor(self) -> Any:
|
|
350
|
+
"""Register BOTH a text and an image preprocessor for v1 (#1163).
|
|
351
|
+
|
|
352
|
+
v1 documents must take the conformant ``_encode_images_preprocessed`` path,
|
|
353
|
+
which requires an *image* preprocessor to be registered so the encode pipeline
|
|
354
|
+
produces a ``NemoColEmbedPayload`` (with ``pixel_values``) instead of a
|
|
355
|
+
passthrough ``ImagePayload``. Without it every doc batch falls back to the
|
|
356
|
+
model's ``forward_passages`` — which re-tiles each page inline on one thread,
|
|
357
|
+
~3x slower than running the tiling upstream in the preprocessing thread pool.
|
|
358
|
+
|
|
359
|
+
But v1 *queries* (text) still go through ``model.forward_queries`` and rely on
|
|
360
|
+
the batched worker path; registering only the image preprocessor de-registers
|
|
361
|
+
the text one and routes queries to the unbatched direct-call path, which has
|
|
362
|
+
surfaced ``forward_queries`` failures. So we register both: the base
|
|
363
|
+
``CharCountPreprocessor`` (text → worker-batched queries) and the
|
|
364
|
+
``NemoColEmbedPreprocessor`` (image → conformant docs). ``model_loader``
|
|
365
|
+
registers each entry of the returned list by its ``modality``.
|
|
366
|
+
|
|
367
|
+
v2 (Qwen3-VL backbone) builds no ``_processor`` (``None``); it keeps just the
|
|
368
|
+
base text preprocessor and its native ``forward_images`` path (with #1055 fix).
|
|
369
|
+
"""
|
|
370
|
+
if self._processor is None:
|
|
371
|
+
return super().get_preprocessor()
|
|
372
|
+
return [super().get_preprocessor(), self._processor]
|
|
373
|
+
|
|
349
374
|
def encode(
|
|
350
375
|
self,
|
|
351
376
|
items: list[Item],
|
|
@@ -606,14 +631,25 @@ class NemoColEmbedAdapter(BaseAdapter):
|
|
|
606
631
|
if self._normalize:
|
|
607
632
|
embeddings = functional.normalize(embeddings, p=2, dim=-1)
|
|
608
633
|
|
|
609
|
-
# Store results for this sub-batch (move to CPU immediately to free GPU
|
|
634
|
+
# Store results for this sub-batch (move to CPU immediately to free GPU
|
|
635
|
+
# memory). Trim each item's left-padding rows before returning: the batch is
|
|
636
|
+
# left-padded, so padded positions are zeroed by the attention_mask above —
|
|
637
|
+
# but emitting them as zero vectors leaks 0-similarity rows into the late-
|
|
638
|
+
# interaction MaxSim (a 0-floor on every query token's max). Because the
|
|
639
|
+
# batcher pads inconsistently across docs, identical docs then score
|
|
640
|
+
# differently by batch and ranking is corrupted on variable-tile batches
|
|
641
|
+
# (#1163: Vidore3 Hr 0.6532 -> 0.5713). Keep only real tokens, matching the
|
|
642
|
+
# native forward_passages path (_unpack_embeddings drops zero rows likewise).
|
|
610
643
|
for i in range(len(sub_batch_items)):
|
|
611
|
-
|
|
644
|
+
keep = batch["attention_mask"][i].bool()
|
|
645
|
+
emb = embeddings[i][keep].float().cpu().numpy()
|
|
612
646
|
all_embeddings.append(emb)
|
|
613
647
|
|
|
614
|
-
#
|
|
615
|
-
|
|
616
|
-
|
|
648
|
+
# Free this sub-batch's GPU tensors. NOTE: no per-sub-batch
|
|
649
|
+
# torch.cuda.empty_cache() — repeatedly releasing the allocator's cache and
|
|
650
|
+
# re-acquiring ~GB blocks fragments the pool and OOMs at scale on big GPUs
|
|
651
|
+
# (#1163). The sub-batch loop + immediate CPU offload already bound peak VRAM.
|
|
652
|
+
del outputs, embeddings, attention_mask, batch
|
|
617
653
|
|
|
618
654
|
return EncodeOutput(
|
|
619
655
|
multivector=all_embeddings,
|
|
@@ -189,7 +189,7 @@ class Owlv2Adapter(BaseAdapter):
|
|
|
189
189
|
|
|
190
190
|
# Extract options once
|
|
191
191
|
opts = options or {}
|
|
192
|
-
score_threshold = opts.get("score_threshold", self._score_threshold)
|
|
192
|
+
score_threshold = opts.get("score_threshold", opts.get("threshold", self._score_threshold))
|
|
193
193
|
|
|
194
194
|
# Build text queries once (shared across batch)
|
|
195
195
|
# OWL-v2 format: list of prompts per image
|
|
@@ -19,6 +19,7 @@ HTTP connection, which SGLang treats as a cancel signal. A best-effort
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
21
|
import asyncio
|
|
22
|
+
import base64
|
|
22
23
|
import contextlib
|
|
23
24
|
import dataclasses
|
|
24
25
|
import json
|
|
@@ -47,6 +48,7 @@ from sie_server.adapters._types import ERR_NOT_LOADED, ComputePrecision
|
|
|
47
48
|
from sie_server.adapters.sglang import _server
|
|
48
49
|
from sie_server.observability.metrics import GenerationStreamTimer
|
|
49
50
|
from sie_server.types.grammar import GrammarSpec
|
|
51
|
+
from sie_server.types.inputs import ImageInput, media_bytes
|
|
50
52
|
|
|
51
53
|
logger = logging.getLogger(__name__)
|
|
52
54
|
|
|
@@ -91,6 +93,47 @@ def _resolve_read_timeout() -> float | None:
|
|
|
91
93
|
_GENERATE_READ_TIMEOUT_S: float | None = _resolve_read_timeout()
|
|
92
94
|
|
|
93
95
|
|
|
96
|
+
# Format hints we re-embed into the SGLang ``image_data`` MIME type. Anything
|
|
97
|
+
# else falls back to ``jpeg`` (the engine sniffs the real format from bytes).
|
|
98
|
+
_ALLOWED_IMAGE_FORMATS = frozenset({"png", "jpeg", "jpg", "webp", "gif"})
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _encode_image_data(images: list[ImageInput] | None) -> list[str] | None:
|
|
102
|
+
"""Translate wire ``ImageInput`` entries into SGLang ``image_data`` URIs.
|
|
103
|
+
|
|
104
|
+
SGLang's ``/generate`` accepts a top-level ``image_data`` field — a list of
|
|
105
|
+
images, each as a base64 string, an ``http(s)`` URL, or a local file path.
|
|
106
|
+
We emit ``data:image/<fmt>;base64,<...>`` data URIs so the format hint
|
|
107
|
+
travels with the bytes and SGLang's image loader can decode without
|
|
108
|
+
sniffing. Bytes are validated through :func:`media_bytes`, the single
|
|
109
|
+
enforcement point for the wire contract (raises :class:`InvalidMediaError`
|
|
110
|
+
on a non-bytes ``data``, e.g. an un-decoded base64 JSON string).
|
|
111
|
+
|
|
112
|
+
Returns ``None`` when there are no images so the request body stays
|
|
113
|
+
byte-identical to the text-only path — vision plumbing is inert for the
|
|
114
|
+
text-only models that share this adapter.
|
|
115
|
+
"""
|
|
116
|
+
if not images:
|
|
117
|
+
return None
|
|
118
|
+
encoded: list[str] = []
|
|
119
|
+
for image in images:
|
|
120
|
+
raw = media_bytes(image, kind="image")
|
|
121
|
+
fmt = (image.get("format") or "jpeg").strip().lower() or "jpeg"
|
|
122
|
+
# Clamp the client-controlled format hint to a known set before
|
|
123
|
+
# re-embedding it in the data-URI MIME type — an arbitrary subtype
|
|
124
|
+
# would produce a malformed URI for SGLang's loader. The engine
|
|
125
|
+
# sniffs the real format from the bytes regardless, so an unknown
|
|
126
|
+
# hint safely falls back to jpeg.
|
|
127
|
+
if fmt not in _ALLOWED_IMAGE_FORMATS:
|
|
128
|
+
fmt = "jpeg"
|
|
129
|
+
elif fmt == "jpg":
|
|
130
|
+
# ``image/jpg`` is not a registered MIME type; normalise to jpeg.
|
|
131
|
+
fmt = "jpeg"
|
|
132
|
+
b64 = base64.b64encode(raw).decode("ascii")
|
|
133
|
+
encoded.append(f"data:image/{fmt};base64,{b64}")
|
|
134
|
+
return encoded
|
|
135
|
+
|
|
136
|
+
|
|
94
137
|
def _tail_file(path: str, *, max_lines: int = 200) -> str:
|
|
95
138
|
"""Return the final lines from a startup log for diagnostics."""
|
|
96
139
|
try:
|
|
@@ -764,9 +807,17 @@ class SGLangGenerationAdapter(GenerationAdapter):
|
|
|
764
807
|
best_of: int | None = None,
|
|
765
808
|
stream: bool = False,
|
|
766
809
|
lora_path: str | None = None,
|
|
810
|
+
images: list[ImageInput] | None = None,
|
|
767
811
|
) -> AsyncIterator[GenerationChunk]:
|
|
768
812
|
self._check_loaded()
|
|
769
813
|
|
|
814
|
+
# Vision input: encode any images into SGLang's top-level ``image_data``
|
|
815
|
+
# field once, then attach to whichever request body we build below. The
|
|
816
|
+
# ``prompt`` is expected to already carry the model's image placeholder
|
|
817
|
+
# tokens (the chat template renders them worker-side). ``None`` when
|
|
818
|
+
# there are no images, keeping the text-only request body unchanged.
|
|
819
|
+
image_data = _encode_image_data(images)
|
|
820
|
+
|
|
770
821
|
# Guard verdict thresholding only runs on the single-candidate (n=1)
|
|
771
822
|
# path, so reject multi-candidate sampling up front — otherwise a guard
|
|
772
823
|
# request with n>1 / best_of>1 would silently return an UN-thresholded
|
|
@@ -892,6 +943,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
|
|
|
892
943
|
}
|
|
893
944
|
if lora_path:
|
|
894
945
|
sbody["lora_path"] = lora_path
|
|
946
|
+
if image_data:
|
|
947
|
+
sbody["image_data"] = image_data
|
|
895
948
|
if logprobs:
|
|
896
949
|
sbody["return_logprob"] = True
|
|
897
950
|
# Without this SGLang omits the decoded token TEXT from
|
|
@@ -1023,6 +1076,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
|
|
|
1023
1076
|
nbody: dict[str, Any] = {"text": prompt, "sampling_params": sp, "stream": False}
|
|
1024
1077
|
if lora_path:
|
|
1025
1078
|
nbody["lora_path"] = lora_path
|
|
1079
|
+
if image_data:
|
|
1080
|
+
nbody["image_data"] = image_data
|
|
1026
1081
|
if logprobs or rank:
|
|
1027
1082
|
nbody["return_logprob"] = True
|
|
1028
1083
|
# Surface decoded token text (see streaming body below) so the
|
|
@@ -1125,6 +1180,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
|
|
|
1125
1180
|
# verified on L4). Empirically applies the adapter in-batch per request.
|
|
1126
1181
|
if lora_path:
|
|
1127
1182
|
body["lora_path"] = lora_path
|
|
1183
|
+
if image_data:
|
|
1184
|
+
body["image_data"] = image_data
|
|
1128
1185
|
# OpenAI ``logprobs`` → SGLang ``return_logprob`` (top-level body
|
|
1129
1186
|
# flag, not under sampling_params). ``top_logprobs`` →
|
|
1130
1187
|
# ``top_logprobs_num``. SGLang surfaces them under
|
|
@@ -20,7 +20,7 @@ class AppStateConfig:
|
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
models_dir: Path | str | None = None
|
|
23
|
-
"""Path to models directory (local path, s3://, or
|
|
23
|
+
"""Path to models directory (local path, s3://, gs://, abfs://, or abfss://). If None, registry starts empty."""
|
|
24
24
|
|
|
25
25
|
device: str = "cpu"
|
|
26
26
|
"""Device to load models on (e.g., "cuda:0", "cpu", "mps")."""
|
|
@@ -220,12 +220,18 @@ def serve(
|
|
|
220
220
|
host: str = typer.Option("0.0.0.0", "--host", help="Host to bind to"), # noqa: S104 — intentional bind to all interfaces for server
|
|
221
221
|
device: str = typer.Option("auto", "--device", "-d", help="Device to use (auto, cuda, mps, cpu)"),
|
|
222
222
|
models_dir: str = typer.Option(
|
|
223
|
-
DEFAULT_MODELS_DIR,
|
|
223
|
+
DEFAULT_MODELS_DIR,
|
|
224
|
+
"--models-dir",
|
|
225
|
+
help="Models directory (local path, s3://, gs://, abfs://, or abfss://)",
|
|
224
226
|
),
|
|
225
227
|
bundle: str | None = typer.Option(None, "--bundle", "-b", help="Bundle name to load (from bundles/ dir)"),
|
|
226
228
|
models: str | None = typer.Option(None, "--models", "-m", help="Comma-separated model names to load"),
|
|
227
229
|
local_cache: str | None = typer.Option(None, "--local-cache", help="Local cache directory (default: HF_HOME)"),
|
|
228
|
-
cluster_cache: str | None = typer.Option(
|
|
230
|
+
cluster_cache: str | None = typer.Option(
|
|
231
|
+
None,
|
|
232
|
+
"--cluster-cache",
|
|
233
|
+
help="Cluster cache URL (s3://, gs://, abfs://, or abfss://)",
|
|
234
|
+
),
|
|
229
235
|
hf_fallback: bool = typer.Option(True, "--hf-fallback/--no-hf-fallback", help="Enable HuggingFace Hub fallback"),
|
|
230
236
|
reload: bool = typer.Option(default=False, help="Enable auto-reload for development"),
|
|
231
237
|
tracing: bool = typer.Option(default=False, help="Enable OpenTelemetry tracing (exports to localhost:4317)"),
|
|
@@ -48,7 +48,7 @@ def load_model_configs(models_dir: Path | str) -> dict[str, ModelConfig]:
|
|
|
48
48
|
"""Load all model configs from a directory (local or cloud).
|
|
49
49
|
|
|
50
50
|
Args:
|
|
51
|
-
models_dir: Path to the models directory (local path, s3://, or
|
|
51
|
+
models_dir: Path to the models directory (local path, s3://, gs://, abfs://, or abfss://).
|
|
52
52
|
|
|
53
53
|
Returns:
|
|
54
54
|
Dictionary mapping model names to their ModelConfig objects.
|
|
@@ -141,7 +141,7 @@ def _expand_profile_variants(configs: dict[str, ModelConfig]) -> None:
|
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
def _load_configs_from_cloud(models_dir: str) -> dict[str, ModelConfig]:
|
|
144
|
-
"""Load model configs from
|
|
144
|
+
"""Load model configs from cloud object storage.
|
|
145
145
|
|
|
146
146
|
Discovers YAML files via LIST operation, downloads them to local cache, and parses them.
|
|
147
147
|
Model configs are flat YAML files (e.g., gs://bucket/models/BAAI__bge-m3.yaml).
|
|
@@ -561,11 +561,16 @@ class ModelLoader:
|
|
|
561
561
|
Returns:
|
|
562
562
|
LoadedModel containing the loaded state.
|
|
563
563
|
"""
|
|
564
|
-
# Get preprocessor from adapter - all adapters implement get_preprocessor()
|
|
565
|
-
preprocessor
|
|
566
|
-
|
|
567
|
-
#
|
|
568
|
-
|
|
564
|
+
# Get preprocessor(s) from adapter - all adapters implement get_preprocessor().
|
|
565
|
+
# Most return a single preprocessor; multi-modal adapters (e.g. NemoColEmbed v1,
|
|
566
|
+
# which needs a text preprocessor for queries AND an image preprocessor for
|
|
567
|
+
# documents) may return a list. Register each by its modality.
|
|
568
|
+
preprocessors = adapter.get_preprocessor()
|
|
569
|
+
if not isinstance(preprocessors, list):
|
|
570
|
+
preprocessors = [preprocessors]
|
|
571
|
+
for preprocessor in preprocessors:
|
|
572
|
+
if preprocessor is None:
|
|
573
|
+
continue
|
|
569
574
|
modality = getattr(preprocessor, "modality", None)
|
|
570
575
|
if modality == "text":
|
|
571
576
|
self._preprocessor_registry._register(name, preprocessor)
|
|
@@ -82,7 +82,7 @@ class ModelRegistry:
|
|
|
82
82
|
"""Initialize the registry.
|
|
83
83
|
|
|
84
84
|
Args:
|
|
85
|
-
models_dir: Path to models directory (local path, s3://, or
|
|
85
|
+
models_dir: Path to models directory (local path, s3://, gs://, abfs://, or abfss://).
|
|
86
86
|
If None, registry starts empty and configs must be added manually.
|
|
87
87
|
memory_config: Configuration for memory management. If None, uses defaults.
|
|
88
88
|
drain_timeout_s: Timeout in seconds to wait for worker drain before unload.
|
|
@@ -1396,7 +1396,7 @@ class ModelRegistry:
|
|
|
1396
1396
|
logger.debug("No models_dir, skipping hot reload")
|
|
1397
1397
|
return
|
|
1398
1398
|
|
|
1399
|
-
# Don't watch cloud URLs (s3://, gs://)
|
|
1399
|
+
# Don't watch cloud URLs (s3://, gs://, abfs(s)://)
|
|
1400
1400
|
if is_cloud_path(self._models_dir):
|
|
1401
1401
|
logger.debug("Cloud models_dir, skipping hot reload (not supported)")
|
|
1402
1402
|
return
|