sie-server 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sie_server-0.2.0 → sie_server-0.3.0}/.gitignore +4 -0
- sie_server-0.3.0/Dockerfile.cpu +198 -0
- sie_server-0.3.0/Dockerfile.cuda11 +211 -0
- sie_server-0.3.0/Dockerfile.cuda12 +242 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/PKG-INFO +2 -1
- sie_server-0.3.0/README.md +90 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/bundles/default.yaml +12 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/bundles/transformers5.yaml +3 -1
- sie_server-0.3.0/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +25 -0
- sie_server-0.3.0/models/Qwen__Qwen3-Reranker-0.6B.yaml +25 -0
- sie_server-0.3.0/models/Qwen__Qwen3-Reranker-4B.yaml +25 -0
- sie_server-0.3.0/models/Qwen__Qwen3-VL-Embedding-2B.yaml +27 -0
- sie_server-0.3.0/models/Qwen__Qwen3-VL-Reranker-2B.yaml +21 -0
- sie_server-0.3.0/models/answerdotai__ModernBERT-base.yaml +33 -0
- sie_server-0.3.0/models/docling.yaml +20 -0
- sie_server-0.3.0/models/fastino__gliner2-base-v1.yaml +20 -0
- sie_server-0.3.0/models/google__siglip2-base-patch16-224.yaml +25 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-base-v1.0.yaml +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-large-v1.0.yaml +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-large-v3.0.yaml +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-small-v1.0.yaml +1 -1
- sie_server-0.3.0/models/knowledgator__gliner-bi-base-v2.0.yaml +22 -0
- sie_server-0.3.0/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +29 -0
- sie_server-0.3.0/models/sugiv__stablebridge-pruner-highlighter.yaml +39 -0
- sie_server-0.3.0/models/zai-org__GLM-OCR.yaml +21 -0
- sie_server-0.3.0/openapi.json +1788 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/pyproject.toml +3 -1
- sie_server-0.3.0/src/sie_server/adapters/_flash_base.py +213 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_spec.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/base.py +3 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bert_flash/__init__.py +12 -4
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +11 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3/__init__.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3_flash/__init__.py +5 -5
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/clip/__init__.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert/__init__.py +8 -8
- sie_server-0.3.0/src/sie_server/adapters/docling/__init__.py +165 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gliclass/__init__.py +87 -38
- sie_server-0.3.0/src/sie_server/adapters/gliner2/__init__.py +182 -0
- sie_server-0.3.0/src/sie_server/adapters/gliner_bi/__init__.py +288 -0
- sie_server-0.3.0/src/sie_server/adapters/glm_ocr/__init__.py +393 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gte_sparse_flash/__init__.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +25 -3
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/modernbert_flash/__init__.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +12 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nli_classification_flash/__init__.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nomic_flash/__init__.py +9 -0
- sie_server-0.3.0/src/sie_server/adapters/paddleocr_vl/__init__.py +374 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/qwen2_flash/__init__.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +313 -73
- sie_server-0.3.0/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +409 -0
- sie_server-0.3.0/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +386 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/rope_flash/__init__.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/sentence_transformer/__init__.py +2 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/siglip/__init__.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/splade_flash/__init__.py +8 -0
- sie_server-0.3.0/src/sie_server/adapters/stablebridge_pruner/__init__.py +497 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +15 -7
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/encode.py +13 -3
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/extract.py +12 -13
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/helpers.py +68 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/openai_compat.py +6 -6
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/openapi.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/score.py +9 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/validation.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/ws.py +6 -6
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/app_factory.py +5 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/cli.py +52 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/engine.py +150 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/model.py +9 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/adaptive_batching.py +1 -1
- sie_server-0.3.0/src/sie_server/core/extract_cost.py +29 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/inference_output.py +7 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/loader.py +8 -4
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/memory.py +36 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/model_loader.py +10 -0
- sie_server-0.3.0/src/sie_server/core/oom.py +161 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/prepared.py +43 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/__init__.py +2 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/vision.py +351 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/readiness.py +2 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/registry.py +207 -13
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/extract.py +16 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/model_worker.py +120 -28
- sie_server-0.3.0/src/sie_server/core/worker/oom_recovery.py +429 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/types.py +8 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/nats_pull_loop.py +152 -26
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/metrics.py +124 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/inputs.py +29 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/openapi.py +10 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/responses.py +1 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_base.py +1 -1
- sie_server-0.3.0/tests/adapters/test_docling.py +194 -0
- sie_server-0.3.0/tests/adapters/test_docling_smoke.py +65 -0
- sie_server-0.3.0/tests/adapters/test_flash_base.py +132 -0
- sie_server-0.3.0/tests/adapters/test_glm_ocr.py +91 -0
- sie_server-0.3.0/tests/adapters/test_jina_flash_cross_encoder.py +196 -0
- sie_server-0.3.0/tests/adapters/test_paddleocr_vl.py +255 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_runtime_options.py +237 -2
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sentence_transformer.py +2 -2
- sie_server-0.3.0/tests/adapters/test_stablebridge_integration.py +160 -0
- sie_server-0.3.0/tests/adapters/test_stablebridge_pruner.py +273 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_endpoint.py +195 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_extract.py +78 -9
- sie_server-0.3.0/tests/api/test_extract_oom.py +186 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_ws.py +3 -3
- sie_server-0.3.0/tests/config/test_bundle_coverage.py +142 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/config/test_config.py +140 -8
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/conftest.py +164 -26
- sie_server-0.3.0/tests/core/test_idle_evict.py +263 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_inference.py +1 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_loader.py +18 -1
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_memory.py +27 -0
- sie_server-0.3.0/tests/core/test_oom_detection.py +144 -0
- sie_server-0.3.0/tests/core/worker/test_oom_recovery.py +495 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_metrics.py +12 -12
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_all_models.py +63 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_docker_integration.py +10 -10
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_nats_pull_loop.py +187 -1
- sie_server-0.3.0/tests/test_openapi_export.py +52 -0
- sie_server-0.3.0/tests/type_defs/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/type_defs/test_inputs.py +38 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/type_defs/test_types.py +8 -0
- sie_server-0.2.0/Dockerfile.cpu +0 -190
- sie_server-0.2.0/Dockerfile.cuda11 +0 -168
- sie_server-0.2.0/Dockerfile.cuda12 +0 -168
- sie_server-0.2.0/README.md +0 -31
- sie_server-0.2.0/src/sie_server/adapters/_flash_base.py +0 -110
- {sie_server-0.2.0 → sie_server-0.3.0}/CONTRIBUTING.md +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/LICENSE +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/bundles/sglang.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-m3.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-large.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/GritLM__GritLM-7B.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/NeuML__gliner-bert-tiny.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/colbert-ir__colbertv2.0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/google__embeddinggemma-300m.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/google__siglip-so400m-patch14-224.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/google__siglip-so400m-patch14-384.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-base-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-large-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-small-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__multilingual-e5-large.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/jackboyla__glirel-large-v0.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/jinaai__jina-colbert-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-base-ft.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-base.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-large.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/naver__splade-v3.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/numind__NuNER_Zero-span.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/numind__NuNER_Zero.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__NV-Embed-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/openai__clip-vit-base-patch32.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/openai__clip-vit-large-patch14.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/rasyosef__splade-mini.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_large-v2.1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_medium-v2.1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_multi-v2.1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_small-v2.1.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/vidore__colpali-v1.3-hf.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_base_adapter.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_types.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_utils.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colpali/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/donut/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/florence2/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gliner/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/glirel/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/owlv2/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/sglang/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/health.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/metrics.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/models.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/options.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/root.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/serialization.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/app_state_config.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/batcher.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/deps.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/disk_cache.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/encode_pipeline.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/hot_reload.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/inference.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/logging.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/postprocessor.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/postprocessor_registry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/base.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/image.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/text.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor_registry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/shutdown.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/timing.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/tokenizer.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/watcher.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/base.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/encode.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/score.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/main.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/nats_subscriber.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/gpu.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/prometheus.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/telemetry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/tracing.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/static/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/static/index.html +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/outputs.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/requests.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_bge_m3.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_clip.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_colbert.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_donut.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_factory_integration.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_florence2.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_glirel.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_grounding_dino.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_gte_sparse.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lighton_ocr.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lora.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lora_integration.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sglang.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_siglip.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sparse_aggregation.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_visual_document.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_dtype.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_json_schema.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_timing.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_validation.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_extract_integration.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_health.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_models.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_openai_compat.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_score.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_version_header.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/app/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/app/test_app_factory.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/config/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_adaptive_batching.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_batcher.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_disk_cache.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_hot_reload.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_logging.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_postprocessor.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_postprocessor_registry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_prepared.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_preprocessor.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_preprocessor_registry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_quantization.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_readiness.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_async.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_core.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_deps.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_memory.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_multi_model.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_shutdown.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_timing.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_watcher.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_backpressure.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_core.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_extract.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_lora.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_options.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_score.py +0 -0
- {sie_server-0.2.0/tests/observability → sie_server-0.3.0/tests/core/worker}/__init__.py +0 -0
- {sie_server-0.2.0/tests/type_defs → sie_server-0.3.0/tests/observability}/__init__.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_telemetry.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_tracing.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_nats_pull_loop_batching.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_sdk_integration.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_server_smoke.py +0 -0
- {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_sparse_integration.py +0 -0
|
@@ -75,6 +75,9 @@ docs/_build/
|
|
|
75
75
|
.pybuilder/
|
|
76
76
|
target/
|
|
77
77
|
|
|
78
|
+
# Rust
|
|
79
|
+
*.rs.bk
|
|
80
|
+
|
|
78
81
|
# Jupyter Notebook
|
|
79
82
|
.ipynb_checkpoints
|
|
80
83
|
|
|
@@ -247,6 +250,7 @@ Thumbs.db
|
|
|
247
250
|
|
|
248
251
|
# kilocode
|
|
249
252
|
.kilo/
|
|
253
|
+
.kilocode/
|
|
250
254
|
|
|
251
255
|
# Worktree metadata
|
|
252
256
|
.base-branch
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
# SIE Server - CPU-only Image (amd64 + arm64)
|
|
3
|
+
# Build from repo root:
|
|
4
|
+
# docker build -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu-default .
|
|
5
|
+
# docker build -f packages/sie_server/Dockerfile.cpu --build-arg BUNDLE=transformers5 -t sie-server:cpu-transformers5 .
|
|
6
|
+
# docker buildx build --platform linux/amd64,linux/arm64 -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu .
|
|
7
|
+
|
|
8
|
+
ARG BUNDLE=default
|
|
9
|
+
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# Stage 1: Dependencies (pyproject.toml only, cached across code changes)
|
|
12
|
+
# =============================================================================
|
|
13
|
+
FROM python:3.12-slim-bookworm AS deps
|
|
14
|
+
|
|
15
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
16
|
+
UV_NO_CACHE=1 \
|
|
17
|
+
UV_COMPILE_BYTECODE=1 \
|
|
18
|
+
UV_LINK_MODE=copy \
|
|
19
|
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
20
|
+
|
|
21
|
+
# build-essential + git are builder-only; they do NOT leak into the runtime stage.
|
|
22
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
23
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
24
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
25
|
+
build-essential \
|
|
26
|
+
ca-certificates \
|
|
27
|
+
curl \
|
|
28
|
+
git
|
|
29
|
+
|
|
30
|
+
ARG UV_VERSION=0.9.28
|
|
31
|
+
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
|
|
32
|
+
&& mv /root/.local/bin/uv /bin/uv \
|
|
33
|
+
&& mv /root/.local/bin/uvx /bin/uvx
|
|
34
|
+
|
|
35
|
+
WORKDIR /app
|
|
36
|
+
|
|
37
|
+
# Dependency specs first — this layer caches as long as pyproject.toml is unchanged.
|
|
38
|
+
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
39
|
+
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
40
|
+
|
|
41
|
+
# Stub source trees so pip accepts the editable installs during dep resolution.
|
|
42
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
|
|
43
|
+
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
44
|
+
&& touch src/sie_server/__init__.py
|
|
45
|
+
|
|
46
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
47
|
+
python -m venv .venv \
|
|
48
|
+
&& .venv/bin/pip install --upgrade pip \
|
|
49
|
+
&& .venv/bin/pip install \
|
|
50
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
51
|
+
--extra-index-url https://pypi.org/simple \
|
|
52
|
+
-e "/tmp/sie_sdk[storage]" \
|
|
53
|
+
-e ".[gpu-metrics]"
|
|
54
|
+
|
|
55
|
+
# =============================================================================
|
|
56
|
+
# Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
|
|
57
|
+
# =============================================================================
|
|
58
|
+
# Bundle-agnostic: all base-stage layers are shared across bundles of this
|
|
59
|
+
# platform in local BuildKit cache and in content-addressed registry layers.
|
|
60
|
+
FROM deps AS base
|
|
61
|
+
|
|
62
|
+
COPY packages/sie_sdk/src /tmp/sie_sdk/src
|
|
63
|
+
COPY packages/sie_server/src src/
|
|
64
|
+
COPY packages/sie_server/bundles bundles/
|
|
65
|
+
COPY packages/sie_server/models models/
|
|
66
|
+
|
|
67
|
+
# Editable reinstall over the stub stage — keeps ./bundles lookups via CWD
|
|
68
|
+
# working for `sie_server.cli resolve-deps`.
|
|
69
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
70
|
+
.venv/bin/pip install --no-deps \
|
|
71
|
+
-e /tmp/sie_sdk \
|
|
72
|
+
-e .
|
|
73
|
+
|
|
74
|
+
# Sanity-check shared venv imports — catches breakage introduced by the
|
|
75
|
+
# shared-deps resolver.
|
|
76
|
+
RUN .venv/bin/python -c "import torch; print(torch.__version__)"
|
|
77
|
+
|
|
78
|
+
# Register bundle-libs on sys.path. Content is identical across bundles.
|
|
79
|
+
RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
|
|
80
|
+
> /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
|
|
81
|
+
|
|
82
|
+
# .venv cleanup + stdlib trim — runs here so /app/.venv reaches its final
|
|
83
|
+
# form BEFORE bundle divergence. Largest wins: torch/include, *.a static
|
|
84
|
+
# libs, stripping shared objects. Keep bytecode (cold-start matters more
|
|
85
|
+
# than size).
|
|
86
|
+
RUN set -eux; \
|
|
87
|
+
cd /app/.venv/lib/python3.12/site-packages; \
|
|
88
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
89
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
90
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
91
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
92
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
93
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
94
|
+
rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
|
|
95
|
+
rm -rf /usr/local/lib/python3.12/test /usr/local/lib/python3.12/idlelib 2>/dev/null || true; \
|
|
96
|
+
find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
97
|
+
|
|
98
|
+
# =============================================================================
|
|
99
|
+
# Stage 3: Builder - bundle-specific deps
|
|
100
|
+
# =============================================================================
|
|
101
|
+
FROM base AS builder
|
|
102
|
+
|
|
103
|
+
ARG BUNDLE
|
|
104
|
+
|
|
105
|
+
RUN .venv/bin/python -m sie_server.cli resolve-deps \
|
|
106
|
+
--bundle "${BUNDLE}" \
|
|
107
|
+
--models-dir models \
|
|
108
|
+
--cpu \
|
|
109
|
+
> /tmp/bundle-requirements.txt \
|
|
110
|
+
&& echo "Bundle ${BUNDLE} requirements:" \
|
|
111
|
+
&& cat /tmp/bundle-requirements.txt
|
|
112
|
+
|
|
113
|
+
# Bundle-specific deps land in a separate site-packages tree so the shared
|
|
114
|
+
# venv layer stays byte-identical across bundles of the same platform.
|
|
115
|
+
# Even if /tmp/bundle-requirements.txt is empty, create /app/bundle-libs so
|
|
116
|
+
# the runtime `COPY --from` never fails.
|
|
117
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
118
|
+
mkdir -p /app/bundle-libs; \
|
|
119
|
+
if [ -s /tmp/bundle-requirements.txt ]; then \
|
|
120
|
+
.venv/bin/pip install \
|
|
121
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
122
|
+
--extra-index-url https://pypi.org/simple \
|
|
123
|
+
--target=/app/bundle-libs \
|
|
124
|
+
--no-compile \
|
|
125
|
+
-r /tmp/bundle-requirements.txt; \
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
# bundle-libs cleanup (symmetric to the .venv cleanup in base).
|
|
129
|
+
RUN set -eux; \
|
|
130
|
+
if [ -d /app/bundle-libs ]; then \
|
|
131
|
+
cd /app/bundle-libs; \
|
|
132
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
133
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
134
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
135
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
136
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
137
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
138
|
+
fi; \
|
|
139
|
+
find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
140
|
+
|
|
141
|
+
# =============================================================================
|
|
142
|
+
# Stage 4: Runtime
|
|
143
|
+
# =============================================================================
|
|
144
|
+
FROM python:3.12-slim-bookworm AS runtime
|
|
145
|
+
|
|
146
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
147
|
+
PATH="/app/.venv/bin:$PATH" \
|
|
148
|
+
PYTHONUNBUFFERED=1 \
|
|
149
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
150
|
+
HF_HOME=/app/.cache/huggingface \
|
|
151
|
+
OMP_NUM_THREADS=4 \
|
|
152
|
+
MKL_NUM_THREADS=4
|
|
153
|
+
|
|
154
|
+
# Only the shared libs torch + pillow actually dlopen at runtime.
|
|
155
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
156
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
157
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
158
|
+
libgomp1 \
|
|
159
|
+
libjpeg62-turbo \
|
|
160
|
+
libpng16-16
|
|
161
|
+
|
|
162
|
+
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
163
|
+
|
|
164
|
+
WORKDIR /app
|
|
165
|
+
|
|
166
|
+
# --link shared COPYs — produce independent layer tars without parent-dir
|
|
167
|
+
# headers carrying a build-time mtime, so resulting layer digests match
|
|
168
|
+
# across bundles of the same platform. Numeric --chown because --link has
|
|
169
|
+
# no /etc/passwd visible (the sie user exists in the runtime FS but --link
|
|
170
|
+
# layers are created in isolation).
|
|
171
|
+
COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
|
|
172
|
+
COPY --link --from=base --chown=1000:1000 /app/src /app/src
|
|
173
|
+
COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
|
|
174
|
+
COPY --link --from=base --chown=1000:1000 /app/models /app/models
|
|
175
|
+
COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
|
|
176
|
+
# Bundle-specific extras — last layer so shared layers above stay cached.
|
|
177
|
+
COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
|
|
178
|
+
|
|
179
|
+
RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
|
|
180
|
+
|
|
181
|
+
# Declare BUNDLE arg only here, where it is first used (LABEL + ENV),
|
|
182
|
+
# so every RUN/COPY layer above is bundle-agnostic in its cache key.
|
|
183
|
+
ARG BUNDLE
|
|
184
|
+
|
|
185
|
+
ENV SIE_BUNDLE="${BUNDLE}"
|
|
186
|
+
|
|
187
|
+
LABEL org.opencontainers.image.title="SIE Server" \
|
|
188
|
+
org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CPU)" \
|
|
189
|
+
sie.bundle="${BUNDLE}"
|
|
190
|
+
|
|
191
|
+
USER sie
|
|
192
|
+
EXPOSE 8080
|
|
193
|
+
|
|
194
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
195
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
|
|
196
|
+
|
|
197
|
+
ENTRYPOINT ["python", "-m", "sie_server.cli"]
|
|
198
|
+
CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models", "--device", "cpu"]
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
# SIE Server - CUDA 11.8 Image (for older drivers 470+)
|
|
3
|
+
# Build from repo root:
|
|
4
|
+
# docker build -f packages/sie_server/Dockerfile.cuda11 -t sie-server:cuda11-default .
|
|
5
|
+
# docker build -f packages/sie_server/Dockerfile.cuda11 --build-arg BUNDLE=sglang -t sie-server:cuda11-sglang .
|
|
6
|
+
|
|
7
|
+
ARG BUNDLE=default
|
|
8
|
+
ARG UV_VERSION=0.9.28
|
|
9
|
+
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# Stage 1: uv + standalone Python 3.12 (no deadsnakes PPA)
|
|
12
|
+
# =============================================================================
|
|
13
|
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS deps
|
|
14
|
+
|
|
15
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
16
|
+
UV_NO_CACHE=1 \
|
|
17
|
+
UV_COMPILE_BYTECODE=1 \
|
|
18
|
+
UV_LINK_MODE=copy \
|
|
19
|
+
UV_PYTHON_INSTALL_DIR=/opt/python \
|
|
20
|
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
21
|
+
|
|
22
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
23
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
24
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
25
|
+
ca-certificates \
|
|
26
|
+
curl \
|
|
27
|
+
git
|
|
28
|
+
|
|
29
|
+
ARG UV_VERSION
|
|
30
|
+
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
|
|
31
|
+
&& mv /root/.local/bin/uv /bin/uv \
|
|
32
|
+
&& mv /root/.local/bin/uvx /bin/uvx
|
|
33
|
+
|
|
34
|
+
RUN uv python install 3.12 \
|
|
35
|
+
&& ln -s "$(uv python find 3.12)" /usr/local/bin/python3.12 \
|
|
36
|
+
&& ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3 \
|
|
37
|
+
&& ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
|
|
38
|
+
|
|
39
|
+
WORKDIR /app
|
|
40
|
+
|
|
41
|
+
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
42
|
+
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
43
|
+
|
|
44
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
|
|
45
|
+
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
46
|
+
&& touch src/sie_server/__init__.py
|
|
47
|
+
|
|
48
|
+
# Note: flash-attn prebuilt wheels are not published for cu118, so SDPA
|
|
49
|
+
# fallback is used at runtime (see SIE_ATTENTION_BACKEND below).
|
|
50
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
51
|
+
python3.12 -m venv .venv \
|
|
52
|
+
&& .venv/bin/pip install --upgrade pip \
|
|
53
|
+
&& .venv/bin/pip install \
|
|
54
|
+
--index-url https://download.pytorch.org/whl/cu118 \
|
|
55
|
+
--extra-index-url https://pypi.org/simple \
|
|
56
|
+
-e "/tmp/sie_sdk[storage]" \
|
|
57
|
+
-e ".[gpu-metrics]"
|
|
58
|
+
|
|
59
|
+
# =============================================================================
|
|
60
|
+
# Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
|
|
61
|
+
# =============================================================================
|
|
62
|
+
# Everything here is bundle-agnostic, so all base-stage layers are shared
|
|
63
|
+
# across bundles of this platform in local BuildKit cache and in registry.
|
|
64
|
+
FROM deps AS base
|
|
65
|
+
|
|
66
|
+
COPY packages/sie_sdk/src /tmp/sie_sdk/src
|
|
67
|
+
COPY packages/sie_server/src src/
|
|
68
|
+
COPY packages/sie_server/bundles bundles/
|
|
69
|
+
COPY packages/sie_server/models models/
|
|
70
|
+
|
|
71
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
72
|
+
.venv/bin/pip install --no-deps \
|
|
73
|
+
-e /tmp/sie_sdk \
|
|
74
|
+
-e .
|
|
75
|
+
|
|
76
|
+
# Sanity-check shared venv imports — catches breakage introduced by the
|
|
77
|
+
# shared-deps resolver.
|
|
78
|
+
RUN .venv/bin/python -c "import torch; print(torch.__version__)"
|
|
79
|
+
|
|
80
|
+
# Register bundle-libs on sys.path. `sys.path.insert(0, ...)` puts
|
|
81
|
+
# bundle-libs at position 0 so bundle-specific versions shadow shared
|
|
82
|
+
# venv versions. Content is identical across bundles.
|
|
83
|
+
RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
|
|
84
|
+
> /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
|
|
85
|
+
|
|
86
|
+
# .venv cleanup — runs here so /app/.venv reaches its final form BEFORE
|
|
87
|
+
# bundle divergence. All nvidia-*-cu11 packages kept — torch._C DT_NEEDED
|
|
88
|
+
# links libcufile, libnccl, etc.; removing any breaks `import torch`.
|
|
89
|
+
RUN set -eux; \
|
|
90
|
+
cd /app/.venv/lib/python3.12/site-packages; \
|
|
91
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
92
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
93
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
94
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
95
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
96
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
97
|
+
find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
98
|
+
rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
|
|
99
|
+
find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
100
|
+
|
|
101
|
+
# =============================================================================
|
|
102
|
+
# Stage 3: Builder - bundle-specific deps
|
|
103
|
+
# =============================================================================
|
|
104
|
+
FROM base AS builder
|
|
105
|
+
|
|
106
|
+
ARG BUNDLE
|
|
107
|
+
|
|
108
|
+
RUN .venv/bin/python -m sie_server.cli resolve-deps \
|
|
109
|
+
--bundle "${BUNDLE}" \
|
|
110
|
+
--models-dir models \
|
|
111
|
+
> /tmp/bundle-requirements.txt \
|
|
112
|
+
&& echo "Bundle ${BUNDLE} requirements:" \
|
|
113
|
+
&& cat /tmp/bundle-requirements.txt
|
|
114
|
+
|
|
115
|
+
# Bundle-specific deps land in a separate site-packages tree so the shared
|
|
116
|
+
# venv layer stays byte-identical across bundles of the same platform.
|
|
117
|
+
# The cu11 `default` bundle skips flash-attn (wheel is cu128), so
|
|
118
|
+
# /app/bundle-libs may be empty — create the directory either way so the
|
|
119
|
+
# runtime COPY never fails.
|
|
120
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
121
|
+
mkdir -p /app/bundle-libs; \
|
|
122
|
+
if [ -s /tmp/bundle-requirements.txt ]; then \
|
|
123
|
+
.venv/bin/pip install \
|
|
124
|
+
--index-url https://download.pytorch.org/whl/cu118 \
|
|
125
|
+
--extra-index-url https://pypi.org/simple \
|
|
126
|
+
--target=/app/bundle-libs \
|
|
127
|
+
--no-compile \
|
|
128
|
+
-r /tmp/bundle-requirements.txt; \
|
|
129
|
+
fi
|
|
130
|
+
|
|
131
|
+
# bundle-libs cleanup (symmetric to the .venv cleanup in base).
|
|
132
|
+
RUN set -eux; \
|
|
133
|
+
if [ -d /app/bundle-libs ]; then \
|
|
134
|
+
cd /app/bundle-libs; \
|
|
135
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
136
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
137
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
138
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
139
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
140
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
141
|
+
find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
142
|
+
fi; \
|
|
143
|
+
find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
144
|
+
|
|
145
|
+
# =============================================================================
|
|
146
|
+
# Stage 4: Runtime
|
|
147
|
+
# =============================================================================
|
|
148
|
+
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
|
|
149
|
+
|
|
150
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
151
|
+
|
|
152
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
153
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
154
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
155
|
+
ca-certificates \
|
|
156
|
+
gcc \
|
|
157
|
+
libc6-dev \
|
|
158
|
+
libgomp1 \
|
|
159
|
+
libnuma1
|
|
160
|
+
|
|
161
|
+
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
162
|
+
|
|
163
|
+
WORKDIR /app
|
|
164
|
+
|
|
165
|
+
COPY --link --from=base /opt/python /opt/python
|
|
166
|
+
RUN set -e; \
|
|
167
|
+
py=$(ls -d /opt/python/cpython-3.12*/bin/python3.12 | head -1); \
|
|
168
|
+
[ -x "$py" ] || { echo "no standalone python found under /opt/python"; exit 1; }; \
|
|
169
|
+
ln -sf "$py" /usr/local/bin/python3.12; \
|
|
170
|
+
ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3; \
|
|
171
|
+
ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
|
|
172
|
+
|
|
173
|
+
# --link shared COPYs — produce independent layer tars without parent-dir
|
|
174
|
+
# headers carrying a build-time mtime, so resulting layer digests match
|
|
175
|
+
# across bundles of the same platform. Numeric --chown because --link has
|
|
176
|
+
# no /etc/passwd visible (the sie user exists in the runtime FS but --link
|
|
177
|
+
# layers are created in isolation).
|
|
178
|
+
COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
|
|
179
|
+
COPY --link --from=base --chown=1000:1000 /app/src /app/src
|
|
180
|
+
COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
|
|
181
|
+
COPY --link --from=base --chown=1000:1000 /app/models /app/models
|
|
182
|
+
COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
|
|
183
|
+
COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
|
|
184
|
+
|
|
185
|
+
RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
|
|
186
|
+
|
|
187
|
+
# Declare BUNDLE arg only here, where it is first used (LABEL + ENV),
|
|
188
|
+
# so every RUN/COPY layer above is bundle-agnostic in its cache key.
|
|
189
|
+
ARG BUNDLE
|
|
190
|
+
|
|
191
|
+
LABEL org.opencontainers.image.title="SIE Server" \
|
|
192
|
+
org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CUDA 11.8)" \
|
|
193
|
+
sie.bundle="${BUNDLE}"
|
|
194
|
+
|
|
195
|
+
ENV PATH="/app/.venv/bin:$PATH" \
|
|
196
|
+
PYTHONUNBUFFERED=1 \
|
|
197
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
198
|
+
HF_HOME=/app/.cache/huggingface \
|
|
199
|
+
NVIDIA_VISIBLE_DEVICES=all \
|
|
200
|
+
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
|
201
|
+
SIE_BUNDLE="${BUNDLE}" \
|
|
202
|
+
SIE_ATTENTION_BACKEND=sdpa
|
|
203
|
+
|
|
204
|
+
USER sie
|
|
205
|
+
EXPOSE 8080
|
|
206
|
+
|
|
207
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
208
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
|
|
209
|
+
|
|
210
|
+
ENTRYPOINT ["python", "-m", "sie_server.cli"]
|
|
211
|
+
CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models"]
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1
|
|
2
|
+
# SIE Server - CUDA 12.4 Image
|
|
3
|
+
# Build from repo root:
|
|
4
|
+
# docker build -f packages/sie_server/Dockerfile.cuda12 -t sie-server:cuda12-default .
|
|
5
|
+
# docker build -f packages/sie_server/Dockerfile.cuda12 --build-arg BUNDLE=sglang -t sie-server:cuda12-sglang .
|
|
6
|
+
|
|
7
|
+
ARG BUNDLE=default
|
|
8
|
+
ARG UV_VERSION=0.9.28
|
|
9
|
+
|
|
10
|
+
# =============================================================================
|
|
11
|
+
# Stage 1: uv + standalone Python 3.12 (no deadsnakes PPA)
|
|
12
|
+
# =============================================================================
|
|
13
|
+
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 AS deps
|
|
14
|
+
|
|
15
|
+
ENV DEBIAN_FRONTEND=noninteractive \
|
|
16
|
+
UV_NO_CACHE=1 \
|
|
17
|
+
UV_COMPILE_BYTECODE=1 \
|
|
18
|
+
UV_LINK_MODE=copy \
|
|
19
|
+
UV_PYTHON_INSTALL_DIR=/opt/python \
|
|
20
|
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
|
21
|
+
|
|
22
|
+
# Minimal apt footprint: just enough for curl + git; no deadsnakes PPA.
|
|
23
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
24
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
25
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
26
|
+
ca-certificates \
|
|
27
|
+
curl \
|
|
28
|
+
git
|
|
29
|
+
|
|
30
|
+
ARG UV_VERSION
|
|
31
|
+
RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
|
|
32
|
+
&& mv /root/.local/bin/uv /bin/uv \
|
|
33
|
+
&& mv /root/.local/bin/uvx /bin/uvx
|
|
34
|
+
|
|
35
|
+
# Install a standalone Python 3.12 from python-build-standalone (fully relocatable,
|
|
36
|
+
# glibc 2.17+ compat). This replaces the deadsnakes PPA entirely.
|
|
37
|
+
RUN uv python install 3.12 \
|
|
38
|
+
&& ln -s "$(uv python find 3.12)" /usr/local/bin/python3.12 \
|
|
39
|
+
&& ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3 \
|
|
40
|
+
&& ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
|
|
41
|
+
|
|
42
|
+
WORKDIR /app
|
|
43
|
+
|
|
44
|
+
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
45
|
+
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
46
|
+
|
|
47
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
|
|
48
|
+
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
49
|
+
&& touch src/sie_server/__init__.py
|
|
50
|
+
|
|
51
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
52
|
+
python3.12 -m venv .venv \
|
|
53
|
+
&& .venv/bin/pip install --upgrade pip \
|
|
54
|
+
&& .venv/bin/pip install \
|
|
55
|
+
-e "/tmp/sie_sdk[storage]" \
|
|
56
|
+
-e ".[gpu-metrics]"
|
|
57
|
+
|
|
58
|
+
# =============================================================================
|
|
59
|
+
# Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
|
|
60
|
+
# =============================================================================
|
|
61
|
+
# Everything here is bundle-agnostic, so bundle-specific builds of a given
|
|
62
|
+
# platform share every base-stage layer in local BuildKit cache and in
|
|
63
|
+
# content-addressed registry layers.
|
|
64
|
+
FROM deps AS base
|
|
65
|
+
|
|
66
|
+
COPY packages/sie_sdk/src /tmp/sie_sdk/src
|
|
67
|
+
COPY packages/sie_server/src src/
|
|
68
|
+
COPY packages/sie_server/bundles bundles/
|
|
69
|
+
COPY packages/sie_server/models models/
|
|
70
|
+
|
|
71
|
+
# Editable reinstall over the stub stage so runtime path-based lookups
|
|
72
|
+
# (e.g. sie_server.cli resolve-deps reading ./bundles) keep working.
|
|
73
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
74
|
+
.venv/bin/pip install --no-deps \
|
|
75
|
+
-e /tmp/sie_sdk \
|
|
76
|
+
-e .
|
|
77
|
+
|
|
78
|
+
# Sanity-check shared venv imports — catches breakage introduced by the
|
|
79
|
+
# shared-deps resolver. flash-attn/sglang need a real GPU and are
|
|
80
|
+
# intentionally not imported here.
|
|
81
|
+
RUN .venv/bin/python -c "import torch; print(torch.__version__)"
|
|
82
|
+
|
|
83
|
+
# Register bundle-libs on sys.path via a .pth file executed by site.py at
|
|
84
|
+
# interpreter startup. `sys.path.insert(0, ...)` puts bundle-libs at
|
|
85
|
+
# position 0 so bundle-specific versions SHADOW shared venv versions
|
|
86
|
+
# (required for the transformers5 bundle). Content is identical for every
|
|
87
|
+
# bundle, so writing it here keeps the shared venv layer byte-identical.
|
|
88
|
+
RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
|
|
89
|
+
> /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
|
|
90
|
+
|
|
91
|
+
# Conservative venv cleanup — only things known to be runtime-safe. Keep
|
|
92
|
+
# torch/include (triton JIT compiles against torch C++ headers for
|
|
93
|
+
# sglang/flash-attn custom ops) and keep gcc/libc6-dev at runtime.
|
|
94
|
+
# Main wins: byte-compiled caches, *.a static libs, tests/, RECORD metadata.
|
|
95
|
+
# Runs here (not in builder) so /app/.venv reaches its final form BEFORE
|
|
96
|
+
# bundle divergence, making the runtime COPY cache-hit across bundles.
|
|
97
|
+
RUN set -eux; \
|
|
98
|
+
cd /app/.venv/lib/python3.12/site-packages; \
|
|
99
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
100
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
101
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
102
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
103
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
104
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
105
|
+
# All nvidia-*-cu12 packages are kept — torch._C DT_NEEDED links libcufile,
|
|
106
|
+
# libnccl, and similar; removing any breaks `import torch`.
|
|
107
|
+
find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
108
|
+
rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
|
|
109
|
+
# Normalize mtimes so the cross-stage COPY of /app/.venv produces a
|
|
110
|
+
# byte-identical tar across bundles of the same platform.
|
|
111
|
+
find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
112
|
+
|
|
113
|
+
# =============================================================================
|
|
114
|
+
# Stage 3: Builder - bundle-specific deps
|
|
115
|
+
# =============================================================================
|
|
116
|
+
FROM base AS builder
|
|
117
|
+
|
|
118
|
+
ARG BUNDLE
|
|
119
|
+
|
|
120
|
+
RUN .venv/bin/python -m sie_server.cli resolve-deps \
|
|
121
|
+
--bundle "${BUNDLE}" \
|
|
122
|
+
--models-dir models \
|
|
123
|
+
> /tmp/bundle-requirements.txt \
|
|
124
|
+
&& echo "Bundle ${BUNDLE} requirements:" \
|
|
125
|
+
&& cat /tmp/bundle-requirements.txt
|
|
126
|
+
|
|
127
|
+
# Bundle-specific deps go into a separate site-packages tree so the shared
|
|
128
|
+
# venv layer stays byte-identical across bundles (default, sglang,
|
|
129
|
+
# transformers5) of the same platform. `.venv/bin/pip` is used as the
|
|
130
|
+
# resolver so shared torch/transformers/nvidia wheels are visible on
|
|
131
|
+
# sys.path and only novel packages land under --target.
|
|
132
|
+
# flash-attn deps carry `sys_platform == 'linux'` markers so they install on
|
|
133
|
+
# Linux (this image) but not on developer machines.
|
|
134
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
135
|
+
mkdir -p /app/bundle-libs; \
|
|
136
|
+
if [ -s /tmp/bundle-requirements.txt ]; then \
|
|
137
|
+
.venv/bin/pip install \
|
|
138
|
+
--target=/app/bundle-libs \
|
|
139
|
+
--no-compile \
|
|
140
|
+
-r /tmp/bundle-requirements.txt; \
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
# bundle-libs cleanup (symmetric to the .venv cleanup in base). The .venv
|
|
144
|
+
# tree is already final in the base stage; only the per-bundle tree needs
|
|
145
|
+
# cleanup here.
|
|
146
|
+
RUN set -eux; \
|
|
147
|
+
if [ -d /app/bundle-libs ]; then \
|
|
148
|
+
cd /app/bundle-libs; \
|
|
149
|
+
find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
|
|
150
|
+
find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
|
|
151
|
+
find . -type d -name 'tests' -prune -exec rm -rf {} +; \
|
|
152
|
+
rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
|
|
153
|
+
find torch -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
154
|
+
find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
|
|
155
|
+
find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
|
|
156
|
+
fi; \
|
|
157
|
+
# Normalize mtimes so rebuilds of the same bundle produce identical layer bytes.
|
|
158
|
+
find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
|
|
159
|
+
|
|
160
|
+
# =============================================================================
|
|
161
|
+
# Stage 4: Runtime
|
|
162
|
+
# =============================================================================
|
|
163
|
+
# Use base CUDA image (not devel/runtime) — PyTorch wheels bundle CUDA libs,
|
|
164
|
+
# cuDNN ships inside torch. Saves ~2GB vs `runtime` variant.
|
|
165
|
+
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS runtime
|
|
166
|
+
|
|
167
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
168
|
+
|
|
169
|
+
# gcc + libc6-dev: triton JIT compiles CUDA kernels at first use.
|
|
170
|
+
# libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
|
|
171
|
+
# misleading SM-arch error without it.
|
|
172
|
+
# libgomp1: torch OpenMP runtime.
|
|
173
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
174
|
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
|
175
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
176
|
+
ca-certificates \
|
|
177
|
+
gcc \
|
|
178
|
+
libc6-dev \
|
|
179
|
+
libgomp1 \
|
|
180
|
+
libnuma1
|
|
181
|
+
|
|
182
|
+
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
183
|
+
|
|
184
|
+
WORKDIR /app
|
|
185
|
+
|
|
186
|
+
# Standalone Python 3.12 tree + symlinks (no deadsnakes). Pulled from `base`
|
|
187
|
+
# since /opt/python is established in the deps stage and never touched later.
|
|
188
|
+
COPY --link --from=base /opt/python /opt/python
|
|
189
|
+
RUN set -e; \
|
|
190
|
+
py=$(ls -d /opt/python/cpython-3.12*/bin/python3.12 | head -1); \
|
|
191
|
+
[ -x "$py" ] || { echo "no standalone python found under /opt/python"; exit 1; }; \
|
|
192
|
+
ln -sf "$py" /usr/local/bin/python3.12; \
|
|
193
|
+
ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3; \
|
|
194
|
+
ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
|
|
195
|
+
|
|
196
|
+
# Shared content — identical across bundles of the same platform. Pulled from
|
|
197
|
+
# `base` so every bundle rebuild hits BuildKit's local cache and the resulting
|
|
198
|
+
# registry layers are content-addressed identically, enabling pull-time dedup.
|
|
199
|
+
# Ordered BEFORE the per-bundle COPY so bundle-libs is the only invalidating
|
|
200
|
+
# layer when only BUNDLE changes.
|
|
201
|
+
# --link on shared COPYs: produces an independent layer tar without parent-dir
|
|
202
|
+
# headers carrying a build-time mtime, so the resulting layer digests match
|
|
203
|
+
# across bundles of the same platform and the registry can dedup them.
|
|
204
|
+
# --chown uses numeric IDs because --link doesn't have /etc/passwd visible
|
|
205
|
+
# (the sie user is added in the runtime stage filesystem but --link layers
|
|
206
|
+
# are created in isolation from the destination stage state).
|
|
207
|
+
COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
|
|
208
|
+
COPY --link --from=base --chown=1000:1000 /app/src /app/src
|
|
209
|
+
COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
|
|
210
|
+
COPY --link --from=base --chown=1000:1000 /app/models /app/models
|
|
211
|
+
COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
|
|
212
|
+
# Bundle-specific extras — last layer so shared layers above stay cached.
|
|
213
|
+
# A `.pth` file in the shared venv puts /app/bundle-libs at sys.path[0] at startup.
|
|
214
|
+
COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
|
|
215
|
+
|
|
216
|
+
RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
|
|
217
|
+
|
|
218
|
+
# Declare the BUNDLE arg only here, where it is first used (LABEL + ENV),
|
|
219
|
+
# so every RUN/COPY layer above is bundle-agnostic in its cache key and the
|
|
220
|
+
# resulting layers are content-addressed identically across bundles.
|
|
221
|
+
ARG BUNDLE
|
|
222
|
+
|
|
223
|
+
LABEL org.opencontainers.image.title="SIE Server" \
|
|
224
|
+
org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle" \
|
|
225
|
+
sie.bundle="${BUNDLE}"
|
|
226
|
+
|
|
227
|
+
ENV PATH="/app/.venv/bin:$PATH" \
|
|
228
|
+
PYTHONUNBUFFERED=1 \
|
|
229
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
230
|
+
HF_HOME=/app/.cache/huggingface \
|
|
231
|
+
NVIDIA_VISIBLE_DEVICES=all \
|
|
232
|
+
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
|
233
|
+
SIE_BUNDLE="${BUNDLE}"
|
|
234
|
+
|
|
235
|
+
USER sie
|
|
236
|
+
EXPOSE 8080
|
|
237
|
+
|
|
238
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
239
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
|
|
240
|
+
|
|
241
|
+
ENTRYPOINT ["python", "-m", "sie_server.cli"]
|
|
242
|
+
CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models"]
|