sie-server 0.1.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sie_server-0.1.10/.gitignore +259 -0
- sie_server-0.1.10/CONTRIBUTING.md +34 -0
- sie_server-0.1.10/Dockerfile.cpu +187 -0
- sie_server-0.1.10/Dockerfile.cuda11 +165 -0
- sie_server-0.1.10/Dockerfile.cuda12 +165 -0
- sie_server-0.1.10/LICENSE +201 -0
- sie_server-0.1.10/PKG-INFO +50 -0
- sie_server-0.1.10/README.md +31 -0
- sie_server-0.1.10/bundles/default.yaml +67 -0
- sie_server-0.1.10/bundles/sglang.yaml +8 -0
- sie_server-0.1.10/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +31 -0
- sie_server-0.1.10/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +31 -0
- sie_server-0.1.10/models/Alibaba-NLP__gte-multilingual-base.yaml +26 -0
- sie_server-0.1.10/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +21 -0
- sie_server-0.1.10/models/BAAI__bge-m3.yaml +76 -0
- sie_server-0.1.10/models/BAAI__bge-reranker-base.yaml +20 -0
- sie_server-0.1.10/models/BAAI__bge-reranker-large.yaml +20 -0
- sie_server-0.1.10/models/BAAI__bge-reranker-v2-m3.yaml +20 -0
- sie_server-0.1.10/models/EmergentMethods__gliner_large_news-v2.1.yaml +19 -0
- sie_server-0.1.10/models/GritLM__GritLM-7B.yaml +38 -0
- sie_server-0.1.10/models/IDEA-Research__grounding-dino-base.yaml +21 -0
- sie_server-0.1.10/models/IDEA-Research__grounding-dino-tiny.yaml +21 -0
- sie_server-0.1.10/models/Ihor__gliner-biomed-large-v1.0.yaml +19 -0
- sie_server-0.1.10/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +31 -0
- sie_server-0.1.10/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +22 -0
- sie_server-0.1.10/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +22 -0
- sie_server-0.1.10/models/NeuML__gliner-bert-tiny.yaml +20 -0
- sie_server-0.1.10/models/NovaSearch__stella_en_1.5B_v5.yaml +36 -0
- sie_server-0.1.10/models/NovaSearch__stella_en_400M_v5.yaml +33 -0
- sie_server-0.1.10/models/Qwen__Qwen3-Embedding-0.6B.yaml +43 -0
- sie_server-0.1.10/models/Qwen__Qwen3-Embedding-4B.yaml +31 -0
- sie_server-0.1.10/models/Salesforce__SFR-Embedding-2_R.yaml +31 -0
- sie_server-0.1.10/models/Salesforce__SFR-Embedding-Mistral.yaml +31 -0
- sie_server-0.1.10/models/answerdotai__answerai-colbert-small-v1.yaml +45 -0
- sie_server-0.1.10/models/colbert-ir__colbertv2.0.yaml +45 -0
- sie_server-0.1.10/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +20 -0
- sie_server-0.1.10/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +20 -0
- sie_server-0.1.10/models/cross-encoder__nli-deberta-v3-base.yaml +22 -0
- sie_server-0.1.10/models/gliner-community__gliner_large-v2.5.yaml +19 -0
- sie_server-0.1.10/models/gliner-community__gliner_medium-v2.5.yaml +19 -0
- sie_server-0.1.10/models/gliner-community__gliner_small-v2.5.yaml +19 -0
- sie_server-0.1.10/models/google__embeddinggemma-300m.yaml +29 -0
- sie_server-0.1.10/models/google__owlv2-base-patch16-ensemble.yaml +21 -0
- sie_server-0.1.10/models/google__siglip-so400m-patch14-224.yaml +25 -0
- sie_server-0.1.10/models/google__siglip-so400m-patch14-384.yaml +25 -0
- sie_server-0.1.10/models/ibm-granite__granite-embedding-30m-sparse.yaml +24 -0
- sie_server-0.1.10/models/intfloat__e5-base-v2.yaml +28 -0
- sie_server-0.1.10/models/intfloat__e5-large-v2.yaml +28 -0
- sie_server-0.1.10/models/intfloat__e5-mistral-7b-instruct.yaml +31 -0
- sie_server-0.1.10/models/intfloat__e5-small-v2.yaml +28 -0
- sie_server-0.1.10/models/intfloat__multilingual-e5-large-instruct.yaml +29 -0
- sie_server-0.1.10/models/intfloat__multilingual-e5-large.yaml +37 -0
- sie_server-0.1.10/models/jackboyla__glirel-large-v0.yaml +20 -0
- sie_server-0.1.10/models/jinaai__jina-colbert-v2.yaml +44 -0
- sie_server-0.1.10/models/jinaai__jina-reranker-v2-base-multilingual.yaml +21 -0
- sie_server-0.1.10/models/knowledgator__gliclass-base-v1.0.yaml +22 -0
- sie_server-0.1.10/models/knowledgator__gliclass-large-v1.0.yaml +22 -0
- sie_server-0.1.10/models/knowledgator__gliclass-large-v3.0.yaml +22 -0
- sie_server-0.1.10/models/knowledgator__gliclass-small-v1.0.yaml +22 -0
- sie_server-0.1.10/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +25 -0
- sie_server-0.1.10/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +25 -0
- sie_server-0.1.10/models/lightonai__GTE-ModernColBERT-v1.yaml +42 -0
- sie_server-0.1.10/models/lightonai__Reason-ModernColBERT.yaml +42 -0
- sie_server-0.1.10/models/microsoft__Florence-2-base-ft.yaml +23 -0
- sie_server-0.1.10/models/microsoft__Florence-2-base.yaml +23 -0
- sie_server-0.1.10/models/microsoft__Florence-2-large.yaml +23 -0
- sie_server-0.1.10/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +46 -0
- sie_server-0.1.10/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +42 -0
- sie_server-0.1.10/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +20 -0
- sie_server-0.1.10/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +20 -0
- sie_server-0.1.10/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +23 -0
- sie_server-0.1.10/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +22 -0
- sie_server-0.1.10/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +22 -0
- sie_server-0.1.10/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +22 -0
- sie_server-0.1.10/models/naver__splade-cocondenser-selfdistil.yaml +24 -0
- sie_server-0.1.10/models/naver__splade-v3.yaml +24 -0
- sie_server-0.1.10/models/nomic-ai__nomic-embed-text-v2-moe.yaml +28 -0
- sie_server-0.1.10/models/numind__NuNER_Zero-span.yaml +19 -0
- sie_server-0.1.10/models/numind__NuNER_Zero.yaml +20 -0
- sie_server-0.1.10/models/nvidia__NV-Embed-v2.yaml +28 -0
- sie_server-0.1.10/models/nvidia__llama-embed-nemotron-8b.yaml +31 -0
- sie_server-0.1.10/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +43 -0
- sie_server-0.1.10/models/openai__clip-vit-base-patch32.yaml +25 -0
- sie_server-0.1.10/models/openai__clip-vit-large-patch14.yaml +25 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +24 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +24 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +24 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +25 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +24 -0
- sie_server-0.1.10/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +24 -0
- sie_server-0.1.10/models/prithivida__Splade_PP_en_v2.yaml +24 -0
- sie_server-0.1.10/models/rasyosef__splade-mini.yaml +24 -0
- sie_server-0.1.10/models/sentence-transformers__all-MiniLM-L6-v2.yaml +27 -0
- sie_server-0.1.10/models/urchade__gliner_large-v2.1.yaml +19 -0
- sie_server-0.1.10/models/urchade__gliner_medium-v2.1.yaml +19 -0
- sie_server-0.1.10/models/urchade__gliner_multi-v2.1.yaml +19 -0
- sie_server-0.1.10/models/urchade__gliner_multi_pii-v1.yaml +19 -0
- sie_server-0.1.10/models/urchade__gliner_small-v2.1.yaml +19 -0
- sie_server-0.1.10/models/vidore__colpali-v1.3-hf.yaml +41 -0
- sie_server-0.1.10/models/vidore__colqwen2.5-v0.2.yaml +41 -0
- sie_server-0.1.10/pyproject.toml +85 -0
- sie_server-0.1.10/src/sie_server/__init__.py +3 -0
- sie_server-0.1.10/src/sie_server/adapters/__init__.py +9 -0
- sie_server-0.1.10/src/sie_server/adapters/_flash_base.py +160 -0
- sie_server-0.1.10/src/sie_server/adapters/_utils.py +146 -0
- sie_server-0.1.10/src/sie_server/adapters/base.py +491 -0
- sie_server-0.1.10/src/sie_server/adapters/bert_flash/__init__.py +477 -0
- sie_server-0.1.10/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +558 -0
- sie_server-0.1.10/src/sie_server/adapters/bge_m3/__init__.py +428 -0
- sie_server-0.1.10/src/sie_server/adapters/bge_m3_flag/__init__.py +305 -0
- sie_server-0.1.10/src/sie_server/adapters/bge_m3_flash/__init__.py +549 -0
- sie_server-0.1.10/src/sie_server/adapters/clip/__init__.py +343 -0
- sie_server-0.1.10/src/sie_server/adapters/colbert/__init__.py +1147 -0
- sie_server-0.1.10/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +594 -0
- sie_server-0.1.10/src/sie_server/adapters/colbert_rotary_flash/__init__.py +772 -0
- sie_server-0.1.10/src/sie_server/adapters/colpali/__init__.py +610 -0
- sie_server-0.1.10/src/sie_server/adapters/colqwen2/__init__.py +489 -0
- sie_server-0.1.10/src/sie_server/adapters/cross_encoder/__init__.py +252 -0
- sie_server-0.1.10/src/sie_server/adapters/donut/__init__.py +588 -0
- sie_server-0.1.10/src/sie_server/adapters/florence2/__init__.py +649 -0
- sie_server-0.1.10/src/sie_server/adapters/gliclass/__init__.py +237 -0
- sie_server-0.1.10/src/sie_server/adapters/gliner/__init__.py +305 -0
- sie_server-0.1.10/src/sie_server/adapters/glirel/__init__.py +276 -0
- sie_server-0.1.10/src/sie_server/adapters/grounding_dino/__init__.py +415 -0
- sie_server-0.1.10/src/sie_server/adapters/gte_sparse_flash/__init__.py +673 -0
- sie_server-0.1.10/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +309 -0
- sie_server-0.1.10/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +476 -0
- sie_server-0.1.10/src/sie_server/adapters/nemo_colembed/__init__.py +556 -0
- sie_server-0.1.10/src/sie_server/adapters/nli_classification/__init__.py +239 -0
- sie_server-0.1.10/src/sie_server/adapters/nli_classification_flash/__init__.py +300 -0
- sie_server-0.1.10/src/sie_server/adapters/nomic_flash/__init__.py +662 -0
- sie_server-0.1.10/src/sie_server/adapters/owlv2/__init__.py +431 -0
- sie_server-0.1.10/src/sie_server/adapters/peft_lora_mixin.py +264 -0
- sie_server-0.1.10/src/sie_server/adapters/pytorch_embedding/__init__.py +430 -0
- sie_server-0.1.10/src/sie_server/adapters/qwen2_flash/__init__.py +632 -0
- sie_server-0.1.10/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +569 -0
- sie_server-0.1.10/src/sie_server/adapters/rope_flash/__init__.py +535 -0
- sie_server-0.1.10/src/sie_server/adapters/sentence_transformer/__init__.py +385 -0
- sie_server-0.1.10/src/sie_server/adapters/sglang/__init__.py +628 -0
- sie_server-0.1.10/src/sie_server/adapters/siglip/__init__.py +348 -0
- sie_server-0.1.10/src/sie_server/adapters/splade_flash/__init__.py +619 -0
- sie_server-0.1.10/src/sie_server/adapters/xlm_roberta_flash/__init__.py +519 -0
- sie_server-0.1.10/src/sie_server/api/__init__.py +1 -0
- sie_server-0.1.10/src/sie_server/api/encode.py +407 -0
- sie_server-0.1.10/src/sie_server/api/extract.py +384 -0
- sie_server-0.1.10/src/sie_server/api/health.py +47 -0
- sie_server-0.1.10/src/sie_server/api/helpers.py +455 -0
- sie_server-0.1.10/src/sie_server/api/metrics.py +30 -0
- sie_server-0.1.10/src/sie_server/api/models.py +112 -0
- sie_server-0.1.10/src/sie_server/api/openai_compat.py +441 -0
- sie_server-0.1.10/src/sie_server/api/openapi.py +79 -0
- sie_server-0.1.10/src/sie_server/api/options.py +51 -0
- sie_server-0.1.10/src/sie_server/api/root.py +17 -0
- sie_server-0.1.10/src/sie_server/api/score.py +281 -0
- sie_server-0.1.10/src/sie_server/api/serialization.py +65 -0
- sie_server-0.1.10/src/sie_server/api/validation.py +60 -0
- sie_server-0.1.10/src/sie_server/api/ws.py +333 -0
- sie_server-0.1.10/src/sie_server/app/__init__.py +0 -0
- sie_server-0.1.10/src/sie_server/app/app_factory.py +292 -0
- sie_server-0.1.10/src/sie_server/app/app_state_config.py +56 -0
- sie_server-0.1.10/src/sie_server/cli.py +252 -0
- sie_server-0.1.10/src/sie_server/config/__init__.py +0 -0
- sie_server-0.1.10/src/sie_server/config/engine.py +192 -0
- sie_server-0.1.10/src/sie_server/config/model.py +295 -0
- sie_server-0.1.10/src/sie_server/core/__init__.py +10 -0
- sie_server-0.1.10/src/sie_server/core/adaptive_batching.py +364 -0
- sie_server-0.1.10/src/sie_server/core/batcher.py +507 -0
- sie_server-0.1.10/src/sie_server/core/deps.py +230 -0
- sie_server-0.1.10/src/sie_server/core/disk_cache.py +339 -0
- sie_server-0.1.10/src/sie_server/core/encode_pipeline.py +120 -0
- sie_server-0.1.10/src/sie_server/core/hot_reload.py +581 -0
- sie_server-0.1.10/src/sie_server/core/inference.py +282 -0
- sie_server-0.1.10/src/sie_server/core/inference_output.py +171 -0
- sie_server-0.1.10/src/sie_server/core/loader.py +424 -0
- sie_server-0.1.10/src/sie_server/core/logging.py +110 -0
- sie_server-0.1.10/src/sie_server/core/memory.py +435 -0
- sie_server-0.1.10/src/sie_server/core/model_loader.py +546 -0
- sie_server-0.1.10/src/sie_server/core/postprocessor.py +568 -0
- sie_server-0.1.10/src/sie_server/core/postprocessor_registry.py +268 -0
- sie_server-0.1.10/src/sie_server/core/prepared.py +306 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor/__init__.py +45 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor/base.py +133 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor/image.py +129 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor/text.py +268 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor/vision.py +946 -0
- sie_server-0.1.10/src/sie_server/core/preprocessor_registry.py +307 -0
- sie_server-0.1.10/src/sie_server/core/readiness.py +52 -0
- sie_server-0.1.10/src/sie_server/core/registry.py +1239 -0
- sie_server-0.1.10/src/sie_server/core/shutdown.py +160 -0
- sie_server-0.1.10/src/sie_server/core/timing.py +133 -0
- sie_server-0.1.10/src/sie_server/core/tokenizer.py +49 -0
- sie_server-0.1.10/src/sie_server/core/watcher.py +391 -0
- sie_server-0.1.10/src/sie_server/core/worker/__init__.py +48 -0
- sie_server-0.1.10/src/sie_server/core/worker/handlers/__init__.py +23 -0
- sie_server-0.1.10/src/sie_server/core/worker/handlers/base.py +125 -0
- sie_server-0.1.10/src/sie_server/core/worker/handlers/encode.py +237 -0
- sie_server-0.1.10/src/sie_server/core/worker/handlers/extract.py +175 -0
- sie_server-0.1.10/src/sie_server/core/worker/handlers/score.py +115 -0
- sie_server-0.1.10/src/sie_server/core/worker/model_worker.py +976 -0
- sie_server-0.1.10/src/sie_server/core/worker/types.py +184 -0
- sie_server-0.1.10/src/sie_server/main.py +30 -0
- sie_server-0.1.10/src/sie_server/nats_pull_loop.py +1423 -0
- sie_server-0.1.10/src/sie_server/nats_subscriber.py +231 -0
- sie_server-0.1.10/src/sie_server/observability/__init__.py +30 -0
- sie_server-0.1.10/src/sie_server/observability/gpu.py +202 -0
- sie_server-0.1.10/src/sie_server/observability/metrics.py +225 -0
- sie_server-0.1.10/src/sie_server/observability/prometheus.py +88 -0
- sie_server-0.1.10/src/sie_server/observability/tracing.py +121 -0
- sie_server-0.1.10/src/sie_server/static/__init__.py +1 -0
- sie_server-0.1.10/src/sie_server/static/index.html +37 -0
- sie_server-0.1.10/src/sie_server/types/__init__.py +47 -0
- sie_server-0.1.10/src/sie_server/types/inputs.py +124 -0
- sie_server-0.1.10/src/sie_server/types/openapi.py +226 -0
- sie_server-0.1.10/src/sie_server/types/outputs.py +93 -0
- sie_server-0.1.10/src/sie_server/types/requests.py +56 -0
- sie_server-0.1.10/src/sie_server/types/responses.py +205 -0
- sie_server-0.1.10/tests/adapters/__init__.py +0 -0
- sie_server-0.1.10/tests/adapters/test_base.py +74 -0
- sie_server-0.1.10/tests/adapters/test_bge_m3.py +222 -0
- sie_server-0.1.10/tests/adapters/test_clip.py +128 -0
- sie_server-0.1.10/tests/adapters/test_colbert.py +331 -0
- sie_server-0.1.10/tests/adapters/test_donut.py +170 -0
- sie_server-0.1.10/tests/adapters/test_factory_integration.py +235 -0
- sie_server-0.1.10/tests/adapters/test_florence2.py +161 -0
- sie_server-0.1.10/tests/adapters/test_glirel.py +45 -0
- sie_server-0.1.10/tests/adapters/test_grounding_dino.py +203 -0
- sie_server-0.1.10/tests/adapters/test_gte_sparse.py +115 -0
- sie_server-0.1.10/tests/adapters/test_lora.py +438 -0
- sie_server-0.1.10/tests/adapters/test_lora_integration.py +293 -0
- sie_server-0.1.10/tests/adapters/test_runtime_options.py +842 -0
- sie_server-0.1.10/tests/adapters/test_sentence_transformer.py +319 -0
- sie_server-0.1.10/tests/adapters/test_sglang.py +553 -0
- sie_server-0.1.10/tests/adapters/test_siglip.py +103 -0
- sie_server-0.1.10/tests/adapters/test_sparse_aggregation.py +271 -0
- sie_server-0.1.10/tests/adapters/test_visual_document.py +249 -0
- sie_server-0.1.10/tests/api/__init__.py +0 -0
- sie_server-0.1.10/tests/api/test_encode_dtype.py +444 -0
- sie_server-0.1.10/tests/api/test_encode_endpoint.py +547 -0
- sie_server-0.1.10/tests/api/test_encode_json_schema.py +320 -0
- sie_server-0.1.10/tests/api/test_encode_timing.py +260 -0
- sie_server-0.1.10/tests/api/test_encode_validation.py +226 -0
- sie_server-0.1.10/tests/api/test_extract.py +663 -0
- sie_server-0.1.10/tests/api/test_extract_integration.py +216 -0
- sie_server-0.1.10/tests/api/test_health.py +45 -0
- sie_server-0.1.10/tests/api/test_models.py +155 -0
- sie_server-0.1.10/tests/api/test_openai_compat.py +276 -0
- sie_server-0.1.10/tests/api/test_score.py +621 -0
- sie_server-0.1.10/tests/api/test_version_header.py +9 -0
- sie_server-0.1.10/tests/api/test_ws.py +293 -0
- sie_server-0.1.10/tests/app/__init__.py +0 -0
- sie_server-0.1.10/tests/app/test_app_factory.py +68 -0
- sie_server-0.1.10/tests/config/__init__.py +0 -0
- sie_server-0.1.10/tests/config/test_config.py +399 -0
- sie_server-0.1.10/tests/conftest.py +490 -0
- sie_server-0.1.10/tests/core/__init__.py +0 -0
- sie_server-0.1.10/tests/core/test_adaptive_batching.py +893 -0
- sie_server-0.1.10/tests/core/test_batcher.py +945 -0
- sie_server-0.1.10/tests/core/test_disk_cache.py +516 -0
- sie_server-0.1.10/tests/core/test_hot_reload.py +490 -0
- sie_server-0.1.10/tests/core/test_inference.py +141 -0
- sie_server-0.1.10/tests/core/test_loader.py +513 -0
- sie_server-0.1.10/tests/core/test_logging.py +150 -0
- sie_server-0.1.10/tests/core/test_memory.py +402 -0
- sie_server-0.1.10/tests/core/test_postprocessor.py +388 -0
- sie_server-0.1.10/tests/core/test_postprocessor_registry.py +186 -0
- sie_server-0.1.10/tests/core/test_prepared.py +194 -0
- sie_server-0.1.10/tests/core/test_preprocessor.py +549 -0
- sie_server-0.1.10/tests/core/test_preprocessor_registry.py +257 -0
- sie_server-0.1.10/tests/core/test_quantization.py +250 -0
- sie_server-0.1.10/tests/core/test_readiness.py +45 -0
- sie_server-0.1.10/tests/core/test_registry_async.py +353 -0
- sie_server-0.1.10/tests/core/test_registry_core.py +285 -0
- sie_server-0.1.10/tests/core/test_registry_deps.py +33 -0
- sie_server-0.1.10/tests/core/test_registry_memory.py +246 -0
- sie_server-0.1.10/tests/core/test_registry_multi_model.py +204 -0
- sie_server-0.1.10/tests/core/test_shutdown.py +196 -0
- sie_server-0.1.10/tests/core/test_timing.py +108 -0
- sie_server-0.1.10/tests/core/test_watcher.py +346 -0
- sie_server-0.1.10/tests/core/test_worker_backpressure.py +131 -0
- sie_server-0.1.10/tests/core/test_worker_core.py +449 -0
- sie_server-0.1.10/tests/core/test_worker_extract.py +224 -0
- sie_server-0.1.10/tests/core/test_worker_lora.py +200 -0
- sie_server-0.1.10/tests/core/test_worker_options.py +300 -0
- sie_server-0.1.10/tests/core/test_worker_score.py +223 -0
- sie_server-0.1.10/tests/observability/__init__.py +0 -0
- sie_server-0.1.10/tests/observability/test_metrics.py +194 -0
- sie_server-0.1.10/tests/observability/test_tracing.py +137 -0
- sie_server-0.1.10/tests/test_all_models.py +862 -0
- sie_server-0.1.10/tests/test_docker_integration.py +153 -0
- sie_server-0.1.10/tests/test_nats_pull_loop.py +418 -0
- sie_server-0.1.10/tests/test_nats_pull_loop_batching.py +1116 -0
- sie_server-0.1.10/tests/test_sdk_integration.py +210 -0
- sie_server-0.1.10/tests/test_server_smoke.py +8 -0
- sie_server-0.1.10/tests/test_sparse_integration.py +187 -0
- sie_server-0.1.10/tests/type_defs/__init__.py +0 -0
- sie_server-0.1.10/tests/type_defs/test_inputs.py +137 -0
- sie_server-0.1.10/tests/type_defs/test_types.py +207 -0
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.venv
|
|
140
|
+
env/
|
|
141
|
+
venv/
|
|
142
|
+
ENV/
|
|
143
|
+
env.bak/
|
|
144
|
+
venv.bak/
|
|
145
|
+
|
|
146
|
+
# Spyder project settings
|
|
147
|
+
.spyderproject
|
|
148
|
+
.spyproject
|
|
149
|
+
|
|
150
|
+
# Rope project settings
|
|
151
|
+
.ropeproject
|
|
152
|
+
|
|
153
|
+
# mkdocs documentation
|
|
154
|
+
/site
|
|
155
|
+
|
|
156
|
+
# Pyre type checker
|
|
157
|
+
.pyre/
|
|
158
|
+
|
|
159
|
+
# pytype static type analyzer
|
|
160
|
+
.pytype/
|
|
161
|
+
|
|
162
|
+
# Cython debug symbols
|
|
163
|
+
cython_debug/
|
|
164
|
+
|
|
165
|
+
# PyCharm
|
|
166
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
167
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
168
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
169
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
170
|
+
#.idea/
|
|
171
|
+
|
|
172
|
+
# Abstra
|
|
173
|
+
# Abstra is an AI-powered process automation framework.
|
|
174
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
175
|
+
# Learn more at https://abstra.io/docs
|
|
176
|
+
.abstra/
|
|
177
|
+
|
|
178
|
+
# Visual Studio Code
|
|
179
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
180
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
181
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
182
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
183
|
+
# .vscode/
|
|
184
|
+
|
|
185
|
+
# Ruff stuff:
|
|
186
|
+
.ruff_cache/
|
|
187
|
+
|
|
188
|
+
# PyPI configuration file
|
|
189
|
+
.pypirc
|
|
190
|
+
|
|
191
|
+
# Cursor
|
|
192
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
193
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
194
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
195
|
+
.cursorignore
|
|
196
|
+
.cursorindexingignore
|
|
197
|
+
|
|
198
|
+
# vcscode
|
|
199
|
+
.vscode
|
|
200
|
+
|
|
201
|
+
# Marimo
|
|
202
|
+
marimo/_static/
|
|
203
|
+
marimo/_lsp/
|
|
204
|
+
__marimo__/
|
|
205
|
+
|
|
206
|
+
# SIE specific
|
|
207
|
+
# Model weights cache
|
|
208
|
+
.cache/
|
|
209
|
+
*.safetensors
|
|
210
|
+
*.bin
|
|
211
|
+
|
|
212
|
+
# Secrets (never commit)
|
|
213
|
+
*.pem
|
|
214
|
+
*.key
|
|
215
|
+
credentials.json
|
|
216
|
+
*-key.json
|
|
217
|
+
|
|
218
|
+
# Terraform
|
|
219
|
+
# Local .terraform directories (cached providers/modules)
|
|
220
|
+
**/.terraform/*
|
|
221
|
+
.terraform.lock*
|
|
222
|
+
# State files (contain sensitive data)
|
|
223
|
+
*.tfstate
|
|
224
|
+
*.tfstate.*
|
|
225
|
+
# Crash log files
|
|
226
|
+
crash.log
|
|
227
|
+
crash.*.log
|
|
228
|
+
# Override files (local developer overrides)
|
|
229
|
+
override.tf
|
|
230
|
+
override.tf.json
|
|
231
|
+
*_override.tf
|
|
232
|
+
*_override.tf.json
|
|
233
|
+
# tfvars files may contain secrets
|
|
234
|
+
*.tfvars
|
|
235
|
+
*.tfvars.json
|
|
236
|
+
# Keep .terraform.lock.hcl for reproducibility (provider versions)
|
|
237
|
+
|
|
238
|
+
# Node.js
|
|
239
|
+
node_modules/
|
|
240
|
+
|
|
241
|
+
# OS
|
|
242
|
+
.DS_Store
|
|
243
|
+
Thumbs.db
|
|
244
|
+
|
|
245
|
+
# VIM
|
|
246
|
+
*.swp
|
|
247
|
+
|
|
248
|
+
# kilocode
|
|
249
|
+
.kilo/
|
|
250
|
+
|
|
251
|
+
# Worktree metadata
|
|
252
|
+
.base-branch
|
|
253
|
+
|
|
254
|
+
# Temporary files
|
|
255
|
+
tmp/
|
|
256
|
+
.tmp/
|
|
257
|
+
.local/
|
|
258
|
+
|
|
259
|
+
.requirements-modal.txt
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Contributing to SIE Server
|
|
2
|
+
|
|
3
|
+
## Development Setup
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
# Clone the repo
|
|
7
|
+
git clone https://github.com/superlinked/sie.git
|
|
8
|
+
cd sie
|
|
9
|
+
|
|
10
|
+
# Install mise and dependencies
|
|
11
|
+
mise trust && mise install
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Running Tests
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
mise run test packages/sie_server
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Code Style
|
|
21
|
+
|
|
22
|
+
- Format with `mise run lint -f`
|
|
23
|
+
- Type check with `mise run typecheck`
|
|
24
|
+
|
|
25
|
+
## Pull Requests
|
|
26
|
+
|
|
27
|
+
1. Fork the repo
|
|
28
|
+
2. Create a feature branch
|
|
29
|
+
3. Make changes with tests
|
|
30
|
+
4. Submit a PR
|
|
31
|
+
|
|
32
|
+
## License
|
|
33
|
+
|
|
34
|
+
By contributing, you agree that your contributions will be licensed under Apache 2.0.
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1.6
|
|
2
|
+
# SIE Server - CPU-only Image (amd64 + arm64)
|
|
3
|
+
# Build from repo root:
|
|
4
|
+
# docker build -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu-default .
|
|
5
|
+
# docker build -f packages/sie_server/Dockerfile.cpu --build-arg BUNDLE=sglang -t sie-server:cpu-sglang .
|
|
6
|
+
# docker buildx build --platform linux/amd64,linux/arm64 -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu .
|
|
7
|
+
#
|
|
8
|
+
# For faster builds with cache mounts (requires BuildKit):
|
|
9
|
+
# DOCKER_BUILDKIT=1 docker build -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu-default .
|
|
10
|
+
#
|
|
11
|
+
# Layer caching strategy:
|
|
12
|
+
# - Stage 1 (deps): Install all pip dependencies from pyproject.toml (cached unless deps change)
|
|
13
|
+
# - Stage 2 (builder): Copy source code and do editable install (quick, uses cached deps)
|
|
14
|
+
# This ensures code-only changes rebuild only Stage 2, not the slow pip install step.
|
|
15
|
+
|
|
16
|
+
ARG BUNDLE=default
|
|
17
|
+
|
|
18
|
+
# =============================================================================
|
|
19
|
+
# Stage 1: Dependencies (changes rarely - cached for code-only changes)
|
|
20
|
+
# =============================================================================
|
|
21
|
+
FROM ubuntu:22.04 AS deps
|
|
22
|
+
|
|
23
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
24
|
+
|
|
25
|
+
# Install Python 3.12 and build tools
|
|
26
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
27
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
28
|
+
software-properties-common \
|
|
29
|
+
gnupg \
|
|
30
|
+
ca-certificates \
|
|
31
|
+
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
32
|
+
&& apt-get update && apt-get install -y --no-install-recommends \
|
|
33
|
+
python3.12 \
|
|
34
|
+
python3.12-venv \
|
|
35
|
+
python3.12-dev \
|
|
36
|
+
curl \
|
|
37
|
+
git \
|
|
38
|
+
build-essential \
|
|
39
|
+
zlib1g-dev \
|
|
40
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
41
|
+
|
|
42
|
+
# Install uv - pin to specific version for reproducibility
|
|
43
|
+
COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/
|
|
44
|
+
|
|
45
|
+
WORKDIR /app
|
|
46
|
+
|
|
47
|
+
# UV environment variables for optimal builds
|
|
48
|
+
ENV UV_NO_CACHE=1 \
|
|
49
|
+
UV_COMPILE_BYTECODE=1 \
|
|
50
|
+
UV_LINK_MODE=copy
|
|
51
|
+
|
|
52
|
+
# Copy ONLY dependency specifications first (for layer caching)
|
|
53
|
+
COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
|
|
54
|
+
COPY packages/sie_server/pyproject.toml ./pyproject.toml
|
|
55
|
+
|
|
56
|
+
# Create minimal src directories so pip install -e works for dep resolution
|
|
57
|
+
RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
|
|
58
|
+
&& touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
|
|
59
|
+
&& touch src/sie_server/__init__.py
|
|
60
|
+
|
|
61
|
+
# Create venv and install dependencies only (without actual source code)
|
|
62
|
+
# This layer is cached as long as pyproject.toml files don't change
|
|
63
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
64
|
+
python3.12 -m venv .venv \
|
|
65
|
+
&& .venv/bin/pip install --upgrade pip \
|
|
66
|
+
&& .venv/bin/pip install \
|
|
67
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
68
|
+
--extra-index-url https://pypi.org/simple \
|
|
69
|
+
-e "/tmp/sie_sdk[storage]" \
|
|
70
|
+
-e ".[gpu-metrics]"
|
|
71
|
+
|
|
72
|
+
# =============================================================================
|
|
73
|
+
# Stage 2: Source code (changes often - quick rebuild)
|
|
74
|
+
# =============================================================================
|
|
75
|
+
FROM deps AS base
|
|
76
|
+
|
|
77
|
+
# Now copy the actual source code (this layer rebuilds on code changes)
|
|
78
|
+
COPY packages/sie_sdk/src /tmp/sie_sdk/src
|
|
79
|
+
COPY packages/sie_server/src src/
|
|
80
|
+
|
|
81
|
+
# Re-install editable packages with actual source (quick, deps already installed)
|
|
82
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
83
|
+
.venv/bin/pip install --no-deps \
|
|
84
|
+
-e /tmp/sie_sdk \
|
|
85
|
+
-e .
|
|
86
|
+
|
|
87
|
+
# =============================================================================
|
|
88
|
+
# Stage 2: Builder - bundle-specific deps
|
|
89
|
+
# =============================================================================
|
|
90
|
+
FROM base AS builder
|
|
91
|
+
|
|
92
|
+
ARG BUNDLE
|
|
93
|
+
|
|
94
|
+
# Resolve bundle-specific dependencies
|
|
95
|
+
COPY packages/sie_server/bundles bundles/
|
|
96
|
+
COPY packages/sie_server/models models/
|
|
97
|
+
|
|
98
|
+
# Use --cpu flag to exclude CUDA-only dependencies (flash-attn, xformers)
|
|
99
|
+
RUN .venv/bin/python -m sie_server.cli resolve-deps \
|
|
100
|
+
--bundle "${BUNDLE}" \
|
|
101
|
+
--models-dir models \
|
|
102
|
+
--cpu \
|
|
103
|
+
> /tmp/bundle-requirements.txt \
|
|
104
|
+
&& echo "Bundle ${BUNDLE} requirements:" \
|
|
105
|
+
&& cat /tmp/bundle-requirements.txt
|
|
106
|
+
|
|
107
|
+
# Install bundle-specific deps
|
|
108
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
109
|
+
if [ -s /tmp/bundle-requirements.txt ]; then \
|
|
110
|
+
.venv/bin/pip install \
|
|
111
|
+
--index-url https://download.pytorch.org/whl/cpu \
|
|
112
|
+
--extra-index-url https://pypi.org/simple \
|
|
113
|
+
-r /tmp/bundle-requirements.txt; \
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
# Clean up to reduce image size
|
|
117
|
+
RUN find .venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true \
|
|
118
|
+
&& find .venv -type f -name "*.pyc" -delete 2>/dev/null || true \
|
|
119
|
+
&& find .venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true \
|
|
120
|
+
&& find .venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true \
|
|
121
|
+
&& rm -rf .venv/share/jupyter 2>/dev/null || true
|
|
122
|
+
|
|
123
|
+
# =============================================================================
|
|
124
|
+
# Stage 2: Runtime
|
|
125
|
+
# =============================================================================
|
|
126
|
+
FROM ubuntu:22.04 AS runtime
|
|
127
|
+
|
|
128
|
+
ARG BUNDLE
|
|
129
|
+
|
|
130
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
131
|
+
|
|
132
|
+
# Install Python 3.12 runtime
|
|
133
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
134
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
135
|
+
software-properties-common \
|
|
136
|
+
gnupg \
|
|
137
|
+
ca-certificates \
|
|
138
|
+
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
139
|
+
&& apt-get update && apt-get install -y --no-install-recommends \
|
|
140
|
+
python3.12 \
|
|
141
|
+
python3.12-venv \
|
|
142
|
+
libjpeg8 \
|
|
143
|
+
libpng16-16 \
|
|
144
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
145
|
+
|
|
146
|
+
# Create non-root user
|
|
147
|
+
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
148
|
+
|
|
149
|
+
WORKDIR /app
|
|
150
|
+
|
|
151
|
+
# Copy virtual environment from builder (with correct ownership to avoid chown layer)
|
|
152
|
+
COPY --from=builder --chown=sie:sie /app/.venv /app/.venv
|
|
153
|
+
|
|
154
|
+
# Copy source code (uv sync creates editable install with .pth pointing to /app/src)
|
|
155
|
+
COPY --from=builder --chown=sie:sie /app/src /app/src
|
|
156
|
+
|
|
157
|
+
# Copy sie-sdk source (installed as editable in /tmp)
|
|
158
|
+
COPY --from=builder --chown=sie:sie /tmp/sie_sdk/src /tmp/sie_sdk/src
|
|
159
|
+
|
|
160
|
+
# Copy model configs and bundles (needed for runtime model discovery)
|
|
161
|
+
COPY --from=builder --chown=sie:sie /app/models /app/models
|
|
162
|
+
COPY --from=builder --chown=sie:sie /app/bundles /app/bundles
|
|
163
|
+
|
|
164
|
+
# Create directories for runtime
|
|
165
|
+
RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
|
|
166
|
+
|
|
167
|
+
# Labels
|
|
168
|
+
LABEL org.opencontainers.image.title="SIE Server" \
|
|
169
|
+
org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CPU)" \
|
|
170
|
+
sie.bundle="${BUNDLE}"
|
|
171
|
+
|
|
172
|
+
ENV PATH="/app/.venv/bin:$PATH" \
|
|
173
|
+
PYTHONUNBUFFERED=1 \
|
|
174
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
175
|
+
HF_HOME=/app/.cache/huggingface \
|
|
176
|
+
OMP_NUM_THREADS=4 \
|
|
177
|
+
MKL_NUM_THREADS=4 \
|
|
178
|
+
SIE_BUNDLE="${BUNDLE}"
|
|
179
|
+
|
|
180
|
+
USER sie
|
|
181
|
+
EXPOSE 8080
|
|
182
|
+
|
|
183
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
184
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
|
|
185
|
+
|
|
186
|
+
ENTRYPOINT ["python", "-m", "sie_server.cli"]
|
|
187
|
+
CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models", "--device", "cpu"]
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# syntax=docker/dockerfile:1.6
|
|
2
|
+
# SIE Server - CUDA 11.8 Image (for older drivers 470+)
|
|
3
|
+
# Build from repo root:
|
|
4
|
+
# docker build -f packages/sie_server/Dockerfile.cuda11 -t sie-server:cuda11-default .
|
|
5
|
+
# docker build -f packages/sie_server/Dockerfile.cuda11 --build-arg BUNDLE=sglang -t sie-server:cuda11-sglang .
|
|
6
|
+
#
|
|
7
|
+
# For faster builds with cache mounts (requires BuildKit):
|
|
8
|
+
# DOCKER_BUILDKIT=1 docker build -f packages/sie_server/Dockerfile.cuda11 -t sie-server:cuda11-default .
|
|
9
|
+
|
|
10
|
+
ARG BUNDLE=default
|
|
11
|
+
|
|
12
|
+
# =============================================================================
|
|
13
|
+
# Stage 1: Base deps (shared across bundles)
|
|
14
|
+
# =============================================================================
|
|
15
|
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS base
|
|
16
|
+
|
|
17
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
18
|
+
|
|
19
|
+
# Install Python 3.12 and build tools
|
|
20
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
21
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
22
|
+
software-properties-common \
|
|
23
|
+
gnupg \
|
|
24
|
+
ca-certificates \
|
|
25
|
+
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
26
|
+
&& apt-get update && apt-get install -y --no-install-recommends \
|
|
27
|
+
python3.12 \
|
|
28
|
+
python3.12-venv \
|
|
29
|
+
python3.12-dev \
|
|
30
|
+
curl \
|
|
31
|
+
git \
|
|
32
|
+
zlib1g-dev \
|
|
33
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
34
|
+
|
|
35
|
+
# Install uv - pin to specific version for reproducibility
|
|
36
|
+
COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/
|
|
37
|
+
|
|
38
|
+
WORKDIR /app
|
|
39
|
+
|
|
40
|
+
# UV environment variables for optimal builds
|
|
41
|
+
ENV UV_NO_CACHE=1 \
|
|
42
|
+
UV_COMPILE_BYTECODE=1 \
|
|
43
|
+
UV_LINK_MODE=copy
|
|
44
|
+
|
|
45
|
+
# Copy sie-sdk (dependency of sie-server)
|
|
46
|
+
COPY packages/sie_sdk /tmp/sie_sdk
|
|
47
|
+
|
|
48
|
+
# Copy sie-server files (delay bundles/models to keep deps cached)
|
|
49
|
+
COPY packages/sie_server/pyproject.toml ./
|
|
50
|
+
COPY packages/sie_server/src src/
|
|
51
|
+
|
|
52
|
+
# Create venv and install everything with pip (simpler, no workspace issues)
|
|
53
|
+
# Note: flash-attn prebuilt wheels not available for cu118, uses SDPA fallback
|
|
54
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
55
|
+
python3.12 -m venv .venv \
|
|
56
|
+
&& .venv/bin/pip install --upgrade pip \
|
|
57
|
+
&& .venv/bin/pip install \
|
|
58
|
+
--index-url https://download.pytorch.org/whl/cu118 \
|
|
59
|
+
--extra-index-url https://pypi.org/simple \
|
|
60
|
+
-e "/tmp/sie_sdk[storage]" \
|
|
61
|
+
-e ".[gpu-metrics]"
|
|
62
|
+
|
|
63
|
+
# =============================================================================
|
|
64
|
+
# Stage 2: Builder - bundle-specific deps
|
|
65
|
+
# =============================================================================
|
|
66
|
+
FROM base AS builder
|
|
67
|
+
|
|
68
|
+
ARG BUNDLE
|
|
69
|
+
|
|
70
|
+
# Resolve bundle-specific dependencies
|
|
71
|
+
COPY packages/sie_server/bundles bundles/
|
|
72
|
+
COPY packages/sie_server/models models/
|
|
73
|
+
|
|
74
|
+
RUN .venv/bin/python -m sie_server.cli resolve-deps \
|
|
75
|
+
--bundle "${BUNDLE}" \
|
|
76
|
+
--models-dir models \
|
|
77
|
+
> /tmp/bundle-requirements.txt \
|
|
78
|
+
&& echo "Bundle ${BUNDLE} requirements:" \
|
|
79
|
+
&& cat /tmp/bundle-requirements.txt
|
|
80
|
+
|
|
81
|
+
# Install bundle-specific deps
|
|
82
|
+
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
83
|
+
if [ -s /tmp/bundle-requirements.txt ]; then \
|
|
84
|
+
.venv/bin/pip install \
|
|
85
|
+
--index-url https://download.pytorch.org/whl/cu118 \
|
|
86
|
+
--extra-index-url https://pypi.org/simple \
|
|
87
|
+
-r /tmp/bundle-requirements.txt; \
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
# Clean up to reduce image size
|
|
91
|
+
RUN find .venv -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true \
|
|
92
|
+
&& find .venv -type f -name "*.pyc" -delete 2>/dev/null || true \
|
|
93
|
+
&& find .venv -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true \
|
|
94
|
+
&& find .venv -type d -name "test" -exec rm -rf {} + 2>/dev/null || true \
|
|
95
|
+
&& rm -rf .venv/share/jupyter 2>/dev/null || true
|
|
96
|
+
|
|
97
|
+
# =============================================================================
|
|
98
|
+
# Stage 2: Runtime
|
|
99
|
+
# =============================================================================
|
|
100
|
+
# Use base image - PyTorch bundles CUDA libs
|
|
101
|
+
FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
|
|
102
|
+
|
|
103
|
+
ARG BUNDLE
|
|
104
|
+
|
|
105
|
+
ENV DEBIAN_FRONTEND=noninteractive
|
|
106
|
+
|
|
107
|
+
# Install Python 3.12 runtime + gcc (needed for triton kernel compilation)
|
|
108
|
+
# libnuma1 is required by sgl_kernel (SGLang bundle) - without it, imports fail with misleading SM arch errors
|
|
109
|
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
|
110
|
+
apt-get update && apt-get install -y --no-install-recommends \
|
|
111
|
+
software-properties-common \
|
|
112
|
+
gnupg \
|
|
113
|
+
ca-certificates \
|
|
114
|
+
&& add-apt-repository -y ppa:deadsnakes/ppa \
|
|
115
|
+
&& apt-get update && apt-get install -y --no-install-recommends \
|
|
116
|
+
python3.12 \
|
|
117
|
+
python3.12-venv \
|
|
118
|
+
gcc \
|
|
119
|
+
libc6-dev \
|
|
120
|
+
libnuma1 \
|
|
121
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
122
|
+
|
|
123
|
+
# Create non-root user
|
|
124
|
+
RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
|
|
125
|
+
|
|
126
|
+
WORKDIR /app
|
|
127
|
+
|
|
128
|
+
# Copy virtual environment from builder (with correct ownership to avoid chown layer)
|
|
129
|
+
COPY --from=builder --chown=sie:sie /app/.venv /app/.venv
|
|
130
|
+
|
|
131
|
+
# Copy source code (uv sync creates editable install with .pth pointing to /app/src)
|
|
132
|
+
COPY --from=builder --chown=sie:sie /app/src /app/src
|
|
133
|
+
|
|
134
|
+
# Copy sie-sdk source (installed as editable in /tmp)
|
|
135
|
+
COPY --from=builder --chown=sie:sie /tmp/sie_sdk/src /tmp/sie_sdk/src
|
|
136
|
+
|
|
137
|
+
# Copy model configs and bundles (needed for runtime model discovery)
|
|
138
|
+
COPY --from=builder --chown=sie:sie /app/models /app/models
|
|
139
|
+
COPY --from=builder --chown=sie:sie /app/bundles /app/bundles
|
|
140
|
+
|
|
141
|
+
# Create directories for runtime
|
|
142
|
+
RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
|
|
143
|
+
|
|
144
|
+
# Labels
|
|
145
|
+
LABEL org.opencontainers.image.title="SIE Server" \
|
|
146
|
+
org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CUDA 11.8)" \
|
|
147
|
+
sie.bundle="${BUNDLE}"
|
|
148
|
+
|
|
149
|
+
ENV PATH="/app/.venv/bin:$PATH" \
|
|
150
|
+
PYTHONUNBUFFERED=1 \
|
|
151
|
+
PYTHONDONTWRITEBYTECODE=1 \
|
|
152
|
+
HF_HOME=/app/.cache/huggingface \
|
|
153
|
+
NVIDIA_VISIBLE_DEVICES=all \
|
|
154
|
+
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
|
155
|
+
SIE_BUNDLE="${BUNDLE}" \
|
|
156
|
+
SIE_ATTENTION_BACKEND=sdpa
|
|
157
|
+
|
|
158
|
+
USER sie
|
|
159
|
+
EXPOSE 8080
|
|
160
|
+
|
|
161
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
162
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
|
|
163
|
+
|
|
164
|
+
ENTRYPOINT ["python", "-m", "sie_server.cli"]
|
|
165
|
+
CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models"]
|