sie-server 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. {sie_server-0.2.0 → sie_server-0.3.0}/.gitignore +4 -0
  2. sie_server-0.3.0/Dockerfile.cpu +198 -0
  3. sie_server-0.3.0/Dockerfile.cuda11 +211 -0
  4. sie_server-0.3.0/Dockerfile.cuda12 +242 -0
  5. {sie_server-0.2.0 → sie_server-0.3.0}/PKG-INFO +2 -1
  6. sie_server-0.3.0/README.md +90 -0
  7. {sie_server-0.2.0 → sie_server-0.3.0}/bundles/default.yaml +12 -1
  8. {sie_server-0.2.0 → sie_server-0.3.0}/bundles/transformers5.yaml +3 -1
  9. sie_server-0.3.0/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +25 -0
  10. sie_server-0.3.0/models/Qwen__Qwen3-Reranker-0.6B.yaml +25 -0
  11. sie_server-0.3.0/models/Qwen__Qwen3-Reranker-4B.yaml +25 -0
  12. sie_server-0.3.0/models/Qwen__Qwen3-VL-Embedding-2B.yaml +27 -0
  13. sie_server-0.3.0/models/Qwen__Qwen3-VL-Reranker-2B.yaml +21 -0
  14. sie_server-0.3.0/models/answerdotai__ModernBERT-base.yaml +33 -0
  15. sie_server-0.3.0/models/docling.yaml +20 -0
  16. sie_server-0.3.0/models/fastino__gliner2-base-v1.yaml +20 -0
  17. sie_server-0.3.0/models/google__siglip2-base-patch16-224.yaml +25 -0
  18. {sie_server-0.2.0 → sie_server-0.3.0}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +1 -1
  19. {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-base-v1.0.yaml +1 -1
  20. {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-large-v1.0.yaml +1 -1
  21. {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-large-v3.0.yaml +1 -1
  22. {sie_server-0.2.0 → sie_server-0.3.0}/models/knowledgator__gliclass-small-v1.0.yaml +1 -1
  23. sie_server-0.3.0/models/knowledgator__gliner-bi-base-v2.0.yaml +22 -0
  24. sie_server-0.3.0/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +29 -0
  25. sie_server-0.3.0/models/sugiv__stablebridge-pruner-highlighter.yaml +39 -0
  26. sie_server-0.3.0/models/zai-org__GLM-OCR.yaml +21 -0
  27. sie_server-0.3.0/openapi.json +1788 -0
  28. {sie_server-0.2.0 → sie_server-0.3.0}/pyproject.toml +3 -1
  29. sie_server-0.3.0/src/sie_server/adapters/_flash_base.py +213 -0
  30. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_spec.py +1 -1
  31. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/base.py +3 -1
  32. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bert_flash/__init__.py +12 -4
  33. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +11 -1
  34. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3/__init__.py +1 -1
  35. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3_flash/__init__.py +5 -5
  36. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/clip/__init__.py +1 -1
  37. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert/__init__.py +8 -8
  38. sie_server-0.3.0/src/sie_server/adapters/docling/__init__.py +165 -0
  39. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gliclass/__init__.py +87 -38
  40. sie_server-0.3.0/src/sie_server/adapters/gliner2/__init__.py +182 -0
  41. sie_server-0.3.0/src/sie_server/adapters/gliner_bi/__init__.py +288 -0
  42. sie_server-0.3.0/src/sie_server/adapters/glm_ocr/__init__.py +393 -0
  43. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gte_sparse_flash/__init__.py +8 -0
  44. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +25 -3
  45. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/modernbert_flash/__init__.py +8 -0
  46. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +12 -1
  47. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nli_classification_flash/__init__.py +8 -0
  48. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nomic_flash/__init__.py +9 -0
  49. sie_server-0.3.0/src/sie_server/adapters/paddleocr_vl/__init__.py +374 -0
  50. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/qwen2_flash/__init__.py +8 -0
  51. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +313 -73
  52. sie_server-0.3.0/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +409 -0
  53. sie_server-0.3.0/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +386 -0
  54. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/rope_flash/__init__.py +8 -0
  55. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/sentence_transformer/__init__.py +2 -2
  56. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/siglip/__init__.py +1 -1
  57. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/splade_flash/__init__.py +8 -0
  58. sie_server-0.3.0/src/sie_server/adapters/stablebridge_pruner/__init__.py +497 -0
  59. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +15 -7
  60. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/encode.py +13 -3
  61. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/extract.py +12 -13
  62. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/helpers.py +68 -1
  63. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/openai_compat.py +6 -6
  64. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/openapi.py +1 -1
  65. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/score.py +9 -2
  66. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/validation.py +1 -1
  67. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/ws.py +6 -6
  68. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/app_factory.py +5 -2
  69. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/cli.py +52 -1
  70. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/engine.py +150 -1
  71. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/model.py +9 -2
  72. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/adaptive_batching.py +1 -1
  73. sie_server-0.3.0/src/sie_server/core/extract_cost.py +29 -0
  74. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/inference_output.py +7 -0
  75. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/loader.py +8 -4
  76. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/memory.py +36 -0
  77. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/model_loader.py +10 -0
  78. sie_server-0.3.0/src/sie_server/core/oom.py +161 -0
  79. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/prepared.py +43 -0
  80. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/__init__.py +2 -0
  81. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/vision.py +351 -0
  82. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/readiness.py +2 -2
  83. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/registry.py +207 -13
  84. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/extract.py +16 -0
  85. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/model_worker.py +120 -28
  86. sie_server-0.3.0/src/sie_server/core/worker/oom_recovery.py +429 -0
  87. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/types.py +8 -0
  88. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/nats_pull_loop.py +152 -26
  89. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/metrics.py +124 -0
  90. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/inputs.py +29 -0
  91. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/openapi.py +10 -0
  92. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/responses.py +1 -0
  93. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_base.py +1 -1
  94. sie_server-0.3.0/tests/adapters/test_docling.py +194 -0
  95. sie_server-0.3.0/tests/adapters/test_docling_smoke.py +65 -0
  96. sie_server-0.3.0/tests/adapters/test_flash_base.py +132 -0
  97. sie_server-0.3.0/tests/adapters/test_glm_ocr.py +91 -0
  98. sie_server-0.3.0/tests/adapters/test_jina_flash_cross_encoder.py +196 -0
  99. sie_server-0.3.0/tests/adapters/test_paddleocr_vl.py +255 -0
  100. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_runtime_options.py +237 -2
  101. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sentence_transformer.py +2 -2
  102. sie_server-0.3.0/tests/adapters/test_stablebridge_integration.py +160 -0
  103. sie_server-0.3.0/tests/adapters/test_stablebridge_pruner.py +273 -0
  104. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_endpoint.py +195 -1
  105. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_extract.py +78 -9
  106. sie_server-0.3.0/tests/api/test_extract_oom.py +186 -0
  107. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_ws.py +3 -3
  108. sie_server-0.3.0/tests/config/test_bundle_coverage.py +142 -0
  109. {sie_server-0.2.0 → sie_server-0.3.0}/tests/config/test_config.py +140 -8
  110. {sie_server-0.2.0 → sie_server-0.3.0}/tests/conftest.py +164 -26
  111. sie_server-0.3.0/tests/core/test_idle_evict.py +263 -0
  112. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_inference.py +1 -1
  113. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_loader.py +18 -1
  114. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_memory.py +27 -0
  115. sie_server-0.3.0/tests/core/test_oom_detection.py +144 -0
  116. sie_server-0.3.0/tests/core/worker/test_oom_recovery.py +495 -0
  117. {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_metrics.py +12 -12
  118. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_all_models.py +63 -0
  119. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_docker_integration.py +10 -10
  120. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_nats_pull_loop.py +187 -1
  121. sie_server-0.3.0/tests/test_openapi_export.py +52 -0
  122. sie_server-0.3.0/tests/type_defs/__init__.py +0 -0
  123. {sie_server-0.2.0 → sie_server-0.3.0}/tests/type_defs/test_inputs.py +38 -0
  124. {sie_server-0.2.0 → sie_server-0.3.0}/tests/type_defs/test_types.py +8 -0
  125. sie_server-0.2.0/Dockerfile.cpu +0 -190
  126. sie_server-0.2.0/Dockerfile.cuda11 +0 -168
  127. sie_server-0.2.0/Dockerfile.cuda12 +0 -168
  128. sie_server-0.2.0/README.md +0 -31
  129. sie_server-0.2.0/src/sie_server/adapters/_flash_base.py +0 -110
  130. {sie_server-0.2.0 → sie_server-0.3.0}/CONTRIBUTING.md +0 -0
  131. {sie_server-0.2.0 → sie_server-0.3.0}/LICENSE +0 -0
  132. {sie_server-0.2.0 → sie_server-0.3.0}/bundles/sglang.yaml +0 -0
  133. {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  134. {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  135. {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  136. {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  137. {sie_server-0.2.0 → sie_server-0.3.0}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  138. {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-m3.yaml +0 -0
  139. {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-base.yaml +0 -0
  140. {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-large.yaml +0 -0
  141. {sie_server-0.2.0 → sie_server-0.3.0}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  142. {sie_server-0.2.0 → sie_server-0.3.0}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  143. {sie_server-0.2.0 → sie_server-0.3.0}/models/GritLM__GritLM-7B.yaml +0 -0
  144. {sie_server-0.2.0 → sie_server-0.3.0}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  145. {sie_server-0.2.0 → sie_server-0.3.0}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  146. {sie_server-0.2.0 → sie_server-0.3.0}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  147. {sie_server-0.2.0 → sie_server-0.3.0}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  148. {sie_server-0.2.0 → sie_server-0.3.0}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  149. {sie_server-0.2.0 → sie_server-0.3.0}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  150. {sie_server-0.2.0 → sie_server-0.3.0}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  151. {sie_server-0.2.0 → sie_server-0.3.0}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  152. {sie_server-0.2.0 → sie_server-0.3.0}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  153. {sie_server-0.2.0 → sie_server-0.3.0}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  154. {sie_server-0.2.0 → sie_server-0.3.0}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  155. {sie_server-0.2.0 → sie_server-0.3.0}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  156. {sie_server-0.2.0 → sie_server-0.3.0}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  157. {sie_server-0.2.0 → sie_server-0.3.0}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  158. {sie_server-0.2.0 → sie_server-0.3.0}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
  159. {sie_server-0.2.0 → sie_server-0.3.0}/models/colbert-ir__colbertv2.0.yaml +0 -0
  160. {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  161. {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  162. {sie_server-0.2.0 → sie_server-0.3.0}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  163. {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  164. {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  165. {sie_server-0.2.0 → sie_server-0.3.0}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  166. {sie_server-0.2.0 → sie_server-0.3.0}/models/google__embeddinggemma-300m.yaml +0 -0
  167. {sie_server-0.2.0 → sie_server-0.3.0}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  168. {sie_server-0.2.0 → sie_server-0.3.0}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  169. {sie_server-0.2.0 → sie_server-0.3.0}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  170. {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  171. {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  172. {sie_server-0.2.0 → sie_server-0.3.0}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  173. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-base-v2.yaml +0 -0
  174. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-large-v2.yaml +0 -0
  175. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  176. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__e5-small-v2.yaml +0 -0
  177. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  178. {sie_server-0.2.0 → sie_server-0.3.0}/models/intfloat__multilingual-e5-large.yaml +0 -0
  179. {sie_server-0.2.0 → sie_server-0.3.0}/models/jackboyla__glirel-large-v0.yaml +0 -0
  180. {sie_server-0.2.0 → sie_server-0.3.0}/models/jinaai__jina-colbert-v2.yaml +0 -0
  181. {sie_server-0.2.0 → sie_server-0.3.0}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  182. {sie_server-0.2.0 → sie_server-0.3.0}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  183. {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
  184. {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  185. {sie_server-0.2.0 → sie_server-0.3.0}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
  186. {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  187. {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-base.yaml +0 -0
  188. {sie_server-0.2.0 → sie_server-0.3.0}/models/microsoft__Florence-2-large.yaml +0 -0
  189. {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
  190. {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
  191. {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  192. {sie_server-0.2.0 → sie_server-0.3.0}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  193. {sie_server-0.2.0 → sie_server-0.3.0}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  194. {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  195. {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  196. {sie_server-0.2.0 → sie_server-0.3.0}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  197. {sie_server-0.2.0 → sie_server-0.3.0}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  198. {sie_server-0.2.0 → sie_server-0.3.0}/models/naver__splade-v3.yaml +0 -0
  199. {sie_server-0.2.0 → sie_server-0.3.0}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  200. {sie_server-0.2.0 → sie_server-0.3.0}/models/numind__NuNER_Zero-span.yaml +0 -0
  201. {sie_server-0.2.0 → sie_server-0.3.0}/models/numind__NuNER_Zero.yaml +0 -0
  202. {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__NV-Embed-v2.yaml +0 -0
  203. {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
  204. {sie_server-0.2.0 → sie_server-0.3.0}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  205. {sie_server-0.2.0 → sie_server-0.3.0}/models/openai__clip-vit-base-patch32.yaml +0 -0
  206. {sie_server-0.2.0 → sie_server-0.3.0}/models/openai__clip-vit-large-patch14.yaml +0 -0
  207. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  208. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  209. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  210. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  211. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  212. {sie_server-0.2.0 → sie_server-0.3.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  213. {sie_server-0.2.0 → sie_server-0.3.0}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  214. {sie_server-0.2.0 → sie_server-0.3.0}/models/rasyosef__splade-mini.yaml +0 -0
  215. {sie_server-0.2.0 → sie_server-0.3.0}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  216. {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_large-v2.1.yaml +0 -0
  217. {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  218. {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  219. {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  220. {sie_server-0.2.0 → sie_server-0.3.0}/models/urchade__gliner_small-v2.1.yaml +0 -0
  221. {sie_server-0.2.0 → sie_server-0.3.0}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  222. {sie_server-0.2.0 → sie_server-0.3.0}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  223. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/__init__.py +0 -0
  224. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/__init__.py +0 -0
  225. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_base_adapter.py +0 -0
  226. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_types.py +0 -0
  227. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/_utils.py +0 -0
  228. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  229. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  230. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  231. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colpali/__init__.py +0 -0
  232. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  233. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  234. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/donut/__init__.py +0 -0
  235. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/florence2/__init__.py +0 -0
  236. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/gliner/__init__.py +0 -0
  237. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/glirel/__init__.py +0 -0
  238. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  239. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  240. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
  241. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  242. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  243. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  244. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  245. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/adapters/sglang/__init__.py +0 -0
  246. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/__init__.py +0 -0
  247. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/health.py +0 -0
  248. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/metrics.py +0 -0
  249. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/models.py +0 -0
  250. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/options.py +0 -0
  251. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/root.py +0 -0
  252. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/api/serialization.py +0 -0
  253. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/__init__.py +0 -0
  254. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/app/app_state_config.py +0 -0
  255. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/config/__init__.py +0 -0
  256. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/__init__.py +0 -0
  257. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/batcher.py +0 -0
  258. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/deps.py +0 -0
  259. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/disk_cache.py +0 -0
  260. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/encode_pipeline.py +0 -0
  261. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/hot_reload.py +0 -0
  262. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/inference.py +0 -0
  263. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/logging.py +0 -0
  264. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/postprocessor.py +0 -0
  265. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/postprocessor_registry.py +0 -0
  266. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/base.py +0 -0
  267. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/image.py +0 -0
  268. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor/text.py +0 -0
  269. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/preprocessor_registry.py +0 -0
  270. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/shutdown.py +0 -0
  271. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/timing.py +0 -0
  272. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/tokenizer.py +0 -0
  273. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/watcher.py +0 -0
  274. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/__init__.py +0 -0
  275. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  276. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/base.py +0 -0
  277. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/encode.py +0 -0
  278. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/core/worker/handlers/score.py +0 -0
  279. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/main.py +0 -0
  280. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/nats_subscriber.py +0 -0
  281. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/__init__.py +0 -0
  282. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/gpu.py +0 -0
  283. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/prometheus.py +0 -0
  284. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/telemetry.py +0 -0
  285. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/observability/tracing.py +0 -0
  286. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/static/__init__.py +0 -0
  287. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/static/index.html +0 -0
  288. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/__init__.py +0 -0
  289. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/outputs.py +0 -0
  290. {sie_server-0.2.0 → sie_server-0.3.0}/src/sie_server/types/requests.py +0 -0
  291. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/__init__.py +0 -0
  292. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_bge_m3.py +0 -0
  293. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_clip.py +0 -0
  294. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_colbert.py +0 -0
  295. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_donut.py +0 -0
  296. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_factory_integration.py +0 -0
  297. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_florence2.py +0 -0
  298. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_glirel.py +0 -0
  299. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_grounding_dino.py +0 -0
  300. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_gte_sparse.py +0 -0
  301. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lighton_ocr.py +0 -0
  302. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lora.py +0 -0
  303. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_lora_integration.py +0 -0
  304. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sglang.py +0 -0
  305. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_siglip.py +0 -0
  306. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_sparse_aggregation.py +0 -0
  307. {sie_server-0.2.0 → sie_server-0.3.0}/tests/adapters/test_visual_document.py +0 -0
  308. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/__init__.py +0 -0
  309. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_dtype.py +0 -0
  310. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_json_schema.py +0 -0
  311. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_timing.py +0 -0
  312. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_encode_validation.py +0 -0
  313. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_extract_integration.py +0 -0
  314. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_health.py +0 -0
  315. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_models.py +0 -0
  316. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_openai_compat.py +0 -0
  317. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_score.py +0 -0
  318. {sie_server-0.2.0 → sie_server-0.3.0}/tests/api/test_version_header.py +0 -0
  319. {sie_server-0.2.0 → sie_server-0.3.0}/tests/app/__init__.py +0 -0
  320. {sie_server-0.2.0 → sie_server-0.3.0}/tests/app/test_app_factory.py +0 -0
  321. {sie_server-0.2.0 → sie_server-0.3.0}/tests/config/__init__.py +0 -0
  322. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/__init__.py +0 -0
  323. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_adaptive_batching.py +0 -0
  324. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_batcher.py +0 -0
  325. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_disk_cache.py +0 -0
  326. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_hot_reload.py +0 -0
  327. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_logging.py +0 -0
  328. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_postprocessor.py +0 -0
  329. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_postprocessor_registry.py +0 -0
  330. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_prepared.py +0 -0
  331. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_preprocessor.py +0 -0
  332. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_preprocessor_registry.py +0 -0
  333. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_quantization.py +0 -0
  334. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_readiness.py +0 -0
  335. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_async.py +0 -0
  336. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_core.py +0 -0
  337. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_deps.py +0 -0
  338. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_memory.py +0 -0
  339. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_registry_multi_model.py +0 -0
  340. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_shutdown.py +0 -0
  341. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_timing.py +0 -0
  342. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_watcher.py +0 -0
  343. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_backpressure.py +0 -0
  344. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_core.py +0 -0
  345. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_extract.py +0 -0
  346. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_lora.py +0 -0
  347. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_options.py +0 -0
  348. {sie_server-0.2.0 → sie_server-0.3.0}/tests/core/test_worker_score.py +0 -0
  349. {sie_server-0.2.0/tests/observability → sie_server-0.3.0/tests/core/worker}/__init__.py +0 -0
  350. {sie_server-0.2.0/tests/type_defs → sie_server-0.3.0/tests/observability}/__init__.py +0 -0
  351. {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_telemetry.py +0 -0
  352. {sie_server-0.2.0 → sie_server-0.3.0}/tests/observability/test_tracing.py +0 -0
  353. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_nats_pull_loop_batching.py +0 -0
  354. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_sdk_integration.py +0 -0
  355. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_server_smoke.py +0 -0
  356. {sie_server-0.2.0 → sie_server-0.3.0}/tests/test_sparse_integration.py +0 -0
@@ -75,6 +75,9 @@ docs/_build/
75
75
  .pybuilder/
76
76
  target/
77
77
 
78
+ # Rust
79
+ *.rs.bk
80
+
78
81
  # Jupyter Notebook
79
82
  .ipynb_checkpoints
80
83
 
@@ -247,6 +250,7 @@ Thumbs.db
247
250
 
248
251
  # kilocode
249
252
  .kilo/
253
+ .kilocode/
250
254
 
251
255
  # Worktree metadata
252
256
  .base-branch
@@ -0,0 +1,198 @@
1
+ # syntax=docker/dockerfile:1
2
+ # SIE Server - CPU-only Image (amd64 + arm64)
3
+ # Build from repo root:
4
+ # docker build -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu-default .
5
+ # docker build -f packages/sie_server/Dockerfile.cpu --build-arg BUNDLE=transformers5 -t sie-server:cpu-transformers5 .
6
+ # docker buildx build --platform linux/amd64,linux/arm64 -f packages/sie_server/Dockerfile.cpu -t sie-server:cpu .
7
+
8
+ ARG BUNDLE=default
9
+
10
+ # =============================================================================
11
+ # Stage 1: Dependencies (pyproject.toml only, cached across code changes)
12
+ # =============================================================================
13
+ FROM python:3.12-slim-bookworm AS deps
14
+
15
+ ENV DEBIAN_FRONTEND=noninteractive \
16
+ UV_NO_CACHE=1 \
17
+ UV_COMPILE_BYTECODE=1 \
18
+ UV_LINK_MODE=copy \
19
+ PIP_DISABLE_PIP_VERSION_CHECK=1
20
+
21
+ # build-essential + git are builder-only; they do NOT leak into the runtime stage.
22
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
23
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
24
+ apt-get update && apt-get install -y --no-install-recommends \
25
+ build-essential \
26
+ ca-certificates \
27
+ curl \
28
+ git
29
+
30
+ ARG UV_VERSION=0.9.28
31
+ RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
32
+ && mv /root/.local/bin/uv /bin/uv \
33
+ && mv /root/.local/bin/uvx /bin/uvx
34
+
35
+ WORKDIR /app
36
+
37
+ # Dependency specs first — this layer caches as long as pyproject.toml is unchanged.
38
+ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
39
+ COPY packages/sie_server/pyproject.toml ./pyproject.toml
40
+
41
+ # Stub source trees so pip accepts the editable installs during dep resolution.
42
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
43
+ && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
44
+ && touch src/sie_server/__init__.py
45
+
46
+ RUN --mount=type=cache,target=/root/.cache/pip \
47
+ python -m venv .venv \
48
+ && .venv/bin/pip install --upgrade pip \
49
+ && .venv/bin/pip install \
50
+ --index-url https://download.pytorch.org/whl/cpu \
51
+ --extra-index-url https://pypi.org/simple \
52
+ -e "/tmp/sie_sdk[storage]" \
53
+ -e ".[gpu-metrics]"
54
+
55
+ # =============================================================================
56
+ # Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
57
+ # =============================================================================
58
+ # Bundle-agnostic: all base-stage layers are shared across bundles of this
59
+ # platform in local BuildKit cache and in content-addressed registry layers.
60
+ FROM deps AS base
61
+
62
+ COPY packages/sie_sdk/src /tmp/sie_sdk/src
63
+ COPY packages/sie_server/src src/
64
+ COPY packages/sie_server/bundles bundles/
65
+ COPY packages/sie_server/models models/
66
+
67
+ # Editable reinstall over the stub stage — keeps ./bundles lookups via CWD
68
+ # working for `sie_server.cli resolve-deps`.
69
+ RUN --mount=type=cache,target=/root/.cache/pip \
70
+ .venv/bin/pip install --no-deps \
71
+ -e /tmp/sie_sdk \
72
+ -e .
73
+
74
+ # Sanity-check shared venv imports — catches breakage introduced by the
75
+ # shared-deps resolver.
76
+ RUN .venv/bin/python -c "import torch; print(torch.__version__)"
77
+
78
+ # Register bundle-libs on sys.path. Content is identical across bundles.
79
+ RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
80
+ > /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
81
+
82
+ # .venv cleanup + stdlib trim — runs here so /app/.venv reaches its final
83
+ # form BEFORE bundle divergence. Largest wins: torch/include, *.a static
84
+ # libs, stripping shared objects. Keep bytecode (cold-start matters more
85
+ # than size).
86
+ RUN set -eux; \
87
+ cd /app/.venv/lib/python3.12/site-packages; \
88
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
89
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
90
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
91
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
92
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
93
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
94
+ rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
95
+ rm -rf /usr/local/lib/python3.12/test /usr/local/lib/python3.12/idlelib 2>/dev/null || true; \
96
+ find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
97
+
98
+ # =============================================================================
99
+ # Stage 3: Builder - bundle-specific deps
100
+ # =============================================================================
101
+ FROM base AS builder
102
+
103
+ ARG BUNDLE
104
+
105
+ RUN .venv/bin/python -m sie_server.cli resolve-deps \
106
+ --bundle "${BUNDLE}" \
107
+ --models-dir models \
108
+ --cpu \
109
+ > /tmp/bundle-requirements.txt \
110
+ && echo "Bundle ${BUNDLE} requirements:" \
111
+ && cat /tmp/bundle-requirements.txt
112
+
113
+ # Bundle-specific deps land in a separate site-packages tree so the shared
114
+ # venv layer stays byte-identical across bundles of the same platform.
115
+ # Even if /tmp/bundle-requirements.txt is empty, create /app/bundle-libs so
116
+ # the runtime `COPY --from` never fails.
117
+ RUN --mount=type=cache,target=/root/.cache/pip \
118
+ mkdir -p /app/bundle-libs; \
119
+ if [ -s /tmp/bundle-requirements.txt ]; then \
120
+ .venv/bin/pip install \
121
+ --index-url https://download.pytorch.org/whl/cpu \
122
+ --extra-index-url https://pypi.org/simple \
123
+ --target=/app/bundle-libs \
124
+ --no-compile \
125
+ -r /tmp/bundle-requirements.txt; \
126
+ fi
127
+
128
+ # bundle-libs cleanup (symmetric to the .venv cleanup in base).
129
+ RUN set -eux; \
130
+ if [ -d /app/bundle-libs ]; then \
131
+ cd /app/bundle-libs; \
132
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
133
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
134
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
135
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
136
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
137
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
138
+ fi; \
139
+ find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
140
+
141
+ # =============================================================================
142
+ # Stage 4: Runtime
143
+ # =============================================================================
144
+ FROM python:3.12-slim-bookworm AS runtime
145
+
146
+ ENV DEBIAN_FRONTEND=noninteractive \
147
+ PATH="/app/.venv/bin:$PATH" \
148
+ PYTHONUNBUFFERED=1 \
149
+ PYTHONDONTWRITEBYTECODE=1 \
150
+ HF_HOME=/app/.cache/huggingface \
151
+ OMP_NUM_THREADS=4 \
152
+ MKL_NUM_THREADS=4
153
+
154
+ # Only the shared libs torch + pillow actually dlopen at runtime.
155
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
156
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
157
+ apt-get update && apt-get install -y --no-install-recommends \
158
+ libgomp1 \
159
+ libjpeg62-turbo \
160
+ libpng16-16
161
+
162
+ RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
163
+
164
+ WORKDIR /app
165
+
166
+ # --link shared COPYs — produce independent layer tars without parent-dir
167
+ # headers carrying a build-time mtime, so resulting layer digests match
168
+ # across bundles of the same platform. Numeric --chown because --link has
169
+ # no /etc/passwd visible (the sie user exists in the runtime FS but --link
170
+ # layers are created in isolation).
171
+ COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
172
+ COPY --link --from=base --chown=1000:1000 /app/src /app/src
173
+ COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
174
+ COPY --link --from=base --chown=1000:1000 /app/models /app/models
175
+ COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
176
+ # Bundle-specific extras — last layer so shared layers above stay cached.
177
+ COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
178
+
179
+ RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
180
+
181
+ # Declare BUNDLE arg only here, where it is first used (LABEL + ENV),
182
+ # so every RUN/COPY layer above is bundle-agnostic in its cache key.
183
+ ARG BUNDLE
184
+
185
+ ENV SIE_BUNDLE="${BUNDLE}"
186
+
187
+ LABEL org.opencontainers.image.title="SIE Server" \
188
+ org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CPU)" \
189
+ sie.bundle="${BUNDLE}"
190
+
191
+ USER sie
192
+ EXPOSE 8080
193
+
194
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
195
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
196
+
197
+ ENTRYPOINT ["python", "-m", "sie_server.cli"]
198
+ CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models", "--device", "cpu"]
@@ -0,0 +1,211 @@
1
+ # syntax=docker/dockerfile:1
2
+ # SIE Server - CUDA 11.8 Image (for older drivers 470+)
3
+ # Build from repo root:
4
+ # docker build -f packages/sie_server/Dockerfile.cuda11 -t sie-server:cuda11-default .
5
+ # docker build -f packages/sie_server/Dockerfile.cuda11 --build-arg BUNDLE=sglang -t sie-server:cuda11-sglang .
6
+
7
+ ARG BUNDLE=default
8
+ ARG UV_VERSION=0.9.28
9
+
10
+ # =============================================================================
11
+ # Stage 1: uv + standalone Python 3.12 (no deadsnakes PPA)
12
+ # =============================================================================
13
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS deps
14
+
15
+ ENV DEBIAN_FRONTEND=noninteractive \
16
+ UV_NO_CACHE=1 \
17
+ UV_COMPILE_BYTECODE=1 \
18
+ UV_LINK_MODE=copy \
19
+ UV_PYTHON_INSTALL_DIR=/opt/python \
20
+ PIP_DISABLE_PIP_VERSION_CHECK=1
21
+
22
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
23
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
24
+ apt-get update && apt-get install -y --no-install-recommends \
25
+ ca-certificates \
26
+ curl \
27
+ git
28
+
29
+ ARG UV_VERSION
30
+ RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
31
+ && mv /root/.local/bin/uv /bin/uv \
32
+ && mv /root/.local/bin/uvx /bin/uvx
33
+
34
+ RUN uv python install 3.12 \
35
+ && ln -s "$(uv python find 3.12)" /usr/local/bin/python3.12 \
36
+ && ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3 \
37
+ && ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
38
+
39
+ WORKDIR /app
40
+
41
+ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
42
+ COPY packages/sie_server/pyproject.toml ./pyproject.toml
43
+
44
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
45
+ && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
46
+ && touch src/sie_server/__init__.py
47
+
48
+ # Note: flash-attn prebuilt wheels are not published for cu118, so SDPA
49
+ # fallback is used at runtime (see SIE_ATTENTION_BACKEND below).
50
+ RUN --mount=type=cache,target=/root/.cache/pip \
51
+ python3.12 -m venv .venv \
52
+ && .venv/bin/pip install --upgrade pip \
53
+ && .venv/bin/pip install \
54
+ --index-url https://download.pytorch.org/whl/cu118 \
55
+ --extra-index-url https://pypi.org/simple \
56
+ -e "/tmp/sie_sdk[storage]" \
57
+ -e ".[gpu-metrics]"
58
+
59
+ # =============================================================================
60
+ # Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
61
+ # =============================================================================
62
+ # Everything here is bundle-agnostic, so all base-stage layers are shared
63
+ # across bundles of this platform in local BuildKit cache and in registry.
64
+ FROM deps AS base
65
+
66
+ COPY packages/sie_sdk/src /tmp/sie_sdk/src
67
+ COPY packages/sie_server/src src/
68
+ COPY packages/sie_server/bundles bundles/
69
+ COPY packages/sie_server/models models/
70
+
71
+ RUN --mount=type=cache,target=/root/.cache/pip \
72
+ .venv/bin/pip install --no-deps \
73
+ -e /tmp/sie_sdk \
74
+ -e .
75
+
76
+ # Sanity-check shared venv imports — catches breakage introduced by the
77
+ # shared-deps resolver.
78
+ RUN .venv/bin/python -c "import torch; print(torch.__version__)"
79
+
80
+ # Register bundle-libs on sys.path. `sys.path.insert(0, ...)` puts
81
+ # bundle-libs at position 0 so bundle-specific versions shadow shared
82
+ # venv versions. Content is identical across bundles.
83
+ RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
84
+ > /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
85
+
86
+ # .venv cleanup — runs here so /app/.venv reaches its final form BEFORE
87
+ # bundle divergence. All nvidia-*-cu11 packages kept — torch._C DT_NEEDED
88
+ # links libcufile, libnccl, etc.; removing any breaks `import torch`.
89
+ RUN set -eux; \
90
+ cd /app/.venv/lib/python3.12/site-packages; \
91
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
92
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
93
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
94
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
95
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
96
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
97
+ find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
98
+ rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
99
+ find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
100
+
101
+ # =============================================================================
102
+ # Stage 3: Builder - bundle-specific deps
103
+ # =============================================================================
104
+ FROM base AS builder
105
+
106
+ ARG BUNDLE
107
+
108
+ RUN .venv/bin/python -m sie_server.cli resolve-deps \
109
+ --bundle "${BUNDLE}" \
110
+ --models-dir models \
111
+ > /tmp/bundle-requirements.txt \
112
+ && echo "Bundle ${BUNDLE} requirements:" \
113
+ && cat /tmp/bundle-requirements.txt
114
+
115
+ # Bundle-specific deps land in a separate site-packages tree so the shared
116
+ # venv layer stays byte-identical across bundles of the same platform.
117
+ # The cu11 `default` bundle skips flash-attn (wheel is cu128), so
118
+ # /app/bundle-libs may be empty — create the directory either way so the
119
+ # runtime COPY never fails.
120
+ RUN --mount=type=cache,target=/root/.cache/pip \
121
+ mkdir -p /app/bundle-libs; \
122
+ if [ -s /tmp/bundle-requirements.txt ]; then \
123
+ .venv/bin/pip install \
124
+ --index-url https://download.pytorch.org/whl/cu118 \
125
+ --extra-index-url https://pypi.org/simple \
126
+ --target=/app/bundle-libs \
127
+ --no-compile \
128
+ -r /tmp/bundle-requirements.txt; \
129
+ fi
130
+
131
+ # bundle-libs cleanup (symmetric to the .venv cleanup in base).
132
+ RUN set -eux; \
133
+ if [ -d /app/bundle-libs ]; then \
134
+ cd /app/bundle-libs; \
135
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
136
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
137
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
138
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
139
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
140
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
141
+ find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
142
+ fi; \
143
+ find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
144
+
145
+ # =============================================================================
146
+ # Stage 4: Runtime
147
+ # =============================================================================
148
+ FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
149
+
150
+ ENV DEBIAN_FRONTEND=noninteractive
151
+
152
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
153
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
154
+ apt-get update && apt-get install -y --no-install-recommends \
155
+ ca-certificates \
156
+ gcc \
157
+ libc6-dev \
158
+ libgomp1 \
159
+ libnuma1
160
+
161
+ RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
162
+
163
+ WORKDIR /app
164
+
165
+ COPY --link --from=base /opt/python /opt/python
166
+ RUN set -e; \
167
+ py=$(ls -d /opt/python/cpython-3.12*/bin/python3.12 | head -1); \
168
+ [ -x "$py" ] || { echo "no standalone python found under /opt/python"; exit 1; }; \
169
+ ln -sf "$py" /usr/local/bin/python3.12; \
170
+ ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3; \
171
+ ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
172
+
173
+ # --link shared COPYs — produce independent layer tars without parent-dir
174
+ # headers carrying a build-time mtime, so resulting layer digests match
175
+ # across bundles of the same platform. Numeric --chown because --link has
176
+ # no /etc/passwd visible (the sie user exists in the runtime FS but --link
177
+ # layers are created in isolation).
178
+ COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
179
+ COPY --link --from=base --chown=1000:1000 /app/src /app/src
180
+ COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
181
+ COPY --link --from=base --chown=1000:1000 /app/models /app/models
182
+ COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
183
+ COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
184
+
185
+ RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
186
+
187
+ # Declare BUNDLE arg only here, where it is first used (LABEL + ENV),
188
+ # so every RUN/COPY layer above is bundle-agnostic in its cache key.
189
+ ARG BUNDLE
190
+
191
+ LABEL org.opencontainers.image.title="SIE Server" \
192
+ org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle (CUDA 11.8)" \
193
+ sie.bundle="${BUNDLE}"
194
+
195
+ ENV PATH="/app/.venv/bin:$PATH" \
196
+ PYTHONUNBUFFERED=1 \
197
+ PYTHONDONTWRITEBYTECODE=1 \
198
+ HF_HOME=/app/.cache/huggingface \
199
+ NVIDIA_VISIBLE_DEVICES=all \
200
+ NVIDIA_DRIVER_CAPABILITIES=compute,utility \
201
+ SIE_BUNDLE="${BUNDLE}" \
202
+ SIE_ATTENTION_BACKEND=sdpa
203
+
204
+ USER sie
205
+ EXPOSE 8080
206
+
207
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
208
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
209
+
210
+ ENTRYPOINT ["python", "-m", "sie_server.cli"]
211
+ CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models"]
@@ -0,0 +1,242 @@
1
+ # syntax=docker/dockerfile:1
2
+ # SIE Server - CUDA 12.4 Image
3
+ # Build from repo root:
4
+ # docker build -f packages/sie_server/Dockerfile.cuda12 -t sie-server:cuda12-default .
5
+ # docker build -f packages/sie_server/Dockerfile.cuda12 --build-arg BUNDLE=sglang -t sie-server:cuda12-sglang .
6
+
7
+ ARG BUNDLE=default
8
+ ARG UV_VERSION=0.9.28
9
+
10
+ # =============================================================================
11
+ # Stage 1: uv + standalone Python 3.12 (no deadsnakes PPA)
12
+ # =============================================================================
13
+ FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 AS deps
14
+
15
+ ENV DEBIAN_FRONTEND=noninteractive \
16
+ UV_NO_CACHE=1 \
17
+ UV_COMPILE_BYTECODE=1 \
18
+ UV_LINK_MODE=copy \
19
+ UV_PYTHON_INSTALL_DIR=/opt/python \
20
+ PIP_DISABLE_PIP_VERSION_CHECK=1
21
+
22
+ # Minimal apt footprint: just enough for curl + git; no deadsnakes PPA.
23
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
24
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
25
+ apt-get update && apt-get install -y --no-install-recommends \
26
+ ca-certificates \
27
+ curl \
28
+ git
29
+
30
+ ARG UV_VERSION
31
+ RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
32
+ && mv /root/.local/bin/uv /bin/uv \
33
+ && mv /root/.local/bin/uvx /bin/uvx
34
+
35
+ # Install a standalone Python 3.12 from python-build-standalone (fully relocatable,
36
+ # glibc 2.17+ compat). This replaces the deadsnakes PPA entirely.
37
+ RUN uv python install 3.12 \
38
+ && ln -s "$(uv python find 3.12)" /usr/local/bin/python3.12 \
39
+ && ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3 \
40
+ && ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
41
+
42
+ WORKDIR /app
43
+
44
+ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
45
+ COPY packages/sie_server/pyproject.toml ./pyproject.toml
46
+
47
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
48
+ && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
49
+ && touch src/sie_server/__init__.py
50
+
51
+ RUN --mount=type=cache,target=/root/.cache/pip \
52
+ python3.12 -m venv .venv \
53
+ && .venv/bin/pip install --upgrade pip \
54
+ && .venv/bin/pip install \
55
+ -e "/tmp/sie_sdk[storage]" \
56
+ -e ".[gpu-metrics]"
57
+
58
+ # =============================================================================
59
+ # Stage 2: Base - source install + shared-venv finalization (no BUNDLE)
60
+ # =============================================================================
61
+ # Everything here is bundle-agnostic, so bundle-specific builds of a given
62
+ # platform share every base-stage layer in local BuildKit cache and in
63
+ # content-addressed registry layers.
64
+ FROM deps AS base
65
+
66
+ COPY packages/sie_sdk/src /tmp/sie_sdk/src
67
+ COPY packages/sie_server/src src/
68
+ COPY packages/sie_server/bundles bundles/
69
+ COPY packages/sie_server/models models/
70
+
71
+ # Editable reinstall over the stub stage so runtime path-based lookups
72
+ # (e.g. sie_server.cli resolve-deps reading ./bundles) keep working.
73
+ RUN --mount=type=cache,target=/root/.cache/pip \
74
+ .venv/bin/pip install --no-deps \
75
+ -e /tmp/sie_sdk \
76
+ -e .
77
+
78
+ # Sanity-check shared venv imports — catches breakage introduced by the
79
+ # shared-deps resolver. flash-attn/sglang need a real GPU and are
80
+ # intentionally not imported here.
81
+ RUN .venv/bin/python -c "import torch; print(torch.__version__)"
82
+
83
+ # Register bundle-libs on sys.path via a .pth file executed by site.py at
84
+ # interpreter startup. `sys.path.insert(0, ...)` puts bundle-libs at
85
+ # position 0 so bundle-specific versions SHADOW shared venv versions
86
+ # (required for the transformers5 bundle). Content is identical for every
87
+ # bundle, so writing it here keeps the shared venv layer byte-identical.
88
+ RUN echo "import sys; sys.path.insert(0, '/app/bundle-libs')" \
89
+ > /app/.venv/lib/python3.12/site-packages/_sie_bundle.pth
90
+
91
+ # Conservative venv cleanup — only things known to be runtime-safe. Keep
92
+ # torch/include (triton JIT compiles against torch C++ headers for
93
+ # sglang/flash-attn custom ops) and keep gcc/libc6-dev at runtime.
94
+ # Main wins: byte-compiled caches, *.a static libs, tests/, RECORD metadata.
95
+ # Runs here (not in builder) so /app/.venv reaches its final form BEFORE
96
+ # bundle divergence, making the runtime COPY cache-hit across bundles.
97
+ RUN set -eux; \
98
+ cd /app/.venv/lib/python3.12/site-packages; \
99
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
100
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
101
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
102
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
103
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
104
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
105
+ # All nvidia-*-cu12 packages are kept — torch._C DT_NEEDED links libcufile,
106
+ # libnccl, and similar; removing any breaks `import torch`.
107
+ find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
108
+ rm -rf /app/.venv/share/jupyter 2>/dev/null || true; \
109
+ # Normalize mtimes so the cross-stage COPY of /app/.venv produces a
110
+ # byte-identical tar across bundles of the same platform.
111
+ find /app/.venv -exec touch -h -d @0 {} + 2>/dev/null || true
112
+
113
+ # =============================================================================
114
+ # Stage 3: Builder - bundle-specific deps
115
+ # =============================================================================
116
+ FROM base AS builder
117
+
118
+ ARG BUNDLE
119
+
120
+ RUN .venv/bin/python -m sie_server.cli resolve-deps \
121
+ --bundle "${BUNDLE}" \
122
+ --models-dir models \
123
+ > /tmp/bundle-requirements.txt \
124
+ && echo "Bundle ${BUNDLE} requirements:" \
125
+ && cat /tmp/bundle-requirements.txt
126
+
127
+ # Bundle-specific deps go into a separate site-packages tree so the shared
128
+ # venv layer stays byte-identical across bundles (default, sglang,
129
+ # transformers5) of the same platform. `.venv/bin/pip` is used as the
130
+ # resolver so shared torch/transformers/nvidia wheels are visible on
131
+ # sys.path and only novel packages land under --target.
132
+ # flash-attn deps carry `sys_platform == 'linux'` markers so they install on
133
+ # Linux (this image) but not on developer machines.
134
+ RUN --mount=type=cache,target=/root/.cache/pip \
135
+ mkdir -p /app/bundle-libs; \
136
+ if [ -s /tmp/bundle-requirements.txt ]; then \
137
+ .venv/bin/pip install \
138
+ --target=/app/bundle-libs \
139
+ --no-compile \
140
+ -r /tmp/bundle-requirements.txt; \
141
+ fi
142
+
143
+ # bundle-libs cleanup (symmetric to the .venv cleanup in base). The .venv
144
+ # tree is already final in the base stage; only the per-bundle tree needs
145
+ # cleanup here.
146
+ RUN set -eux; \
147
+ if [ -d /app/bundle-libs ]; then \
148
+ cd /app/bundle-libs; \
149
+ find . -type d -name '__pycache__' -prune -exec rm -rf {} +; \
150
+ find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -delete; \
151
+ find . -type d -name 'tests' -prune -exec rm -rf {} +; \
152
+ rm -rf torch/test torch/utils/tensorboard torch/utils/bottleneck 2>/dev/null || true; \
153
+ find torch -type f -name '*.a' -delete 2>/dev/null || true; \
154
+ find . -type f -name 'RECORD' -path '*dist-info*' -delete; \
155
+ find nvidia -type f -name '*.a' -delete 2>/dev/null || true; \
156
+ fi; \
157
+ # Normalize mtimes so rebuilds of the same bundle produce identical layer bytes.
158
+ find /app/bundle-libs -exec touch -h -d @0 {} + 2>/dev/null || true
159
+
160
+ # =============================================================================
161
+ # Stage 4: Runtime
162
+ # =============================================================================
163
+ # Use base CUDA image (not devel/runtime) — PyTorch wheels bundle CUDA libs,
164
+ # cuDNN ships inside torch. Saves ~2GB vs `runtime` variant.
165
+ FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS runtime
166
+
167
+ ENV DEBIAN_FRONTEND=noninteractive
168
+
169
+ # gcc + libc6-dev: triton JIT compiles CUDA kernels at first use.
170
+ # libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
171
+ # misleading SM-arch error without it.
172
+ # libgomp1: torch OpenMP runtime.
173
+ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
174
+ --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
175
+ apt-get update && apt-get install -y --no-install-recommends \
176
+ ca-certificates \
177
+ gcc \
178
+ libc6-dev \
179
+ libgomp1 \
180
+ libnuma1
181
+
182
+ RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
183
+
184
+ WORKDIR /app
185
+
186
+ # Standalone Python 3.12 tree + symlinks (no deadsnakes). Pulled from `base`
187
+ # since /opt/python is established in the deps stage and never touched later.
188
+ COPY --link --from=base /opt/python /opt/python
189
+ RUN set -e; \
190
+ py=$(ls -d /opt/python/cpython-3.12*/bin/python3.12 | head -1); \
191
+ [ -x "$py" ] || { echo "no standalone python found under /opt/python"; exit 1; }; \
192
+ ln -sf "$py" /usr/local/bin/python3.12; \
193
+ ln -sf /usr/local/bin/python3.12 /usr/local/bin/python3; \
194
+ ln -sf /usr/local/bin/python3.12 /usr/local/bin/python
195
+
196
+ # Shared content — identical across bundles of the same platform. Pulled from
197
+ # `base` so every bundle rebuild hits BuildKit's local cache and the resulting
198
+ # registry layers are content-addressed identically, enabling pull-time dedup.
199
+ # Ordered BEFORE the per-bundle COPY so bundle-libs is the only invalidating
200
+ # layer when only BUNDLE changes.
201
+ # --link on shared COPYs: produces an independent layer tar without parent-dir
202
+ # headers carrying a build-time mtime, so the resulting layer digests match
203
+ # across bundles of the same platform and the registry can dedup them.
204
+ # --chown uses numeric IDs because --link doesn't have /etc/passwd visible
205
+ # (the sie user is added in the runtime stage filesystem but --link layers
206
+ # are created in isolation from the destination stage state).
207
+ COPY --link --from=base --chown=1000:1000 /app/.venv /app/.venv
208
+ COPY --link --from=base --chown=1000:1000 /app/src /app/src
209
+ COPY --link --from=base --chown=1000:1000 /tmp/sie_sdk/src /tmp/sie_sdk/src
210
+ COPY --link --from=base --chown=1000:1000 /app/models /app/models
211
+ COPY --link --from=base --chown=1000:1000 /app/bundles /app/bundles
212
+ # Bundle-specific extras — last layer so shared layers above stay cached.
213
+ # A `.pth` file in the shared venv puts /app/bundle-libs at sys.path[0] at startup.
214
+ COPY --link --from=builder --chown=1000:1000 /app/bundle-libs /app/bundle-libs
215
+
216
+ RUN mkdir -p /app/evals /app/.cache/huggingface && chown -R sie:sie /app/evals /app/.cache
217
+
218
+ # Declare the BUNDLE arg only here, where it is first used (LABEL + ENV),
219
+ # so every RUN/COPY layer above is bundle-agnostic in its cache key and the
220
+ # resulting layers are content-addressed identically across bundles.
221
+ ARG BUNDLE
222
+
223
+ LABEL org.opencontainers.image.title="SIE Server" \
224
+ org.opencontainers.image.description="Search Inference Engine - ${BUNDLE} bundle" \
225
+ sie.bundle="${BUNDLE}"
226
+
227
+ ENV PATH="/app/.venv/bin:$PATH" \
228
+ PYTHONUNBUFFERED=1 \
229
+ PYTHONDONTWRITEBYTECODE=1 \
230
+ HF_HOME=/app/.cache/huggingface \
231
+ NVIDIA_VISIBLE_DEVICES=all \
232
+ NVIDIA_DRIVER_CAPABILITIES=compute,utility \
233
+ SIE_BUNDLE="${BUNDLE}"
234
+
235
+ USER sie
236
+ EXPOSE 8080
237
+
238
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
239
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1
240
+
241
+ ENTRYPOINT ["python", "-m", "sie_server.cli"]
242
+ CMD ["serve", "--host", "0.0.0.0", "--port", "8080", "--models-dir", "/app/models"]