sie-server 0.3.2__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (364) hide show
  1. {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cpu +8 -4
  2. {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cuda11 +4 -1
  3. {sie_server-0.3.2 → sie_server-0.3.3}/Dockerfile.cuda12 +4 -1
  4. {sie_server-0.3.2 → sie_server-0.3.3}/PKG-INFO +1 -1
  5. {sie_server-0.3.2 → sie_server-0.3.3}/bundles/default.yaml +3 -2
  6. {sie_server-0.3.2 → sie_server-0.3.3}/bundles/transformers5.yaml +4 -1
  7. {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__llama-embed-nemotron-8b.yaml +11 -2
  8. sie_server-0.3.3/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +43 -0
  9. sie_server-0.3.3/models/tomoroai__tomoro-colqwen3-embed-4b.yaml +42 -0
  10. {sie_server-0.3.2 → sie_server-0.3.3}/openapi.json +19 -2
  11. {sie_server-0.3.2 → sie_server-0.3.3}/pyproject.toml +1 -1
  12. sie_server-0.3.3/src/sie_server/adapters/colqwen3/__init__.py +337 -0
  13. sie_server-0.3.3/src/sie_server/adapters/errors.py +2 -0
  14. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliclass/__init__.py +98 -29
  15. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nemo_colembed/__init__.py +175 -65
  16. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/sglang/__init__.py +7 -2
  17. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/extract.py +3 -0
  18. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/helpers.py +12 -0
  19. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/options.py +15 -0
  20. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/cli.py +13 -1
  21. sie_server-0.3.3/src/sie_server/core/hf_env.py +37 -0
  22. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/load_errors.py +32 -0
  23. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/model_loader.py +225 -42
  24. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/registry.py +19 -3
  25. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/main.py +11 -0
  26. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/metrics.py +20 -0
  27. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/openapi.py +17 -2
  28. sie_server-0.3.3/src/sie_server/types/overflow_policy.py +5 -0
  29. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/responses.py +1 -0
  30. sie_server-0.3.3/tests/adapters/test_gliclass_overflow_policy.py +102 -0
  31. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_runtime_options.py +34 -0
  32. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_visual_document.py +151 -0
  33. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract_integration.py +4 -3
  34. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_adaptive_batching.py +3 -3
  35. sie_server-0.3.3/tests/core/test_model_load_timeout.py +372 -0
  36. {sie_server-0.3.2 → sie_server-0.3.3}/.gitignore +0 -0
  37. {sie_server-0.3.2 → sie_server-0.3.3}/CONTRIBUTING.md +0 -0
  38. {sie_server-0.3.2 → sie_server-0.3.3}/LICENSE +0 -0
  39. {sie_server-0.3.2 → sie_server-0.3.3}/README.md +0 -0
  40. {sie_server-0.3.2 → sie_server-0.3.3}/bundles/sglang.yaml +0 -0
  41. {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  42. {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  43. {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  44. {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  45. {sie_server-0.3.2 → sie_server-0.3.3}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  46. {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-m3.yaml +0 -0
  47. {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-base.yaml +0 -0
  48. {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-large.yaml +0 -0
  49. {sie_server-0.3.2 → sie_server-0.3.3}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  50. {sie_server-0.3.2 → sie_server-0.3.3}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  51. {sie_server-0.3.2 → sie_server-0.3.3}/models/GritLM__GritLM-7B.yaml +0 -0
  52. {sie_server-0.3.2 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  53. {sie_server-0.3.2 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  54. {sie_server-0.3.2 → sie_server-0.3.3}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  55. {sie_server-0.3.2 → sie_server-0.3.3}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  56. {sie_server-0.3.2 → sie_server-0.3.3}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
  57. {sie_server-0.3.2 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  58. {sie_server-0.3.2 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  59. {sie_server-0.3.2 → sie_server-0.3.3}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  60. {sie_server-0.3.2 → sie_server-0.3.3}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  61. {sie_server-0.3.2 → sie_server-0.3.3}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  62. {sie_server-0.3.2 → sie_server-0.3.3}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  63. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  64. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  65. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  66. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  67. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  68. {sie_server-0.3.2 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  69. {sie_server-0.3.2 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  70. {sie_server-0.3.2 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  71. {sie_server-0.3.2 → sie_server-0.3.3}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  72. {sie_server-0.3.2 → sie_server-0.3.3}/models/answerdotai__ModernBERT-base.yaml +0 -0
  73. {sie_server-0.3.2 → sie_server-0.3.3}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
  74. {sie_server-0.3.2 → sie_server-0.3.3}/models/colbert-ir__colbertv2.0.yaml +0 -0
  75. {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  76. {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  77. {sie_server-0.3.2 → sie_server-0.3.3}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  78. {sie_server-0.3.2 → sie_server-0.3.3}/models/docling.yaml +0 -0
  79. {sie_server-0.3.2 → sie_server-0.3.3}/models/fastino__gliner2-base-v1.yaml +0 -0
  80. {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  81. {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  82. {sie_server-0.3.2 → sie_server-0.3.3}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  83. {sie_server-0.3.2 → sie_server-0.3.3}/models/google__embeddinggemma-300m.yaml +0 -0
  84. {sie_server-0.3.2 → sie_server-0.3.3}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  85. {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  86. {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  87. {sie_server-0.3.2 → sie_server-0.3.3}/models/google__siglip2-base-patch16-224.yaml +0 -0
  88. {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  89. {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  90. {sie_server-0.3.2 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  91. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-base-v2.yaml +0 -0
  92. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-large-v2.yaml +0 -0
  93. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  94. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__e5-small-v2.yaml +0 -0
  95. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  96. {sie_server-0.3.2 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large.yaml +0 -0
  97. {sie_server-0.3.2 → sie_server-0.3.3}/models/jackboyla__glirel-large-v0.yaml +0 -0
  98. {sie_server-0.3.2 → sie_server-0.3.3}/models/jinaai__jina-colbert-v2.yaml +0 -0
  99. {sie_server-0.3.2 → sie_server-0.3.3}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  100. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  101. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  102. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  103. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  104. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  105. {sie_server-0.3.2 → sie_server-0.3.3}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  106. {sie_server-0.3.2 → sie_server-0.3.3}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  107. {sie_server-0.3.2 → sie_server-0.3.3}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  108. {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
  109. {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  110. {sie_server-0.3.2 → sie_server-0.3.3}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
  111. {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  112. {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-base.yaml +0 -0
  113. {sie_server-0.3.2 → sie_server-0.3.3}/models/microsoft__Florence-2-large.yaml +0 -0
  114. {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
  115. {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
  116. {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  117. {sie_server-0.3.2 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  118. {sie_server-0.3.2 → sie_server-0.3.3}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  119. {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  120. {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  121. {sie_server-0.3.2 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  122. {sie_server-0.3.2 → sie_server-0.3.3}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  123. {sie_server-0.3.2 → sie_server-0.3.3}/models/naver__splade-v3.yaml +0 -0
  124. {sie_server-0.3.2 → sie_server-0.3.3}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  125. {sie_server-0.3.2 → sie_server-0.3.3}/models/numind__NuNER_Zero-span.yaml +0 -0
  126. {sie_server-0.3.2 → sie_server-0.3.3}/models/numind__NuNER_Zero.yaml +0 -0
  127. {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__NV-Embed-v2.yaml +0 -0
  128. {sie_server-0.3.2 → sie_server-0.3.3}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  129. {sie_server-0.3.2 → sie_server-0.3.3}/models/openai__clip-vit-base-patch32.yaml +0 -0
  130. {sie_server-0.3.2 → sie_server-0.3.3}/models/openai__clip-vit-large-patch14.yaml +0 -0
  131. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  132. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  133. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  134. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  135. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  136. {sie_server-0.3.2 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  137. {sie_server-0.3.2 → sie_server-0.3.3}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  138. {sie_server-0.3.2 → sie_server-0.3.3}/models/rasyosef__splade-mini.yaml +0 -0
  139. {sie_server-0.3.2 → sie_server-0.3.3}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  140. {sie_server-0.3.2 → sie_server-0.3.3}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  141. {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_large-v2.1.yaml +0 -0
  142. {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  143. {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  144. {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  145. {sie_server-0.3.2 → sie_server-0.3.3}/models/urchade__gliner_small-v2.1.yaml +0 -0
  146. {sie_server-0.3.2 → sie_server-0.3.3}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  147. {sie_server-0.3.2 → sie_server-0.3.3}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  148. {sie_server-0.3.2 → sie_server-0.3.3}/models/zai-org__GLM-OCR.yaml +0 -0
  149. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/__init__.py +0 -0
  150. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/__init__.py +0 -0
  151. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_base_adapter.py +0 -0
  152. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_flash_base.py +0 -0
  153. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_spec.py +0 -0
  154. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_types.py +0 -0
  155. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/_utils.py +0 -0
  156. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/base.py +0 -0
  157. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
  158. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  159. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
  160. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  161. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
  162. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
  163. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/clip/__init__.py +0 -0
  164. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert/__init__.py +0 -0
  165. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  166. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  167. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colpali/__init__.py +0 -0
  168. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  169. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  170. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/docling/__init__.py +0 -0
  171. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/donut/__init__.py +0 -0
  172. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/florence2/__init__.py +0 -0
  173. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner/__init__.py +0 -0
  174. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  175. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  176. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/glirel/__init__.py +0 -0
  177. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  178. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  179. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  180. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  181. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  182. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
  183. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  184. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  185. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  186. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  187. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  188. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
  189. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  190. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  191. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  192. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  193. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  194. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  195. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  196. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
  197. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/siglip/__init__.py +0 -0
  198. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  199. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  200. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  201. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/__init__.py +0 -0
  202. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/encode.py +0 -0
  203. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/health.py +0 -0
  204. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/metrics.py +0 -0
  205. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/models.py +0 -0
  206. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/openai_compat.py +0 -0
  207. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/openapi.py +0 -0
  208. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/root.py +0 -0
  209. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/score.py +0 -0
  210. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/serialization.py +0 -0
  211. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/validation.py +0 -0
  212. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/api/ws.py +0 -0
  213. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/__init__.py +0 -0
  214. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/app_factory.py +0 -0
  215. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/app/app_state_config.py +0 -0
  216. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/__init__.py +0 -0
  217. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/engine.py +0 -0
  218. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/config/model.py +0 -0
  219. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/__init__.py +0 -0
  220. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/adaptive_batching.py +0 -0
  221. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/batcher.py +0 -0
  222. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/deps.py +0 -0
  223. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/disk_cache.py +0 -0
  224. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/encode_pipeline.py +0 -0
  225. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/extract_cost.py +0 -0
  226. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/hot_reload.py +0 -0
  227. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/inference.py +0 -0
  228. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/inference_output.py +0 -0
  229. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/loader.py +0 -0
  230. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/logging.py +0 -0
  231. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/memory.py +0 -0
  232. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/oom.py +0 -0
  233. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/postprocessor.py +0 -0
  234. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/postprocessor_registry.py +0 -0
  235. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/prepared.py +0 -0
  236. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/__init__.py +0 -0
  237. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/base.py +0 -0
  238. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/image.py +0 -0
  239. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/text.py +0 -0
  240. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor/vision.py +0 -0
  241. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/preprocessor_registry.py +0 -0
  242. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/readiness.py +0 -0
  243. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/shutdown.py +0 -0
  244. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/timing.py +0 -0
  245. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/tokenizer.py +0 -0
  246. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/watcher.py +0 -0
  247. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/__init__.py +0 -0
  248. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  249. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/base.py +0 -0
  250. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/encode.py +0 -0
  251. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/extract.py +0 -0
  252. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/score.py +0 -0
  253. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/model_worker.py +0 -0
  254. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/oom_recovery.py +0 -0
  255. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/core/worker/types.py +0 -0
  256. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/nats_pull_loop.py +0 -0
  257. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/nats_subscriber.py +0 -0
  258. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/__init__.py +0 -0
  259. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/gpu.py +0 -0
  260. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/prometheus.py +0 -0
  261. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/telemetry.py +0 -0
  262. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/observability/tracing.py +0 -0
  263. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/static/__init__.py +0 -0
  264. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/static/index.html +0 -0
  265. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/__init__.py +0 -0
  266. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/inputs.py +0 -0
  267. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/outputs.py +0 -0
  268. {sie_server-0.3.2 → sie_server-0.3.3}/src/sie_server/types/requests.py +0 -0
  269. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/__init__.py +0 -0
  270. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_base.py +0 -0
  271. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_bge_m3.py +0 -0
  272. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_bge_m3_flash.py +0 -0
  273. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_clip.py +0 -0
  274. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_colbert.py +0 -0
  275. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_docling.py +0 -0
  276. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_docling_smoke.py +0 -0
  277. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_donut.py +0 -0
  278. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_factory_integration.py +0 -0
  279. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_flash_base.py +0 -0
  280. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_florence2.py +0 -0
  281. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_glirel.py +0 -0
  282. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_glm_ocr.py +0 -0
  283. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_grounding_dino.py +0 -0
  284. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_gte_sparse.py +0 -0
  285. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  286. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lighton_ocr.py +0 -0
  287. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lora.py +0 -0
  288. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_lora_integration.py +0 -0
  289. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_paddleocr_vl.py +0 -0
  290. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sentence_transformer.py +0 -0
  291. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sglang.py +0 -0
  292. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_siglip.py +0 -0
  293. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_sparse_aggregation.py +0 -0
  294. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_stablebridge_integration.py +0 -0
  295. {sie_server-0.3.2 → sie_server-0.3.3}/tests/adapters/test_stablebridge_pruner.py +0 -0
  296. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/__init__.py +0 -0
  297. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_dtype.py +0 -0
  298. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_endpoint.py +0 -0
  299. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_json_schema.py +0 -0
  300. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_timing.py +0 -0
  301. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_encode_validation.py +0 -0
  302. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract.py +0 -0
  303. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_extract_oom.py +0 -0
  304. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_health.py +0 -0
  305. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_models.py +0 -0
  306. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_openai_compat.py +0 -0
  307. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_score.py +0 -0
  308. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_version_header.py +0 -0
  309. {sie_server-0.3.2 → sie_server-0.3.3}/tests/api/test_ws.py +0 -0
  310. {sie_server-0.3.2 → sie_server-0.3.3}/tests/app/__init__.py +0 -0
  311. {sie_server-0.3.2 → sie_server-0.3.3}/tests/app/test_app_factory.py +0 -0
  312. {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/__init__.py +0 -0
  313. {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/test_bundle_coverage.py +0 -0
  314. {sie_server-0.3.2 → sie_server-0.3.3}/tests/config/test_config.py +0 -0
  315. {sie_server-0.3.2 → sie_server-0.3.3}/tests/conftest.py +0 -0
  316. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/__init__.py +0 -0
  317. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_batcher.py +0 -0
  318. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_disk_cache.py +0 -0
  319. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_hot_reload.py +0 -0
  320. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_idle_evict.py +0 -0
  321. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_inference.py +0 -0
  322. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_loader.py +0 -0
  323. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_logging.py +0 -0
  324. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_memory.py +0 -0
  325. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_oom_detection.py +0 -0
  326. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_postprocessor.py +0 -0
  327. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_postprocessor_registry.py +0 -0
  328. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_prepared.py +0 -0
  329. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_preprocessor.py +0 -0
  330. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_preprocessor_registry.py +0 -0
  331. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_quantization.py +0 -0
  332. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_readiness.py +0 -0
  333. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_async.py +0 -0
  334. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_core.py +0 -0
  335. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_deps.py +0 -0
  336. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_failed_state.py +0 -0
  337. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_memory.py +0 -0
  338. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_registry_multi_model.py +0 -0
  339. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_shutdown.py +0 -0
  340. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_timing.py +0 -0
  341. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_watcher.py +0 -0
  342. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_backpressure.py +0 -0
  343. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_core.py +0 -0
  344. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_extract.py +0 -0
  345. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_lora.py +0 -0
  346. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_options.py +0 -0
  347. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/test_worker_score.py +0 -0
  348. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/worker/__init__.py +0 -0
  349. {sie_server-0.3.2 → sie_server-0.3.3}/tests/core/worker/test_oom_recovery.py +0 -0
  350. {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/__init__.py +0 -0
  351. {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_metrics.py +0 -0
  352. {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_telemetry.py +0 -0
  353. {sie_server-0.3.2 → sie_server-0.3.3}/tests/observability/test_tracing.py +0 -0
  354. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_all_models.py +0 -0
  355. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_docker_integration.py +0 -0
  356. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_nats_pull_loop.py +0 -0
  357. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_nats_pull_loop_batching.py +0 -0
  358. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_openapi_export.py +0 -0
  359. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_sdk_integration.py +0 -0
  360. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_server_smoke.py +0 -0
  361. {sie_server-0.3.2 → sie_server-0.3.3}/tests/test_sparse_integration.py +0 -0
  362. {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/__init__.py +0 -0
  363. {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/test_inputs.py +0 -0
  364. {sie_server-0.3.2 → sie_server-0.3.3}/tests/type_defs/test_types.py +0 -0
@@ -18,14 +18,15 @@ ENV DEBIAN_FRONTEND=noninteractive \
18
18
  UV_LINK_MODE=copy \
19
19
  PIP_DISABLE_PIP_VERSION_CHECK=1
20
20
 
21
- # build-essential + git are builder-only; they do NOT leak into the runtime stage.
21
+ # build-essential + git + zlib1g-dev are builder-only; they do NOT leak into the runtime stage.
22
22
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
23
23
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
24
24
  apt-get update && apt-get install -y --no-install-recommends \
25
25
  build-essential \
26
26
  ca-certificates \
27
27
  curl \
28
- git
28
+ git \
29
+ zlib1g-dev
29
30
 
30
31
  ARG UV_VERSION=0.9.28
31
32
  RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
@@ -154,13 +155,16 @@ ENV DEBIAN_FRONTEND=noninteractive \
154
155
  OMP_NUM_THREADS=4 \
155
156
  MKL_NUM_THREADS=4
156
157
 
157
- # Only the shared libs torch + pillow actually dlopen at runtime.
158
+ # Only the shared libs torch + pillow + rtree actually dlopen at runtime.
159
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
160
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
158
161
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
159
162
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
160
163
  apt-get update && apt-get install -y --no-install-recommends \
161
164
  libgomp1 \
162
165
  libjpeg62-turbo \
163
- libpng16-16
166
+ libpng16-16 \
167
+ libspatialindex-c6
164
168
 
165
169
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
166
170
 
@@ -152,6 +152,8 @@ FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
152
152
 
153
153
  ENV DEBIAN_FRONTEND=noninteractive
154
154
 
155
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
156
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
155
157
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
156
158
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
157
159
  apt-get update && apt-get install -y --no-install-recommends \
@@ -159,7 +161,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
159
161
  gcc \
160
162
  libc6-dev \
161
163
  libgomp1 \
162
- libnuma1
164
+ libnuma1 \
165
+ libspatialindex-c6
163
166
 
164
167
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
165
168
 
@@ -173,6 +173,8 @@ ENV DEBIAN_FRONTEND=noninteractive
173
173
  # libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
174
174
  # misleading SM-arch error without it.
175
175
  # libgomp1: torch OpenMP runtime.
176
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
177
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
176
178
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
177
179
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
178
180
  apt-get update && apt-get install -y --no-install-recommends \
@@ -180,7 +182,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
180
182
  gcc \
181
183
  libc6-dev \
182
184
  libgomp1 \
183
- libnuma1
185
+ libnuma1 \
186
+ libspatialindex-c6
184
187
 
185
188
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
186
189
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -12,6 +12,7 @@ adapters:
12
12
  - sie_server.adapters.colbert_rotary_flash
13
13
  - sie_server.adapters.colpali
14
14
  - sie_server.adapters.colqwen2
15
+ - sie_server.adapters.colqwen3
15
16
  - sie_server.adapters.grounding_dino
16
17
  - sie_server.adapters.gte_sparse_flash
17
18
  - sie_server.adapters.jina_flash_cross_encoder
@@ -44,7 +45,7 @@ adapters:
44
45
  - sie_server.adapters.paddleocr_vl
45
46
  deps:
46
47
  # Most flash adapters; sentence_transformer needs >=4.57
47
- transformers: '>=4.57'
48
+ transformers: '>=4.57,<5'
48
49
  # Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
49
50
  torch: '>=2.9,<2.10'
50
51
  # bge_m3, bge_m3_flash, nemo_colembed, nomic_flash, xlm_roberta_flash
@@ -56,7 +57,7 @@ deps:
56
57
  # bge_m3_flag
57
58
  FlagEmbedding: '>=1.3'
58
59
  # grounding_dino
59
- pillow: ''
60
+ pillow: '>=11,<12'
60
61
  # grounding_dino
61
62
  requests: ''
62
63
  # gliner, gliner_bi
@@ -13,5 +13,8 @@ deps:
13
13
  transformers: '>=5.0'
14
14
  # Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
15
15
  torch: '>=2.9,<2.10'
16
- huggingface-hub: '>=0.20'
16
+ # No huggingface-hub override: transformers 5.x's own metadata
17
+ # pins it to >=1.3,<2.0. An override here was previously
18
+ # unsatisfiable against that pin and broke `uv run` resolution.
19
+ # Let transformers drive the version.
17
20
  pillow: ''
@@ -18,10 +18,19 @@ profiles:
18
18
  default:
19
19
  max_batch_tokens: 8192
20
20
  compute_precision: bfloat16
21
- adapter_path: sie_server.adapters.sglang:SGLangEmbeddingAdapter
21
+ # Was sie_server.adapters.sglang:SGLangEmbeddingAdapter, but
22
+ # SGLang's generic transformers fallback hits
23
+ # `assert get_embedding is False` for this Llama-based arch
24
+ # (no model-specific embedding implementation registered). Route
25
+ # through PyTorchEmbeddingAdapter like NV-Embed-v2 (also Llama-
26
+ # based 7B+ embedder); the heavy lane has 96 GiB so non-paged
27
+ # attention is fine. May be reconciled with a colleague's
28
+ # in-flight SGLang/arch fix later.
29
+ adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
22
30
  adapter_options:
23
31
  loadtime:
24
- mem_fraction_static: 0.85
32
+ trust_remote_code: true
33
+ attn_implementation: eager
25
34
  runtime:
26
35
  pooling: last_token
27
36
  normalize: true
@@ -0,0 +1,43 @@
1
+ sie_id: nvidia/nemotron-colembed-vl-4b-v2
2
+ hf_id: nvidia/nemotron-colembed-vl-4b-v2
3
+ inputs:
4
+ text: true
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode:
10
+ dense: null
11
+ sparse: null
12
+ multivector:
13
+ dim: 2560
14
+ score: null
15
+ extract: null
16
+ max_sequence_length: 8192
17
+ profiles:
18
+ default:
19
+ max_batch_tokens: 4096
20
+ compute_precision: bfloat16
21
+ adapter_path: sie_server.adapters.nemo_colembed:NemoColEmbedAdapter
22
+ adapter_options:
23
+ loadtime:
24
+ muvera_config:
25
+ num_repetitions: 40
26
+ num_simhash_projections: 6
27
+ projection_dim: null
28
+ final_projection_dim: 10240
29
+ token_dim: 2560
30
+ runtime:
31
+ normalize: true
32
+ batch_size: 4
33
+ muvera:
34
+ extends: default
35
+ adapter_options:
36
+ runtime:
37
+ normalize: true
38
+ batch_size: 4
39
+ output_types:
40
+ - dense
41
+ output_similarity:
42
+ dense: dot
43
+ muvera: {}
@@ -0,0 +1,42 @@
1
+ sie_id: TomoroAI/tomoro-colqwen3-embed-4b
2
+ hf_id: TomoroAI/tomoro-colqwen3-embed-4b
3
+ inputs:
4
+ text: true
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode:
10
+ dense: null
11
+ sparse: null
12
+ multivector:
13
+ dim: 320
14
+ score: null
15
+ extract: null
16
+ max_sequence_length: 8192
17
+ profiles:
18
+ default:
19
+ max_batch_tokens: 4096
20
+ compute_precision: bfloat16
21
+ adapter_path: sie_server.adapters.colqwen3:ColQwen3Adapter
22
+ adapter_options:
23
+ loadtime:
24
+ muvera_config:
25
+ num_repetitions: 40
26
+ num_simhash_projections: 6
27
+ projection_dim: null
28
+ final_projection_dim: 10240
29
+ token_dim: 320
30
+ trust_remote_code: true
31
+ runtime:
32
+ normalize: true
33
+ muvera:
34
+ extends: default
35
+ adapter_options:
36
+ runtime:
37
+ normalize: true
38
+ output_types:
39
+ - dense
40
+ output_similarity:
41
+ dense: dot
42
+ muvera: {}
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.3.2"
6
+ "version": "0.3.3"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -1740,7 +1740,7 @@
1740
1740
  }
1741
1741
  ],
1742
1742
  "default": null,
1743
- "description": "Adapter-specific options",
1743
+ "description": "Adapter-specific options. Recognized sub-keys include 'overflow_policy' (one of 'default', 'truncate_text', 'error'; default 'default') controlling how inputs exceeding the model's max_sequence_length are handled.",
1744
1744
  "title": "Options"
1745
1745
  }
1746
1746
  },
@@ -1763,6 +1763,23 @@
1763
1763
  "location"
1764
1764
  ]
1765
1765
  }
1766
+ },
1767
+ {
1768
+ "items": [
1769
+ {
1770
+ "text": "Apple Inc. was founded by Steve Jobs in Cupertino, California."
1771
+ }
1772
+ ],
1773
+ "params": {
1774
+ "labels": [
1775
+ "person",
1776
+ "organization",
1777
+ "location"
1778
+ ],
1779
+ "options": {
1780
+ "overflow_policy": "truncate_text"
1781
+ }
1782
+ }
1766
1783
  }
1767
1784
  ],
1768
1785
  "properties": {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.3.2"
3
+ version = "0.3.3"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -0,0 +1,337 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ import numpy as np
9
+ import torch
10
+ from torch.nn import functional as F
11
+
12
+ from sie_server.adapters._base_adapter import BaseAdapter
13
+ from sie_server.adapters._spec import AdapterSpec
14
+ from sie_server.adapters._types import ComputePrecision
15
+ from sie_server.core.inference_output import EncodeOutput
16
+
17
+ if TYPE_CHECKING:
18
+ from PIL import Image as PILImage
19
+
20
+ from sie_server.types.inputs import Item
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ _ERR_NO_INPUT = "ColQwen3Adapter requires either text or images input"
25
+
26
+
27
+ class ColQwen3Adapter(BaseAdapter):
28
+ """Adapter for ColQwen3-style visual document retrieval models.
29
+
30
+ ColQwen3 encodes document page images into multi-vector representations
31
+ (320-dim per token) for late interaction retrieval. Built on Qwen3-VL,
32
+ with a custom projection layer wrapper that exposes ``out.embeddings``.
33
+
34
+ Target model: ``TomoroAI/tomoro-colqwen3-embed-4b`` (4B params).
35
+
36
+ Loaded via ``AutoModel`` + ``AutoProcessor`` with ``trust_remote_code``
37
+ because the model ships its own ``ColQwen3`` / ``ColQwen3Processor``
38
+ classes (not in native transformers).
39
+ """
40
+
41
+ spec = AdapterSpec(
42
+ inputs=("text", "image"),
43
+ outputs=("multivector", "score"),
44
+ multivector_dim=320,
45
+ unload_fields=("_model", "_processor"),
46
+ default_preprocessor="image",
47
+ )
48
+
49
+ def __init__(
50
+ self,
51
+ model_name_or_path: str | Path,
52
+ *,
53
+ normalize: bool = True,
54
+ compute_precision: ComputePrecision = "bfloat16",
55
+ trust_remote_code: bool = True,
56
+ max_seq_length: int | None = None,
57
+ muvera_config: dict[str, Any] | None = None,
58
+ token_dim: int = 320,
59
+ max_num_visual_tokens: int = 1280,
60
+ ) -> None:
61
+ """Initialize the adapter.
62
+
63
+ Args:
64
+ model_name_or_path: HuggingFace model ID or local path.
65
+ normalize: Whether to L2-normalize embeddings (the model's wrapper
66
+ already normalizes; kept for interface parity).
67
+ compute_precision: Compute precision for inference.
68
+ trust_remote_code: Required for ColQwen3 (custom processor + model classes).
69
+ max_seq_length: Ignored — ColQwen3 uses dynamic sequence length.
70
+ muvera_config: Optional MUVERA configuration (passed to postprocessor).
71
+ token_dim: Per-token embedding dimension (320 for ColQwen3).
72
+ max_num_visual_tokens: Cap on visual tokens per image (passed to processor).
73
+ """
74
+ self._model_name_or_path = str(model_name_or_path)
75
+ self._normalize = normalize
76
+ self._compute_precision = compute_precision
77
+ self._trust_remote_code = trust_remote_code
78
+ self._max_num_visual_tokens = max_num_visual_tokens
79
+
80
+ self._model: Any = None
81
+ self._processor: Any = None
82
+ self._device: str | None = None
83
+ self._multivector_dim: int = token_dim
84
+
85
+ def load(self, device: str) -> None:
86
+ """Load processor + model onto the specified device."""
87
+ from transformers import AutoModel, AutoProcessor
88
+
89
+ self._device = device
90
+
91
+ dtype = self._resolve_dtype()
92
+ attn_impl = self._resolve_attn_implementation(device)
93
+
94
+ logger.info(
95
+ "Loading ColQwen3 model %s on device=%s with dtype=%s, attn=%s",
96
+ self._model_name_or_path,
97
+ device,
98
+ dtype,
99
+ attn_impl,
100
+ )
101
+
102
+ self._processor = AutoProcessor.from_pretrained(
103
+ self._model_name_or_path,
104
+ trust_remote_code=self._trust_remote_code,
105
+ max_num_visual_tokens=self._max_num_visual_tokens,
106
+ )
107
+
108
+ load_kwargs: dict[str, Any] = {
109
+ "trust_remote_code": self._trust_remote_code,
110
+ "device_map": device,
111
+ "dtype": dtype,
112
+ }
113
+ if attn_impl is not None:
114
+ load_kwargs["attn_implementation"] = attn_impl
115
+
116
+ self._model = AutoModel.from_pretrained(
117
+ self._model_name_or_path,
118
+ **load_kwargs,
119
+ ).eval()
120
+
121
+ # Discover token dim from the projection layer when present.
122
+ proj = getattr(self._model, "embedding_proj_layer", None)
123
+ out_features = getattr(proj, "out_features", None)
124
+ if isinstance(out_features, int) and out_features > 0:
125
+ self._multivector_dim = out_features
126
+
127
+ def _resolve_dtype(self) -> torch.dtype:
128
+ if not self._device or not str(self._device).startswith("cuda"):
129
+ return torch.float32
130
+ dtype_map = {
131
+ "float16": torch.float16,
132
+ "bfloat16": torch.bfloat16,
133
+ "float32": torch.float32,
134
+ }
135
+ return dtype_map.get(self._compute_precision, torch.bfloat16)
136
+
137
+ def _resolve_attn_implementation(self, device: str) -> str | None:
138
+ if not device.startswith("cuda"):
139
+ return None
140
+ try:
141
+ import flash_attn # ty: ignore[unresolved-import]
142
+
143
+ return "flash_attention_2"
144
+ except ImportError:
145
+ logger.info("flash_attn not available, using sdpa attention")
146
+ return "sdpa"
147
+
148
+ # ------------------------------------------------------------------
149
+ # Encode
150
+ # ------------------------------------------------------------------
151
+
152
+ def encode(
153
+ self,
154
+ items: list[Item],
155
+ output_types: list[str],
156
+ *,
157
+ instruction: str | None = None,
158
+ is_query: bool = False,
159
+ prepared_items: Any = None,
160
+ options: dict[str, Any] | None = None,
161
+ ) -> EncodeOutput:
162
+ self._check_loaded()
163
+ self._validate_output_types(output_types)
164
+
165
+ if is_query:
166
+ multivector_list: list[np.ndarray] = []
167
+ for item in items:
168
+ if item.text is None:
169
+ raise ValueError(_ERR_NO_INPUT)
170
+ multivector_list.append(self._encode_text(item.text))
171
+ return EncodeOutput(
172
+ multivector=multivector_list,
173
+ batch_size=len(items),
174
+ is_query=is_query,
175
+ multivector_token_dim=self._multivector_dim,
176
+ )
177
+
178
+ # Preallocate by index so output order matches input order regardless of
179
+ # text/image mix, and so multi-image items collapse to one multivector.
180
+ results: list[np.ndarray | None] = [None] * len(items)
181
+ all_images: list[PILImage.Image] = []
182
+ image_slots: list[tuple[int, int]] = [] # (item_idx, image_count)
183
+ for idx, item in enumerate(items):
184
+ has_images = item.images is not None and len(item.images) > 0
185
+ if has_images:
186
+ images = self._load_images(item)
187
+ all_images.extend(images)
188
+ image_slots.append((idx, len(images)))
189
+ elif item.text is not None:
190
+ results[idx] = self._encode_text(item.text)
191
+ else:
192
+ raise ValueError(_ERR_NO_INPUT)
193
+
194
+ if all_images:
195
+ per_image_mvs = self._encode_images(all_images)
196
+ cursor = 0
197
+ for idx, count in image_slots:
198
+ segment = per_image_mvs[cursor : cursor + count]
199
+ cursor += count
200
+ results[idx] = segment[0] if count == 1 else np.concatenate(segment, axis=0)
201
+
202
+ multivector_list = [mv for mv in results if mv is not None]
203
+ assert len(multivector_list) == len(items)
204
+
205
+ return EncodeOutput(
206
+ multivector=multivector_list,
207
+ batch_size=len(items),
208
+ is_query=is_query,
209
+ multivector_token_dim=self._multivector_dim,
210
+ )
211
+
212
+ # ------------------------------------------------------------------
213
+ # Image encoding
214
+ # ------------------------------------------------------------------
215
+
216
+ def _load_images(self, item: Any) -> list[PILImage.Image]:
217
+ from PIL import Image
218
+
219
+ pil_images: list[PILImage.Image] = []
220
+ for img_input in item.images or []:
221
+ pil_img = Image.open(io.BytesIO(img_input["data"]))
222
+ if pil_img.mode != "RGB":
223
+ pil_img = pil_img.convert("RGB")
224
+ pil_images.append(pil_img)
225
+ return pil_images
226
+
227
+ def _encode_images(self, images: list[PILImage.Image]) -> list[np.ndarray]:
228
+ """Encode a batch of images and return per-image multi-vectors."""
229
+ assert self._model is not None
230
+ assert self._processor is not None
231
+
232
+ inputs = self._processor(
233
+ images=images,
234
+ return_tensors="pt",
235
+ padding="longest",
236
+ )
237
+ inputs = {k: v.to(self._device) for k, v in inputs.items() if hasattr(v, "to")}
238
+
239
+ with torch.inference_mode():
240
+ outputs = self._model(**inputs)
241
+
242
+ # ColQwen3 returns a ModelOutput-like object with ``.embeddings``
243
+ # of shape (batch, seq, token_dim). The wrapper already L2-normalizes
244
+ # and applies attention-masking; our ``self._normalize`` is a no-op
245
+ # safety belt for downstream parity.
246
+ embeddings = outputs.embeddings
247
+ if self._normalize:
248
+ embeddings = F.normalize(embeddings, p=2, dim=-1)
249
+
250
+ results: list[np.ndarray] = [embeddings[i].float().cpu().numpy() for i in range(embeddings.shape[0])]
251
+
252
+ # Free GPU memory between batches to prevent OOM on subsequent calls
253
+ # (L4 22GB GPUs are tight for VLM models).
254
+ del outputs, embeddings, inputs
255
+ if self._device and self._device.startswith("cuda"):
256
+ torch.cuda.empty_cache()
257
+
258
+ return results
259
+
260
+ # ------------------------------------------------------------------
261
+ # Text encoding
262
+ # ------------------------------------------------------------------
263
+
264
+ def _encode_text(self, text: str) -> np.ndarray:
265
+ """Encode a single text query."""
266
+ assert self._model is not None
267
+ assert self._processor is not None
268
+
269
+ inputs = self._processor(
270
+ text=[text],
271
+ return_tensors="pt",
272
+ padding="longest",
273
+ )
274
+ inputs = {k: v.to(self._device) for k, v in inputs.items() if hasattr(v, "to")}
275
+
276
+ with torch.inference_mode():
277
+ outputs = self._model(**inputs)
278
+
279
+ embeddings = outputs.embeddings # (1, seq, token_dim)
280
+ if self._normalize:
281
+ embeddings = F.normalize(embeddings, p=2, dim=-1)
282
+
283
+ result = embeddings[0].float().cpu().numpy()
284
+
285
+ del outputs, embeddings, inputs
286
+ if self._device and self._device.startswith("cuda"):
287
+ torch.cuda.empty_cache()
288
+
289
+ return result
290
+
291
+ # ------------------------------------------------------------------
292
+ # Scoring
293
+ # ------------------------------------------------------------------
294
+
295
+ def score(
296
+ self,
297
+ query: Any,
298
+ items: list[Any],
299
+ *,
300
+ instruction: str | None = None,
301
+ options: dict[str, Any] | None = None,
302
+ ) -> list[float]:
303
+ """Score documents against a text query using MaxSim."""
304
+ self._check_loaded()
305
+
306
+ query_output = self.encode([query], output_types=["multivector"], is_query=True)
307
+ if query_output.multivector is None:
308
+ raise RuntimeError("Failed to encode query: no multivector output")
309
+ query_vecs = query_output.multivector[0]
310
+
311
+ doc_output = self.encode(items, output_types=["multivector"], is_query=False)
312
+ if doc_output.multivector is None:
313
+ raise RuntimeError("Failed to encode documents: no multivector output")
314
+
315
+ scores: list[float] = []
316
+ query_tensor = torch.from_numpy(query_vecs).to(self._device)
317
+ for doc_vecs in doc_output.multivector:
318
+ doc_tensor = torch.from_numpy(doc_vecs).to(self._device)
319
+ sim = torch.matmul(query_tensor, doc_tensor.T)
320
+ scores.append(sim.max(dim=-1).values.sum().item())
321
+ return scores
322
+
323
+ # ------------------------------------------------------------------
324
+ # Helpers
325
+ # ------------------------------------------------------------------
326
+
327
+ def _validate_output_types(self, output_types: list[str]) -> None:
328
+ unsupported = set(output_types) - {"multivector"}
329
+ if unsupported:
330
+ msg = f"Unsupported output types: {unsupported}. ColQwen3Adapter only supports 'multivector'."
331
+ raise ValueError(msg)
332
+
333
+ def get_preprocessor(self) -> Any | None:
334
+ # ColQwen3 uses a custom processor that handles both text and images
335
+ # internally via the ColQwen3Processor; the generic ImagePreprocessor
336
+ # does not match the (text-only / image-only) call pattern.
337
+ return None
@@ -0,0 +1,2 @@
1
+ class InputTooLongError(ValueError):
2
+ pass