sie-server 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (365) hide show
  1. {sie_server-0.3.1 → sie_server-0.3.3}/.gitignore +3 -0
  2. {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cpu +12 -5
  3. {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cuda11 +8 -2
  4. {sie_server-0.3.1 → sie_server-0.3.3}/Dockerfile.cuda12 +8 -2
  5. {sie_server-0.3.1 → sie_server-0.3.3}/PKG-INFO +1 -1
  6. {sie_server-0.3.1 → sie_server-0.3.3}/bundles/default.yaml +3 -2
  7. {sie_server-0.3.1 → sie_server-0.3.3}/bundles/transformers5.yaml +4 -1
  8. {sie_server-0.3.1 → sie_server-0.3.3}/models/answerdotai__answerai-colbert-small-v1.yaml +1 -1
  9. {sie_server-0.3.1 → sie_server-0.3.3}/models/colbert-ir__colbertv2.0.yaml +1 -1
  10. {sie_server-0.3.1 → sie_server-0.3.3}/models/docling.yaml +8 -0
  11. {sie_server-0.3.1 → sie_server-0.3.3}/models/jinaai__jina-colbert-v2.yaml +1 -1
  12. {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__GTE-ModernColBERT-v1.yaml +1 -1
  13. {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__Reason-ModernColBERT.yaml +1 -1
  14. {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +1 -1
  15. {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +1 -1
  16. {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__llama-embed-nemotron-8b.yaml +11 -2
  17. sie_server-0.3.3/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +43 -0
  18. sie_server-0.3.3/models/tomoroai__tomoro-colqwen3-embed-4b.yaml +42 -0
  19. {sie_server-0.3.1 → sie_server-0.3.3}/openapi.json +19 -2
  20. {sie_server-0.3.1 → sie_server-0.3.3}/pyproject.toml +5 -1
  21. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_base_adapter.py +49 -1
  22. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_utils.py +79 -1
  23. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/base.py +14 -0
  24. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash/__init__.py +1 -0
  25. sie_server-0.3.3/src/sie_server/adapters/colqwen3/__init__.py +337 -0
  26. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/docling/__init__.py +72 -21
  27. sie_server-0.3.3/src/sie_server/adapters/errors.py +2 -0
  28. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliclass/__init__.py +98 -29
  29. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash/__init__.py +1 -0
  30. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nemo_colembed/__init__.py +175 -65
  31. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/paddleocr_vl/__init__.py +2 -0
  32. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/sentence_transformer/__init__.py +5 -1
  33. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/sglang/__init__.py +7 -2
  34. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/extract.py +3 -0
  35. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/helpers.py +12 -0
  36. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/options.py +15 -0
  37. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/app_factory.py +65 -7
  38. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/cli.py +27 -4
  39. sie_server-0.3.3/src/sie_server/core/hf_env.py +37 -0
  40. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/load_errors.py +32 -0
  41. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/model_loader.py +244 -44
  42. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/registry.py +19 -3
  43. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/main.py +11 -0
  44. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/metrics.py +20 -0
  45. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/openapi.py +17 -2
  46. sie_server-0.3.3/src/sie_server/types/overflow_policy.py +5 -0
  47. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/responses.py +1 -0
  48. sie_server-0.3.3/tests/adapters/test_docling.py +356 -0
  49. sie_server-0.3.3/tests/adapters/test_gliclass_overflow_policy.py +102 -0
  50. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_paddleocr_vl.py +45 -0
  51. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_runtime_options.py +34 -0
  52. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sentence_transformer.py +2 -1
  53. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_visual_document.py +151 -0
  54. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract_integration.py +4 -3
  55. {sie_server-0.3.1 → sie_server-0.3.3}/tests/app/test_app_factory.py +65 -0
  56. {sie_server-0.3.1 → sie_server-0.3.3}/tests/conftest.py +5 -0
  57. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_adaptive_batching.py +3 -3
  58. sie_server-0.3.3/tests/core/test_model_load_timeout.py +372 -0
  59. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_all_models.py +39 -0
  60. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_docker_integration.py +3 -3
  61. sie_server-0.3.1/tests/adapters/test_docling.py +0 -194
  62. {sie_server-0.3.1 → sie_server-0.3.3}/CONTRIBUTING.md +0 -0
  63. {sie_server-0.3.1 → sie_server-0.3.3}/LICENSE +0 -0
  64. {sie_server-0.3.1 → sie_server-0.3.3}/README.md +0 -0
  65. {sie_server-0.3.1 → sie_server-0.3.3}/bundles/sglang.yaml +0 -0
  66. {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  67. {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  68. {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  69. {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  70. {sie_server-0.3.1 → sie_server-0.3.3}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  71. {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-m3.yaml +0 -0
  72. {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-base.yaml +0 -0
  73. {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-large.yaml +0 -0
  74. {sie_server-0.3.1 → sie_server-0.3.3}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  75. {sie_server-0.3.1 → sie_server-0.3.3}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  76. {sie_server-0.3.1 → sie_server-0.3.3}/models/GritLM__GritLM-7B.yaml +0 -0
  77. {sie_server-0.3.1 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  78. {sie_server-0.3.1 → sie_server-0.3.3}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  79. {sie_server-0.3.1 → sie_server-0.3.3}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  80. {sie_server-0.3.1 → sie_server-0.3.3}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  81. {sie_server-0.3.1 → sie_server-0.3.3}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
  82. {sie_server-0.3.1 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  83. {sie_server-0.3.1 → sie_server-0.3.3}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  84. {sie_server-0.3.1 → sie_server-0.3.3}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  85. {sie_server-0.3.1 → sie_server-0.3.3}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  86. {sie_server-0.3.1 → sie_server-0.3.3}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  87. {sie_server-0.3.1 → sie_server-0.3.3}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  88. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  89. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  90. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  91. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  92. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  93. {sie_server-0.3.1 → sie_server-0.3.3}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  94. {sie_server-0.3.1 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  95. {sie_server-0.3.1 → sie_server-0.3.3}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  96. {sie_server-0.3.1 → sie_server-0.3.3}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  97. {sie_server-0.3.1 → sie_server-0.3.3}/models/answerdotai__ModernBERT-base.yaml +0 -0
  98. {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  99. {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  100. {sie_server-0.3.1 → sie_server-0.3.3}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  101. {sie_server-0.3.1 → sie_server-0.3.3}/models/fastino__gliner2-base-v1.yaml +0 -0
  102. {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  103. {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  104. {sie_server-0.3.1 → sie_server-0.3.3}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  105. {sie_server-0.3.1 → sie_server-0.3.3}/models/google__embeddinggemma-300m.yaml +0 -0
  106. {sie_server-0.3.1 → sie_server-0.3.3}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  107. {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  108. {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  109. {sie_server-0.3.1 → sie_server-0.3.3}/models/google__siglip2-base-patch16-224.yaml +0 -0
  110. {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  111. {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  112. {sie_server-0.3.1 → sie_server-0.3.3}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  113. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-base-v2.yaml +0 -0
  114. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-large-v2.yaml +0 -0
  115. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  116. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__e5-small-v2.yaml +0 -0
  117. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  118. {sie_server-0.3.1 → sie_server-0.3.3}/models/intfloat__multilingual-e5-large.yaml +0 -0
  119. {sie_server-0.3.1 → sie_server-0.3.3}/models/jackboyla__glirel-large-v0.yaml +0 -0
  120. {sie_server-0.3.1 → sie_server-0.3.3}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  121. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  122. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  123. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  124. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  125. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  126. {sie_server-0.3.1 → sie_server-0.3.3}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  127. {sie_server-0.3.1 → sie_server-0.3.3}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  128. {sie_server-0.3.1 → sie_server-0.3.3}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  129. {sie_server-0.3.1 → sie_server-0.3.3}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  130. {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  131. {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-base.yaml +0 -0
  132. {sie_server-0.3.1 → sie_server-0.3.3}/models/microsoft__Florence-2-large.yaml +0 -0
  133. {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  134. {sie_server-0.3.1 → sie_server-0.3.3}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  135. {sie_server-0.3.1 → sie_server-0.3.3}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  136. {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  137. {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  138. {sie_server-0.3.1 → sie_server-0.3.3}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  139. {sie_server-0.3.1 → sie_server-0.3.3}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  140. {sie_server-0.3.1 → sie_server-0.3.3}/models/naver__splade-v3.yaml +0 -0
  141. {sie_server-0.3.1 → sie_server-0.3.3}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  142. {sie_server-0.3.1 → sie_server-0.3.3}/models/numind__NuNER_Zero-span.yaml +0 -0
  143. {sie_server-0.3.1 → sie_server-0.3.3}/models/numind__NuNER_Zero.yaml +0 -0
  144. {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__NV-Embed-v2.yaml +0 -0
  145. {sie_server-0.3.1 → sie_server-0.3.3}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  146. {sie_server-0.3.1 → sie_server-0.3.3}/models/openai__clip-vit-base-patch32.yaml +0 -0
  147. {sie_server-0.3.1 → sie_server-0.3.3}/models/openai__clip-vit-large-patch14.yaml +0 -0
  148. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  149. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  150. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  151. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  152. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  153. {sie_server-0.3.1 → sie_server-0.3.3}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  154. {sie_server-0.3.1 → sie_server-0.3.3}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  155. {sie_server-0.3.1 → sie_server-0.3.3}/models/rasyosef__splade-mini.yaml +0 -0
  156. {sie_server-0.3.1 → sie_server-0.3.3}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  157. {sie_server-0.3.1 → sie_server-0.3.3}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  158. {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_large-v2.1.yaml +0 -0
  159. {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  160. {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  161. {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  162. {sie_server-0.3.1 → sie_server-0.3.3}/models/urchade__gliner_small-v2.1.yaml +0 -0
  163. {sie_server-0.3.1 → sie_server-0.3.3}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  164. {sie_server-0.3.1 → sie_server-0.3.3}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  165. {sie_server-0.3.1 → sie_server-0.3.3}/models/zai-org__GLM-OCR.yaml +0 -0
  166. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/__init__.py +0 -0
  167. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/__init__.py +0 -0
  168. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_flash_base.py +0 -0
  169. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_spec.py +0 -0
  170. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/_types.py +0 -0
  171. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  172. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
  173. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  174. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
  175. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
  176. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/clip/__init__.py +0 -0
  177. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert/__init__.py +0 -0
  178. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  179. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  180. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colpali/__init__.py +0 -0
  181. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  182. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  183. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/donut/__init__.py +0 -0
  184. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/florence2/__init__.py +0 -0
  185. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner/__init__.py +0 -0
  186. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  187. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  188. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/glirel/__init__.py +0 -0
  189. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  190. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  191. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  192. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  193. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  194. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  195. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  196. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  197. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  198. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  199. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  200. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  201. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  202. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  203. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  204. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  205. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  206. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/siglip/__init__.py +0 -0
  207. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  208. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  209. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  210. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/__init__.py +0 -0
  211. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/encode.py +0 -0
  212. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/health.py +0 -0
  213. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/metrics.py +0 -0
  214. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/models.py +0 -0
  215. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/openai_compat.py +0 -0
  216. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/openapi.py +0 -0
  217. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/root.py +0 -0
  218. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/score.py +0 -0
  219. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/serialization.py +0 -0
  220. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/validation.py +0 -0
  221. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/api/ws.py +0 -0
  222. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/__init__.py +0 -0
  223. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/app/app_state_config.py +0 -0
  224. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/__init__.py +0 -0
  225. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/engine.py +0 -0
  226. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/config/model.py +0 -0
  227. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/__init__.py +0 -0
  228. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/adaptive_batching.py +0 -0
  229. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/batcher.py +0 -0
  230. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/deps.py +0 -0
  231. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/disk_cache.py +0 -0
  232. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/encode_pipeline.py +0 -0
  233. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/extract_cost.py +0 -0
  234. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/hot_reload.py +0 -0
  235. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/inference.py +0 -0
  236. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/inference_output.py +0 -0
  237. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/loader.py +0 -0
  238. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/logging.py +0 -0
  239. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/memory.py +0 -0
  240. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/oom.py +0 -0
  241. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/postprocessor.py +0 -0
  242. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/postprocessor_registry.py +0 -0
  243. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/prepared.py +0 -0
  244. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/__init__.py +0 -0
  245. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/base.py +0 -0
  246. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/image.py +0 -0
  247. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/text.py +0 -0
  248. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor/vision.py +0 -0
  249. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/preprocessor_registry.py +0 -0
  250. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/readiness.py +0 -0
  251. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/shutdown.py +0 -0
  252. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/timing.py +0 -0
  253. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/tokenizer.py +0 -0
  254. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/watcher.py +0 -0
  255. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/__init__.py +0 -0
  256. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  257. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/base.py +0 -0
  258. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/encode.py +0 -0
  259. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/extract.py +0 -0
  260. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/handlers/score.py +0 -0
  261. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/model_worker.py +0 -0
  262. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/oom_recovery.py +0 -0
  263. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/core/worker/types.py +0 -0
  264. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/nats_pull_loop.py +0 -0
  265. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/nats_subscriber.py +0 -0
  266. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/__init__.py +0 -0
  267. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/gpu.py +0 -0
  268. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/prometheus.py +0 -0
  269. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/telemetry.py +0 -0
  270. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/observability/tracing.py +0 -0
  271. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/static/__init__.py +0 -0
  272. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/static/index.html +0 -0
  273. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/__init__.py +0 -0
  274. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/inputs.py +0 -0
  275. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/outputs.py +0 -0
  276. {sie_server-0.3.1 → sie_server-0.3.3}/src/sie_server/types/requests.py +0 -0
  277. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/__init__.py +0 -0
  278. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_base.py +0 -0
  279. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_bge_m3.py +0 -0
  280. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_bge_m3_flash.py +0 -0
  281. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_clip.py +0 -0
  282. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_colbert.py +0 -0
  283. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_docling_smoke.py +0 -0
  284. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_donut.py +0 -0
  285. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_factory_integration.py +0 -0
  286. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_flash_base.py +0 -0
  287. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_florence2.py +0 -0
  288. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_glirel.py +0 -0
  289. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_glm_ocr.py +0 -0
  290. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_grounding_dino.py +0 -0
  291. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_gte_sparse.py +0 -0
  292. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  293. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lighton_ocr.py +0 -0
  294. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lora.py +0 -0
  295. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_lora_integration.py +0 -0
  296. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sglang.py +0 -0
  297. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_siglip.py +0 -0
  298. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_sparse_aggregation.py +0 -0
  299. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_stablebridge_integration.py +0 -0
  300. {sie_server-0.3.1 → sie_server-0.3.3}/tests/adapters/test_stablebridge_pruner.py +0 -0
  301. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/__init__.py +0 -0
  302. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_dtype.py +0 -0
  303. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_endpoint.py +0 -0
  304. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_json_schema.py +0 -0
  305. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_timing.py +0 -0
  306. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_encode_validation.py +0 -0
  307. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract.py +0 -0
  308. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_extract_oom.py +0 -0
  309. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_health.py +0 -0
  310. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_models.py +0 -0
  311. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_openai_compat.py +0 -0
  312. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_score.py +0 -0
  313. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_version_header.py +0 -0
  314. {sie_server-0.3.1 → sie_server-0.3.3}/tests/api/test_ws.py +0 -0
  315. {sie_server-0.3.1 → sie_server-0.3.3}/tests/app/__init__.py +0 -0
  316. {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/__init__.py +0 -0
  317. {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/test_bundle_coverage.py +0 -0
  318. {sie_server-0.3.1 → sie_server-0.3.3}/tests/config/test_config.py +0 -0
  319. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/__init__.py +0 -0
  320. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_batcher.py +0 -0
  321. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_disk_cache.py +0 -0
  322. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_hot_reload.py +0 -0
  323. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_idle_evict.py +0 -0
  324. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_inference.py +0 -0
  325. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_loader.py +0 -0
  326. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_logging.py +0 -0
  327. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_memory.py +0 -0
  328. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_oom_detection.py +0 -0
  329. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_postprocessor.py +0 -0
  330. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_postprocessor_registry.py +0 -0
  331. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_prepared.py +0 -0
  332. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_preprocessor.py +0 -0
  333. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_preprocessor_registry.py +0 -0
  334. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_quantization.py +0 -0
  335. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_readiness.py +0 -0
  336. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_async.py +0 -0
  337. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_core.py +0 -0
  338. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_deps.py +0 -0
  339. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_failed_state.py +0 -0
  340. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_memory.py +0 -0
  341. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_registry_multi_model.py +0 -0
  342. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_shutdown.py +0 -0
  343. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_timing.py +0 -0
  344. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_watcher.py +0 -0
  345. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_backpressure.py +0 -0
  346. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_core.py +0 -0
  347. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_extract.py +0 -0
  348. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_lora.py +0 -0
  349. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_options.py +0 -0
  350. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/test_worker_score.py +0 -0
  351. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/worker/__init__.py +0 -0
  352. {sie_server-0.3.1 → sie_server-0.3.3}/tests/core/worker/test_oom_recovery.py +0 -0
  353. {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/__init__.py +0 -0
  354. {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_metrics.py +0 -0
  355. {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_telemetry.py +0 -0
  356. {sie_server-0.3.1 → sie_server-0.3.3}/tests/observability/test_tracing.py +0 -0
  357. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_nats_pull_loop.py +0 -0
  358. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_nats_pull_loop_batching.py +0 -0
  359. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_openapi_export.py +0 -0
  360. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_sdk_integration.py +0 -0
  361. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_server_smoke.py +0 -0
  362. {sie_server-0.3.1 → sie_server-0.3.3}/tests/test_sparse_integration.py +0 -0
  363. {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/__init__.py +0 -0
  364. {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/test_inputs.py +0 -0
  365. {sie_server-0.3.1 → sie_server-0.3.3}/tests/type_defs/test_types.py +0 -0
@@ -16,6 +16,9 @@ eggs/
16
16
  .eggs/
17
17
  lib/
18
18
  lib64/
19
+ # JS/TS projects under tools/ legitimately use a `lib/` directory.
20
+ !tools/*/lib/
21
+ !tools/*/lib/**
19
22
  parts/
20
23
  sdist/
21
24
  var/
@@ -18,14 +18,15 @@ ENV DEBIAN_FRONTEND=noninteractive \
18
18
  UV_LINK_MODE=copy \
19
19
  PIP_DISABLE_PIP_VERSION_CHECK=1
20
20
 
21
- # build-essential + git are builder-only; they do NOT leak into the runtime stage.
21
+ # build-essential + git + zlib1g-dev are builder-only; they do NOT leak into the runtime stage.
22
22
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
23
23
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
24
24
  apt-get update && apt-get install -y --no-install-recommends \
25
25
  build-essential \
26
26
  ca-certificates \
27
27
  curl \
28
- git
28
+ git \
29
+ zlib1g-dev
29
30
 
30
31
  ARG UV_VERSION=0.9.28
31
32
  RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh \
@@ -39,7 +40,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
39
40
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
40
41
 
41
42
  # Stub source trees so pip accepts the editable installs during dep resolution.
42
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
43
+ # Also create empty bundles/ and models/ — referenced by force-include in
44
+ # pyproject.toml; hatchling resolves them at editable-metadata time even though
45
+ # real contents only land in the `base` stage.
46
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
43
47
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
44
48
  && touch src/sie_server/__init__.py
45
49
 
@@ -151,13 +155,16 @@ ENV DEBIAN_FRONTEND=noninteractive \
151
155
  OMP_NUM_THREADS=4 \
152
156
  MKL_NUM_THREADS=4
153
157
 
154
- # Only the shared libs torch + pillow actually dlopen at runtime.
158
+ # Only the shared libs torch + pillow + rtree actually dlopen at runtime.
159
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
160
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
155
161
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
156
162
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
157
163
  apt-get update && apt-get install -y --no-install-recommends \
158
164
  libgomp1 \
159
165
  libjpeg62-turbo \
160
- libpng16-16
166
+ libpng16-16 \
167
+ libspatialindex-c6
161
168
 
162
169
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
163
170
 
@@ -41,7 +41,10 @@ WORKDIR /app
41
41
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
42
42
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
43
43
 
44
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
44
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
45
+ # by force-include in pyproject.toml and must exist; real contents are copied
46
+ # in the base stage.
47
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
45
48
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
46
49
  && touch src/sie_server/__init__.py
47
50
 
@@ -149,6 +152,8 @@ FROM nvidia/cuda:11.8.0-base-ubuntu22.04 AS runtime
149
152
 
150
153
  ENV DEBIAN_FRONTEND=noninteractive
151
154
 
155
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
156
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
152
157
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
153
158
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
154
159
  apt-get update && apt-get install -y --no-install-recommends \
@@ -156,7 +161,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
156
161
  gcc \
157
162
  libc6-dev \
158
163
  libgomp1 \
159
- libnuma1
164
+ libnuma1 \
165
+ libspatialindex-c6
160
166
 
161
167
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
162
168
 
@@ -44,7 +44,10 @@ WORKDIR /app
44
44
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
45
45
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
46
46
 
47
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
47
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
48
+ # by force-include in pyproject.toml and must exist; real contents are copied
49
+ # in the base stage.
50
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
48
51
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
49
52
  && touch src/sie_server/__init__.py
50
53
 
@@ -170,6 +173,8 @@ ENV DEBIAN_FRONTEND=noninteractive
170
173
  # libnuma1: required by sgl_kernel (SGLang bundle); import fails with a
171
174
  # misleading SM-arch error without it.
172
175
  # libgomp1: torch OpenMP runtime.
176
+ # libspatialindex-c6: rtree (docling dep) dlopens libspatialindex_c.so; the
177
+ # rtree==1.4.1 wheel only bundles the C++ core, not the C wrapper.
173
178
  RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
174
179
  --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
175
180
  apt-get update && apt-get install -y --no-install-recommends \
@@ -177,7 +182,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
177
182
  gcc \
178
183
  libc6-dev \
179
184
  libgomp1 \
180
- libnuma1
185
+ libnuma1 \
186
+ libspatialindex-c6
181
187
 
182
188
  RUN groupadd -g 1000 sie && useradd -u 1000 -g sie -m sie
183
189
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -12,6 +12,7 @@ adapters:
12
12
  - sie_server.adapters.colbert_rotary_flash
13
13
  - sie_server.adapters.colpali
14
14
  - sie_server.adapters.colqwen2
15
+ - sie_server.adapters.colqwen3
15
16
  - sie_server.adapters.grounding_dino
16
17
  - sie_server.adapters.gte_sparse_flash
17
18
  - sie_server.adapters.jina_flash_cross_encoder
@@ -44,7 +45,7 @@ adapters:
44
45
  - sie_server.adapters.paddleocr_vl
45
46
  deps:
46
47
  # Most flash adapters; sentence_transformer needs >=4.57
47
- transformers: '>=4.57'
48
+ transformers: '>=4.57,<5'
48
49
  # Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
49
50
  torch: '>=2.9,<2.10'
50
51
  # bge_m3, bge_m3_flash, nemo_colembed, nomic_flash, xlm_roberta_flash
@@ -56,7 +57,7 @@ deps:
56
57
  # bge_m3_flag
57
58
  FlagEmbedding: '>=1.3'
58
59
  # grounding_dino
59
- pillow: ''
60
+ pillow: '>=11,<12'
60
61
  # grounding_dino
61
62
  requests: ''
62
63
  # gliner, gliner_bi
@@ -13,5 +13,8 @@ deps:
13
13
  transformers: '>=5.0'
14
14
  # Pin torch — flash-attn's unpinned torch dep can pull in 2.10+ which breaks torchvision
15
15
  torch: '>=2.9,<2.10'
16
- huggingface-hub: '>=0.20'
16
+ # No huggingface-hub override: transformers 5.x's own metadata
17
+ # pins it to >=1.3,<2.0. An override here was previously
18
+ # unsatisfiable against that pin and broke `uv run` resolution.
19
+ # Let transformers drive the version.
17
20
  pillow: ''
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 96
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -18,3 +18,11 @@ profiles:
18
18
  adapter_options:
19
19
  loadtime: {}
20
20
  runtime: {}
21
+ ocr:
22
+ max_batch_tokens: 1
23
+ compute_precision: null
24
+ adapter_path: sie_server.adapters.docling:DoclingAdapter
25
+ adapter_options:
26
+ loadtime: {}
27
+ runtime:
28
+ ocr: true
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 64
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -18,10 +18,19 @@ profiles:
18
18
  default:
19
19
  max_batch_tokens: 8192
20
20
  compute_precision: bfloat16
21
- adapter_path: sie_server.adapters.sglang:SGLangEmbeddingAdapter
21
+ # Was sie_server.adapters.sglang:SGLangEmbeddingAdapter, but
22
+ # SGLang's generic transformers fallback hits
23
+ # `assert get_embedding is False` for this Llama-based arch
24
+ # (no model-specific embedding implementation registered). Route
25
+ # through PyTorchEmbeddingAdapter like NV-Embed-v2 (also Llama-
26
+ # based 7B+ embedder); the heavy lane has 96 GiB so non-paged
27
+ # attention is fine. May be reconciled with a colleague's
28
+ # in-flight SGLang/arch fix later.
29
+ adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
22
30
  adapter_options:
23
31
  loadtime:
24
- mem_fraction_static: 0.85
32
+ trust_remote_code: true
33
+ attn_implementation: eager
25
34
  runtime:
26
35
  pooling: last_token
27
36
  normalize: true
@@ -0,0 +1,43 @@
1
+ sie_id: nvidia/nemotron-colembed-vl-4b-v2
2
+ hf_id: nvidia/nemotron-colembed-vl-4b-v2
3
+ inputs:
4
+ text: true
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode:
10
+ dense: null
11
+ sparse: null
12
+ multivector:
13
+ dim: 2560
14
+ score: null
15
+ extract: null
16
+ max_sequence_length: 8192
17
+ profiles:
18
+ default:
19
+ max_batch_tokens: 4096
20
+ compute_precision: bfloat16
21
+ adapter_path: sie_server.adapters.nemo_colembed:NemoColEmbedAdapter
22
+ adapter_options:
23
+ loadtime:
24
+ muvera_config:
25
+ num_repetitions: 40
26
+ num_simhash_projections: 6
27
+ projection_dim: null
28
+ final_projection_dim: 10240
29
+ token_dim: 2560
30
+ runtime:
31
+ normalize: true
32
+ batch_size: 4
33
+ muvera:
34
+ extends: default
35
+ adapter_options:
36
+ runtime:
37
+ normalize: true
38
+ batch_size: 4
39
+ output_types:
40
+ - dense
41
+ output_similarity:
42
+ dense: dot
43
+ muvera: {}
@@ -0,0 +1,42 @@
1
+ sie_id: TomoroAI/tomoro-colqwen3-embed-4b
2
+ hf_id: TomoroAI/tomoro-colqwen3-embed-4b
3
+ inputs:
4
+ text: true
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode:
10
+ dense: null
11
+ sparse: null
12
+ multivector:
13
+ dim: 320
14
+ score: null
15
+ extract: null
16
+ max_sequence_length: 8192
17
+ profiles:
18
+ default:
19
+ max_batch_tokens: 4096
20
+ compute_precision: bfloat16
21
+ adapter_path: sie_server.adapters.colqwen3:ColQwen3Adapter
22
+ adapter_options:
23
+ loadtime:
24
+ muvera_config:
25
+ num_repetitions: 40
26
+ num_simhash_projections: 6
27
+ projection_dim: null
28
+ final_projection_dim: 10240
29
+ token_dim: 320
30
+ trust_remote_code: true
31
+ runtime:
32
+ normalize: true
33
+ muvera:
34
+ extends: default
35
+ adapter_options:
36
+ runtime:
37
+ normalize: true
38
+ output_types:
39
+ - dense
40
+ output_similarity:
41
+ dense: dot
42
+ muvera: {}
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.3.1"
6
+ "version": "0.3.3"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -1740,7 +1740,7 @@
1740
1740
  }
1741
1741
  ],
1742
1742
  "default": null,
1743
- "description": "Adapter-specific options",
1743
+ "description": "Adapter-specific options. Recognized sub-keys include 'overflow_policy' (one of 'default', 'truncate_text', 'error'; default 'default') controlling how inputs exceeding the model's max_sequence_length are handled.",
1744
1744
  "title": "Options"
1745
1745
  }
1746
1746
  },
@@ -1763,6 +1763,23 @@
1763
1763
  "location"
1764
1764
  ]
1765
1765
  }
1766
+ },
1767
+ {
1768
+ "items": [
1769
+ {
1770
+ "text": "Apple Inc. was founded by Steve Jobs in Cupertino, California."
1771
+ }
1772
+ ],
1773
+ "params": {
1774
+ "labels": [
1775
+ "person",
1776
+ "organization",
1777
+ "location"
1778
+ ],
1779
+ "options": {
1780
+ "overflow_policy": "truncate_text"
1781
+ }
1782
+ }
1766
1783
  }
1767
1784
  ],
1768
1785
  "properties": {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.3.1"
3
+ version = "0.3.3"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -80,6 +80,10 @@ build-backend = "hatchling.build"
80
80
  [tool.hatch.build.targets.wheel]
81
81
  packages = ["src/sie_server"]
82
82
 
83
+ [tool.hatch.build.targets.wheel.force-include]
84
+ "models" = "sie_server/models"
85
+ "bundles" = "sie_server/bundles"
86
+
83
87
  [tool.uv.sources]
84
88
  # Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
85
89
  # Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.
@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
6
6
 
7
7
  from sie_server.adapters._spec import AdapterSpec
8
8
  from sie_server.adapters._types import ERR_NOT_LOADED
9
+ from sie_server.adapters._utils import grouped_score_pairs
9
10
  from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  import torch
13
14
 
15
+ from sie_server.core.inference_output import ScoreOutput
16
+ from sie_server.types.inputs import Item
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
57
61
  raise TypeError(msg)
58
62
 
59
63
  if "score" in spec.outputs:
60
- if cls.score is ModelAdapter.score and cls.score_pairs is ModelAdapter.score_pairs:
64
+ # BaseAdapter ships a default score_pairs() that delegates to score().
65
+ # Treat that default as "not implemented" for validation purposes:
66
+ # subclasses must override either score() or score_pairs() so the
67
+ # default delegate doesn't bottom out in ModelAdapter.score().
68
+ score_overridden = cls.score is not ModelAdapter.score
69
+ score_pairs_overridden = cls.score_pairs not in (
70
+ ModelAdapter.score_pairs,
71
+ BaseAdapter.score_pairs,
72
+ )
73
+ if not score_overridden and not score_pairs_overridden:
61
74
  msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
62
75
  raise TypeError(msg)
63
76
 
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
117
130
  model_name=getattr(self, "_model_name_or_path", ""),
118
131
  )
119
132
 
133
+ # -- Default batched scoring ---------------------------------------------
134
+
135
+ def score_pairs(
136
+ self,
137
+ queries: list[Item],
138
+ docs: list[Item],
139
+ *,
140
+ instruction: str | None = None,
141
+ options: dict[str, Any] | None = None,
142
+ ) -> ScoreOutput:
143
+ """Default ``score_pairs()`` that batches via per-query grouping.
144
+
145
+ Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
146
+ so each unique query is encoded once and its docs are scored as a
147
+ single ``score()`` call. Subclasses with a more efficient native
148
+ cross-batch path (e.g. cross-encoders that pack queries and docs
149
+ into one transformer pass) should override this.
150
+
151
+ Per-call ``options`` are not supported by this default delegate
152
+ (it dispatches per-query and cannot route options into ``score()``
153
+ without subclass-specific knowledge). If ``options`` is a non-empty
154
+ mapping, this raises ``NotImplementedError`` to surface the
155
+ unsupported configuration; pass ``options=None`` (or ``{}``) or
156
+ override ``score_pairs()`` with an options-aware implementation.
157
+ """
158
+ if options:
159
+ msg = (
160
+ f"{type(self).__name__}.score_pairs(): per-call options are "
161
+ f"not supported by the default batching path "
162
+ f"(got options={options!r}). Override score_pairs() with an "
163
+ f"options-aware implementation."
164
+ )
165
+ raise NotImplementedError(msg)
166
+ return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
167
+
120
168
  # -- Shared helpers ------------------------------------------------------
121
169
 
122
170
  def _check_loaded(self) -> None:
@@ -1,6 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any
3
+ from typing import TYPE_CHECKING, Any, Protocol
4
+
5
+ import numpy as np
6
+
7
+ from sie_server.core.inference_output import ScoreOutput
4
8
 
5
9
  if TYPE_CHECKING:
6
10
  import torch
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
8
12
  from sie_server.types.inputs import Item
9
13
 
10
14
 
15
+ class _ScoreFn(Protocol):
16
+ def __call__(
17
+ self,
18
+ query: Item,
19
+ items: list[Item],
20
+ *,
21
+ instruction: str | None = ...,
22
+ ) -> list[float]: ...
23
+
24
+
11
25
  # ---------------------------------------------------------------------------
12
26
  # RoPE utilities (eliminates 7 identical copies)
13
27
  # ---------------------------------------------------------------------------
@@ -140,3 +154,67 @@ def resolve_embedding_options(
140
154
  opts.get("query_template", default_query_template),
141
155
  opts.get("doc_template", default_doc_template),
142
156
  )
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Score-pair grouping (shared by ColBERT-family adapters)
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ def grouped_score_pairs(
165
+ score_fn: _ScoreFn,
166
+ queries: list[Item],
167
+ docs: list[Item],
168
+ *,
169
+ instruction: str | None = None,
170
+ ) -> ScoreOutput:
171
+ """Run a per-query ``score()`` callable over parallel (query, doc) pairs.
172
+
173
+ Groups pairs by ``(query.text, query.id, instruction)`` so each unique
174
+ query is encoded once and its docs are scored as one batch. Used by
175
+ ColBERT-family adapters to satisfy the worker's ``score_pairs()``
176
+ contract while reusing the optimized batched ``score()``.
177
+
178
+ Queries with ``text is None`` are not supported and raise ``ValueError``
179
+ (ColBERT scoring requires text). The grouping key is
180
+ ``(query.text, query.id or "", instruction or "")`` — two distinct
181
+ ``Item`` objects with identical text/id/instruction collapse to one
182
+ encoding pass.
183
+
184
+ Args:
185
+ score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
186
+ queries: Query items (parallel to docs).
187
+ docs: Document items to score.
188
+ instruction: Optional instruction passed through to ``score_fn``.
189
+
190
+ Returns:
191
+ ``ScoreOutput`` with one float per pair, in the original input order.
192
+
193
+ Raises:
194
+ ValueError: If ``queries`` and ``docs`` lengths differ, or any query
195
+ lacks text.
196
+ """
197
+ if len(queries) != len(docs):
198
+ msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
199
+ raise ValueError(msg)
200
+
201
+ if not docs:
202
+ return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
203
+
204
+ groups: dict[tuple[str, str, str], list[int]] = {}
205
+ for i, q in enumerate(queries):
206
+ if q.text is None:
207
+ msg = f"grouped_score_pairs requires queries[{i}].text; got None"
208
+ raise ValueError(msg)
209
+ key = (q.text, q.id or "", instruction or "")
210
+ groups.setdefault(key, []).append(i)
211
+
212
+ scores = np.zeros(len(docs), dtype=np.float32)
213
+ for indices in groups.values():
214
+ q = queries[indices[0]]
215
+ group_docs = [docs[i] for i in indices]
216
+ group_scores = score_fn(q, group_docs, instruction=instruction)
217
+ for idx, s in zip(indices, group_scores, strict=True):
218
+ scores[idx] = float(s)
219
+
220
+ return ScoreOutput(scores=scores, batch_size=len(docs))
@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
127
127
  device: Device string (e.g., "cuda:0", "cpu").
128
128
  """
129
129
 
130
+ def warmup(self) -> None:
131
+ """Run a warmup forward pass on the loaded model.
132
+
133
+ Called by the model loader after ``load()`` has completed. The default
134
+ implementation is a no-op for adapters that do not need warmup. Adapters
135
+ that compile kernels on first call (e.g. flash-attention) or otherwise
136
+ benefit from a priming pass should override this and run a single
137
+ inference pass against a tiny synthetic input.
138
+
139
+ Splitting this from ``load()`` lets the cold-start instrumentation
140
+ attribute deserialize and warmup time separately.
141
+ """
142
+ return
143
+
130
144
  @abstractmethod
131
145
  def unload(self) -> None:
132
146
  """Unload the model and free resources.
@@ -172,6 +172,7 @@ class BertFlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
172
172
  self._max_seq_length,
173
173
  )
174
174
 
175
+ def warmup(self) -> None:
175
176
  # Warmup flash attention kernels
176
177
  logger.info("Warming up CUDA kernels...")
177
178
  warmup_items = [Item(text="warmup")]