sie-server 0.6.2__tar.gz → 0.6.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (443) hide show
  1. {sie_server-0.6.2 → sie_server-0.6.4}/PKG-INFO +1 -1
  2. sie_server-0.6.4/models/MoritzLaurer__ModernBERT-base-zeroshot-v2.0.yaml +22 -0
  3. sie_server-0.6.4/models/facebook__bart-large-mnli.yaml +22 -0
  4. sie_server-0.6.4/models/fastino__gliner2-large-v1.yaml +20 -0
  5. sie_server-0.6.4/models/google__owlv2-large-patch14-ensemble.yaml +21 -0
  6. {sie_server-0.6.2 → sie_server-0.6.4}/openapi.json +1 -1
  7. {sie_server-0.6.2 → sie_server-0.6.4}/pyproject.toml +1 -1
  8. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/_ipc_test_harness.py +46 -1
  9. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_generation_base.py +9 -0
  10. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nemo_colembed/__init__.py +41 -5
  11. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/owlv2/__init__.py +1 -1
  12. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/generation.py +57 -0
  13. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/app_state_config.py +1 -1
  14. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/cli.py +8 -2
  15. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/loader.py +2 -2
  16. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/model_loader.py +10 -5
  17. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/registry.py +2 -2
  18. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/streaming.py +423 -9
  19. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_runtime_options.py +4 -1
  20. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sglang_generation.py +108 -0
  21. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming.py +480 -0
  22. {sie_server-0.6.2 → sie_server-0.6.4}/.gitignore +0 -0
  23. {sie_server-0.6.2 → sie_server-0.6.4}/CONTRIBUTING.md +0 -0
  24. {sie_server-0.6.2 → sie_server-0.6.4}/Dockerfile.cpu +0 -0
  25. {sie_server-0.6.2 → sie_server-0.6.4}/Dockerfile.cuda12 +0 -0
  26. {sie_server-0.6.2 → sie_server-0.6.4}/LICENSE +0 -0
  27. {sie_server-0.6.2 → sie_server-0.6.4}/README.md +0 -0
  28. {sie_server-0.6.2 → sie_server-0.6.4}/bundles/default.yaml +0 -0
  29. {sie_server-0.6.2 → sie_server-0.6.4}/bundles/sglang-embedding.yaml +0 -0
  30. {sie_server-0.6.2 → sie_server-0.6.4}/bundles/sglang.yaml +0 -0
  31. {sie_server-0.6.2 → sie_server-0.6.4}/bundles/transformers5.yaml +0 -0
  32. {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  33. {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  34. {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  35. {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  36. {sie_server-0.6.2 → sie_server-0.6.4}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  37. {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-m3.yaml +0 -0
  38. {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-base.yaml +0 -0
  39. {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-large.yaml +0 -0
  40. {sie_server-0.6.2 → sie_server-0.6.4}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  41. {sie_server-0.6.2 → sie_server-0.6.4}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  42. {sie_server-0.6.2 → sie_server-0.6.4}/models/GritLM__GritLM-7B.yaml +0 -0
  43. {sie_server-0.6.2 → sie_server-0.6.4}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  44. {sie_server-0.6.2 → sie_server-0.6.4}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  45. {sie_server-0.6.2 → sie_server-0.6.4}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  46. {sie_server-0.6.2 → sie_server-0.6.4}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  47. {sie_server-0.6.2 → sie_server-0.6.4}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
  48. {sie_server-0.6.2 → sie_server-0.6.4}/models/Marqo__marqo-fashionSigLIP.yaml +0 -0
  49. {sie_server-0.6.2 → sie_server-0.6.4}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  50. {sie_server-0.6.2 → sie_server-0.6.4}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  51. {sie_server-0.6.2 → sie_server-0.6.4}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  52. {sie_server-0.6.2 → sie_server-0.6.4}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  53. {sie_server-0.6.2 → sie_server-0.6.4}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  54. {sie_server-0.6.2 → sie_server-0.6.4}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  55. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-0.6B.yaml +0 -0
  56. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-4B-Instruct-2507.yaml +0 -0
  57. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  58. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  59. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  60. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  61. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  62. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  63. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3.5-4B.yaml +0 -0
  64. {sie_server-0.6.2 → sie_server-0.6.4}/models/Qwen__Qwen3.6-27B.yaml +0 -0
  65. {sie_server-0.6.2 → sie_server-0.6.4}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  66. {sie_server-0.6.2 → sie_server-0.6.4}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  67. {sie_server-0.6.2 → sie_server-0.6.4}/models/Snowflake__snowflake-arctic-embed-l-v2.0.yaml +0 -0
  68. {sie_server-0.6.2 → sie_server-0.6.4}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  69. {sie_server-0.6.2 → sie_server-0.6.4}/models/TomoroAI__tomoro-colqwen3-embed-4b.yaml +0 -0
  70. {sie_server-0.6.2 → sie_server-0.6.4}/models/answerdotai__ModernBERT-base.yaml +0 -0
  71. {sie_server-0.6.2 → sie_server-0.6.4}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
  72. {sie_server-0.6.2 → sie_server-0.6.4}/models/colbert-ir__colbertv2.0.yaml +0 -0
  73. {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  74. {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  75. {sie_server-0.6.2 → sie_server-0.6.4}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  76. {sie_server-0.6.2 → sie_server-0.6.4}/models/defog__sqlcoder-7b-2.yaml +0 -0
  77. {sie_server-0.6.2 → sie_server-0.6.4}/models/docling.yaml +0 -0
  78. {sie_server-0.6.2 → sie_server-0.6.4}/models/fastino__gliner2-base-v1.yaml +0 -0
  79. {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  80. {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  81. {sie_server-0.6.2 → sie_server-0.6.4}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  82. {sie_server-0.6.2 → sie_server-0.6.4}/models/google__embeddinggemma-300m.yaml +0 -0
  83. {sie_server-0.6.2 → sie_server-0.6.4}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  84. {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  85. {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  86. {sie_server-0.6.2 → sie_server-0.6.4}/models/google__siglip2-base-patch16-224.yaml +0 -0
  87. {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  88. {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  89. {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  90. {sie_server-0.6.2 → sie_server-0.6.4}/models/ibm-granite__granite-guardian-3.0-2b.yaml +0 -0
  91. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-base-v2.yaml +0 -0
  92. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-large-v2.yaml +0 -0
  93. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  94. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__e5-small-v2.yaml +0 -0
  95. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  96. {sie_server-0.6.2 → sie_server-0.6.4}/models/intfloat__multilingual-e5-large.yaml +0 -0
  97. {sie_server-0.6.2 → sie_server-0.6.4}/models/jackboyla__glirel-large-v0.yaml +0 -0
  98. {sie_server-0.6.2 → sie_server-0.6.4}/models/jinaai__jina-colbert-v2.yaml +0 -0
  99. {sie_server-0.6.2 → sie_server-0.6.4}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  100. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  101. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  102. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  103. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  104. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  105. {sie_server-0.6.2 → sie_server-0.6.4}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  106. {sie_server-0.6.2 → sie_server-0.6.4}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  107. {sie_server-0.6.2 → sie_server-0.6.4}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  108. {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
  109. {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  110. {sie_server-0.6.2 → sie_server-0.6.4}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
  111. {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  112. {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-base.yaml +0 -0
  113. {sie_server-0.6.2 → sie_server-0.6.4}/models/microsoft__Florence-2-large.yaml +0 -0
  114. {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
  115. {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
  116. {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-embed-large-v1.yaml +0 -0
  117. {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  118. {sie_server-0.6.2 → sie_server-0.6.4}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  119. {sie_server-0.6.2 → sie_server-0.6.4}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  120. {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  121. {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  122. {sie_server-0.6.2 → sie_server-0.6.4}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  123. {sie_server-0.6.2 → sie_server-0.6.4}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  124. {sie_server-0.6.2 → sie_server-0.6.4}/models/naver__splade-v3.yaml +0 -0
  125. {sie_server-0.6.2 → sie_server-0.6.4}/models/nomic-ai__modernbert-embed-base.yaml +0 -0
  126. {sie_server-0.6.2 → sie_server-0.6.4}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  127. {sie_server-0.6.2 → sie_server-0.6.4}/models/numind__NuNER_Zero-span.yaml +0 -0
  128. {sie_server-0.6.2 → sie_server-0.6.4}/models/numind__NuNER_Zero.yaml +0 -0
  129. {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__NV-Embed-v2.yaml +0 -0
  130. {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
  131. {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  132. {sie_server-0.6.2 → sie_server-0.6.4}/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +0 -0
  133. {sie_server-0.6.2 → sie_server-0.6.4}/models/openai__clip-vit-base-patch32.yaml +0 -0
  134. {sie_server-0.6.2 → sie_server-0.6.4}/models/openai__clip-vit-large-patch14.yaml +0 -0
  135. {sie_server-0.6.2 → sie_server-0.6.4}/models/opendatalab__MinerU2.5-Pro-2604-1.2B.yaml +0 -0
  136. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  137. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  138. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  139. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  140. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  141. {sie_server-0.6.2 → sie_server-0.6.4}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  142. {sie_server-0.6.2 → sie_server-0.6.4}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  143. {sie_server-0.6.2 → sie_server-0.6.4}/models/rasyosef__splade-mini.yaml +0 -0
  144. {sie_server-0.6.2 → sie_server-0.6.4}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  145. {sie_server-0.6.2 → sie_server-0.6.4}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  146. {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_large-v2.1.yaml +0 -0
  147. {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  148. {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  149. {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  150. {sie_server-0.6.2 → sie_server-0.6.4}/models/urchade__gliner_small-v2.1.yaml +0 -0
  151. {sie_server-0.6.2 → sie_server-0.6.4}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  152. {sie_server-0.6.2 → sie_server-0.6.4}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  153. {sie_server-0.6.2 → sie_server-0.6.4}/models/zai-org__GLM-OCR.yaml +0 -0
  154. {sie_server-0.6.2 → sie_server-0.6.4}/scripts/generate_tokenize_fixture.py +0 -0
  155. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/__init__.py +0 -0
  156. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapter_call_loop.py +0 -0
  157. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/__init__.py +0 -0
  158. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_base_adapter.py +0 -0
  159. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_flash_base.py +0 -0
  160. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_spec.py +0 -0
  161. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_types.py +0 -0
  162. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/_utils.py +0 -0
  163. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/base.py +0 -0
  164. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
  165. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  166. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
  167. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  168. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
  169. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
  170. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/clip/__init__.py +0 -0
  171. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert/__init__.py +0 -0
  172. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  173. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  174. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colpali/__init__.py +0 -0
  175. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  176. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/colqwen3/__init__.py +0 -0
  177. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  178. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/docling/__init__.py +0 -0
  179. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/donut/__init__.py +0 -0
  180. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/errors.py +0 -0
  181. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/florence2/__init__.py +0 -0
  182. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliclass/__init__.py +0 -0
  183. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner/__init__.py +0 -0
  184. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  185. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  186. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/glirel/__init__.py +0 -0
  187. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  188. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  189. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  190. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  191. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  192. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/mineru_vl/__init__.py +0 -0
  193. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
  194. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  195. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  196. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  197. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  198. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
  199. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  200. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  201. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  202. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  203. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  204. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  205. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  206. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
  207. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/__init__.py +0 -0
  208. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/_server.py +0 -0
  209. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/sglang/embedding.py +0 -0
  210. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/siglip/__init__.py +0 -0
  211. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  212. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  213. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  214. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/__init__.py +0 -0
  215. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/encode.py +0 -0
  216. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/extract.py +0 -0
  217. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/generate.py +0 -0
  218. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/health.py +0 -0
  219. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/helpers.py +0 -0
  220. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/metrics.py +0 -0
  221. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/models.py +0 -0
  222. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/openai_compat.py +0 -0
  223. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/openapi.py +0 -0
  224. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/options.py +0 -0
  225. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/root.py +0 -0
  226. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/score.py +0 -0
  227. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/serialization.py +0 -0
  228. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/validation.py +0 -0
  229. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/api/ws.py +0 -0
  230. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/__init__.py +0 -0
  231. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/app/app_factory.py +0 -0
  232. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/__init__.py +0 -0
  233. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/engine.py +0 -0
  234. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/config/model.py +0 -0
  235. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/__init__.py +0 -0
  236. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/adaptive_batching.py +0 -0
  237. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/batcher.py +0 -0
  238. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/deps.py +0 -0
  239. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/disk_cache.py +0 -0
  240. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/encode_pipeline.py +0 -0
  241. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/extract_cost.py +0 -0
  242. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/gpu_health.py +0 -0
  243. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/hf_env.py +0 -0
  244. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/hot_reload.py +0 -0
  245. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/inference.py +0 -0
  246. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/inference_output.py +0 -0
  247. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/load_errors.py +0 -0
  248. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/logging.py +0 -0
  249. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/memory.py +0 -0
  250. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/oom.py +0 -0
  251. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/pool_isolation.py +0 -0
  252. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/postprocessor.py +0 -0
  253. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/postprocessor_registry.py +0 -0
  254. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/prepared.py +0 -0
  255. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/__init__.py +0 -0
  256. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/base.py +0 -0
  257. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/image.py +0 -0
  258. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/text.py +0 -0
  259. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor/vision.py +0 -0
  260. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/preprocessor_registry.py +0 -0
  261. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/readiness.py +0 -0
  262. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/shutdown.py +0 -0
  263. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/text_tokens.py +0 -0
  264. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/timing.py +0 -0
  265. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/tokenizer.py +0 -0
  266. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/watcher.py +0 -0
  267. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/__init__.py +0 -0
  268. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  269. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/base.py +0 -0
  270. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/encode.py +0 -0
  271. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/extract.py +0 -0
  272. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/handlers/score.py +0 -0
  273. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/model_worker.py +0 -0
  274. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/oom_recovery.py +0 -0
  275. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/core/worker/types.py +0 -0
  276. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/__init__.py +0 -0
  277. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/nats_publisher.py +0 -0
  278. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/health/saturation.py +0 -0
  279. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/ipc_server.py +0 -0
  280. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/ipc_types.py +0 -0
  281. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/main.py +0 -0
  282. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/__init__.py +0 -0
  283. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/gpu.py +0 -0
  284. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/metrics.py +0 -0
  285. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/prometheus.py +0 -0
  286. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/telemetry.py +0 -0
  287. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/observability/tracing.py +0 -0
  288. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/__init__.py +0 -0
  289. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/admission.py +0 -0
  290. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/base.py +0 -0
  291. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/grammar_cache.py +0 -0
  292. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/grammar_compile.py +0 -0
  293. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/tool_call_grammar.py +0 -0
  294. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/tool_call_parser.py +0 -0
  295. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/processors/work_class_scheduler.py +0 -0
  296. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/queue_executor.py +0 -0
  297. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/static/__init__.py +0 -0
  298. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/static/index.html +0 -0
  299. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/__init__.py +0 -0
  300. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/grammar.py +0 -0
  301. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/inputs.py +0 -0
  302. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/openapi.py +0 -0
  303. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/outputs.py +0 -0
  304. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/overflow_policy.py +0 -0
  305. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/requests.py +0 -0
  306. {sie_server-0.6.2 → sie_server-0.6.4}/src/sie_server/types/responses.py +0 -0
  307. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/__init__.py +0 -0
  308. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_base.py +0 -0
  309. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_bge_m3.py +0 -0
  310. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_bge_m3_flash.py +0 -0
  311. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_clip.py +0 -0
  312. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_colbert.py +0 -0
  313. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_docling.py +0 -0
  314. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_docling_smoke.py +0 -0
  315. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_donut.py +0 -0
  316. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_factory_integration.py +0 -0
  317. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_flash_base.py +0 -0
  318. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_florence2.py +0 -0
  319. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_gliclass_overflow_policy.py +0 -0
  320. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_glirel.py +0 -0
  321. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_glm_ocr.py +0 -0
  322. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_grounding_dino.py +0 -0
  323. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_gte_sparse.py +0 -0
  324. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  325. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lighton_ocr.py +0 -0
  326. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lora.py +0 -0
  327. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_lora_integration.py +0 -0
  328. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_mineru_vl.py +0 -0
  329. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_paddleocr_vl.py +0 -0
  330. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_pytorch_embedding_revision.py +0 -0
  331. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sentence_transformer.py +0 -0
  332. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sglang.py +0 -0
  333. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_siglip.py +0 -0
  334. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_sparse_aggregation.py +0 -0
  335. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_stablebridge_integration.py +0 -0
  336. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_stablebridge_pruner.py +0 -0
  337. {sie_server-0.6.2 → sie_server-0.6.4}/tests/adapters/test_visual_document.py +0 -0
  338. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/__init__.py +0 -0
  339. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_dtype.py +0 -0
  340. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_endpoint.py +0 -0
  341. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_json_schema.py +0 -0
  342. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_timing.py +0 -0
  343. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_encode_validation.py +0 -0
  344. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract.py +0 -0
  345. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract_integration.py +0 -0
  346. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_extract_oom.py +0 -0
  347. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_generate.py +0 -0
  348. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_health.py +0 -0
  349. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_models.py +0 -0
  350. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_openai_compat.py +0 -0
  351. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_score.py +0 -0
  352. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_version_header.py +0 -0
  353. {sie_server-0.6.2 → sie_server-0.6.4}/tests/api/test_ws.py +0 -0
  354. {sie_server-0.6.2 → sie_server-0.6.4}/tests/app/__init__.py +0 -0
  355. {sie_server-0.6.2 → sie_server-0.6.4}/tests/app/test_app_factory.py +0 -0
  356. {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/__init__.py +0 -0
  357. {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_bundle_coverage.py +0 -0
  358. {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_config.py +0 -0
  359. {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_model_prewarm_grammars.py +0 -0
  360. {sie_server-0.6.2 → sie_server-0.6.4}/tests/config/test_profile_backend_consistency.py +0 -0
  361. {sie_server-0.6.2 → sie_server-0.6.4}/tests/conftest.py +0 -0
  362. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/__init__.py +0 -0
  363. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_adaptive_batching.py +0 -0
  364. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_batcher.py +0 -0
  365. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_disk_cache.py +0 -0
  366. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_gpu_health.py +0 -0
  367. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_hot_reload.py +0 -0
  368. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_idle_evict.py +0 -0
  369. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_inference.py +0 -0
  370. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_loader.py +0 -0
  371. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_logging.py +0 -0
  372. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_lora_generation_exclusion.py +0 -0
  373. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_memory.py +0 -0
  374. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_model_load_timeout.py +0 -0
  375. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_oom_detection.py +0 -0
  376. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_pool_isolation.py +0 -0
  377. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_postprocessor.py +0 -0
  378. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_postprocessor_registry.py +0 -0
  379. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_prepared.py +0 -0
  380. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_preprocessor.py +0 -0
  381. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_preprocessor_registry.py +0 -0
  382. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_quantization.py +0 -0
  383. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_readiness.py +0 -0
  384. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_async.py +0 -0
  385. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_core.py +0 -0
  386. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_deps.py +0 -0
  387. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_failed_state.py +0 -0
  388. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_memory.py +0 -0
  389. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_registry_multi_model.py +0 -0
  390. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_shutdown.py +0 -0
  391. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_timing.py +0 -0
  392. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_watcher.py +0 -0
  393. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_backpressure.py +0 -0
  394. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_core.py +0 -0
  395. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_extract.py +0 -0
  396. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_lora.py +0 -0
  397. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_options.py +0 -0
  398. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_passthrough.py +0 -0
  399. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/test_worker_score.py +0 -0
  400. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/worker/__init__.py +0 -0
  401. {sie_server-0.6.2 → sie_server-0.6.4}/tests/core/worker/test_oom_recovery.py +0 -0
  402. {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/__init__.py +0 -0
  403. {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_nats_publisher.py +0 -0
  404. {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_saturation.py +0 -0
  405. {sie_server-0.6.2 → sie_server-0.6.4}/tests/health/test_worker_id_consistency.py +0 -0
  406. {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/__init__.py +0 -0
  407. {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/test_chat_completions.py +0 -0
  408. {sie_server-0.6.2 → sie_server-0.6.4}/tests/integration/test_grammar_generate.py +0 -0
  409. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/__init__.py +0 -0
  410. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_generation_metrics.py +0 -0
  411. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_metrics.py +0 -0
  412. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_telemetry.py +0 -0
  413. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_trace_propagation.py +0 -0
  414. {sie_server-0.6.2 → sie_server-0.6.4}/tests/observability/test_tracing.py +0 -0
  415. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/__init__.py +0 -0
  416. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_cache.py +0 -0
  417. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_compile.py +0 -0
  418. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_grammar_prewarm.py +0 -0
  419. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming_admission.py +0 -0
  420. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_streaming_integration.py +0 -0
  421. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_tool_call_grammar.py +0 -0
  422. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_tool_call_parser.py +0 -0
  423. {sie_server-0.6.2 → sie_server-0.6.4}/tests/processors/test_work_class_scheduler.py +0 -0
  424. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_adapter_call_loop.py +0 -0
  425. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_all_models.py +0 -0
  426. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_docker_integration.py +0 -0
  427. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_ipc_server.py +0 -0
  428. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_ipc_types_raw_output.py +0 -0
  429. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_model_yaml_filenames.py +0 -0
  430. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_openapi_export.py +0 -0
  431. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_parity_run_batch.py +0 -0
  432. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_queue_executor.py +0 -0
  433. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_queue_executor_stage1d.py +0 -0
  434. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_readiness.py +0 -0
  435. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_sdk_integration.py +0 -0
  436. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_server_smoke.py +0 -0
  437. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_sparse_integration.py +0 -0
  438. {sie_server-0.6.2 → sie_server-0.6.4}/tests/test_stage1d_byte_identity.py +0 -0
  439. {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/__init__.py +0 -0
  440. {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_inputs.py +0 -0
  441. {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_inputs_json_decode.py +0 -0
  442. {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_media_bytes.py +0 -0
  443. {sie_server-0.6.2 → sie_server-0.6.4}/tests/type_defs/test_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.6.2
3
+ Version: 0.6.4
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -0,0 +1,22 @@
1
+ sie_id: MoritzLaurer/ModernBERT-base-zeroshot-v2.0
2
+ hf_id: MoritzLaurer/ModernBERT-base-zeroshot-v2.0
3
+ inputs:
4
+ text: true
5
+ image: false
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode: null
10
+ score: null
11
+ extract: {}
12
+ max_sequence_length: 512
13
+ profiles:
14
+ default:
15
+ max_batch_tokens: 16384
16
+ compute_precision: null
17
+ adapter_path: sie_server.adapters.nli_classification_flash:NLIClassificationFlashAdapter
18
+ adapter_options:
19
+ loadtime: {}
20
+ runtime:
21
+ hypothesis_template: This text is about {}.
22
+ multi_label: false
@@ -0,0 +1,22 @@
1
+ sie_id: facebook/bart-large-mnli
2
+ hf_id: facebook/bart-large-mnli
3
+ inputs:
4
+ text: true
5
+ image: false
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode: null
10
+ score: null
11
+ extract: {}
12
+ max_sequence_length: 512
13
+ profiles:
14
+ default:
15
+ max_batch_tokens: 16384
16
+ compute_precision: null
17
+ adapter_path: sie_server.adapters.nli_classification_flash:NLIClassificationFlashAdapter
18
+ adapter_options:
19
+ loadtime: {}
20
+ runtime:
21
+ hypothesis_template: This text is about {}.
22
+ multi_label: false
@@ -0,0 +1,20 @@
1
+ sie_id: fastino/gliner2-large-v1
2
+ hf_id: fastino/gliner2-large-v1
3
+ inputs:
4
+ text: true
5
+ image: false
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode: null
10
+ score: null
11
+ extract: {}
12
+ max_sequence_length: 512
13
+ profiles:
14
+ default:
15
+ max_batch_tokens: 16384
16
+ compute_precision: float16
17
+ adapter_path: sie_server.adapters.gliner2:GLiNER2Adapter
18
+ adapter_options:
19
+ loadtime: {}
20
+ runtime: {}
@@ -0,0 +1,21 @@
1
+ sie_id: google/owlv2-large-patch14-ensemble
2
+ hf_id: google/owlv2-large-patch14-ensemble
3
+ inputs:
4
+ text: false
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode: null
10
+ score: null
11
+ extract: {}
12
+ profiles:
13
+ default:
14
+ max_batch_tokens: 16384
15
+ compute_precision: float16
16
+ adapter_path: sie_server.adapters.owlv2:Owlv2Adapter
17
+ adapter_options:
18
+ loadtime:
19
+ score_threshold: 0.1
20
+ runtime:
21
+ score_threshold: 0.1
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.6.2"
6
+ "version": "0.6.4"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.6.2"
3
+ version = "0.6.4"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -136,7 +136,7 @@ class _StubExecutor:
136
136
 
137
137
  async def process_extract_batch(self, req: ProcessExtractBatchRequest) -> BatchOutcome:
138
138
  await self._maybe_sleep()
139
- return _canned_batch_outcome(req.items)
139
+ return _canned_extract_batch_outcome(req.items)
140
140
 
141
141
 
142
142
  class _FakeGenerateProcessor:
@@ -179,6 +179,51 @@ def _canned_batch_outcome(items: list[Any]) -> BatchOutcome:
179
179
  )
180
180
 
181
181
 
182
+ def _extract_document_echo(item: Any) -> dict[str, Any]:
183
+ document = item.item.get("document") if isinstance(item.item, dict) else None
184
+ if not isinstance(document, dict):
185
+ return {
186
+ "present": False,
187
+ "data_is_bytes": False,
188
+ "data": b"",
189
+ "data_len": 0,
190
+ "format": None,
191
+ }
192
+
193
+ data = document.get("data")
194
+ data_is_bytes = isinstance(data, bytes | bytearray)
195
+ data_bytes = bytes(data) if data_is_bytes else b""
196
+ return {
197
+ "present": True,
198
+ "data_is_bytes": data_is_bytes,
199
+ "data": data_bytes,
200
+ "data_len": len(data_bytes),
201
+ "format": document.get("format"),
202
+ }
203
+
204
+
205
+ def _canned_extract_batch_outcome(items: list[Any]) -> BatchOutcome:
206
+ outcomes: list[ItemOutcome] = []
207
+ for item in items:
208
+ payload = msgpack.packb(
209
+ {**_CANNED_RESULT, "extract_document": _extract_document_echo(item)},
210
+ use_bin_type=True,
211
+ )
212
+ outcomes.append(
213
+ ItemOutcome(
214
+ work_item_id=item.work_item_id,
215
+ request_id=item.request_id,
216
+ item_index=item.item_index,
217
+ disposition="publish_and_ack",
218
+ result_msgpack=payload,
219
+ inference_ms=0.1,
220
+ tokenization_ms=0.05,
221
+ postprocessing_ms=0.01,
222
+ )
223
+ )
224
+ return BatchOutcome(outcomes=outcomes)
225
+
226
+
182
227
  def _canned_batch_outcome_echoing_prepared_tokens(items: list[Any]) -> BatchOutcome:
183
228
  """Like :func:`_canned_batch_outcome` but folds each item's
184
229
  ``prepared_tokens`` presence / content into the per-item
@@ -23,6 +23,7 @@ from typing import Any, ClassVar, Literal, cast
23
23
 
24
24
  from sie_server.adapters._spec import AdapterSpec
25
25
  from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
26
+ from sie_server.types.inputs import ImageInput
26
27
 
27
28
  logger = logging.getLogger(__name__)
28
29
 
@@ -247,6 +248,7 @@ class GenerationAdapter(ModelAdapter):
247
248
  logit_bias: dict[str, float] | None = None,
248
249
  logprobs: bool = False,
249
250
  top_logprobs: int | None = None,
251
+ images: list[ImageInput] | None = None,
250
252
  ) -> AsyncIterator[GenerationChunk]:
251
253
  """Stream generation chunks from a prompt.
252
254
 
@@ -279,6 +281,13 @@ class GenerationAdapter(ModelAdapter):
279
281
  with per-token log-probabilities.
280
282
  top_logprobs: How many alternates per position; only
281
283
  consulted when ``logprobs`` is True.
284
+ images: Optional list of wire-format :class:`ImageInput`
285
+ entries for vision-language models. The ``prompt`` is
286
+ expected to already carry the model's image placeholder
287
+ tokens (rendered by the chat template upstream); the
288
+ adapter forwards the image bytes to the engine. ``None``
289
+ or empty for text-only generation. Text-only adapters may
290
+ ignore this argument.
282
291
 
283
292
  Yields:
284
293
  :class:`GenerationChunk` instances. At least one terminal
@@ -346,6 +346,31 @@ class NemoColEmbedAdapter(BaseAdapter):
346
346
  num_image_token,
347
347
  )
348
348
 
349
+ def get_preprocessor(self) -> Any:
350
+ """Register BOTH a text and an image preprocessor for v1 (#1163).
351
+
352
+ v1 documents must take the conformant ``_encode_images_preprocessed`` path,
353
+ which requires an *image* preprocessor to be registered so the encode pipeline
354
+ produces a ``NemoColEmbedPayload`` (with ``pixel_values``) instead of a
355
+ passthrough ``ImagePayload``. Without it every doc batch falls back to the
356
+ model's ``forward_passages`` — which re-tiles each page inline on one thread,
357
+ ~3x slower than running the tiling upstream in the preprocessing thread pool.
358
+
359
+ But v1 *queries* (text) still go through ``model.forward_queries`` and rely on
360
+ the batched worker path; registering only the image preprocessor de-registers
361
+ the text one and routes queries to the unbatched direct-call path, which has
362
+ surfaced ``forward_queries`` failures. So we register both: the base
363
+ ``CharCountPreprocessor`` (text → worker-batched queries) and the
364
+ ``NemoColEmbedPreprocessor`` (image → conformant docs). ``model_loader``
365
+ registers each entry of the returned list by its ``modality``.
366
+
367
+ v2 (Qwen3-VL backbone) builds no ``_processor`` (``None``); it keeps just the
368
+ base text preprocessor and its native ``forward_images`` path (with #1055 fix).
369
+ """
370
+ if self._processor is None:
371
+ return super().get_preprocessor()
372
+ return [super().get_preprocessor(), self._processor]
373
+
349
374
  def encode(
350
375
  self,
351
376
  items: list[Item],
@@ -606,14 +631,25 @@ class NemoColEmbedAdapter(BaseAdapter):
606
631
  if self._normalize:
607
632
  embeddings = functional.normalize(embeddings, p=2, dim=-1)
608
633
 
609
- # Store results for this sub-batch (move to CPU immediately to free GPU memory)
634
+ # Store results for this sub-batch (move to CPU immediately to free GPU
635
+ # memory). Trim each item's left-padding rows before returning: the batch is
636
+ # left-padded, so padded positions are zeroed by the attention_mask above —
637
+ # but emitting them as zero vectors leaks 0-similarity rows into the late-
638
+ # interaction MaxSim (a 0-floor on every query token's max). Because the
639
+ # batcher pads inconsistently across docs, identical docs then score
640
+ # differently by batch and ranking is corrupted on variable-tile batches
641
+ # (#1163: Vidore3 Hr 0.6532 -> 0.5713). Keep only real tokens, matching the
642
+ # native forward_passages path (_unpack_embeddings drops zero rows likewise).
610
643
  for i in range(len(sub_batch_items)):
611
- emb = embeddings[i].float().cpu().numpy()
644
+ keep = batch["attention_mask"][i].bool()
645
+ emb = embeddings[i][keep].float().cpu().numpy()
612
646
  all_embeddings.append(emb)
613
647
 
614
- # Clear GPU memory between sub-batches
615
- del outputs, embeddings, batch
616
- torch.cuda.empty_cache()
648
+ # Free this sub-batch's GPU tensors. NOTE: no per-sub-batch
649
+ # torch.cuda.empty_cache() repeatedly releasing the allocator's cache and
650
+ # re-acquiring ~GB blocks fragments the pool and OOMs at scale on big GPUs
651
+ # (#1163). The sub-batch loop + immediate CPU offload already bound peak VRAM.
652
+ del outputs, embeddings, attention_mask, batch
617
653
 
618
654
  return EncodeOutput(
619
655
  multivector=all_embeddings,
@@ -189,7 +189,7 @@ class Owlv2Adapter(BaseAdapter):
189
189
 
190
190
  # Extract options once
191
191
  opts = options or {}
192
- score_threshold = opts.get("score_threshold", self._score_threshold)
192
+ score_threshold = opts.get("score_threshold", opts.get("threshold", self._score_threshold))
193
193
 
194
194
  # Build text queries once (shared across batch)
195
195
  # OWL-v2 format: list of prompts per image
@@ -19,6 +19,7 @@ HTTP connection, which SGLang treats as a cancel signal. A best-effort
19
19
  from __future__ import annotations
20
20
 
21
21
  import asyncio
22
+ import base64
22
23
  import contextlib
23
24
  import dataclasses
24
25
  import json
@@ -47,6 +48,7 @@ from sie_server.adapters._types import ERR_NOT_LOADED, ComputePrecision
47
48
  from sie_server.adapters.sglang import _server
48
49
  from sie_server.observability.metrics import GenerationStreamTimer
49
50
  from sie_server.types.grammar import GrammarSpec
51
+ from sie_server.types.inputs import ImageInput, media_bytes
50
52
 
51
53
  logger = logging.getLogger(__name__)
52
54
 
@@ -91,6 +93,47 @@ def _resolve_read_timeout() -> float | None:
91
93
  _GENERATE_READ_TIMEOUT_S: float | None = _resolve_read_timeout()
92
94
 
93
95
 
96
+ # Format hints we re-embed into the SGLang ``image_data`` MIME type. Anything
97
+ # else falls back to ``jpeg`` (the engine sniffs the real format from bytes).
98
+ _ALLOWED_IMAGE_FORMATS = frozenset({"png", "jpeg", "jpg", "webp", "gif"})
99
+
100
+
101
+ def _encode_image_data(images: list[ImageInput] | None) -> list[str] | None:
102
+ """Translate wire ``ImageInput`` entries into SGLang ``image_data`` URIs.
103
+
104
+ SGLang's ``/generate`` accepts a top-level ``image_data`` field — a list of
105
+ images, each as a base64 string, an ``http(s)`` URL, or a local file path.
106
+ We emit ``data:image/<fmt>;base64,<...>`` data URIs so the format hint
107
+ travels with the bytes and SGLang's image loader can decode without
108
+ sniffing. Bytes are validated through :func:`media_bytes`, the single
109
+ enforcement point for the wire contract (raises :class:`InvalidMediaError`
110
+ on a non-bytes ``data``, e.g. an un-decoded base64 JSON string).
111
+
112
+ Returns ``None`` when there are no images so the request body stays
113
+ byte-identical to the text-only path — vision plumbing is inert for the
114
+ text-only models that share this adapter.
115
+ """
116
+ if not images:
117
+ return None
118
+ encoded: list[str] = []
119
+ for image in images:
120
+ raw = media_bytes(image, kind="image")
121
+ fmt = (image.get("format") or "jpeg").strip().lower() or "jpeg"
122
+ # Clamp the client-controlled format hint to a known set before
123
+ # re-embedding it in the data-URI MIME type — an arbitrary subtype
124
+ # would produce a malformed URI for SGLang's loader. The engine
125
+ # sniffs the real format from the bytes regardless, so an unknown
126
+ # hint safely falls back to jpeg.
127
+ if fmt not in _ALLOWED_IMAGE_FORMATS:
128
+ fmt = "jpeg"
129
+ elif fmt == "jpg":
130
+ # ``image/jpg`` is not a registered MIME type; normalise to jpeg.
131
+ fmt = "jpeg"
132
+ b64 = base64.b64encode(raw).decode("ascii")
133
+ encoded.append(f"data:image/{fmt};base64,{b64}")
134
+ return encoded
135
+
136
+
94
137
  def _tail_file(path: str, *, max_lines: int = 200) -> str:
95
138
  """Return the final lines from a startup log for diagnostics."""
96
139
  try:
@@ -764,9 +807,17 @@ class SGLangGenerationAdapter(GenerationAdapter):
764
807
  best_of: int | None = None,
765
808
  stream: bool = False,
766
809
  lora_path: str | None = None,
810
+ images: list[ImageInput] | None = None,
767
811
  ) -> AsyncIterator[GenerationChunk]:
768
812
  self._check_loaded()
769
813
 
814
+ # Vision input: encode any images into SGLang's top-level ``image_data``
815
+ # field once, then attach to whichever request body we build below. The
816
+ # ``prompt`` is expected to already carry the model's image placeholder
817
+ # tokens (the chat template renders them worker-side). ``None`` when
818
+ # there are no images, keeping the text-only request body unchanged.
819
+ image_data = _encode_image_data(images)
820
+
770
821
  # Guard verdict thresholding only runs on the single-candidate (n=1)
771
822
  # path, so reject multi-candidate sampling up front — otherwise a guard
772
823
  # request with n>1 / best_of>1 would silently return an UN-thresholded
@@ -892,6 +943,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
892
943
  }
893
944
  if lora_path:
894
945
  sbody["lora_path"] = lora_path
946
+ if image_data:
947
+ sbody["image_data"] = image_data
895
948
  if logprobs:
896
949
  sbody["return_logprob"] = True
897
950
  # Without this SGLang omits the decoded token TEXT from
@@ -1023,6 +1076,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
1023
1076
  nbody: dict[str, Any] = {"text": prompt, "sampling_params": sp, "stream": False}
1024
1077
  if lora_path:
1025
1078
  nbody["lora_path"] = lora_path
1079
+ if image_data:
1080
+ nbody["image_data"] = image_data
1026
1081
  if logprobs or rank:
1027
1082
  nbody["return_logprob"] = True
1028
1083
  # Surface decoded token text (see streaming body below) so the
@@ -1125,6 +1180,8 @@ class SGLangGenerationAdapter(GenerationAdapter):
1125
1180
  # verified on L4). Empirically applies the adapter in-batch per request.
1126
1181
  if lora_path:
1127
1182
  body["lora_path"] = lora_path
1183
+ if image_data:
1184
+ body["image_data"] = image_data
1128
1185
  # OpenAI ``logprobs`` → SGLang ``return_logprob`` (top-level body
1129
1186
  # flag, not under sampling_params). ``top_logprobs`` →
1130
1187
  # ``top_logprobs_num``. SGLang surfaces them under
@@ -20,7 +20,7 @@ class AppStateConfig:
20
20
  """
21
21
 
22
22
  models_dir: Path | str | None = None
23
- """Path to models directory (local path, s3://, or gs://). If None, registry starts empty."""
23
+ """Path to models directory (local path, s3://, gs://, abfs://, or abfss://). If None, registry starts empty."""
24
24
 
25
25
  device: str = "cpu"
26
26
  """Device to load models on (e.g., "cuda:0", "cpu", "mps")."""
@@ -220,12 +220,18 @@ def serve(
220
220
  host: str = typer.Option("0.0.0.0", "--host", help="Host to bind to"), # noqa: S104 — intentional bind to all interfaces for server
221
221
  device: str = typer.Option("auto", "--device", "-d", help="Device to use (auto, cuda, mps, cpu)"),
222
222
  models_dir: str = typer.Option(
223
- DEFAULT_MODELS_DIR, "--models-dir", help="Models directory (local path, s3://, or gs://)"
223
+ DEFAULT_MODELS_DIR,
224
+ "--models-dir",
225
+ help="Models directory (local path, s3://, gs://, abfs://, or abfss://)",
224
226
  ),
225
227
  bundle: str | None = typer.Option(None, "--bundle", "-b", help="Bundle name to load (from bundles/ dir)"),
226
228
  models: str | None = typer.Option(None, "--models", "-m", help="Comma-separated model names to load"),
227
229
  local_cache: str | None = typer.Option(None, "--local-cache", help="Local cache directory (default: HF_HOME)"),
228
- cluster_cache: str | None = typer.Option(None, "--cluster-cache", help="Cluster cache URL (s3:// or gs://)"),
230
+ cluster_cache: str | None = typer.Option(
231
+ None,
232
+ "--cluster-cache",
233
+ help="Cluster cache URL (s3://, gs://, abfs://, or abfss://)",
234
+ ),
229
235
  hf_fallback: bool = typer.Option(True, "--hf-fallback/--no-hf-fallback", help="Enable HuggingFace Hub fallback"),
230
236
  reload: bool = typer.Option(default=False, help="Enable auto-reload for development"),
231
237
  tracing: bool = typer.Option(default=False, help="Enable OpenTelemetry tracing (exports to localhost:4317)"),
@@ -48,7 +48,7 @@ def load_model_configs(models_dir: Path | str) -> dict[str, ModelConfig]:
48
48
  """Load all model configs from a directory (local or cloud).
49
49
 
50
50
  Args:
51
- models_dir: Path to the models directory (local path, s3://, or gs://).
51
+ models_dir: Path to the models directory (local path, s3://, gs://, abfs://, or abfss://).
52
52
 
53
53
  Returns:
54
54
  Dictionary mapping model names to their ModelConfig objects.
@@ -141,7 +141,7 @@ def _expand_profile_variants(configs: dict[str, ModelConfig]) -> None:
141
141
 
142
142
 
143
143
  def _load_configs_from_cloud(models_dir: str) -> dict[str, ModelConfig]:
144
- """Load model configs from S3/GCS.
144
+ """Load model configs from cloud object storage.
145
145
 
146
146
  Discovers YAML files via LIST operation, downloads them to local cache, and parses them.
147
147
  Model configs are flat YAML files (e.g., gs://bucket/models/BAAI__bge-m3.yaml).
@@ -561,11 +561,16 @@ class ModelLoader:
561
561
  Returns:
562
562
  LoadedModel containing the loaded state.
563
563
  """
564
- # Get preprocessor from adapter - all adapters implement get_preprocessor()
565
- preprocessor = adapter.get_preprocessor()
566
-
567
- # Register the preprocessor based on its modality
568
- if preprocessor is not None:
564
+ # Get preprocessor(s) from adapter - all adapters implement get_preprocessor().
565
+ # Most return a single preprocessor; multi-modal adapters (e.g. NemoColEmbed v1,
566
+ # which needs a text preprocessor for queries AND an image preprocessor for
567
+ # documents) may return a list. Register each by its modality.
568
+ preprocessors = adapter.get_preprocessor()
569
+ if not isinstance(preprocessors, list):
570
+ preprocessors = [preprocessors]
571
+ for preprocessor in preprocessors:
572
+ if preprocessor is None:
573
+ continue
569
574
  modality = getattr(preprocessor, "modality", None)
570
575
  if modality == "text":
571
576
  self._preprocessor_registry._register(name, preprocessor)
@@ -82,7 +82,7 @@ class ModelRegistry:
82
82
  """Initialize the registry.
83
83
 
84
84
  Args:
85
- models_dir: Path to models directory (local path, s3://, or gs://).
85
+ models_dir: Path to models directory (local path, s3://, gs://, abfs://, or abfss://).
86
86
  If None, registry starts empty and configs must be added manually.
87
87
  memory_config: Configuration for memory management. If None, uses defaults.
88
88
  drain_timeout_s: Timeout in seconds to wait for worker drain before unload.
@@ -1396,7 +1396,7 @@ class ModelRegistry:
1396
1396
  logger.debug("No models_dir, skipping hot reload")
1397
1397
  return
1398
1398
 
1399
- # Don't watch cloud URLs (s3://, gs://)
1399
+ # Don't watch cloud URLs (s3://, gs://, abfs(s)://)
1400
1400
  if is_cloud_path(self._models_dir):
1401
1401
  logger.debug("Cloud models_dir, skipping hot reload (not supported)")
1402
1402
  return