sie-server 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (436) hide show
  1. {sie_server-0.4.2 → sie_server-0.5.0}/PKG-INFO +1 -1
  2. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-4B-Instruct-2507.yaml +22 -6
  3. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3.5-4B.yaml +5 -0
  4. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3.6-27B.yaml +17 -0
  5. sie_server-0.5.0/models/defog__sqlcoder-7b-2.yaml +70 -0
  6. sie_server-0.5.0/models/ibm-granite__granite-guardian-3.0-2b.yaml +93 -0
  7. {sie_server-0.4.2 → sie_server-0.5.0}/openapi.json +46 -1
  8. {sie_server-0.4.2 → sie_server-0.5.0}/pyproject.toml +1 -1
  9. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/florence2/__init__.py +10 -14
  10. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/sglang/generation.py +228 -8
  11. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/models.py +52 -0
  12. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/config/model.py +11 -0
  13. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor/vision.py +80 -4
  14. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_florence2.py +85 -4
  15. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_sglang_generation.py +402 -0
  16. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_models.py +96 -0
  17. {sie_server-0.4.2 → sie_server-0.5.0}/tests/config/test_profile_backend_consistency.py +72 -0
  18. {sie_server-0.4.2 → sie_server-0.5.0}/.gitignore +0 -0
  19. {sie_server-0.4.2 → sie_server-0.5.0}/CONTRIBUTING.md +0 -0
  20. {sie_server-0.4.2 → sie_server-0.5.0}/Dockerfile.cpu +0 -0
  21. {sie_server-0.4.2 → sie_server-0.5.0}/Dockerfile.cuda12 +0 -0
  22. {sie_server-0.4.2 → sie_server-0.5.0}/LICENSE +0 -0
  23. {sie_server-0.4.2 → sie_server-0.5.0}/README.md +0 -0
  24. {sie_server-0.4.2 → sie_server-0.5.0}/bundles/default.yaml +0 -0
  25. {sie_server-0.4.2 → sie_server-0.5.0}/bundles/sglang-embedding.yaml +0 -0
  26. {sie_server-0.4.2 → sie_server-0.5.0}/bundles/sglang.yaml +0 -0
  27. {sie_server-0.4.2 → sie_server-0.5.0}/bundles/transformers5.yaml +0 -0
  28. {sie_server-0.4.2 → sie_server-0.5.0}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  29. {sie_server-0.4.2 → sie_server-0.5.0}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  30. {sie_server-0.4.2 → sie_server-0.5.0}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  31. {sie_server-0.4.2 → sie_server-0.5.0}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  32. {sie_server-0.4.2 → sie_server-0.5.0}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  33. {sie_server-0.4.2 → sie_server-0.5.0}/models/BAAI__bge-m3.yaml +0 -0
  34. {sie_server-0.4.2 → sie_server-0.5.0}/models/BAAI__bge-reranker-base.yaml +0 -0
  35. {sie_server-0.4.2 → sie_server-0.5.0}/models/BAAI__bge-reranker-large.yaml +0 -0
  36. {sie_server-0.4.2 → sie_server-0.5.0}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  37. {sie_server-0.4.2 → sie_server-0.5.0}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  38. {sie_server-0.4.2 → sie_server-0.5.0}/models/GritLM__GritLM-7B.yaml +0 -0
  39. {sie_server-0.4.2 → sie_server-0.5.0}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  40. {sie_server-0.4.2 → sie_server-0.5.0}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  41. {sie_server-0.4.2 → sie_server-0.5.0}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  42. {sie_server-0.4.2 → sie_server-0.5.0}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  43. {sie_server-0.4.2 → sie_server-0.5.0}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
  44. {sie_server-0.4.2 → sie_server-0.5.0}/models/Marqo__marqo-fashionSigLIP.yaml +0 -0
  45. {sie_server-0.4.2 → sie_server-0.5.0}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  46. {sie_server-0.4.2 → sie_server-0.5.0}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  47. {sie_server-0.4.2 → sie_server-0.5.0}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  48. {sie_server-0.4.2 → sie_server-0.5.0}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  49. {sie_server-0.4.2 → sie_server-0.5.0}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  50. {sie_server-0.4.2 → sie_server-0.5.0}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  51. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-0.6B.yaml +0 -0
  52. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  53. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  54. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  55. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  56. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  57. {sie_server-0.4.2 → sie_server-0.5.0}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  58. {sie_server-0.4.2 → sie_server-0.5.0}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  59. {sie_server-0.4.2 → sie_server-0.5.0}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  60. {sie_server-0.4.2 → sie_server-0.5.0}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  61. {sie_server-0.4.2 → sie_server-0.5.0}/models/TomoroAI__tomoro-colqwen3-embed-4b.yaml +0 -0
  62. {sie_server-0.4.2 → sie_server-0.5.0}/models/answerdotai__ModernBERT-base.yaml +0 -0
  63. {sie_server-0.4.2 → sie_server-0.5.0}/models/answerdotai__answerai-colbert-small-v1.yaml +0 -0
  64. {sie_server-0.4.2 → sie_server-0.5.0}/models/colbert-ir__colbertv2.0.yaml +0 -0
  65. {sie_server-0.4.2 → sie_server-0.5.0}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  66. {sie_server-0.4.2 → sie_server-0.5.0}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  67. {sie_server-0.4.2 → sie_server-0.5.0}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  68. {sie_server-0.4.2 → sie_server-0.5.0}/models/docling.yaml +0 -0
  69. {sie_server-0.4.2 → sie_server-0.5.0}/models/fastino__gliner2-base-v1.yaml +0 -0
  70. {sie_server-0.4.2 → sie_server-0.5.0}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  71. {sie_server-0.4.2 → sie_server-0.5.0}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  72. {sie_server-0.4.2 → sie_server-0.5.0}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  73. {sie_server-0.4.2 → sie_server-0.5.0}/models/google__embeddinggemma-300m.yaml +0 -0
  74. {sie_server-0.4.2 → sie_server-0.5.0}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  75. {sie_server-0.4.2 → sie_server-0.5.0}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  76. {sie_server-0.4.2 → sie_server-0.5.0}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  77. {sie_server-0.4.2 → sie_server-0.5.0}/models/google__siglip2-base-patch16-224.yaml +0 -0
  78. {sie_server-0.4.2 → sie_server-0.5.0}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  79. {sie_server-0.4.2 → sie_server-0.5.0}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  80. {sie_server-0.4.2 → sie_server-0.5.0}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  81. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__e5-base-v2.yaml +0 -0
  82. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__e5-large-v2.yaml +0 -0
  83. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  84. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__e5-small-v2.yaml +0 -0
  85. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  86. {sie_server-0.4.2 → sie_server-0.5.0}/models/intfloat__multilingual-e5-large.yaml +0 -0
  87. {sie_server-0.4.2 → sie_server-0.5.0}/models/jackboyla__glirel-large-v0.yaml +0 -0
  88. {sie_server-0.4.2 → sie_server-0.5.0}/models/jinaai__jina-colbert-v2.yaml +0 -0
  89. {sie_server-0.4.2 → sie_server-0.5.0}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  90. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  91. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  92. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  93. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  94. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  95. {sie_server-0.4.2 → sie_server-0.5.0}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  96. {sie_server-0.4.2 → sie_server-0.5.0}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  97. {sie_server-0.4.2 → sie_server-0.5.0}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  98. {sie_server-0.4.2 → sie_server-0.5.0}/models/lightonai__GTE-ModernColBERT-v1.yaml +0 -0
  99. {sie_server-0.4.2 → sie_server-0.5.0}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  100. {sie_server-0.4.2 → sie_server-0.5.0}/models/lightonai__Reason-ModernColBERT.yaml +0 -0
  101. {sie_server-0.4.2 → sie_server-0.5.0}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  102. {sie_server-0.4.2 → sie_server-0.5.0}/models/microsoft__Florence-2-base.yaml +0 -0
  103. {sie_server-0.4.2 → sie_server-0.5.0}/models/microsoft__Florence-2-large.yaml +0 -0
  104. {sie_server-0.4.2 → sie_server-0.5.0}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +0 -0
  105. {sie_server-0.4.2 → sie_server-0.5.0}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +0 -0
  106. {sie_server-0.4.2 → sie_server-0.5.0}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  107. {sie_server-0.4.2 → sie_server-0.5.0}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  108. {sie_server-0.4.2 → sie_server-0.5.0}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  109. {sie_server-0.4.2 → sie_server-0.5.0}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  110. {sie_server-0.4.2 → sie_server-0.5.0}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  111. {sie_server-0.4.2 → sie_server-0.5.0}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  112. {sie_server-0.4.2 → sie_server-0.5.0}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  113. {sie_server-0.4.2 → sie_server-0.5.0}/models/naver__splade-v3.yaml +0 -0
  114. {sie_server-0.4.2 → sie_server-0.5.0}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  115. {sie_server-0.4.2 → sie_server-0.5.0}/models/numind__NuNER_Zero-span.yaml +0 -0
  116. {sie_server-0.4.2 → sie_server-0.5.0}/models/numind__NuNER_Zero.yaml +0 -0
  117. {sie_server-0.4.2 → sie_server-0.5.0}/models/nvidia__NV-Embed-v2.yaml +0 -0
  118. {sie_server-0.4.2 → sie_server-0.5.0}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
  119. {sie_server-0.4.2 → sie_server-0.5.0}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  120. {sie_server-0.4.2 → sie_server-0.5.0}/models/nvidia__nemotron-colembed-vl-4b-v2.yaml +0 -0
  121. {sie_server-0.4.2 → sie_server-0.5.0}/models/openai__clip-vit-base-patch32.yaml +0 -0
  122. {sie_server-0.4.2 → sie_server-0.5.0}/models/openai__clip-vit-large-patch14.yaml +0 -0
  123. {sie_server-0.4.2 → sie_server-0.5.0}/models/opendatalab__MinerU2.5-Pro-2604-1.2B.yaml +0 -0
  124. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  125. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  126. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  127. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  128. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  129. {sie_server-0.4.2 → sie_server-0.5.0}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  130. {sie_server-0.4.2 → sie_server-0.5.0}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  131. {sie_server-0.4.2 → sie_server-0.5.0}/models/rasyosef__splade-mini.yaml +0 -0
  132. {sie_server-0.4.2 → sie_server-0.5.0}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  133. {sie_server-0.4.2 → sie_server-0.5.0}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  134. {sie_server-0.4.2 → sie_server-0.5.0}/models/urchade__gliner_large-v2.1.yaml +0 -0
  135. {sie_server-0.4.2 → sie_server-0.5.0}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  136. {sie_server-0.4.2 → sie_server-0.5.0}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  137. {sie_server-0.4.2 → sie_server-0.5.0}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  138. {sie_server-0.4.2 → sie_server-0.5.0}/models/urchade__gliner_small-v2.1.yaml +0 -0
  139. {sie_server-0.4.2 → sie_server-0.5.0}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  140. {sie_server-0.4.2 → sie_server-0.5.0}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  141. {sie_server-0.4.2 → sie_server-0.5.0}/models/zai-org__GLM-OCR.yaml +0 -0
  142. {sie_server-0.4.2 → sie_server-0.5.0}/scripts/generate_tokenize_fixture.py +0 -0
  143. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/__init__.py +0 -0
  144. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/_ipc_test_harness.py +0 -0
  145. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapter_call_loop.py +0 -0
  146. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/__init__.py +0 -0
  147. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_base_adapter.py +0 -0
  148. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_flash_base.py +0 -0
  149. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_generation_base.py +0 -0
  150. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_spec.py +0 -0
  151. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_types.py +0 -0
  152. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/_utils.py +0 -0
  153. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/base.py +0 -0
  154. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bert_flash/__init__.py +0 -0
  155. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  156. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
  157. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  158. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
  159. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
  160. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/clip/__init__.py +0 -0
  161. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colbert/__init__.py +0 -0
  162. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  163. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  164. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colpali/__init__.py +0 -0
  165. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  166. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/colqwen3/__init__.py +0 -0
  167. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  168. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/docling/__init__.py +0 -0
  169. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/donut/__init__.py +0 -0
  170. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/errors.py +0 -0
  171. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/gliclass/__init__.py +0 -0
  172. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/gliner/__init__.py +0 -0
  173. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  174. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  175. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/glirel/__init__.py +0 -0
  176. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  177. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  178. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  179. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  180. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  181. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/mineru_vl/__init__.py +0 -0
  182. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/modernbert_flash/__init__.py +0 -0
  183. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  184. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
  185. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  186. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  187. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  188. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  189. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/paddleocr_vl/__init__.py +0 -0
  190. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  191. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  192. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  193. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  194. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  195. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  196. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  197. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/sentence_transformer/__init__.py +0 -0
  198. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/sglang/__init__.py +0 -0
  199. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/sglang/_server.py +0 -0
  200. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/sglang/embedding.py +0 -0
  201. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/siglip/__init__.py +0 -0
  202. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  203. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  204. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  205. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/__init__.py +0 -0
  206. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/encode.py +0 -0
  207. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/extract.py +0 -0
  208. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/generate.py +0 -0
  209. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/health.py +0 -0
  210. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/helpers.py +0 -0
  211. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/metrics.py +0 -0
  212. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/openai_compat.py +0 -0
  213. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/openapi.py +0 -0
  214. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/options.py +0 -0
  215. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/root.py +0 -0
  216. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/score.py +0 -0
  217. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/serialization.py +0 -0
  218. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/validation.py +0 -0
  219. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/api/ws.py +0 -0
  220. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/app/__init__.py +0 -0
  221. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/app/app_factory.py +0 -0
  222. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/app/app_state_config.py +0 -0
  223. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/cli.py +0 -0
  224. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/config/__init__.py +0 -0
  225. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/config/engine.py +0 -0
  226. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/__init__.py +0 -0
  227. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/adaptive_batching.py +0 -0
  228. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/batcher.py +0 -0
  229. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/deps.py +0 -0
  230. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/disk_cache.py +0 -0
  231. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/encode_pipeline.py +0 -0
  232. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/extract_cost.py +0 -0
  233. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/gpu_health.py +0 -0
  234. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/hf_env.py +0 -0
  235. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/hot_reload.py +0 -0
  236. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/inference.py +0 -0
  237. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/inference_output.py +0 -0
  238. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/load_errors.py +0 -0
  239. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/loader.py +0 -0
  240. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/logging.py +0 -0
  241. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/memory.py +0 -0
  242. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/model_loader.py +0 -0
  243. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/oom.py +0 -0
  244. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/pool_isolation.py +0 -0
  245. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/postprocessor.py +0 -0
  246. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/postprocessor_registry.py +0 -0
  247. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/prepared.py +0 -0
  248. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor/__init__.py +0 -0
  249. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor/base.py +0 -0
  250. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor/image.py +0 -0
  251. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor/text.py +0 -0
  252. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/preprocessor_registry.py +0 -0
  253. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/readiness.py +0 -0
  254. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/registry.py +0 -0
  255. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/shutdown.py +0 -0
  256. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/text_tokens.py +0 -0
  257. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/timing.py +0 -0
  258. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/tokenizer.py +0 -0
  259. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/watcher.py +0 -0
  260. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/__init__.py +0 -0
  261. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  262. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/handlers/base.py +0 -0
  263. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/handlers/encode.py +0 -0
  264. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/handlers/extract.py +0 -0
  265. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/handlers/score.py +0 -0
  266. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/model_worker.py +0 -0
  267. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/oom_recovery.py +0 -0
  268. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/core/worker/types.py +0 -0
  269. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/health/__init__.py +0 -0
  270. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/health/nats_publisher.py +0 -0
  271. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/health/saturation.py +0 -0
  272. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/ipc_server.py +0 -0
  273. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/ipc_types.py +0 -0
  274. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/main.py +0 -0
  275. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/__init__.py +0 -0
  276. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/gpu.py +0 -0
  277. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/metrics.py +0 -0
  278. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/prometheus.py +0 -0
  279. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/telemetry.py +0 -0
  280. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/observability/tracing.py +0 -0
  281. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/__init__.py +0 -0
  282. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/admission.py +0 -0
  283. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/base.py +0 -0
  284. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/grammar_cache.py +0 -0
  285. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/grammar_compile.py +0 -0
  286. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/streaming.py +0 -0
  287. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/tool_call_grammar.py +0 -0
  288. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/tool_call_parser.py +0 -0
  289. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/processors/work_class_scheduler.py +0 -0
  290. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/queue_executor.py +0 -0
  291. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/static/__init__.py +0 -0
  292. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/static/index.html +0 -0
  293. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/__init__.py +0 -0
  294. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/grammar.py +0 -0
  295. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/inputs.py +0 -0
  296. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/openapi.py +0 -0
  297. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/outputs.py +0 -0
  298. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/overflow_policy.py +0 -0
  299. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/requests.py +0 -0
  300. {sie_server-0.4.2 → sie_server-0.5.0}/src/sie_server/types/responses.py +0 -0
  301. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/__init__.py +0 -0
  302. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_base.py +0 -0
  303. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_bge_m3.py +0 -0
  304. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_bge_m3_flash.py +0 -0
  305. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_clip.py +0 -0
  306. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_colbert.py +0 -0
  307. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_docling.py +0 -0
  308. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_docling_smoke.py +0 -0
  309. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_donut.py +0 -0
  310. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_factory_integration.py +0 -0
  311. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_flash_base.py +0 -0
  312. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_gliclass_overflow_policy.py +0 -0
  313. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_glirel.py +0 -0
  314. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_glm_ocr.py +0 -0
  315. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_grounding_dino.py +0 -0
  316. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_gte_sparse.py +0 -0
  317. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  318. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_lighton_ocr.py +0 -0
  319. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_lora.py +0 -0
  320. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_lora_integration.py +0 -0
  321. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_mineru_vl.py +0 -0
  322. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_paddleocr_vl.py +0 -0
  323. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_pytorch_embedding_revision.py +0 -0
  324. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_runtime_options.py +0 -0
  325. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_sentence_transformer.py +0 -0
  326. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_sglang.py +0 -0
  327. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_siglip.py +0 -0
  328. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_sparse_aggregation.py +0 -0
  329. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_stablebridge_integration.py +0 -0
  330. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_stablebridge_pruner.py +0 -0
  331. {sie_server-0.4.2 → sie_server-0.5.0}/tests/adapters/test_visual_document.py +0 -0
  332. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/__init__.py +0 -0
  333. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_encode_dtype.py +0 -0
  334. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_encode_endpoint.py +0 -0
  335. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_encode_json_schema.py +0 -0
  336. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_encode_timing.py +0 -0
  337. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_encode_validation.py +0 -0
  338. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_extract.py +0 -0
  339. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_extract_integration.py +0 -0
  340. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_extract_oom.py +0 -0
  341. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_generate.py +0 -0
  342. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_health.py +0 -0
  343. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_openai_compat.py +0 -0
  344. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_score.py +0 -0
  345. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_version_header.py +0 -0
  346. {sie_server-0.4.2 → sie_server-0.5.0}/tests/api/test_ws.py +0 -0
  347. {sie_server-0.4.2 → sie_server-0.5.0}/tests/app/__init__.py +0 -0
  348. {sie_server-0.4.2 → sie_server-0.5.0}/tests/app/test_app_factory.py +0 -0
  349. {sie_server-0.4.2 → sie_server-0.5.0}/tests/config/__init__.py +0 -0
  350. {sie_server-0.4.2 → sie_server-0.5.0}/tests/config/test_bundle_coverage.py +0 -0
  351. {sie_server-0.4.2 → sie_server-0.5.0}/tests/config/test_config.py +0 -0
  352. {sie_server-0.4.2 → sie_server-0.5.0}/tests/config/test_model_prewarm_grammars.py +0 -0
  353. {sie_server-0.4.2 → sie_server-0.5.0}/tests/conftest.py +0 -0
  354. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/__init__.py +0 -0
  355. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_adaptive_batching.py +0 -0
  356. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_batcher.py +0 -0
  357. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_disk_cache.py +0 -0
  358. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_gpu_health.py +0 -0
  359. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_hot_reload.py +0 -0
  360. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_idle_evict.py +0 -0
  361. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_inference.py +0 -0
  362. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_loader.py +0 -0
  363. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_logging.py +0 -0
  364. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_lora_generation_exclusion.py +0 -0
  365. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_memory.py +0 -0
  366. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_model_load_timeout.py +0 -0
  367. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_oom_detection.py +0 -0
  368. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_pool_isolation.py +0 -0
  369. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_postprocessor.py +0 -0
  370. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_postprocessor_registry.py +0 -0
  371. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_prepared.py +0 -0
  372. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_preprocessor.py +0 -0
  373. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_preprocessor_registry.py +0 -0
  374. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_quantization.py +0 -0
  375. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_readiness.py +0 -0
  376. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_async.py +0 -0
  377. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_core.py +0 -0
  378. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_deps.py +0 -0
  379. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_failed_state.py +0 -0
  380. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_memory.py +0 -0
  381. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_registry_multi_model.py +0 -0
  382. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_shutdown.py +0 -0
  383. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_timing.py +0 -0
  384. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_watcher.py +0 -0
  385. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_backpressure.py +0 -0
  386. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_core.py +0 -0
  387. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_extract.py +0 -0
  388. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_lora.py +0 -0
  389. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_options.py +0 -0
  390. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_passthrough.py +0 -0
  391. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/test_worker_score.py +0 -0
  392. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/worker/__init__.py +0 -0
  393. {sie_server-0.4.2 → sie_server-0.5.0}/tests/core/worker/test_oom_recovery.py +0 -0
  394. {sie_server-0.4.2 → sie_server-0.5.0}/tests/health/__init__.py +0 -0
  395. {sie_server-0.4.2 → sie_server-0.5.0}/tests/health/test_nats_publisher.py +0 -0
  396. {sie_server-0.4.2 → sie_server-0.5.0}/tests/health/test_saturation.py +0 -0
  397. {sie_server-0.4.2 → sie_server-0.5.0}/tests/health/test_worker_id_consistency.py +0 -0
  398. {sie_server-0.4.2 → sie_server-0.5.0}/tests/integration/__init__.py +0 -0
  399. {sie_server-0.4.2 → sie_server-0.5.0}/tests/integration/test_chat_completions.py +0 -0
  400. {sie_server-0.4.2 → sie_server-0.5.0}/tests/integration/test_grammar_generate.py +0 -0
  401. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/__init__.py +0 -0
  402. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/test_generation_metrics.py +0 -0
  403. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/test_metrics.py +0 -0
  404. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/test_telemetry.py +0 -0
  405. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/test_trace_propagation.py +0 -0
  406. {sie_server-0.4.2 → sie_server-0.5.0}/tests/observability/test_tracing.py +0 -0
  407. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/__init__.py +0 -0
  408. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_grammar_cache.py +0 -0
  409. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_grammar_compile.py +0 -0
  410. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_grammar_prewarm.py +0 -0
  411. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_streaming.py +0 -0
  412. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_streaming_admission.py +0 -0
  413. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_streaming_integration.py +0 -0
  414. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_tool_call_grammar.py +0 -0
  415. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_tool_call_parser.py +0 -0
  416. {sie_server-0.4.2 → sie_server-0.5.0}/tests/processors/test_work_class_scheduler.py +0 -0
  417. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_adapter_call_loop.py +0 -0
  418. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_all_models.py +0 -0
  419. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_docker_integration.py +0 -0
  420. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_ipc_server.py +0 -0
  421. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_ipc_types_raw_output.py +0 -0
  422. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_model_yaml_filenames.py +0 -0
  423. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_openapi_export.py +0 -0
  424. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_parity_run_batch.py +0 -0
  425. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_queue_executor.py +0 -0
  426. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_queue_executor_stage1d.py +0 -0
  427. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_readiness.py +0 -0
  428. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_sdk_integration.py +0 -0
  429. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_server_smoke.py +0 -0
  430. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_sparse_integration.py +0 -0
  431. {sie_server-0.4.2 → sie_server-0.5.0}/tests/test_stage1d_byte_identity.py +0 -0
  432. {sie_server-0.4.2 → sie_server-0.5.0}/tests/type_defs/__init__.py +0 -0
  433. {sie_server-0.4.2 → sie_server-0.5.0}/tests/type_defs/test_inputs.py +0 -0
  434. {sie_server-0.4.2 → sie_server-0.5.0}/tests/type_defs/test_inputs_json_decode.py +0 -0
  435. {sie_server-0.4.2 → sie_server-0.5.0}/tests/type_defs/test_media_bytes.py +0 -0
  436. {sie_server-0.4.2 → sie_server-0.5.0}/tests/type_defs/test_types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -14,12 +14,18 @@ tasks:
14
14
  context_length: 32768
15
15
  max_output_tokens: 4096
16
16
  capabilities:
17
- # Outlines-backed JSON Schema, regex, and EBNF grammars are
18
- # all supported by the SGLang adapter (Outlines and XGrammar
19
- # both accept EBNF natively). The gateway gates requests on
20
- # this exact listadding a new ``grammar.kind`` variant
21
- # requires both the gateway parser and this list to be updated.
22
- grammar: ["json_schema", "regex", "ebnf"]
17
+ # Same constraint as Qwen3.5-4B / Qwen3.6-27B: this profile runs the
18
+ # default ``grammar_backend: outlines``, and SGLang's outlines_backend
19
+ # does NOT implement ebnf (it logs ``Skip unsupported key_type='ebnf'``
20
+ # then fails the compile confirmed on A100 smoke 2026-05-26). Only
21
+ # advertise ``"ebnf"`` here once a profile pins an EBNF-capable backend
22
+ # (``grammar_backend: xgrammar``) AND a via-SIE EBNF smoke passes; the
23
+ # gateway gates requests on this exact list, so advertising ebnf without
24
+ # an ebnf-capable backend admits requests the worker then fails to serve.
25
+ # The ``test_advertised_ebnf_requires_capable_backend`` consistency test
26
+ # guards this invariant. Adding a new ``grammar.kind`` variant requires
27
+ # both the gateway parser and this list to be updated.
28
+ grammar: ["json_schema", "regex"]
23
29
  streaming: true
24
30
  # Qwen3-4B-Instruct's chat template emits OpenAI-compatible
25
31
  # ``<tool_call>{...}</tool_call>`` blocks when ``tools`` is
@@ -28,6 +34,16 @@ tasks:
28
34
  # them on ``delta.tool_calls`` for SSE and on
29
35
  # ``message.tool_calls`` for non-streaming requests.
30
36
  tools: true
37
+ # Validated for code generation — MEASURED HumanEval 0.866 / MBPP 0.74 on
38
+ # Modal A100 (see benchmarks/generation/.../code/measured_baseline.json).
39
+ # Backs the model="code" alias. Informational (not request-gated).
40
+ code: true
41
+ # Backs the model="sql" alias. NOTE: this profile runs the outlines
42
+ # backend (no ebnf), so SQL output here relies on the model's native
43
+ # text-to-SQL ability, NOT an EBNF grammar constraint. Re-add ``"ebnf"``
44
+ # to ``grammar`` above (behind an xgrammar profile + via-SIE EBNF smoke)
45
+ # to enable grammar-constrained SQL. Informational (not request-gated).
46
+ sql: true
31
47
  # Forwarded verbatim to ``tokenizer.apply_chat_template(**kwargs)`` when
32
48
  # the worker renders an OpenAI-shaped ``messages`` request.
33
49
  # Qwen3's chat template emits a ``<think>``/``</think>`` reasoning block
@@ -29,6 +29,11 @@ tasks:
29
29
  grammar: ["json_schema", "regex"]
30
30
  streaming: true
31
31
  tools: true
32
+ # NOTE: not advertising code:true here yet — Qwen3.5-4B is the strongest
33
+ # model on paper but its NEXTN/hybrid serving path does not come up
34
+ # reliably on the current eval image, so it has no measured HumanEval/MBPP
35
+ # baseline. The ``model="code"`` alias points to the measured
36
+ # Qwen3-4B-Instruct-2507; promote this once 3.5-4B is measured.
32
37
  # Grammar backend: ``outlines`` (set per-profile under
33
38
  # ``adapter_options.loadtime``). Earlier revisions forced ``xgrammar``
34
39
  # here because the worker-side Outlines preflight (``compile_outlines``)
@@ -34,6 +34,23 @@ tasks:
34
34
  grammar: ["json_schema", "regex"]
35
35
  streaming: true
36
36
  tools: true
37
+ # Validated on code + text-to-SQL — MEASURED on Modal H100 (bf16 no-spec,
38
+ # greedy+min_tokens=10; accuracy is GPU-invariant + FP8≈BF16 per ADR
39
+ # 0001): HumanEval 0.933, MBPP 0.81 (beats the 4B code default 0.866/0.74),
40
+ # Spider exec-acc 0.693. See code_sql_tools/measured_baseline.json.
41
+ # Informational (not request-gated); surfaces the high-end code/SQL option.
42
+ code: true
43
+ # CAVEAT: this flag is precision-agnostic, but FP8 weight quant regresses
44
+ # SQL ~13pts (Spider 0.71 BF16 -> 0.58 FP8, same-subset control; see the
45
+ # ADR + baseline). Code/tools/MC are FP8-safe. The default rtx-pro-6000
46
+ # profile is FP8, so route SQL-critical traffic to the BF16+NEXTN variant
47
+ # (the 96GB card fits it). The `:profile` variant suffix can't route by
48
+ # precision (it's stripped before worker dispatch), so deploy the BF16
49
+ # variant under a distinct bundle and point the `sql` job alias at it via
50
+ # a bundle-qualified target, e.g.
51
+ # SIE_GATEWAY_MODEL_ALIASES={"sql":"<bf16-bundle>:/Qwen/Qwen3.6-27B"}
52
+ # (gateway resolve_model_spec_with_aliases; see ADR 0001).
53
+ sql: true
37
54
  # Qwen3.6 emits ``<think>...</think>`` reasoning by default. We
38
55
  # disable it for the OpenAI-compat path so visible output is the
39
56
  # answer only. Operators wanting CoT can flip this profile-side.
@@ -0,0 +1,70 @@
1
+ sie_id: defog/sqlcoder-7b-2
2
+ hf_id: defog/sqlcoder-7b-2
3
+ # SQLCoder-7B-2 (Defog) — a CodeLlama-7B base fine-tuned for text-to-SQL.
4
+ # Served via the existing SGLang generation adapter (LlamaForCausalLM is native
5
+ # to SGLang); onboarding is config-only.
6
+ #
7
+ # CAVEATS (read before pointing model="sql" at this):
8
+ # * COMPLETION model, not chat-tuned. It expects its verbatim template
9
+ # (``### Task ... ### Database Schema ... ### Answer ...[SQL]``) via the
10
+ # completions path — a chat-completions wrapper will underperform. The
11
+ # model="sql" alias currently targets Qwen3-4B-Instruct (chat + grammar
12
+ # path); repoint here only once the SQLCoder prompt template + completions
13
+ # rendering are wired and measured on Spider.
14
+ # * dtype float16 (CodeLlama base; the card pins fp16).
15
+ # * License CC-BY-SA-4.0 (share-alike copyleft) — clear distribution/bundle
16
+ # use before shipping.
17
+ # * Card recommends num_beams=4 (beam search); SGLang serves greedy, so
18
+ # expect a small delta vs the published SQL-Eval numbers.
19
+ # * MEASURED in SIE (Modal A100): serves via the sglang.generation adapter.
20
+ # The native ### Task...[SQL] completions template is ESSENTIAL — generic
21
+ # chat prompt scored 0.025, the native template (spider_sqlcoder task,
22
+ # --mode completions) scored 0.467 (140/300, greedy) on 2026-06-03. But that
23
+ # still LOSES to general Qwen3-4B-Instruct at 0.70 on Spider exec-acc, so the
24
+ # model="sql" alias STAYS on Qwen3-4B-Instruct. (The card recommends
25
+ # num_beams=4; SGLang serves greedy, so 0.467 is a lower bound, but beam
26
+ # search is a serving change and would still likely trail.) See
27
+ # benchmarks/generation/Qwen__Qwen3-4B-Instruct-2507/sql/measured_baseline.json.
28
+ inputs:
29
+ text: true
30
+ image: false
31
+ audio: false
32
+ video: false
33
+ tasks:
34
+ generate:
35
+ # Base model supports 16384; 8192 comfortably fits a Spider schema + question.
36
+ context_length: 8192
37
+ max_output_tokens: 512
38
+ capabilities:
39
+ # Grammar-constrained SQL (the "any LLM + grammar" path) is plausible via
40
+ # the SGLang outlines/xgrammar backend but unverified for this model, so
41
+ # nothing is advertised here yet.
42
+ grammar: []
43
+ streaming: true
44
+ tools: false
45
+ # NOT advertising sql:true. The capability flag would expose this model as
46
+ # SQL-ready to capability-based SDK/UI selection, but the currently-wired
47
+ # served path (generic chat) scores only 0.025 on Spider, and even with its
48
+ # native completions template (0.467) it loses to Qwen3-4B-Instruct (0.70 —
49
+ # the actual model="sql" target). Flip on only once SQLCoder beats the
50
+ # incumbent on a wired, measured path.
51
+ profiles:
52
+ default:
53
+ # Generation does not batch at the SIE layer (SGLang batches internally) but
54
+ # the validator requires the field.
55
+ max_batch_tokens: 16384
56
+ compute_precision: float16
57
+ adapter_path: sie_server.adapters.sglang.generation:SGLangGenerationAdapter
58
+ kv_budget_tokens: 32768
59
+ adapter_options:
60
+ loadtime:
61
+ mem_fraction_static: 0.85
62
+ served_model_name: defog/sqlcoder-7b-2
63
+ runtime:
64
+ first_chunk_timeout_s: 30
65
+ inter_chunk_timeout_s: 10
66
+ overall_timeout_s: 300
67
+ # Deterministic decode for text-to-SQL (the card uses do_sample=False).
68
+ default_sampling:
69
+ temperature: 0.0
70
+ top_p: 1.0
@@ -0,0 +1,93 @@
1
+ sie_id: ibm-granite/granite-guardian-3.0-2b
2
+ hf_id: ibm-granite/granite-guardian-3.0-2b
3
+ inputs:
4
+ text: true
5
+ image: false
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ # CHECK POLICY job: a generative guard model. It takes a conversation (or a
10
+ # single user message) and *generates* a one-token verdict — "Yes" (unsafe)
11
+ # or "No" (safe) — under a risk taxonomy. Its chat template defaults to the
12
+ # ``harm`` risk when no ``guardian_config`` is supplied, so the standard
13
+ # OpenAI ``/v1/chat/completions`` path elicits a moderation verdict with no
14
+ # special kwargs. Served on the same SGLang generation adapter as the other
15
+ # decoder-only LLMs (architecture: GraniteForCausalLM).
16
+ generate:
17
+ context_length: 8192
18
+ # Moderation needs only a couple of tokens ("Yes"/"No"); the cap is a
19
+ # generous ceiling, not a target.
20
+ max_output_tokens: 512
21
+ capabilities:
22
+ # Guard verdicts are free-form single tokens — no grammar/tools needed.
23
+ grammar: []
24
+ streaming: true
25
+ tools: false
26
+ # Content-moderation / policy-check job — MEASURED on ToxicChat via the
27
+ # generation gate (see safety/measured_baseline.json): high-recall (0.97)
28
+ # / low-precision (0.16) under the default 'harm' risk. Backs the
29
+ # model="guard" alias. Informational (not request-gated).
30
+ guard: true
31
+ max_sequence_length: 8192
32
+ profiles:
33
+ default:
34
+ # max_batch_tokens is a generic engine knob; generation does not batch at
35
+ # the SIE layer (SGLang batches internally) but the validator requires it.
36
+ max_batch_tokens: 16384
37
+ compute_precision: bfloat16
38
+ adapter_path: sie_server.adapters.sglang.generation:SGLangGenerationAdapter
39
+ # Conservative L4 baseline; moderation prompts are short so a small KV
40
+ # budget is ample. Re-calibrate with a concurrency/OOM sweep if promoted.
41
+ kv_budget_tokens: 8192
42
+ adapter_options:
43
+ loadtime:
44
+ mem_fraction_static: 0.85
45
+ served_model_name: ibm-granite/granite-guardian-3.0-2b
46
+ # CHECK POLICY precision dial. The adapter reads the verdict-token
47
+ # logprobs, computes P(unsafe) over Yes/No, and returns "Yes" iff
48
+ # P(unsafe) >= threshold. threshold=0.5 == the model's argmax (the
49
+ # current high-recall default: recall 0.97 / precision 0.16). Measured
50
+ # ToxicChat trade-off (guard baseline logprob_threshold_sweep):
51
+ # 0.5 -> F1 0.26 recall 0.97 precision 0.15 (catch-everything)
52
+ # 0.8 -> F1 0.38 recall 0.54 precision 0.29 (best F1)
53
+ # 0.95 -> F1 0.31 recall 0.20 precision 0.70 (precision-critical)
54
+ # Raising it trades recall for precision — a PRODUCT/safety decision per
55
+ # deployment (a moderation guard usually favours recall), so the shipped
56
+ # default stays at argmax. Operators raise it to taste.
57
+ guard:
58
+ threshold: 0.5
59
+ runtime:
60
+ first_chunk_timeout_s: 30
61
+ inter_chunk_timeout_s: 10
62
+ overall_timeout_s: 300
63
+ # Greedy: a guard verdict must be deterministic.
64
+ default_sampling:
65
+ temperature: 0.0
66
+ stop_tokens:
67
+ - "<|end_of_text|>"
68
+ a100-40gb:
69
+ max_batch_tokens: 32768
70
+ compute_precision: bfloat16
71
+ adapter_path: sie_server.adapters.sglang.generation:SGLangGenerationAdapter
72
+ # ~2.5B weights leave most of a 40 GB card for KV; moderation traffic is
73
+ # short-context so this budget is set for many concurrent admissions.
74
+ kv_budget_tokens: 65536
75
+ adapter_options:
76
+ loadtime:
77
+ mem_fraction_static: 0.85
78
+ served_model_name: ibm-granite/granite-guardian-3.0-2b
79
+ # Duplicated from ``default`` — this is a NON-extending profile, and
80
+ # loadtime blocks are whole-dict replaced (not deep-merged), so the
81
+ # CHECK POLICY precision dial would otherwise be dropped here and the
82
+ # A100 variant would silently fall back to raw argmax (self._guard={}).
83
+ # Keep in sync with the ``default`` profile's guard.threshold.
84
+ guard:
85
+ threshold: 0.5
86
+ runtime:
87
+ first_chunk_timeout_s: 30
88
+ inter_chunk_timeout_s: 10
89
+ overall_timeout_s: 300
90
+ default_sampling:
91
+ temperature: 0.0
92
+ stop_tokens:
93
+ - "<|end_of_text|>"
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.4.2"
6
+ "version": "0.5.0"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -848,6 +848,41 @@
848
848
  "type": "object",
849
849
  "title": "HTTPValidationError"
850
850
  },
851
+ "ModelCapabilities": {
852
+ "properties": {
853
+ "grammar": {
854
+ "items": {
855
+ "type": "string"
856
+ },
857
+ "type": "array",
858
+ "title": "Grammar",
859
+ "default": []
860
+ },
861
+ "tools": {
862
+ "type": "boolean",
863
+ "title": "Tools",
864
+ "default": false
865
+ },
866
+ "code": {
867
+ "type": "boolean",
868
+ "title": "Code",
869
+ "default": false
870
+ },
871
+ "sql": {
872
+ "type": "boolean",
873
+ "title": "Sql",
874
+ "default": false
875
+ },
876
+ "guard": {
877
+ "type": "boolean",
878
+ "title": "Guard",
879
+ "default": false
880
+ }
881
+ },
882
+ "type": "object",
883
+ "title": "ModelCapabilities",
884
+ "description": "Advertised generation capabilities for a model.\n\nMirrors the gateway ``capabilities`` wire shape\n(``ModelCapabilitiesWire``) for the keys derivable from the loaded\nmodel config's :class:`~sie_server.config.model.GenerateCapabilities`.\n``code``/``sql``/``guard`` are informational flags advertising\nvalidated generation jobs that back the ``model=\"code\"`` /\n``model=\"sql\"`` / ``model=\"guard\"`` aliases. Populated only for\nmodels that declare ``tasks.generate``; ``None`` otherwise.\n\nThese flags mean the model *supports* a task \u2014 they are NOT a\nprecision-independent quality SLA. A flag is true at the model level even\nwhen quality is profile/precision-dependent (e.g. ``sql`` quality regresses\nunder FP8; route SQL-critical traffic to a BF16 bundle via the ``sql``\nalias)."
885
+ },
851
886
  "ModelInfo": {
852
887
  "properties": {
853
888
  "name": {
@@ -919,6 +954,16 @@
919
954
  "type": "object",
920
955
  "title": "Profiles",
921
956
  "default": {}
957
+ },
958
+ "capabilities": {
959
+ "anyOf": [
960
+ {
961
+ "$ref": "#/components/schemas/ModelCapabilities"
962
+ },
963
+ {
964
+ "type": "null"
965
+ }
966
+ ]
922
967
  }
923
968
  },
924
969
  "type": "object",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.4.2"
3
+ version = "0.5.0"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -29,6 +29,7 @@ from sie_server.adapters._base_adapter import BaseAdapter
29
29
  from sie_server.adapters._spec import AdapterSpec
30
30
  from sie_server.adapters._types import ERR_NOT_LOADED, ComputePrecision
31
31
  from sie_server.core.inference_output import EncodeOutput, ExtractOutput
32
+ from sie_server.core.preprocessor.vision import resolve_florence2_prompt
32
33
  from sie_server.types.inputs import media_bytes
33
34
  from sie_server.types.responses import DetectedObject, Entity
34
35
 
@@ -247,15 +248,18 @@ class Florence2Adapter(BaseAdapter):
247
248
  max_new_tokens = options.get("max_new_tokens", self._max_new_tokens)
248
249
  num_beams = options.get("num_beams", self._num_beams)
249
250
 
250
- # Build task prompt
251
- prompt = self._build_prompt(task, labels, instruction)
251
+ # Resolve the prompt and the effective task token. A free-text instruction
252
+ # is answered via DocVQA, so the effective task may differ from the
253
+ # configured one — post-processing must use the effective task to match
254
+ # what the prompt asked the model to do.
255
+ prompt, effective_task = resolve_florence2_prompt(task, labels, instruction)
252
256
 
253
257
  # Use preprocessed items if available
254
258
  if prepared_items is not None and len(prepared_items) > 0:
255
259
  return self._extract_preprocessed(
256
260
  items=items,
257
261
  prepared_items=prepared_items,
258
- task=task,
262
+ task=effective_task,
259
263
  max_new_tokens=max_new_tokens,
260
264
  num_beams=num_beams,
261
265
  )
@@ -267,7 +271,7 @@ class Florence2Adapter(BaseAdapter):
267
271
  entities, objects = self._extract_single(
268
272
  item,
269
273
  prompt=prompt,
270
- task=task,
274
+ task=effective_task,
271
275
  max_new_tokens=max_new_tokens,
272
276
  num_beams=num_beams,
273
277
  )
@@ -388,16 +392,8 @@ class Florence2Adapter(BaseAdapter):
388
392
  Returns:
389
393
  Complete prompt string.
390
394
  """
391
- # Use instruction as custom prompt if provided
392
- if instruction:
393
- return f"{task}{instruction}"
394
-
395
- # For phrase grounding, append labels
396
- if task == TASK_CAPTION_TO_PHRASE_GROUNDING and labels:
397
- label_text = ", ".join(labels)
398
- return f"{task}{label_text}"
399
-
400
- return task
395
+ prompt, _ = resolve_florence2_prompt(task, labels, instruction)
396
+ return prompt
401
397
 
402
398
  def _extract_single(
403
399
  self,