sie-server 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (357) hide show
  1. {sie_server-0.3.1 → sie_server-0.3.2}/.gitignore +3 -0
  2. {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cpu +4 -1
  3. {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cuda11 +4 -1
  4. {sie_server-0.3.1 → sie_server-0.3.2}/Dockerfile.cuda12 +4 -1
  5. {sie_server-0.3.1 → sie_server-0.3.2}/PKG-INFO +1 -1
  6. {sie_server-0.3.1 → sie_server-0.3.2}/models/answerdotai__answerai-colbert-small-v1.yaml +1 -1
  7. {sie_server-0.3.1 → sie_server-0.3.2}/models/colbert-ir__colbertv2.0.yaml +1 -1
  8. {sie_server-0.3.1 → sie_server-0.3.2}/models/docling.yaml +8 -0
  9. {sie_server-0.3.1 → sie_server-0.3.2}/models/jinaai__jina-colbert-v2.yaml +1 -1
  10. {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__GTE-ModernColBERT-v1.yaml +1 -1
  11. {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__Reason-ModernColBERT.yaml +1 -1
  12. {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +1 -1
  13. {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +1 -1
  14. {sie_server-0.3.1 → sie_server-0.3.2}/openapi.json +1 -1
  15. {sie_server-0.3.1 → sie_server-0.3.2}/pyproject.toml +5 -1
  16. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_base_adapter.py +49 -1
  17. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_utils.py +79 -1
  18. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/base.py +14 -0
  19. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash/__init__.py +1 -0
  20. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/docling/__init__.py +72 -21
  21. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash/__init__.py +1 -0
  22. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/paddleocr_vl/__init__.py +2 -0
  23. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/sentence_transformer/__init__.py +5 -1
  24. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/app_factory.py +65 -7
  25. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/cli.py +14 -3
  26. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/model_loader.py +20 -3
  27. sie_server-0.3.2/tests/adapters/test_docling.py +356 -0
  28. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_paddleocr_vl.py +45 -0
  29. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sentence_transformer.py +2 -1
  30. {sie_server-0.3.1 → sie_server-0.3.2}/tests/app/test_app_factory.py +65 -0
  31. {sie_server-0.3.1 → sie_server-0.3.2}/tests/conftest.py +5 -0
  32. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_all_models.py +39 -0
  33. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_docker_integration.py +3 -3
  34. sie_server-0.3.1/tests/adapters/test_docling.py +0 -194
  35. {sie_server-0.3.1 → sie_server-0.3.2}/CONTRIBUTING.md +0 -0
  36. {sie_server-0.3.1 → sie_server-0.3.2}/LICENSE +0 -0
  37. {sie_server-0.3.1 → sie_server-0.3.2}/README.md +0 -0
  38. {sie_server-0.3.1 → sie_server-0.3.2}/bundles/default.yaml +0 -0
  39. {sie_server-0.3.1 → sie_server-0.3.2}/bundles/sglang.yaml +0 -0
  40. {sie_server-0.3.1 → sie_server-0.3.2}/bundles/transformers5.yaml +0 -0
  41. {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  42. {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  43. {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  44. {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  45. {sie_server-0.3.1 → sie_server-0.3.2}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  46. {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-m3.yaml +0 -0
  47. {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-base.yaml +0 -0
  48. {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-large.yaml +0 -0
  49. {sie_server-0.3.1 → sie_server-0.3.2}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  50. {sie_server-0.3.1 → sie_server-0.3.2}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  51. {sie_server-0.3.1 → sie_server-0.3.2}/models/GritLM__GritLM-7B.yaml +0 -0
  52. {sie_server-0.3.1 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  53. {sie_server-0.3.1 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  54. {sie_server-0.3.1 → sie_server-0.3.2}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  55. {sie_server-0.3.1 → sie_server-0.3.2}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  56. {sie_server-0.3.1 → sie_server-0.3.2}/models/Marqo__marqo-ecommerce-embeddings-B.yaml +0 -0
  57. {sie_server-0.3.1 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  58. {sie_server-0.3.1 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  59. {sie_server-0.3.1 → sie_server-0.3.2}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  60. {sie_server-0.3.1 → sie_server-0.3.2}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  61. {sie_server-0.3.1 → sie_server-0.3.2}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  62. {sie_server-0.3.1 → sie_server-0.3.2}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  63. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  64. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  65. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  66. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  67. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  68. {sie_server-0.3.1 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  69. {sie_server-0.3.1 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  70. {sie_server-0.3.1 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  71. {sie_server-0.3.1 → sie_server-0.3.2}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  72. {sie_server-0.3.1 → sie_server-0.3.2}/models/answerdotai__ModernBERT-base.yaml +0 -0
  73. {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  74. {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  75. {sie_server-0.3.1 → sie_server-0.3.2}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  76. {sie_server-0.3.1 → sie_server-0.3.2}/models/fastino__gliner2-base-v1.yaml +0 -0
  77. {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  78. {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  79. {sie_server-0.3.1 → sie_server-0.3.2}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  80. {sie_server-0.3.1 → sie_server-0.3.2}/models/google__embeddinggemma-300m.yaml +0 -0
  81. {sie_server-0.3.1 → sie_server-0.3.2}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  82. {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  83. {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  84. {sie_server-0.3.1 → sie_server-0.3.2}/models/google__siglip2-base-patch16-224.yaml +0 -0
  85. {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  86. {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  87. {sie_server-0.3.1 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  88. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-base-v2.yaml +0 -0
  89. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-large-v2.yaml +0 -0
  90. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  91. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__e5-small-v2.yaml +0 -0
  92. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  93. {sie_server-0.3.1 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large.yaml +0 -0
  94. {sie_server-0.3.1 → sie_server-0.3.2}/models/jackboyla__glirel-large-v0.yaml +0 -0
  95. {sie_server-0.3.1 → sie_server-0.3.2}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  96. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  97. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  98. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  99. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  100. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  101. {sie_server-0.3.1 → sie_server-0.3.2}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  102. {sie_server-0.3.1 → sie_server-0.3.2}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  103. {sie_server-0.3.1 → sie_server-0.3.2}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  104. {sie_server-0.3.1 → sie_server-0.3.2}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  105. {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  106. {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-base.yaml +0 -0
  107. {sie_server-0.3.1 → sie_server-0.3.2}/models/microsoft__Florence-2-large.yaml +0 -0
  108. {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  109. {sie_server-0.3.1 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  110. {sie_server-0.3.1 → sie_server-0.3.2}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  111. {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  112. {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  113. {sie_server-0.3.1 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  114. {sie_server-0.3.1 → sie_server-0.3.2}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  115. {sie_server-0.3.1 → sie_server-0.3.2}/models/naver__splade-v3.yaml +0 -0
  116. {sie_server-0.3.1 → sie_server-0.3.2}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  117. {sie_server-0.3.1 → sie_server-0.3.2}/models/numind__NuNER_Zero-span.yaml +0 -0
  118. {sie_server-0.3.1 → sie_server-0.3.2}/models/numind__NuNER_Zero.yaml +0 -0
  119. {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__NV-Embed-v2.yaml +0 -0
  120. {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
  121. {sie_server-0.3.1 → sie_server-0.3.2}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  122. {sie_server-0.3.1 → sie_server-0.3.2}/models/openai__clip-vit-base-patch32.yaml +0 -0
  123. {sie_server-0.3.1 → sie_server-0.3.2}/models/openai__clip-vit-large-patch14.yaml +0 -0
  124. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  125. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  126. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  127. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  128. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  129. {sie_server-0.3.1 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  130. {sie_server-0.3.1 → sie_server-0.3.2}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  131. {sie_server-0.3.1 → sie_server-0.3.2}/models/rasyosef__splade-mini.yaml +0 -0
  132. {sie_server-0.3.1 → sie_server-0.3.2}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  133. {sie_server-0.3.1 → sie_server-0.3.2}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  134. {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_large-v2.1.yaml +0 -0
  135. {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  136. {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  137. {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  138. {sie_server-0.3.1 → sie_server-0.3.2}/models/urchade__gliner_small-v2.1.yaml +0 -0
  139. {sie_server-0.3.1 → sie_server-0.3.2}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  140. {sie_server-0.3.1 → sie_server-0.3.2}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  141. {sie_server-0.3.1 → sie_server-0.3.2}/models/zai-org__GLM-OCR.yaml +0 -0
  142. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/__init__.py +0 -0
  143. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/__init__.py +0 -0
  144. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_flash_base.py +0 -0
  145. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_spec.py +0 -0
  146. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/_types.py +0 -0
  147. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  148. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3/__init__.py +0 -0
  149. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flag/__init__.py +0 -0
  150. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flash/__init__.py +0 -0
  151. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_score_mixin.py +0 -0
  152. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/clip/__init__.py +0 -0
  153. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert/__init__.py +0 -0
  154. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  155. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  156. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colpali/__init__.py +0 -0
  157. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  158. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  159. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/donut/__init__.py +0 -0
  160. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/florence2/__init__.py +0 -0
  161. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliclass/__init__.py +0 -0
  162. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner/__init__.py +0 -0
  163. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  164. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  165. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/glirel/__init__.py +0 -0
  166. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  167. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  168. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  169. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  170. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  171. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  172. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
  173. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  174. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  175. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  176. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  177. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  178. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  179. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  180. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  181. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  182. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  183. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  184. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/sglang/__init__.py +0 -0
  185. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/siglip/__init__.py +0 -0
  186. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  187. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  188. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  189. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/__init__.py +0 -0
  190. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/encode.py +0 -0
  191. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/extract.py +0 -0
  192. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/health.py +0 -0
  193. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/helpers.py +0 -0
  194. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/metrics.py +0 -0
  195. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/models.py +0 -0
  196. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/openai_compat.py +0 -0
  197. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/openapi.py +0 -0
  198. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/options.py +0 -0
  199. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/root.py +0 -0
  200. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/score.py +0 -0
  201. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/serialization.py +0 -0
  202. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/validation.py +0 -0
  203. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/api/ws.py +0 -0
  204. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/__init__.py +0 -0
  205. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/app/app_state_config.py +0 -0
  206. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/__init__.py +0 -0
  207. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/engine.py +0 -0
  208. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/config/model.py +0 -0
  209. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/__init__.py +0 -0
  210. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/adaptive_batching.py +0 -0
  211. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/batcher.py +0 -0
  212. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/deps.py +0 -0
  213. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/disk_cache.py +0 -0
  214. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/encode_pipeline.py +0 -0
  215. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/extract_cost.py +0 -0
  216. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/hot_reload.py +0 -0
  217. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/inference.py +0 -0
  218. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/inference_output.py +0 -0
  219. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/load_errors.py +0 -0
  220. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/loader.py +0 -0
  221. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/logging.py +0 -0
  222. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/memory.py +0 -0
  223. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/oom.py +0 -0
  224. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/postprocessor.py +0 -0
  225. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/postprocessor_registry.py +0 -0
  226. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/prepared.py +0 -0
  227. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/__init__.py +0 -0
  228. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/base.py +0 -0
  229. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/image.py +0 -0
  230. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/text.py +0 -0
  231. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor/vision.py +0 -0
  232. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/preprocessor_registry.py +0 -0
  233. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/readiness.py +0 -0
  234. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/registry.py +0 -0
  235. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/shutdown.py +0 -0
  236. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/timing.py +0 -0
  237. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/tokenizer.py +0 -0
  238. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/watcher.py +0 -0
  239. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/__init__.py +0 -0
  240. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  241. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/base.py +0 -0
  242. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/encode.py +0 -0
  243. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/extract.py +0 -0
  244. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/score.py +0 -0
  245. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/model_worker.py +0 -0
  246. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/oom_recovery.py +0 -0
  247. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/core/worker/types.py +0 -0
  248. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/main.py +0 -0
  249. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/nats_pull_loop.py +0 -0
  250. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/nats_subscriber.py +0 -0
  251. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/__init__.py +0 -0
  252. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/gpu.py +0 -0
  253. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/metrics.py +0 -0
  254. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/prometheus.py +0 -0
  255. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/telemetry.py +0 -0
  256. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/observability/tracing.py +0 -0
  257. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/static/__init__.py +0 -0
  258. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/static/index.html +0 -0
  259. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/__init__.py +0 -0
  260. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/inputs.py +0 -0
  261. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/openapi.py +0 -0
  262. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/outputs.py +0 -0
  263. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/requests.py +0 -0
  264. {sie_server-0.3.1 → sie_server-0.3.2}/src/sie_server/types/responses.py +0 -0
  265. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/__init__.py +0 -0
  266. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_base.py +0 -0
  267. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_bge_m3.py +0 -0
  268. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_bge_m3_flash.py +0 -0
  269. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_clip.py +0 -0
  270. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_colbert.py +0 -0
  271. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_docling_smoke.py +0 -0
  272. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_donut.py +0 -0
  273. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_factory_integration.py +0 -0
  274. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_flash_base.py +0 -0
  275. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_florence2.py +0 -0
  276. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_glirel.py +0 -0
  277. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_glm_ocr.py +0 -0
  278. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_grounding_dino.py +0 -0
  279. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_gte_sparse.py +0 -0
  280. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  281. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lighton_ocr.py +0 -0
  282. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lora.py +0 -0
  283. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_lora_integration.py +0 -0
  284. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_runtime_options.py +0 -0
  285. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sglang.py +0 -0
  286. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_siglip.py +0 -0
  287. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_sparse_aggregation.py +0 -0
  288. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_stablebridge_integration.py +0 -0
  289. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_stablebridge_pruner.py +0 -0
  290. {sie_server-0.3.1 → sie_server-0.3.2}/tests/adapters/test_visual_document.py +0 -0
  291. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/__init__.py +0 -0
  292. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_dtype.py +0 -0
  293. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_endpoint.py +0 -0
  294. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_json_schema.py +0 -0
  295. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_timing.py +0 -0
  296. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_encode_validation.py +0 -0
  297. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract.py +0 -0
  298. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract_integration.py +0 -0
  299. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_extract_oom.py +0 -0
  300. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_health.py +0 -0
  301. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_models.py +0 -0
  302. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_openai_compat.py +0 -0
  303. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_score.py +0 -0
  304. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_version_header.py +0 -0
  305. {sie_server-0.3.1 → sie_server-0.3.2}/tests/api/test_ws.py +0 -0
  306. {sie_server-0.3.1 → sie_server-0.3.2}/tests/app/__init__.py +0 -0
  307. {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/__init__.py +0 -0
  308. {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/test_bundle_coverage.py +0 -0
  309. {sie_server-0.3.1 → sie_server-0.3.2}/tests/config/test_config.py +0 -0
  310. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/__init__.py +0 -0
  311. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_adaptive_batching.py +0 -0
  312. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_batcher.py +0 -0
  313. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_disk_cache.py +0 -0
  314. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_hot_reload.py +0 -0
  315. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_idle_evict.py +0 -0
  316. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_inference.py +0 -0
  317. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_loader.py +0 -0
  318. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_logging.py +0 -0
  319. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_memory.py +0 -0
  320. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_oom_detection.py +0 -0
  321. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_postprocessor.py +0 -0
  322. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_postprocessor_registry.py +0 -0
  323. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_prepared.py +0 -0
  324. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_preprocessor.py +0 -0
  325. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_preprocessor_registry.py +0 -0
  326. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_quantization.py +0 -0
  327. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_readiness.py +0 -0
  328. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_async.py +0 -0
  329. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_core.py +0 -0
  330. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_deps.py +0 -0
  331. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_failed_state.py +0 -0
  332. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_memory.py +0 -0
  333. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_registry_multi_model.py +0 -0
  334. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_shutdown.py +0 -0
  335. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_timing.py +0 -0
  336. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_watcher.py +0 -0
  337. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_backpressure.py +0 -0
  338. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_core.py +0 -0
  339. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_extract.py +0 -0
  340. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_lora.py +0 -0
  341. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_options.py +0 -0
  342. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/test_worker_score.py +0 -0
  343. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/worker/__init__.py +0 -0
  344. {sie_server-0.3.1 → sie_server-0.3.2}/tests/core/worker/test_oom_recovery.py +0 -0
  345. {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/__init__.py +0 -0
  346. {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_metrics.py +0 -0
  347. {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_telemetry.py +0 -0
  348. {sie_server-0.3.1 → sie_server-0.3.2}/tests/observability/test_tracing.py +0 -0
  349. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_nats_pull_loop.py +0 -0
  350. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_nats_pull_loop_batching.py +0 -0
  351. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_openapi_export.py +0 -0
  352. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_sdk_integration.py +0 -0
  353. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_server_smoke.py +0 -0
  354. {sie_server-0.3.1 → sie_server-0.3.2}/tests/test_sparse_integration.py +0 -0
  355. {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/__init__.py +0 -0
  356. {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/test_inputs.py +0 -0
  357. {sie_server-0.3.1 → sie_server-0.3.2}/tests/type_defs/test_types.py +0 -0
@@ -16,6 +16,9 @@ eggs/
16
16
  .eggs/
17
17
  lib/
18
18
  lib64/
19
+ # JS/TS projects under tools/ legitimately use a `lib/` directory.
20
+ !tools/*/lib/
21
+ !tools/*/lib/**
19
22
  parts/
20
23
  sdist/
21
24
  var/
@@ -39,7 +39,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
39
39
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
40
40
 
41
41
  # Stub source trees so pip accepts the editable installs during dep resolution.
42
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
42
+ # Also create empty bundles/ and models/ — referenced by force-include in
43
+ # pyproject.toml; hatchling resolves them at editable-metadata time even though
44
+ # real contents only land in the `base` stage.
45
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
43
46
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
44
47
  && touch src/sie_server/__init__.py
45
48
 
@@ -41,7 +41,10 @@ WORKDIR /app
41
41
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
42
42
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
43
43
 
44
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
44
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
45
+ # by force-include in pyproject.toml and must exist; real contents are copied
46
+ # in the base stage.
47
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
45
48
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
46
49
  && touch src/sie_server/__init__.py
47
50
 
@@ -44,7 +44,10 @@ WORKDIR /app
44
44
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
45
45
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
46
46
 
47
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
47
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
48
+ # by force-include in pyproject.toml and must exist; real contents are copied
49
+ # in the base stage.
50
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
48
51
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
49
52
  && touch src/sie_server/__init__.py
50
53
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 96
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -18,3 +18,11 @@ profiles:
18
18
  adapter_options:
19
19
  loadtime: {}
20
20
  runtime: {}
21
+ ocr:
22
+ max_batch_tokens: 1
23
+ compute_precision: null
24
+ adapter_path: sie_server.adapters.docling:DoclingAdapter
25
+ adapter_options:
26
+ loadtime: {}
27
+ runtime:
28
+ ocr: true
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 64
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.3.1"
6
+ "version": "0.3.2"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.3.1"
3
+ version = "0.3.2"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -80,6 +80,10 @@ build-backend = "hatchling.build"
80
80
  [tool.hatch.build.targets.wheel]
81
81
  packages = ["src/sie_server"]
82
82
 
83
+ [tool.hatch.build.targets.wheel.force-include]
84
+ "models" = "sie_server/models"
85
+ "bundles" = "sie_server/bundles"
86
+
83
87
  [tool.uv.sources]
84
88
  # Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
85
89
  # Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.
@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
6
6
 
7
7
  from sie_server.adapters._spec import AdapterSpec
8
8
  from sie_server.adapters._types import ERR_NOT_LOADED
9
+ from sie_server.adapters._utils import grouped_score_pairs
9
10
  from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  import torch
13
14
 
15
+ from sie_server.core.inference_output import ScoreOutput
16
+ from sie_server.types.inputs import Item
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
57
61
  raise TypeError(msg)
58
62
 
59
63
  if "score" in spec.outputs:
60
- if cls.score is ModelAdapter.score and cls.score_pairs is ModelAdapter.score_pairs:
64
+ # BaseAdapter ships a default score_pairs() that delegates to score().
65
+ # Treat that default as "not implemented" for validation purposes:
66
+ # subclasses must override either score() or score_pairs() so the
67
+ # default delegate doesn't bottom out in ModelAdapter.score().
68
+ score_overridden = cls.score is not ModelAdapter.score
69
+ score_pairs_overridden = cls.score_pairs not in (
70
+ ModelAdapter.score_pairs,
71
+ BaseAdapter.score_pairs,
72
+ )
73
+ if not score_overridden and not score_pairs_overridden:
61
74
  msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
62
75
  raise TypeError(msg)
63
76
 
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
117
130
  model_name=getattr(self, "_model_name_or_path", ""),
118
131
  )
119
132
 
133
+ # -- Default batched scoring ---------------------------------------------
134
+
135
+ def score_pairs(
136
+ self,
137
+ queries: list[Item],
138
+ docs: list[Item],
139
+ *,
140
+ instruction: str | None = None,
141
+ options: dict[str, Any] | None = None,
142
+ ) -> ScoreOutput:
143
+ """Default ``score_pairs()`` that batches via per-query grouping.
144
+
145
+ Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
146
+ so each unique query is encoded once and its docs are scored as a
147
+ single ``score()`` call. Subclasses with a more efficient native
148
+ cross-batch path (e.g. cross-encoders that pack queries and docs
149
+ into one transformer pass) should override this.
150
+
151
+ Per-call ``options`` are not supported by this default delegate
152
+ (it dispatches per-query and cannot route options into ``score()``
153
+ without subclass-specific knowledge). If ``options`` is a non-empty
154
+ mapping, this raises ``NotImplementedError`` to surface the
155
+ unsupported configuration; pass ``options=None`` (or ``{}``) or
156
+ override ``score_pairs()`` with an options-aware implementation.
157
+ """
158
+ if options:
159
+ msg = (
160
+ f"{type(self).__name__}.score_pairs(): per-call options are "
161
+ f"not supported by the default batching path "
162
+ f"(got options={options!r}). Override score_pairs() with an "
163
+ f"options-aware implementation."
164
+ )
165
+ raise NotImplementedError(msg)
166
+ return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
167
+
120
168
  # -- Shared helpers ------------------------------------------------------
121
169
 
122
170
  def _check_loaded(self) -> None:
@@ -1,6 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any
3
+ from typing import TYPE_CHECKING, Any, Protocol
4
+
5
+ import numpy as np
6
+
7
+ from sie_server.core.inference_output import ScoreOutput
4
8
 
5
9
  if TYPE_CHECKING:
6
10
  import torch
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
8
12
  from sie_server.types.inputs import Item
9
13
 
10
14
 
15
+ class _ScoreFn(Protocol):
16
+ def __call__(
17
+ self,
18
+ query: Item,
19
+ items: list[Item],
20
+ *,
21
+ instruction: str | None = ...,
22
+ ) -> list[float]: ...
23
+
24
+
11
25
  # ---------------------------------------------------------------------------
12
26
  # RoPE utilities (eliminates 7 identical copies)
13
27
  # ---------------------------------------------------------------------------
@@ -140,3 +154,67 @@ def resolve_embedding_options(
140
154
  opts.get("query_template", default_query_template),
141
155
  opts.get("doc_template", default_doc_template),
142
156
  )
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Score-pair grouping (shared by ColBERT-family adapters)
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ def grouped_score_pairs(
165
+ score_fn: _ScoreFn,
166
+ queries: list[Item],
167
+ docs: list[Item],
168
+ *,
169
+ instruction: str | None = None,
170
+ ) -> ScoreOutput:
171
+ """Run a per-query ``score()`` callable over parallel (query, doc) pairs.
172
+
173
+ Groups pairs by ``(query.text, query.id, instruction)`` so each unique
174
+ query is encoded once and its docs are scored as one batch. Used by
175
+ ColBERT-family adapters to satisfy the worker's ``score_pairs()``
176
+ contract while reusing the optimized batched ``score()``.
177
+
178
+ Queries with ``text is None`` are not supported and raise ``ValueError``
179
+ (ColBERT scoring requires text). The grouping key is
180
+ ``(query.text, query.id or "", instruction or "")`` — two distinct
181
+ ``Item`` objects with identical text/id/instruction collapse to one
182
+ encoding pass.
183
+
184
+ Args:
185
+ score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
186
+ queries: Query items (parallel to docs).
187
+ docs: Document items to score.
188
+ instruction: Optional instruction passed through to ``score_fn``.
189
+
190
+ Returns:
191
+ ``ScoreOutput`` with one float per pair, in the original input order.
192
+
193
+ Raises:
194
+ ValueError: If ``queries`` and ``docs`` lengths differ, or any query
195
+ lacks text.
196
+ """
197
+ if len(queries) != len(docs):
198
+ msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
199
+ raise ValueError(msg)
200
+
201
+ if not docs:
202
+ return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
203
+
204
+ groups: dict[tuple[str, str, str], list[int]] = {}
205
+ for i, q in enumerate(queries):
206
+ if q.text is None:
207
+ msg = f"grouped_score_pairs requires queries[{i}].text; got None"
208
+ raise ValueError(msg)
209
+ key = (q.text, q.id or "", instruction or "")
210
+ groups.setdefault(key, []).append(i)
211
+
212
+ scores = np.zeros(len(docs), dtype=np.float32)
213
+ for indices in groups.values():
214
+ q = queries[indices[0]]
215
+ group_docs = [docs[i] for i in indices]
216
+ group_scores = score_fn(q, group_docs, instruction=instruction)
217
+ for idx, s in zip(indices, group_scores, strict=True):
218
+ scores[idx] = float(s)
219
+
220
+ return ScoreOutput(scores=scores, batch_size=len(docs))
@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
127
127
  device: Device string (e.g., "cuda:0", "cpu").
128
128
  """
129
129
 
130
+ def warmup(self) -> None:
131
+ """Run a warmup forward pass on the loaded model.
132
+
133
+ Called by the model loader after ``load()`` has completed. The default
134
+ implementation is a no-op for adapters that do not need warmup. Adapters
135
+ that compile kernels on first call (e.g. flash-attention) or otherwise
136
+ benefit from a priming pass should override this and run a single
137
+ inference pass against a tiny synthetic input.
138
+
139
+ Splitting this from ``load()`` lets the cold-start instrumentation
140
+ attribute deserialize and warmup time separately.
141
+ """
142
+ return
143
+
130
144
  @abstractmethod
131
145
  def unload(self) -> None:
132
146
  """Unload the model and free resources.
@@ -172,6 +172,7 @@ class BertFlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
172
172
  self._max_seq_length,
173
173
  )
174
174
 
175
+ def warmup(self) -> None:
175
176
  # Warmup flash attention kernels
176
177
  logger.info("Warming up CUDA kernels...")
177
178
  warmup_items = [Item(text="warmup")]
@@ -2,7 +2,6 @@ from __future__ import annotations
2
2
 
3
3
  import io
4
4
  import logging
5
- from concurrent.futures import ThreadPoolExecutor
6
5
  from typing import TYPE_CHECKING, Any, ClassVar
7
6
 
8
7
  from sie_server.adapters._base_adapter import BaseAdapter
@@ -52,11 +51,17 @@ class DoclingAdapter(BaseAdapter):
52
51
  OCR is disabled by default for speed and predictability. Pass
53
52
  ``options={"ocr": True}`` per request to enable it.
54
53
 
55
- Concurrency: a fresh ``DocumentConverter`` is built per item rather than
56
- sharing one across threads. Construction is ~10 ms once Docling's
57
- layout/table models have been pre-warmed (they cache globally), and this
58
- sidesteps thread-safety concerns reported upstream
59
- (https://github.com/docling-project/docling/issues/115).
54
+ Concurrency: one ``DocumentConverter`` is cached per ``ocr_enabled`` value
55
+ on the adapter instance. ``self._device`` is set once in ``load()`` and is
56
+ stable for the adapter's lifetime, so the effective cache key is
57
+ ``(self._device, ocr_enabled)`` and at most two converters ever exist per
58
+ adapter instance. Cross-request serialization is provided by
59
+ ``ModelWorker._inference_executor`` (max_workers=1), so the cache itself
60
+ does not need a lock. Items within one batch are processed serially
61
+ (rather than via a per-item thread pool) to sidestep the converter's known
62
+ thread-safety issue (https://github.com/docling-project/docling/issues/115);
63
+ at GPU-bound concurrency the upstream worker is already saturating the
64
+ device, so intra-batch parallelism does not buy real throughput.
60
65
  """
61
66
 
62
67
  spec: ClassVar[AdapterSpec] = AdapterSpec(
@@ -69,12 +74,13 @@ class DoclingAdapter(BaseAdapter):
69
74
  self,
70
75
  model_name_or_path: str | None = None, # unused; Docling is package-backed
71
76
  *,
72
- compute_precision: str | None = None, # unused; Docling runs on CPU
77
+ compute_precision: str | None = None, # unused; device is threaded via load()
73
78
  **kwargs: Any,
74
79
  ) -> None:
75
80
  _ = (model_name_or_path, compute_precision, kwargs)
76
81
  self._loaded = False
77
82
  self._device: str | None = None
83
+ self._converters: dict[bool, Any] = {}
78
84
 
79
85
  def load(self, device: str) -> None:
80
86
  self._device = device
@@ -82,12 +88,20 @@ class DoclingAdapter(BaseAdapter):
82
88
  # the first real request doesn't block on a multi-hundred-MB pull.
83
89
  # Models cache globally, so subsequent per-task converters are cheap.
84
90
  try:
85
- warm_converter = self._make_converter(ocr_enabled=False)
91
+ warm_converter = self._get_converter(ocr_enabled=False)
86
92
  self._convert_bytes(warm_converter, _TINY_PDF_BYTES, format_hint="pdf")
93
+ # Also build the OCR-enabled converter so the first ocr-profile
94
+ # request doesn't pay layout+OCR model-init latency.
95
+ self._get_converter(ocr_enabled=True)
87
96
  except Exception:
88
97
  logger.exception("Docling pre-warm failed; first real request may be slow")
89
98
  self._loaded = True
90
99
 
100
+ def unload(self) -> None:
101
+ self._converters.clear()
102
+ self._loaded = False
103
+ super().unload()
104
+
91
105
  def extract(
92
106
  self,
93
107
  items: list[Item],
@@ -113,28 +127,35 @@ class DoclingAdapter(BaseAdapter):
113
127
  )
114
128
 
115
129
  def _run_extract(self, items: list[Item], *, ocr_enabled: bool) -> list[dict[str, Any]]:
116
- """Run extract per-item, parallelized across the batch.
130
+ """Run extract per-item, serially.
117
131
 
118
- Each task gets its own DocumentConverter (see class docstring).
132
+ Items are processed one at a time so we can share a single cached
133
+ DocumentConverter (see class docstring). At GPU-bound concurrency the
134
+ worker-level inference executor is already saturating the device, so
135
+ intra-batch parallelism does not buy real throughput.
119
136
  """
120
- if len(items) <= 1:
121
- return [self._extract_one(item, ocr_enabled=ocr_enabled) for item in items]
122
-
123
- with ThreadPoolExecutor(max_workers=min(len(items), 4)) as pool:
124
- futures = [pool.submit(self._extract_one, item, ocr_enabled=ocr_enabled) for item in items]
125
- return [f.result() for f in futures]
137
+ return [self._extract_one(item, ocr_enabled=ocr_enabled) for item in items]
126
138
 
127
139
  def _extract_one(self, item: Item, *, ocr_enabled: bool) -> dict[str, Any]:
128
140
  document = item.document
129
141
  if not is_document_input(document):
130
142
  return {"error": _ERR_REQUIRES_DOCUMENT}
131
143
  try:
132
- converter = self._make_converter(ocr_enabled=ocr_enabled)
144
+ converter = self._get_converter(ocr_enabled=ocr_enabled)
133
145
  return self._convert_bytes(converter, document["data"], format_hint=document.get("format"))
134
146
  except Exception as e: # noqa: BLE001 - per-item failure must not poison the batch
135
147
  logger.warning("Docling extract failed for item id=%s: %s", item.id, e)
136
148
  return {"error": str(e)}
137
149
 
150
+ def _get_converter(self, *, ocr_enabled: bool) -> Any:
151
+ """Return the cached DocumentConverter for this ocr_enabled value, building lazily on first use."""
152
+ cached = self._converters.get(ocr_enabled)
153
+ if cached is not None:
154
+ return cached
155
+ converter = self._make_converter(ocr_enabled=ocr_enabled)
156
+ self._converters[ocr_enabled] = converter
157
+ return converter
158
+
138
159
  def _convert_bytes(self, converter: Any, data: bytes, *, format_hint: str | None) -> dict[str, Any]:
139
160
  from docling.datamodel.base_models import DocumentStream # ty: ignore[unresolved-import]
140
161
 
@@ -151,15 +172,45 @@ class DoclingAdapter(BaseAdapter):
151
172
  }
152
173
 
153
174
  def _make_converter(self, *, ocr_enabled: bool) -> Any:
154
- """Build a fresh DocumentConverter. One per task see class docstring."""
175
+ """Build a fresh DocumentConverter. Callers should usually go through _get_converter() for caching.
176
+
177
+ Threads self._device through Docling's AcceleratorOptions so layout, table,
178
+ and OCR models actually run on the configured device. Without this, Docling
179
+ silently defaults to CPU regardless of how SIE was launched.
180
+ """
155
181
  from docling.document_converter import DocumentConverter # ty: ignore[unresolved-import]
156
182
 
157
- if not ocr_enabled:
158
- return DocumentConverter()
183
+ accelerator_options = self._build_accelerator_options()
159
184
 
160
185
  from docling.datamodel.base_models import InputFormat # ty: ignore[unresolved-import]
161
186
  from docling.datamodel.pipeline_options import PdfPipelineOptions # ty: ignore[unresolved-import]
162
187
  from docling.document_converter import PdfFormatOption # ty: ignore[unresolved-import]
163
188
 
164
- pdf_opts = PdfPipelineOptions(do_ocr=True)
189
+ # Pass do_ocr explicitly on both paths. Docling's PdfPipelineOptions defaults
190
+ # do_ocr=True, so an unset default would silently OCR every PDF and make the
191
+ # `ocr` profile a no-op vs. the default profile.
192
+ pdf_kwargs: dict[str, Any] = {"do_ocr": ocr_enabled}
193
+ if accelerator_options is not None:
194
+ pdf_kwargs["accelerator_options"] = accelerator_options
195
+ pdf_opts = PdfPipelineOptions(**pdf_kwargs)
165
196
  return DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_opts)})
197
+
198
+ def _build_accelerator_options(self) -> Any:
199
+ """Translate self._device into a Docling AcceleratorOptions, or None."""
200
+ if not self._device:
201
+ return None
202
+ from docling.datamodel.accelerator_options import AcceleratorOptions # ty: ignore[unresolved-import]
203
+
204
+ try:
205
+ return AcceleratorOptions(device=str(self._device))
206
+ except Exception as e: # noqa: BLE001 - pydantic validation; fall back to auto
207
+ logger.warning(
208
+ "Docling: invalid device %r, falling back to 'auto' (%s)",
209
+ self._device,
210
+ e,
211
+ )
212
+ try:
213
+ return AcceleratorOptions(device="auto")
214
+ except Exception:
215
+ logger.exception("Docling: failed to build AcceleratorOptions even with 'auto'")
216
+ return None
@@ -142,6 +142,7 @@ class ModernBERTFlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
142
142
  self._max_seq_length,
143
143
  )
144
144
 
145
+ def warmup(self) -> None:
145
146
  # Warmup flash attention kernels
146
147
  logger.info("Warming up CUDA kernels...")
147
148
  warmup_items = [Item(text="warmup")]
@@ -293,6 +293,7 @@ class PaddleOCRVLAdapter(BaseAdapter):
293
293
  max_new_tokens=max_new_tokens,
294
294
  do_sample=False,
295
295
  num_beams=num_beams,
296
+ use_cache=True,
296
297
  )
297
298
 
298
299
  generated_ids = output_ids[0, prompt_len:]
@@ -344,6 +345,7 @@ class PaddleOCRVLAdapter(BaseAdapter):
344
345
  max_new_tokens=max_new_tokens,
345
346
  do_sample=False,
346
347
  num_beams=num_beams,
348
+ use_cache=True,
347
349
  )
348
350
 
349
351
  generated_ids = output_ids[0, prompt_len:]
@@ -82,13 +82,17 @@ class SentenceTransformerDenseAdapter(BaseAdapter):
82
82
  trust_remote_code=self._trust_remote_code,
83
83
  config_kwargs=self._config_kwargs,
84
84
  )
85
- _ = self._model.encode(["warmup"], convert_to_numpy=True, show_progress_bar=False)
86
85
 
87
86
  if self._max_seq_length is not None:
88
87
  self._model.max_seq_length = self._max_seq_length
89
88
 
90
89
  self._dense_dim = self._model.get_embedding_dimension()
91
90
 
91
+ def warmup(self) -> None:
92
+ if self._model is None:
93
+ return
94
+ _ = self._model.encode(["warmup"], convert_to_numpy=True, show_progress_bar=False)
95
+
92
96
  def encode(
93
97
  self,
94
98
  items: list[Item],