sie-server 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (361) hide show
  1. {sie_server-0.3.0 → sie_server-0.3.2}/.gitignore +3 -0
  2. {sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cpu +4 -1
  3. {sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cuda11 +4 -1
  4. {sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cuda12 +4 -1
  5. {sie_server-0.3.0 → sie_server-0.3.2}/PKG-INFO +2 -1
  6. {sie_server-0.3.0 → sie_server-0.3.2}/bundles/default.yaml +2 -0
  7. {sie_server-0.3.0 → sie_server-0.3.2}/models/BAAI__bge-m3.yaml +1 -1
  8. sie_server-0.3.2/models/Marqo__marqo-ecommerce-embeddings-B.yaml +28 -0
  9. {sie_server-0.3.0 → sie_server-0.3.2}/models/answerdotai__answerai-colbert-small-v1.yaml +1 -1
  10. {sie_server-0.3.0 → sie_server-0.3.2}/models/colbert-ir__colbertv2.0.yaml +1 -1
  11. {sie_server-0.3.0 → sie_server-0.3.2}/models/docling.yaml +8 -0
  12. sie_server-0.3.2/models/google__embeddinggemma-300m.yaml +49 -0
  13. {sie_server-0.3.0 → sie_server-0.3.2}/models/jinaai__jina-colbert-v2.yaml +1 -1
  14. {sie_server-0.3.0 → sie_server-0.3.2}/models/lightonai__GTE-ModernColBERT-v1.yaml +1 -1
  15. {sie_server-0.3.0 → sie_server-0.3.2}/models/lightonai__Reason-ModernColBERT.yaml +1 -1
  16. {sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml +1 -1
  17. {sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml +1 -1
  18. {sie_server-0.3.0 → sie_server-0.3.2}/openapi.json +64 -1
  19. {sie_server-0.3.0 → sie_server-0.3.2}/pyproject.toml +7 -1
  20. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_base_adapter.py +49 -1
  21. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_utils.py +79 -1
  22. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/base.py +14 -0
  23. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash/__init__.py +1 -0
  24. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3/__init__.py +6 -2
  25. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flag/__init__.py +6 -2
  26. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flash/__init__.py +5 -2
  27. sie_server-0.3.2/src/sie_server/adapters/bge_m3_score_mixin.py +283 -0
  28. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/docling/__init__.py +72 -21
  29. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash/__init__.py +1 -0
  30. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/paddleocr_vl/__init__.py +2 -0
  31. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/sentence_transformer/__init__.py +5 -1
  32. sie_server-0.3.2/src/sie_server/adapters/siglip/__init__.py +507 -0
  33. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/encode.py +8 -0
  34. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/extract.py +8 -0
  35. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/helpers.py +87 -14
  36. sie_server-0.3.2/src/sie_server/api/models.py +198 -0
  37. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/openai_compat.py +8 -0
  38. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/score.py +8 -0
  39. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/ws.py +7 -1
  40. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/app/app_factory.py +65 -7
  41. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/cli.py +14 -3
  42. sie_server-0.3.2/src/sie_server/core/load_errors.py +196 -0
  43. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/model_loader.py +20 -3
  44. sie_server-0.3.2/src/sie_server/core/preprocessor/image.py +260 -0
  45. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/registry.py +153 -2
  46. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/responses.py +1 -0
  47. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_bge_m3.py +2 -2
  48. sie_server-0.3.2/tests/adapters/test_bge_m3_flash.py +507 -0
  49. sie_server-0.3.2/tests/adapters/test_docling.py +356 -0
  50. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_paddleocr_vl.py +45 -0
  51. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_sentence_transformer.py +2 -1
  52. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_encode_dtype.py +4 -0
  53. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_encode_endpoint.py +44 -0
  54. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_encode_json_schema.py +2 -0
  55. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_encode_timing.py +4 -0
  56. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_encode_validation.py +2 -0
  57. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_extract.py +2 -0
  58. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_extract_oom.py +2 -0
  59. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_models.py +87 -0
  60. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_openai_compat.py +2 -0
  61. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_score.py +2 -0
  62. {sie_server-0.3.0 → sie_server-0.3.2}/tests/app/test_app_factory.py +65 -0
  63. {sie_server-0.3.0 → sie_server-0.3.2}/tests/conftest.py +5 -0
  64. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_preprocessor.py +126 -0
  65. sie_server-0.3.2/tests/core/test_registry_failed_state.py +217 -0
  66. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_all_models.py +65 -0
  67. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_docker_integration.py +3 -3
  68. sie_server-0.3.0/models/google__embeddinggemma-300m.yaml +0 -29
  69. sie_server-0.3.0/src/sie_server/adapters/siglip/__init__.py +0 -316
  70. sie_server-0.3.0/src/sie_server/api/models.py +0 -112
  71. sie_server-0.3.0/src/sie_server/core/preprocessor/image.py +0 -129
  72. sie_server-0.3.0/tests/adapters/test_docling.py +0 -194
  73. {sie_server-0.3.0 → sie_server-0.3.2}/CONTRIBUTING.md +0 -0
  74. {sie_server-0.3.0 → sie_server-0.3.2}/LICENSE +0 -0
  75. {sie_server-0.3.0 → sie_server-0.3.2}/README.md +0 -0
  76. {sie_server-0.3.0 → sie_server-0.3.2}/bundles/sglang.yaml +0 -0
  77. {sie_server-0.3.0 → sie_server-0.3.2}/bundles/transformers5.yaml +0 -0
  78. {sie_server-0.3.0 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-1.5B-instruct.yaml +0 -0
  79. {sie_server-0.3.0 → sie_server-0.3.2}/models/Alibaba-NLP__gte-Qwen2-7B-instruct.yaml +0 -0
  80. {sie_server-0.3.0 → sie_server-0.3.2}/models/Alibaba-NLP__gte-modernbert-base.yaml +0 -0
  81. {sie_server-0.3.0 → sie_server-0.3.2}/models/Alibaba-NLP__gte-multilingual-base.yaml +0 -0
  82. {sie_server-0.3.0 → sie_server-0.3.2}/models/Alibaba-NLP__gte-reranker-modernbert-base.yaml +0 -0
  83. {sie_server-0.3.0 → sie_server-0.3.2}/models/BAAI__bge-reranker-base.yaml +0 -0
  84. {sie_server-0.3.0 → sie_server-0.3.2}/models/BAAI__bge-reranker-large.yaml +0 -0
  85. {sie_server-0.3.0 → sie_server-0.3.2}/models/BAAI__bge-reranker-v2-m3.yaml +0 -0
  86. {sie_server-0.3.0 → sie_server-0.3.2}/models/EmergentMethods__gliner_large_news-v2.1.yaml +0 -0
  87. {sie_server-0.3.0 → sie_server-0.3.2}/models/GritLM__GritLM-7B.yaml +0 -0
  88. {sie_server-0.3.0 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-base.yaml +0 -0
  89. {sie_server-0.3.0 → sie_server-0.3.2}/models/IDEA-Research__grounding-dino-tiny.yaml +0 -0
  90. {sie_server-0.3.0 → sie_server-0.3.2}/models/Ihor__gliner-biomed-large-v1.0.yaml +0 -0
  91. {sie_server-0.3.0 → sie_server-0.3.2}/models/Linq-AI-Research__Linq-Embed-Mistral.yaml +0 -0
  92. {sie_server-0.3.0 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-base-zeroshot-v2.0.yaml +0 -0
  93. {sie_server-0.3.0 → sie_server-0.3.2}/models/MoritzLaurer__deberta-v3-large-zeroshot-v2.0.yaml +0 -0
  94. {sie_server-0.3.0 → sie_server-0.3.2}/models/NeuML__gliner-bert-tiny.yaml +0 -0
  95. {sie_server-0.3.0 → sie_server-0.3.2}/models/NovaSearch__stella_en_1.5B_v5.yaml +0 -0
  96. {sie_server-0.3.0 → sie_server-0.3.2}/models/NovaSearch__stella_en_400M_v5.yaml +0 -0
  97. {sie_server-0.3.0 → sie_server-0.3.2}/models/PaddlePaddle__PaddleOCR-VL-1.5.yaml +0 -0
  98. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-0.6B.yaml +0 -0
  99. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-Embedding-4B.yaml +0 -0
  100. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-0.6B.yaml +0 -0
  101. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-Reranker-4B.yaml +0 -0
  102. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Embedding-2B.yaml +0 -0
  103. {sie_server-0.3.0 → sie_server-0.3.2}/models/Qwen__Qwen3-VL-Reranker-2B.yaml +0 -0
  104. {sie_server-0.3.0 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-2_R.yaml +0 -0
  105. {sie_server-0.3.0 → sie_server-0.3.2}/models/Salesforce__SFR-Embedding-Mistral.yaml +0 -0
  106. {sie_server-0.3.0 → sie_server-0.3.2}/models/Snowflake__snowflake-arctic-embed-m-v2.0.yaml +0 -0
  107. {sie_server-0.3.0 → sie_server-0.3.2}/models/answerdotai__ModernBERT-base.yaml +0 -0
  108. {sie_server-0.3.0 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-12-v2.yaml +0 -0
  109. {sie_server-0.3.0 → sie_server-0.3.2}/models/cross-encoder__ms-marco-MiniLM-L-6-v2.yaml +0 -0
  110. {sie_server-0.3.0 → sie_server-0.3.2}/models/cross-encoder__nli-deberta-v3-base.yaml +0 -0
  111. {sie_server-0.3.0 → sie_server-0.3.2}/models/fastino__gliner2-base-v1.yaml +0 -0
  112. {sie_server-0.3.0 → sie_server-0.3.2}/models/gliner-community__gliner_large-v2.5.yaml +0 -0
  113. {sie_server-0.3.0 → sie_server-0.3.2}/models/gliner-community__gliner_medium-v2.5.yaml +0 -0
  114. {sie_server-0.3.0 → sie_server-0.3.2}/models/gliner-community__gliner_small-v2.5.yaml +0 -0
  115. {sie_server-0.3.0 → sie_server-0.3.2}/models/google__owlv2-base-patch16-ensemble.yaml +0 -0
  116. {sie_server-0.3.0 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-224.yaml +0 -0
  117. {sie_server-0.3.0 → sie_server-0.3.2}/models/google__siglip-so400m-patch14-384.yaml +0 -0
  118. {sie_server-0.3.0 → sie_server-0.3.2}/models/google__siglip2-base-patch16-224.yaml +0 -0
  119. {sie_server-0.3.0 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-30m-sparse.yaml +0 -0
  120. {sie_server-0.3.0 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-english-r2.yaml +0 -0
  121. {sie_server-0.3.0 → sie_server-0.3.2}/models/ibm-granite__granite-embedding-small-english-r2.yaml +0 -0
  122. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__e5-base-v2.yaml +0 -0
  123. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__e5-large-v2.yaml +0 -0
  124. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__e5-mistral-7b-instruct.yaml +0 -0
  125. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__e5-small-v2.yaml +0 -0
  126. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large-instruct.yaml +0 -0
  127. {sie_server-0.3.0 → sie_server-0.3.2}/models/intfloat__multilingual-e5-large.yaml +0 -0
  128. {sie_server-0.3.0 → sie_server-0.3.2}/models/jackboyla__glirel-large-v0.yaml +0 -0
  129. {sie_server-0.3.0 → sie_server-0.3.2}/models/jinaai__jina-reranker-v2-base-multilingual.yaml +0 -0
  130. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__gliclass-base-v1.0.yaml +0 -0
  131. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v1.0.yaml +0 -0
  132. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__gliclass-large-v3.0.yaml +0 -0
  133. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__gliclass-small-v1.0.yaml +0 -0
  134. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__gliner-bi-base-v2.0.yaml +0 -0
  135. {sie_server-0.3.0 → sie_server-0.3.2}/models/knowledgator__modern-gliner-bi-base-v1.0.yaml +0 -0
  136. {sie_server-0.3.0 → sie_server-0.3.2}/models/laion__CLIP-ViT-B-32-laion2B-s34B-b79K.yaml +0 -0
  137. {sie_server-0.3.0 → sie_server-0.3.2}/models/laion__CLIP-ViT-H-14-laion2B-s32B-b79K.yaml +0 -0
  138. {sie_server-0.3.0 → sie_server-0.3.2}/models/lightonai__LightOnOCR-2-1B.yaml +0 -0
  139. {sie_server-0.3.0 → sie_server-0.3.2}/models/microsoft__Florence-2-base-ft.yaml +0 -0
  140. {sie_server-0.3.0 → sie_server-0.3.2}/models/microsoft__Florence-2-base.yaml +0 -0
  141. {sie_server-0.3.0 → sie_server-0.3.2}/models/microsoft__Florence-2-large.yaml +0 -0
  142. {sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-base-v2.yaml +0 -0
  143. {sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-rerank-large-v2.yaml +0 -0
  144. {sie_server-0.3.0 → sie_server-0.3.2}/models/mynkchaudhry__Florence-2-FT-DocVQA.yaml +0 -0
  145. {sie_server-0.3.0 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-cord-v2.yaml +0 -0
  146. {sie_server-0.3.0 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-docvqa.yaml +0 -0
  147. {sie_server-0.3.0 → sie_server-0.3.2}/models/naver-clova-ix__donut-base-finetuned-rvlcdip.yaml +0 -0
  148. {sie_server-0.3.0 → sie_server-0.3.2}/models/naver__splade-cocondenser-selfdistil.yaml +0 -0
  149. {sie_server-0.3.0 → sie_server-0.3.2}/models/naver__splade-v3.yaml +0 -0
  150. {sie_server-0.3.0 → sie_server-0.3.2}/models/nomic-ai__nomic-embed-text-v2-moe.yaml +0 -0
  151. {sie_server-0.3.0 → sie_server-0.3.2}/models/numind__NuNER_Zero-span.yaml +0 -0
  152. {sie_server-0.3.0 → sie_server-0.3.2}/models/numind__NuNER_Zero.yaml +0 -0
  153. {sie_server-0.3.0 → sie_server-0.3.2}/models/nvidia__NV-Embed-v2.yaml +0 -0
  154. {sie_server-0.3.0 → sie_server-0.3.2}/models/nvidia__llama-embed-nemotron-8b.yaml +0 -0
  155. {sie_server-0.3.0 → sie_server-0.3.2}/models/nvidia__llama-nemoretriever-colembed-3b-v1.yaml +0 -0
  156. {sie_server-0.3.0 → sie_server-0.3.2}/models/openai__clip-vit-base-patch32.yaml +0 -0
  157. {sie_server-0.3.0 → sie_server-0.3.2}/models/openai__clip-vit-large-patch14.yaml +0 -0
  158. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-distill.yaml +0 -0
  159. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v2-mini.yaml +0 -0
  160. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-distill.yaml +0 -0
  161. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-doc-v3-gte.yaml +0 -0
  162. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v1.yaml +0 -0
  163. {sie_server-0.3.0 → sie_server-0.3.2}/models/opensearch-project__opensearch-neural-sparse-encoding-v2-distill.yaml +0 -0
  164. {sie_server-0.3.0 → sie_server-0.3.2}/models/prithivida__Splade_PP_en_v2.yaml +0 -0
  165. {sie_server-0.3.0 → sie_server-0.3.2}/models/rasyosef__splade-mini.yaml +0 -0
  166. {sie_server-0.3.0 → sie_server-0.3.2}/models/sentence-transformers__all-MiniLM-L6-v2.yaml +0 -0
  167. {sie_server-0.3.0 → sie_server-0.3.2}/models/sugiv__stablebridge-pruner-highlighter.yaml +0 -0
  168. {sie_server-0.3.0 → sie_server-0.3.2}/models/urchade__gliner_large-v2.1.yaml +0 -0
  169. {sie_server-0.3.0 → sie_server-0.3.2}/models/urchade__gliner_medium-v2.1.yaml +0 -0
  170. {sie_server-0.3.0 → sie_server-0.3.2}/models/urchade__gliner_multi-v2.1.yaml +0 -0
  171. {sie_server-0.3.0 → sie_server-0.3.2}/models/urchade__gliner_multi_pii-v1.yaml +0 -0
  172. {sie_server-0.3.0 → sie_server-0.3.2}/models/urchade__gliner_small-v2.1.yaml +0 -0
  173. {sie_server-0.3.0 → sie_server-0.3.2}/models/vidore__colpali-v1.3-hf.yaml +0 -0
  174. {sie_server-0.3.0 → sie_server-0.3.2}/models/vidore__colqwen2.5-v0.2.yaml +0 -0
  175. {sie_server-0.3.0 → sie_server-0.3.2}/models/zai-org__GLM-OCR.yaml +0 -0
  176. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/__init__.py +0 -0
  177. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/__init__.py +0 -0
  178. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_flash_base.py +0 -0
  179. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_spec.py +0 -0
  180. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_types.py +0 -0
  181. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash_cross_encoder/__init__.py +0 -0
  182. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/clip/__init__.py +0 -0
  183. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/colbert/__init__.py +0 -0
  184. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/colbert_modernbert_flash/__init__.py +0 -0
  185. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/colbert_rotary_flash/__init__.py +0 -0
  186. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/colpali/__init__.py +0 -0
  187. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/colqwen2/__init__.py +0 -0
  188. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/cross_encoder/__init__.py +0 -0
  189. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/donut/__init__.py +0 -0
  190. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/florence2/__init__.py +0 -0
  191. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/gliclass/__init__.py +0 -0
  192. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/gliner/__init__.py +0 -0
  193. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/gliner2/__init__.py +0 -0
  194. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/gliner_bi/__init__.py +0 -0
  195. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/glirel/__init__.py +0 -0
  196. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/glm_ocr/__init__.py +0 -0
  197. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/grounding_dino/__init__.py +0 -0
  198. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/gte_sparse_flash/__init__.py +0 -0
  199. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/jina_flash_cross_encoder/__init__.py +0 -0
  200. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/lighton_ocr/__init__.py +0 -0
  201. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/modernbert_flash_cross_encoder/__init__.py +0 -0
  202. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/nemo_colembed/__init__.py +0 -0
  203. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification/__init__.py +0 -0
  204. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/nli_classification_flash/__init__.py +0 -0
  205. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/nomic_flash/__init__.py +0 -0
  206. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/owlv2/__init__.py +0 -0
  207. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/peft_lora_mixin.py +0 -0
  208. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/pytorch_embedding/__init__.py +0 -0
  209. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash/__init__.py +0 -0
  210. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/qwen2_flash_cross_encoder/__init__.py +0 -0
  211. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_embedding/__init__.py +0 -0
  212. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/qwen3_vl_reranker/__init__.py +0 -0
  213. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/rope_flash/__init__.py +0 -0
  214. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/sglang/__init__.py +0 -0
  215. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/splade_flash/__init__.py +0 -0
  216. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/stablebridge_pruner/__init__.py +0 -0
  217. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/xlm_roberta_flash/__init__.py +0 -0
  218. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/__init__.py +0 -0
  219. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/health.py +0 -0
  220. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/metrics.py +0 -0
  221. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/openapi.py +0 -0
  222. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/options.py +0 -0
  223. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/root.py +0 -0
  224. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/serialization.py +0 -0
  225. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/api/validation.py +0 -0
  226. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/app/__init__.py +0 -0
  227. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/app/app_state_config.py +0 -0
  228. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/config/__init__.py +0 -0
  229. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/config/engine.py +0 -0
  230. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/config/model.py +0 -0
  231. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/__init__.py +0 -0
  232. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/adaptive_batching.py +0 -0
  233. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/batcher.py +0 -0
  234. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/deps.py +0 -0
  235. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/disk_cache.py +0 -0
  236. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/encode_pipeline.py +0 -0
  237. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/extract_cost.py +0 -0
  238. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/hot_reload.py +0 -0
  239. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/inference.py +0 -0
  240. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/inference_output.py +0 -0
  241. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/loader.py +0 -0
  242. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/logging.py +0 -0
  243. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/memory.py +0 -0
  244. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/oom.py +0 -0
  245. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/postprocessor.py +0 -0
  246. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/postprocessor_registry.py +0 -0
  247. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/prepared.py +0 -0
  248. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/preprocessor/__init__.py +0 -0
  249. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/preprocessor/base.py +0 -0
  250. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/preprocessor/text.py +0 -0
  251. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/preprocessor/vision.py +0 -0
  252. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/preprocessor_registry.py +0 -0
  253. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/readiness.py +0 -0
  254. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/shutdown.py +0 -0
  255. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/timing.py +0 -0
  256. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/tokenizer.py +0 -0
  257. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/watcher.py +0 -0
  258. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/__init__.py +0 -0
  259. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/__init__.py +0 -0
  260. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/base.py +0 -0
  261. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/encode.py +0 -0
  262. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/extract.py +0 -0
  263. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/handlers/score.py +0 -0
  264. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/model_worker.py +0 -0
  265. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/oom_recovery.py +0 -0
  266. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/core/worker/types.py +0 -0
  267. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/main.py +0 -0
  268. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/nats_pull_loop.py +0 -0
  269. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/nats_subscriber.py +0 -0
  270. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/__init__.py +0 -0
  271. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/gpu.py +0 -0
  272. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/metrics.py +0 -0
  273. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/prometheus.py +0 -0
  274. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/telemetry.py +0 -0
  275. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/observability/tracing.py +0 -0
  276. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/static/__init__.py +0 -0
  277. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/static/index.html +0 -0
  278. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/__init__.py +0 -0
  279. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/inputs.py +0 -0
  280. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/openapi.py +0 -0
  281. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/outputs.py +0 -0
  282. {sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/types/requests.py +0 -0
  283. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/__init__.py +0 -0
  284. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_base.py +0 -0
  285. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_clip.py +0 -0
  286. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_colbert.py +0 -0
  287. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_docling_smoke.py +0 -0
  288. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_donut.py +0 -0
  289. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_factory_integration.py +0 -0
  290. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_flash_base.py +0 -0
  291. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_florence2.py +0 -0
  292. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_glirel.py +0 -0
  293. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_glm_ocr.py +0 -0
  294. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_grounding_dino.py +0 -0
  295. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_gte_sparse.py +0 -0
  296. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_jina_flash_cross_encoder.py +0 -0
  297. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_lighton_ocr.py +0 -0
  298. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_lora.py +0 -0
  299. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_lora_integration.py +0 -0
  300. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_runtime_options.py +0 -0
  301. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_sglang.py +0 -0
  302. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_siglip.py +0 -0
  303. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_sparse_aggregation.py +0 -0
  304. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_stablebridge_integration.py +0 -0
  305. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_stablebridge_pruner.py +0 -0
  306. {sie_server-0.3.0 → sie_server-0.3.2}/tests/adapters/test_visual_document.py +0 -0
  307. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/__init__.py +0 -0
  308. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_extract_integration.py +0 -0
  309. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_health.py +0 -0
  310. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_version_header.py +0 -0
  311. {sie_server-0.3.0 → sie_server-0.3.2}/tests/api/test_ws.py +0 -0
  312. {sie_server-0.3.0 → sie_server-0.3.2}/tests/app/__init__.py +0 -0
  313. {sie_server-0.3.0 → sie_server-0.3.2}/tests/config/__init__.py +0 -0
  314. {sie_server-0.3.0 → sie_server-0.3.2}/tests/config/test_bundle_coverage.py +0 -0
  315. {sie_server-0.3.0 → sie_server-0.3.2}/tests/config/test_config.py +0 -0
  316. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/__init__.py +0 -0
  317. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_adaptive_batching.py +0 -0
  318. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_batcher.py +0 -0
  319. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_disk_cache.py +0 -0
  320. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_hot_reload.py +0 -0
  321. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_idle_evict.py +0 -0
  322. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_inference.py +0 -0
  323. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_loader.py +0 -0
  324. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_logging.py +0 -0
  325. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_memory.py +0 -0
  326. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_oom_detection.py +0 -0
  327. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_postprocessor.py +0 -0
  328. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_postprocessor_registry.py +0 -0
  329. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_prepared.py +0 -0
  330. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_preprocessor_registry.py +0 -0
  331. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_quantization.py +0 -0
  332. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_readiness.py +0 -0
  333. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_registry_async.py +0 -0
  334. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_registry_core.py +0 -0
  335. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_registry_deps.py +0 -0
  336. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_registry_memory.py +0 -0
  337. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_registry_multi_model.py +0 -0
  338. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_shutdown.py +0 -0
  339. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_timing.py +0 -0
  340. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_watcher.py +0 -0
  341. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_backpressure.py +0 -0
  342. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_core.py +0 -0
  343. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_extract.py +0 -0
  344. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_lora.py +0 -0
  345. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_options.py +0 -0
  346. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/test_worker_score.py +0 -0
  347. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/worker/__init__.py +0 -0
  348. {sie_server-0.3.0 → sie_server-0.3.2}/tests/core/worker/test_oom_recovery.py +0 -0
  349. {sie_server-0.3.0 → sie_server-0.3.2}/tests/observability/__init__.py +0 -0
  350. {sie_server-0.3.0 → sie_server-0.3.2}/tests/observability/test_metrics.py +0 -0
  351. {sie_server-0.3.0 → sie_server-0.3.2}/tests/observability/test_telemetry.py +0 -0
  352. {sie_server-0.3.0 → sie_server-0.3.2}/tests/observability/test_tracing.py +0 -0
  353. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_nats_pull_loop.py +0 -0
  354. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_nats_pull_loop_batching.py +0 -0
  355. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_openapi_export.py +0 -0
  356. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_sdk_integration.py +0 -0
  357. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_server_smoke.py +0 -0
  358. {sie_server-0.3.0 → sie_server-0.3.2}/tests/test_sparse_integration.py +0 -0
  359. {sie_server-0.3.0 → sie_server-0.3.2}/tests/type_defs/__init__.py +0 -0
  360. {sie_server-0.3.0 → sie_server-0.3.2}/tests/type_defs/test_inputs.py +0 -0
  361. {sie_server-0.3.0 → sie_server-0.3.2}/tests/type_defs/test_types.py +0 -0
@@ -16,6 +16,9 @@ eggs/
16
16
  .eggs/
17
17
  lib/
18
18
  lib64/
19
+ # JS/TS projects under tools/ legitimately use a `lib/` directory.
20
+ !tools/*/lib/
21
+ !tools/*/lib/**
19
22
  parts/
20
23
  sdist/
21
24
  var/
@@ -39,7 +39,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
39
39
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
40
40
 
41
41
  # Stub source trees so pip accepts the editable installs during dep resolution.
42
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
42
+ # Also create empty bundles/ and models/ — referenced by force-include in
43
+ # pyproject.toml; hatchling resolves them at editable-metadata time even though
44
+ # real contents only land in the `base` stage.
45
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
43
46
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
44
47
  && touch src/sie_server/__init__.py
45
48
 
@@ -41,7 +41,10 @@ WORKDIR /app
41
41
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
42
42
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
43
43
 
44
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
44
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
45
+ # by force-include in pyproject.toml and must exist; real contents are copied
46
+ # in the base stage.
47
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
45
48
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
46
49
  && touch src/sie_server/__init__.py
47
50
 
@@ -44,7 +44,10 @@ WORKDIR /app
44
44
  COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
45
45
  COPY packages/sie_server/pyproject.toml ./pyproject.toml
46
46
 
47
- RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
47
+ # Stubs for editable metadata generation — bundles/ and models/ are referenced
48
+ # by force-include in pyproject.toml and must exist; real contents are copied
49
+ # in the base stage.
50
+ RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
48
51
  && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
49
52
  && touch src/sie_server/__init__.py
50
53
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-server
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Search Inference Engine - GPU inference server for search workloads
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
@@ -19,6 +19,7 @@ Requires-Dist: msgpack<2,>=1.1
19
19
  Requires-Dist: msgspec>=0.20.0
20
20
  Requires-Dist: nats-py<3,>=2.9
21
21
  Requires-Dist: numpy<3,>=2
22
+ Requires-Dist: open-clip-torch>=2.24
22
23
  Requires-Dist: opentelemetry-api<2,>=1.28
23
24
  Requires-Dist: opentelemetry-exporter-otlp<2,>=1.28
24
25
  Requires-Dist: opentelemetry-instrumentation-fastapi<1,>=0.49b0
@@ -71,6 +71,8 @@ deps:
71
71
  loguru: '>=0.7,<1'
72
72
  # donut, florence2
73
73
  timm: '>=0.9.0,<1.0'
74
+ # siglip (Marqo/marqo-ecommerce-embeddings-B uses open_clip native loader)
75
+ open-clip-torch: '>=2.24'
74
76
  # docling — composite-document parser (PDF/DOCX/HTML)
75
77
  docling: '>=2,<3'
76
78
  # Flash Attention 2 — CUDA only, prebuilt wheel
@@ -13,7 +13,7 @@ tasks:
13
13
  dim: 250002
14
14
  multivector:
15
15
  dim: 1024
16
- score: null
16
+ score: {}
17
17
  extract: null
18
18
  max_sequence_length: 8192
19
19
  profiles:
@@ -0,0 +1,28 @@
1
+ sie_id: Marqo/marqo-ecommerce-embeddings-B
2
+ hf_id: Marqo/marqo-ecommerce-embeddings-B
3
+ inputs:
4
+ text: true
5
+ image: true
6
+ audio: false
7
+ video: false
8
+ tasks:
9
+ encode:
10
+ dense:
11
+ dim: 768
12
+ sparse: null
13
+ multivector: null
14
+ score: null
15
+ extract: null
16
+ max_sequence_length: 64
17
+ profiles:
18
+ default:
19
+ max_batch_tokens: 16384
20
+ compute_precision: float16
21
+ adapter_path: sie_server.adapters.siglip:SiglipAdapter
22
+ adapter_options:
23
+ loadtime:
24
+ backend: open_clip
25
+ open_clip_model_id: hf-hub:Marqo/marqo-ecommerce-embeddings-B
26
+ dense_dim: 768
27
+ runtime:
28
+ normalize: true
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 96
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -18,3 +18,11 @@ profiles:
18
18
  adapter_options:
19
19
  loadtime: {}
20
20
  runtime: {}
21
+ ocr:
22
+ max_batch_tokens: 1
23
+ compute_precision: null
24
+ adapter_path: sie_server.adapters.docling:DoclingAdapter
25
+ adapter_options:
26
+ loadtime: {}
27
+ runtime:
28
+ ocr: true
@@ -0,0 +1,49 @@
1
+ # NOTE: ``google/embeddinggemma-300m`` is a *gated* HuggingFace repo. The
2
+ # server requires ``HF_TOKEN`` (with the model license accepted on the
3
+ # HF account) to load this model. Without it, the registry records a
4
+ # terminal ``GATED`` failure and the API returns ``MODEL_LOAD_FAILED``
5
+ # (502, no Retry-After) so the SDK does not loop.
6
+ #
7
+ # Architecture support: needs ``transformers>=4.56`` for
8
+ # ``Gemma3TextModel``. Older versions raise an unsupported-model error
9
+ # which the registry classifies as ``DEPENDENCY``.
10
+ sie_id: google/embeddinggemma-300m
11
+ hf_id: google/embeddinggemma-300m
12
+ # Track the default branch. Note: ``main`` is mutable on the Hub, so
13
+ # this does NOT guarantee bit-for-bit reproducibility — it merely names
14
+ # the branch we expect HuggingFace to resolve. For a true pin, replace
15
+ # this with an immutable commit SHA after verifying the new revision
16
+ # against ``test_google_embeddinggemma_300m_dense``.
17
+ hf_revision: main
18
+ inputs:
19
+ text: true
20
+ image: false
21
+ audio: false
22
+ video: false
23
+ tasks:
24
+ encode:
25
+ dense:
26
+ dim: 768
27
+ sparse: null
28
+ multivector: null
29
+ score: null
30
+ extract: null
31
+ max_sequence_length: 2048
32
+ profiles:
33
+ default:
34
+ max_batch_tokens: 16384
35
+ # bfloat16 on CUDA matches the captured reference embedding in
36
+ # ``test_all_models.py``. On CPU the adapter falls back to fp32
37
+ # automatically (see pytorch_embedding adapter); the loaded model
38
+ # still works, but numerical-equivalence tests should be gated on
39
+ # CUDA availability if drift becomes an issue.
40
+ compute_precision: bfloat16
41
+ adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
42
+ adapter_options:
43
+ loadtime:
44
+ attn_implementation: sdpa
45
+ runtime:
46
+ pooling: mean
47
+ normalize: true
48
+ query_template: 'task: search result | query: {text}'
49
+ doc_template: 'title: none | text: {text}'
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 128
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 512
17
17
  profiles:
@@ -11,7 +11,7 @@ tasks:
11
11
  sparse: null
12
12
  multivector:
13
13
  dim: 64
14
- score: null
14
+ score: {}
15
15
  extract: null
16
16
  max_sequence_length: 8192
17
17
  profiles:
@@ -3,7 +3,7 @@
3
3
  "info": {
4
4
  "title": "SIE Server",
5
5
  "description": "Search Inference Engine - GPU inference server for search workloads",
6
- "version": "0.3.0"
6
+ "version": "0.3.2"
7
7
  },
8
8
  "paths": {
9
9
  "/": {
@@ -134,6 +134,9 @@
134
134
  "404": {
135
135
  "description": "Model not found"
136
136
  },
137
+ "502": {
138
+ "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
139
+ },
137
140
  "503": {
138
141
  "description": "Model not loaded or service unavailable"
139
142
  },
@@ -234,6 +237,9 @@
234
237
  "404": {
235
238
  "description": "Model not found"
236
239
  },
240
+ "502": {
241
+ "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
242
+ },
237
243
  "503": {
238
244
  "description": "Model not loaded or service unavailable"
239
245
  },
@@ -334,6 +340,9 @@
334
340
  "404": {
335
341
  "description": "Model not found"
336
342
  },
343
+ "502": {
344
+ "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
345
+ },
337
346
  "503": {
338
347
  "description": "Model not loaded or service unavailable"
339
348
  },
@@ -504,6 +513,9 @@
504
513
  "404": {
505
514
  "description": "Model not found"
506
515
  },
516
+ "502": {
517
+ "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
518
+ },
507
519
  "503": {
508
520
  "description": "Service unavailable"
509
521
  },
@@ -847,6 +859,28 @@
847
859
  "type": "boolean",
848
860
  "title": "Loaded"
849
861
  },
862
+ "state": {
863
+ "type": "string",
864
+ "enum": [
865
+ "available",
866
+ "loading",
867
+ "loaded",
868
+ "unloading",
869
+ "failed"
870
+ ],
871
+ "title": "State",
872
+ "default": "available"
873
+ },
874
+ "last_error": {
875
+ "anyOf": [
876
+ {
877
+ "$ref": "#/components/schemas/ModelLoadError"
878
+ },
879
+ {
880
+ "type": "null"
881
+ }
882
+ ]
883
+ },
850
884
  "max_sequence_length": {
851
885
  "anyOf": [
852
886
  {
@@ -878,6 +912,35 @@
878
912
  "title": "ModelInfo",
879
913
  "description": "Information about a model."
880
914
  },
915
+ "ModelLoadError": {
916
+ "properties": {
917
+ "code": {
918
+ "type": "string",
919
+ "title": "Code"
920
+ },
921
+ "message": {
922
+ "type": "string",
923
+ "title": "Message"
924
+ },
925
+ "attempts": {
926
+ "type": "integer",
927
+ "title": "Attempts"
928
+ },
929
+ "permanent": {
930
+ "type": "boolean",
931
+ "title": "Permanent"
932
+ }
933
+ },
934
+ "type": "object",
935
+ "required": [
936
+ "code",
937
+ "message",
938
+ "attempts",
939
+ "permanent"
940
+ ],
941
+ "title": "ModelLoadError",
942
+ "description": "Diagnostic detail for a recorded load failure.\n\nSurfaced in :class:`ModelInfo` when the registry has a sticky\nfailure for the model. Attributes mirror the server-side\n:class:`sie_server.core.load_errors.LoadFailure`."
943
+ },
881
944
  "ModelsListResponse": {
882
945
  "properties": {
883
946
  "models": {
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-server"
3
- version = "0.3.0"
3
+ version = "0.3.2"
4
4
  description = "Search Inference Engine - GPU inference server for search workloads"
5
5
  requires-python = ">=3.12,<3.13"
6
6
  license = { text = "Apache-2.0" }
@@ -29,6 +29,8 @@ dependencies = [
29
29
  # Docling — composite-document parser (PDF/DOCX/HTML) for extract()
30
30
  "docling>=2,<3",
31
31
  "loguru>=0.7,<1",
32
+ # SigLIP (Marqo/marqo-ecommerce-embeddings-B native open_clip loader)
33
+ "open-clip-torch>=2.24",
32
34
  # Image processing
33
35
  "pillow>=11,<12",
34
36
  "numpy>=2,<3",
@@ -78,6 +80,10 @@ build-backend = "hatchling.build"
78
80
  [tool.hatch.build.targets.wheel]
79
81
  packages = ["src/sie_server"]
80
82
 
83
+ [tool.hatch.build.targets.wheel.force-include]
84
+ "models" = "sie_server/models"
85
+ "bundles" = "sie_server/bundles"
86
+
81
87
  [tool.uv.sources]
82
88
  # Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
83
89
  # Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.
@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
6
6
 
7
7
  from sie_server.adapters._spec import AdapterSpec
8
8
  from sie_server.adapters._types import ERR_NOT_LOADED
9
+ from sie_server.adapters._utils import grouped_score_pairs
9
10
  from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  import torch
13
14
 
15
+ from sie_server.core.inference_output import ScoreOutput
16
+ from sie_server.types.inputs import Item
17
+
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
57
61
  raise TypeError(msg)
58
62
 
59
63
  if "score" in spec.outputs:
60
- if cls.score is ModelAdapter.score and cls.score_pairs is ModelAdapter.score_pairs:
64
+ # BaseAdapter ships a default score_pairs() that delegates to score().
65
+ # Treat that default as "not implemented" for validation purposes:
66
+ # subclasses must override either score() or score_pairs() so the
67
+ # default delegate doesn't bottom out in ModelAdapter.score().
68
+ score_overridden = cls.score is not ModelAdapter.score
69
+ score_pairs_overridden = cls.score_pairs not in (
70
+ ModelAdapter.score_pairs,
71
+ BaseAdapter.score_pairs,
72
+ )
73
+ if not score_overridden and not score_pairs_overridden:
61
74
  msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
62
75
  raise TypeError(msg)
63
76
 
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
117
130
  model_name=getattr(self, "_model_name_or_path", ""),
118
131
  )
119
132
 
133
+ # -- Default batched scoring ---------------------------------------------
134
+
135
+ def score_pairs(
136
+ self,
137
+ queries: list[Item],
138
+ docs: list[Item],
139
+ *,
140
+ instruction: str | None = None,
141
+ options: dict[str, Any] | None = None,
142
+ ) -> ScoreOutput:
143
+ """Default ``score_pairs()`` that batches via per-query grouping.
144
+
145
+ Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
146
+ so each unique query is encoded once and its docs are scored as a
147
+ single ``score()`` call. Subclasses with a more efficient native
148
+ cross-batch path (e.g. cross-encoders that pack queries and docs
149
+ into one transformer pass) should override this.
150
+
151
+ Per-call ``options`` are not supported by this default delegate
152
+ (it dispatches per-query and cannot route options into ``score()``
153
+ without subclass-specific knowledge). If ``options`` is a non-empty
154
+ mapping, this raises ``NotImplementedError`` to surface the
155
+ unsupported configuration; pass ``options=None`` (or ``{}``) or
156
+ override ``score_pairs()`` with an options-aware implementation.
157
+ """
158
+ if options:
159
+ msg = (
160
+ f"{type(self).__name__}.score_pairs(): per-call options are "
161
+ f"not supported by the default batching path "
162
+ f"(got options={options!r}). Override score_pairs() with an "
163
+ f"options-aware implementation."
164
+ )
165
+ raise NotImplementedError(msg)
166
+ return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
167
+
120
168
  # -- Shared helpers ------------------------------------------------------
121
169
 
122
170
  def _check_loaded(self) -> None:
@@ -1,6 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any
3
+ from typing import TYPE_CHECKING, Any, Protocol
4
+
5
+ import numpy as np
6
+
7
+ from sie_server.core.inference_output import ScoreOutput
4
8
 
5
9
  if TYPE_CHECKING:
6
10
  import torch
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
8
12
  from sie_server.types.inputs import Item
9
13
 
10
14
 
15
+ class _ScoreFn(Protocol):
16
+ def __call__(
17
+ self,
18
+ query: Item,
19
+ items: list[Item],
20
+ *,
21
+ instruction: str | None = ...,
22
+ ) -> list[float]: ...
23
+
24
+
11
25
  # ---------------------------------------------------------------------------
12
26
  # RoPE utilities (eliminates 7 identical copies)
13
27
  # ---------------------------------------------------------------------------
@@ -140,3 +154,67 @@ def resolve_embedding_options(
140
154
  opts.get("query_template", default_query_template),
141
155
  opts.get("doc_template", default_doc_template),
142
156
  )
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Score-pair grouping (shared by ColBERT-family adapters)
161
+ # ---------------------------------------------------------------------------
162
+
163
+
164
+ def grouped_score_pairs(
165
+ score_fn: _ScoreFn,
166
+ queries: list[Item],
167
+ docs: list[Item],
168
+ *,
169
+ instruction: str | None = None,
170
+ ) -> ScoreOutput:
171
+ """Run a per-query ``score()`` callable over parallel (query, doc) pairs.
172
+
173
+ Groups pairs by ``(query.text, query.id, instruction)`` so each unique
174
+ query is encoded once and its docs are scored as one batch. Used by
175
+ ColBERT-family adapters to satisfy the worker's ``score_pairs()``
176
+ contract while reusing the optimized batched ``score()``.
177
+
178
+ Queries with ``text is None`` are not supported and raise ``ValueError``
179
+ (ColBERT scoring requires text). The grouping key is
180
+ ``(query.text, query.id or "", instruction or "")`` — two distinct
181
+ ``Item`` objects with identical text/id/instruction collapse to one
182
+ encoding pass.
183
+
184
+ Args:
185
+ score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
186
+ queries: Query items (parallel to docs).
187
+ docs: Document items to score.
188
+ instruction: Optional instruction passed through to ``score_fn``.
189
+
190
+ Returns:
191
+ ``ScoreOutput`` with one float per pair, in the original input order.
192
+
193
+ Raises:
194
+ ValueError: If ``queries`` and ``docs`` lengths differ, or any query
195
+ lacks text.
196
+ """
197
+ if len(queries) != len(docs):
198
+ msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
199
+ raise ValueError(msg)
200
+
201
+ if not docs:
202
+ return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
203
+
204
+ groups: dict[tuple[str, str, str], list[int]] = {}
205
+ for i, q in enumerate(queries):
206
+ if q.text is None:
207
+ msg = f"grouped_score_pairs requires queries[{i}].text; got None"
208
+ raise ValueError(msg)
209
+ key = (q.text, q.id or "", instruction or "")
210
+ groups.setdefault(key, []).append(i)
211
+
212
+ scores = np.zeros(len(docs), dtype=np.float32)
213
+ for indices in groups.values():
214
+ q = queries[indices[0]]
215
+ group_docs = [docs[i] for i in indices]
216
+ group_scores = score_fn(q, group_docs, instruction=instruction)
217
+ for idx, s in zip(indices, group_scores, strict=True):
218
+ scores[idx] = float(s)
219
+
220
+ return ScoreOutput(scores=scores, batch_size=len(docs))
@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
127
127
  device: Device string (e.g., "cuda:0", "cpu").
128
128
  """
129
129
 
130
+ def warmup(self) -> None:
131
+ """Run a warmup forward pass on the loaded model.
132
+
133
+ Called by the model loader after ``load()`` has completed. The default
134
+ implementation is a no-op for adapters that do not need warmup. Adapters
135
+ that compile kernels on first call (e.g. flash-attention) or otherwise
136
+ benefit from a priming pass should override this and run a single
137
+ inference pass against a tiny synthetic input.
138
+
139
+ Splitting this from ``load()`` lets the cold-start instrumentation
140
+ attribute deserialize and warmup time separately.
141
+ """
142
+ return
143
+
130
144
  @abstractmethod
131
145
  def unload(self) -> None:
132
146
  """Unload the model and free resources.
@@ -172,6 +172,7 @@ class BertFlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
172
172
  self._max_seq_length,
173
173
  )
174
174
 
175
+ def warmup(self) -> None:
175
176
  # Warmup flash attention kernels
176
177
  logger.info("Warming up CUDA kernels...")
177
178
  warmup_items = [Item(text="warmup")]
@@ -26,6 +26,7 @@ from torch.nn import functional
26
26
  from sie_server.adapters._base_adapter import BaseAdapter
27
27
  from sie_server.adapters._spec import AdapterSpec
28
28
  from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
29
+ from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
29
30
  from sie_server.core.inference_output import EncodeOutput, SparseVector
30
31
  from sie_server.types.inputs import Item
31
32
 
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
35
36
  logger = logging.getLogger(__name__)
36
37
 
37
38
 
38
- class BGEM3Adapter(BaseAdapter):
39
+ class BGEM3Adapter(BGEM3ScoreMixin, BaseAdapter):
39
40
  """Adapter for BAAI/bge-m3 model.
40
41
 
41
42
  This adapter uses direct PyTorch inference with Flash Attention 2
42
43
  for optimal performance (dense, sparse, and multi-vector outputs).
44
+
45
+ Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
46
+ composes scores from the encoder outputs (dense / sparse / multivector).
43
47
  """
44
48
 
45
49
  spec = AdapterSpec(
46
50
  inputs=("text",),
47
- outputs=("dense", "sparse", "multivector"),
51
+ outputs=("dense", "sparse", "multivector", "score"),
48
52
  dense_dim=1024,
49
53
  sparse_dim=250002,
50
54
  multivector_dim=1024,
@@ -23,6 +23,7 @@ import torch
23
23
  from sie_server.adapters._base_adapter import BaseAdapter
24
24
  from sie_server.adapters._spec import AdapterSpec
25
25
  from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
26
+ from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
26
27
  from sie_server.core.inference_output import EncodeOutput, SparseVector
27
28
 
28
29
  if TYPE_CHECKING:
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
35
36
  logger = logging.getLogger(__name__)
36
37
 
37
38
 
38
- class BGEM3FlagAdapter(BaseAdapter):
39
+ class BGEM3FlagAdapter(BGEM3ScoreMixin, BaseAdapter):
39
40
  """Adapter for BAAI/bge-m3 using FlagEmbedding library.
40
41
 
41
42
  This adapter uses the FlagEmbedding library's BGEM3FlagModel.
42
43
  For better performance, use BGEM3Adapter which uses Flash Attention 2.
44
+
45
+ Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
46
+ composes scores from the encoder outputs (dense / sparse / multivector).
43
47
  """
44
48
 
45
49
  spec = AdapterSpec(
46
50
  inputs=("text",),
47
- outputs=("dense", "sparse", "multivector"),
51
+ outputs=("dense", "sparse", "multivector", "score"),
48
52
  dense_dim=1024,
49
53
  sparse_dim=250002,
50
54
  multivector_dim=1024,