PyPI - sie-server - Versions diffs - 0.3.0__tar.gz → 0.3.2__tar.gz - Mend

sie-server 0.3.0tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (361) hide show

{sie_server-0.3.0 → sie_server-0.3.2}/.gitignore RENAMED Viewed

@@ -16,6 +16,9 @@ eggs/
 .eggs/
 lib/
 lib64/
+# JS/TS projects under tools/ legitimately use a `lib/` directory.
+!tools/*/lib/
+!tools/*/lib/**
 parts/
 sdist/
 var/

{sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cpu RENAMED Viewed

@@ -39,7 +39,10 @@ COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
 COPY packages/sie_server/pyproject.toml ./pyproject.toml
 # Stub source trees so pip accepts the editable installs during dep resolution.
-RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
+# Also create empty bundles/ and models/ — referenced by force-include in
+# pyproject.toml; hatchling resolves them at editable-metadata time even though
+# real contents only land in the `base` stage.
+RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
     && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
     && touch src/sie_server/__init__.py

{sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cuda11 RENAMED Viewed

@@ -41,7 +41,10 @@ WORKDIR /app
 COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
 COPY packages/sie_server/pyproject.toml ./pyproject.toml
-RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
+# Stubs for editable metadata generation — bundles/ and models/ are referenced
+# by force-include in pyproject.toml and must exist; real contents are copied
+# in the base stage.
+RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
     && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
     && touch src/sie_server/__init__.py

{sie_server-0.3.0 → sie_server-0.3.2}/Dockerfile.cuda12 RENAMED Viewed

@@ -44,7 +44,10 @@ WORKDIR /app
 COPY packages/sie_sdk/pyproject.toml /tmp/sie_sdk/pyproject.toml
 COPY packages/sie_server/pyproject.toml ./pyproject.toml
-RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server \
+# Stubs for editable metadata generation — bundles/ and models/ are referenced
+# by force-include in pyproject.toml and must exist; real contents are copied
+# in the base stage.
+RUN mkdir -p /tmp/sie_sdk/src/sie_sdk src/sie_server bundles models \
     && touch /tmp/sie_sdk/src/sie_sdk/__init__.py \
     && touch src/sie_server/__init__.py

{sie_server-0.3.0 → sie_server-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sie-server
-Version: 0.3.0
+Version: 0.3.2
 Summary: Search Inference Engine - GPU inference server for search workloads
 License: Apache-2.0
 License-File: LICENSE
@@ -19,6 +19,7 @@ Requires-Dist: msgpack<2,>=1.1
 Requires-Dist: msgspec>=0.20.0
 Requires-Dist: nats-py<3,>=2.9
 Requires-Dist: numpy<3,>=2
+Requires-Dist: open-clip-torch>=2.24
 Requires-Dist: opentelemetry-api<2,>=1.28
 Requires-Dist: opentelemetry-exporter-otlp<2,>=1.28
 Requires-Dist: opentelemetry-instrumentation-fastapi<1,>=0.49b0

{sie_server-0.3.0 → sie_server-0.3.2}/bundles/default.yaml RENAMED Viewed

@@ -71,6 +71,8 @@ deps:
   loguru: '>=0.7,<1'
   # donut, florence2
   timm: '>=0.9.0,<1.0'
+  # siglip (Marqo/marqo-ecommerce-embeddings-B uses open_clip native loader)
+  open-clip-torch: '>=2.24'
   # docling — composite-document parser (PDF/DOCX/HTML)
   docling: '>=2,<3'
   # Flash Attention 2 — CUDA only, prebuilt wheel

{sie_server-0.3.0 → sie_server-0.3.2}/models/BAAI__bge-m3.yaml RENAMED Viewed

@@ -13,7 +13,7 @@ tasks:
       dim: 250002
     multivector:
       dim: 1024
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 8192
 profiles:

sie_server-0.3.2/models/Marqo__marqo-ecommerce-embeddings-B.yaml ADDED Viewed

@@ -0,0 +1,28 @@
+sie_id: Marqo/marqo-ecommerce-embeddings-B
+hf_id: Marqo/marqo-ecommerce-embeddings-B
+inputs:
+  text: true
+  image: true
+  audio: false
+  video: false
+tasks:
+  encode:
+    dense:
+      dim: 768
+    sparse: null
+    multivector: null
+  score: null
+  extract: null
+max_sequence_length: 64
+profiles:
+  default:
+    max_batch_tokens: 16384
+    compute_precision: float16
+    adapter_path: sie_server.adapters.siglip:SiglipAdapter
+    adapter_options:
+      loadtime:
+        backend: open_clip
+        open_clip_model_id: hf-hub:Marqo/marqo-ecommerce-embeddings-B
+        dense_dim: 768
+      runtime:
+        normalize: true

{sie_server-0.3.0 → sie_server-0.3.2}/models/answerdotai__answerai-colbert-small-v1.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 96
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 512
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/colbert-ir__colbertv2.0.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 128
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 512
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/docling.yaml RENAMED Viewed

@@ -18,3 +18,11 @@ profiles:
     adapter_options:
       loadtime: {}
       runtime: {}
+  ocr:
+    max_batch_tokens: 1
+    compute_precision: null
+    adapter_path: sie_server.adapters.docling:DoclingAdapter
+    adapter_options:
+      loadtime: {}
+      runtime:
+        ocr: true

sie_server-0.3.2/models/google__embeddinggemma-300m.yaml ADDED Viewed

@@ -0,0 +1,49 @@
+# NOTE: ``google/embeddinggemma-300m`` is a *gated* HuggingFace repo. The
+# server requires ``HF_TOKEN`` (with the model license accepted on the
+# HF account) to load this model. Without it, the registry records a
+# terminal ``GATED`` failure and the API returns ``MODEL_LOAD_FAILED``
+# (502, no Retry-After) so the SDK does not loop.
+#
+# Architecture support: needs ``transformers>=4.56`` for
+# ``Gemma3TextModel``. Older versions raise an unsupported-model error
+# which the registry classifies as ``DEPENDENCY``.
+sie_id: google/embeddinggemma-300m
+hf_id: google/embeddinggemma-300m
+# Track the default branch. Note: ``main`` is mutable on the Hub, so
+# this does NOT guarantee bit-for-bit reproducibility — it merely names
+# the branch we expect HuggingFace to resolve. For a true pin, replace
+# this with an immutable commit SHA after verifying the new revision
+# against ``test_google_embeddinggemma_300m_dense``.
+hf_revision: main
+inputs:
+  text: true
+  image: false
+  audio: false
+  video: false
+tasks:
+  encode:
+    dense:
+      dim: 768
+    sparse: null
+    multivector: null
+  score: null
+  extract: null
+max_sequence_length: 2048
+profiles:
+  default:
+    max_batch_tokens: 16384
+    # bfloat16 on CUDA matches the captured reference embedding in
+    # ``test_all_models.py``. On CPU the adapter falls back to fp32
+    # automatically (see pytorch_embedding adapter); the loaded model
+    # still works, but numerical-equivalence tests should be gated on
+    # CUDA availability if drift becomes an issue.
+    compute_precision: bfloat16
+    adapter_path: sie_server.adapters.pytorch_embedding:PyTorchEmbeddingAdapter
+    adapter_options:
+      loadtime:
+        attn_implementation: sdpa
+      runtime:
+        pooling: mean
+        normalize: true
+        query_template: 'task: search result | query: {text}'
+        doc_template: 'title: none | text: {text}'

{sie_server-0.3.0 → sie_server-0.3.2}/models/jinaai__jina-colbert-v2.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 128
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 8192
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/lightonai__GTE-ModernColBERT-v1.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 128
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 8192
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/lightonai__Reason-ModernColBERT.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 128
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 8192
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-colbert-large-v1.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 128
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 512
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/models/mixedbread-ai__mxbai-edge-colbert-v0-32m.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ tasks:
     sparse: null
     multivector:
       dim: 64
-  score: null
+  score: {}
   extract: null
 max_sequence_length: 8192
 profiles:

{sie_server-0.3.0 → sie_server-0.3.2}/openapi.json RENAMED Viewed

@@ -3,7 +3,7 @@
   "info": {
     "title": "SIE Server",
     "description": "Search Inference Engine - GPU inference server for search workloads",
-    "version": "0.3.0"
+    "version": "0.3.2"
   },
   "paths": {
     "/": {
@@ -134,6 +134,9 @@
           "404": {
             "description": "Model not found"
           },
+          "502": {
+            "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
+          },
           "503": {
             "description": "Model not loaded or service unavailable"
           },
@@ -234,6 +237,9 @@
           "404": {
             "description": "Model not found"
           },
+          "502": {
+            "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
+          },
           "503": {
             "description": "Model not loaded or service unavailable"
           },
@@ -334,6 +340,9 @@
           "404": {
             "description": "Model not found"
           },
+          "502": {
+            "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
+          },
           "503": {
             "description": "Model not loaded or service unavailable"
           },
@@ -504,6 +513,9 @@
           "404": {
             "description": "Model not found"
           },
+          "502": {
+            "description": "Terminal model-load failure (MODEL_LOAD_FAILED). Carried in the ``detail`` envelope: ``{code, message, error_class, permanent, attempts}``. No ``Retry-After`` header \u2014 clients MUST NOT auto-retry. See sie-test#85."
+          },
           "503": {
             "description": "Service unavailable"
           },
@@ -847,6 +859,28 @@
             "type": "boolean",
             "title": "Loaded"
           },
+          "state": {
+            "type": "string",
+            "enum": [
+              "available",
+              "loading",
+              "loaded",
+              "unloading",
+              "failed"
+            ],
+            "title": "State",
+            "default": "available"
+          },
+          "last_error": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/ModelLoadError"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
           "max_sequence_length": {
             "anyOf": [
               {
@@ -878,6 +912,35 @@
         "title": "ModelInfo",
         "description": "Information about a model."
       },
+      "ModelLoadError": {
+        "properties": {
+          "code": {
+            "type": "string",
+            "title": "Code"
+          },
+          "message": {
+            "type": "string",
+            "title": "Message"
+          },
+          "attempts": {
+            "type": "integer",
+            "title": "Attempts"
+          },
+          "permanent": {
+            "type": "boolean",
+            "title": "Permanent"
+          }
+        },
+        "type": "object",
+        "required": [
+          "code",
+          "message",
+          "attempts",
+          "permanent"
+        ],
+        "title": "ModelLoadError",
+        "description": "Diagnostic detail for a recorded load failure.\n\nSurfaced in :class:`ModelInfo` when the registry has a sticky\nfailure for the model. Attributes mirror the server-side\n:class:`sie_server.core.load_errors.LoadFailure`."
+      },
       "ModelsListResponse": {
         "properties": {
           "models": {

{sie_server-0.3.0 → sie_server-0.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "sie-server"
-version = "0.3.0"
+version = "0.3.2"
 description = "Search Inference Engine - GPU inference server for search workloads"
 requires-python = ">=3.12,<3.13"
 license = { text = "Apache-2.0" }
@@ -29,6 +29,8 @@ dependencies = [
     # Docling — composite-document parser (PDF/DOCX/HTML) for extract()
     "docling>=2,<3",
     "loguru>=0.7,<1",
+    # SigLIP (Marqo/marqo-ecommerce-embeddings-B native open_clip loader)
+    "open-clip-torch>=2.24",
     # Image processing
     "pillow>=11,<12",
     "numpy>=2,<3",
@@ -78,6 +80,10 @@ build-backend = "hatchling.build"
 [tool.hatch.build.targets.wheel]
 packages = ["src/sie_server"]
+[tool.hatch.build.targets.wheel.force-include]
+"models" = "sie_server/models"
+"bundles" = "sie_server/bundles"
 [tool.uv.sources]
 # Prebuilt flash-attn wheel for torch 2.9 + cu128 (official wheels only go up to torch 2.8)
 # Platform-specific: Linux x86_64 only. Non-Linux users should not install the flash-attn extra.

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_base_adapter.py RENAMED Viewed

@@ -6,11 +6,15 @@ from typing import TYPE_CHECKING, Any, ClassVar, cast
 from sie_server.adapters._spec import AdapterSpec
 from sie_server.adapters._types import ERR_NOT_LOADED
+from sie_server.adapters._utils import grouped_score_pairs
 from sie_server.adapters.base import ModelAdapter, ModelCapabilities, ModelDims
 if TYPE_CHECKING:
     import torch
+    from sie_server.core.inference_output import ScoreOutput
+    from sie_server.types.inputs import Item
 logger = logging.getLogger(__name__)
@@ -57,7 +61,16 @@ class BaseAdapter(ModelAdapter):
             raise TypeError(msg)
         if "score" in spec.outputs:
-            if cls.score is ModelAdapter.score and cls.score_pairs is ModelAdapter.score_pairs:
+            # BaseAdapter ships a default score_pairs() that delegates to score().
+            # Treat that default as "not implemented" for validation purposes:
+            # subclasses must override either score() or score_pairs() so the
+            # default delegate doesn't bottom out in ModelAdapter.score().
+            score_overridden = cls.score is not ModelAdapter.score
+            score_pairs_overridden = cls.score_pairs not in (
+                ModelAdapter.score_pairs,
+                BaseAdapter.score_pairs,
+            )
+            if not score_overridden and not score_pairs_overridden:
                 msg = f"{cls.__name__} declares 'score' in outputs but does not implement score() or score_pairs()"
                 raise TypeError(msg)
@@ -117,6 +130,41 @@ class BaseAdapter(ModelAdapter):
             model_name=getattr(self, "_model_name_or_path", ""),
         )
+    # -- Default batched scoring ---------------------------------------------
+    def score_pairs(
+        self,
+        queries: list[Item],
+        docs: list[Item],
+        *,
+        instruction: str | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> ScoreOutput:
+        """Default ``score_pairs()`` that batches via per-query grouping.
+        Groups parallel ``(query, doc)`` pairs by ``(text, id, instruction)``
+        so each unique query is encoded once and its docs are scored as a
+        single ``score()`` call. Subclasses with a more efficient native
+        cross-batch path (e.g. cross-encoders that pack queries and docs
+        into one transformer pass) should override this.
+        Per-call ``options`` are not supported by this default delegate
+        (it dispatches per-query and cannot route options into ``score()``
+        without subclass-specific knowledge). If ``options`` is a non-empty
+        mapping, this raises ``NotImplementedError`` to surface the
+        unsupported configuration; pass ``options=None`` (or ``{}``) or
+        override ``score_pairs()`` with an options-aware implementation.
+        """
+        if options:
+            msg = (
+                f"{type(self).__name__}.score_pairs(): per-call options are "
+                f"not supported by the default batching path "
+                f"(got options={options!r}). Override score_pairs() with an "
+                f"options-aware implementation."
+            )
+            raise NotImplementedError(msg)
+        return grouped_score_pairs(self.score, queries, docs, instruction=instruction)
     # -- Shared helpers ------------------------------------------------------
     def _check_loaded(self) -> None:

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/_utils.py RENAMED Viewed

@@ -1,6 +1,10 @@
 from __future__ import annotations
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Protocol
+import numpy as np
+from sie_server.core.inference_output import ScoreOutput
 if TYPE_CHECKING:
     import torch
@@ -8,6 +12,16 @@ if TYPE_CHECKING:
     from sie_server.types.inputs import Item
+class _ScoreFn(Protocol):
+    def __call__(
+        self,
+        query: Item,
+        items: list[Item],
+        *,
+        instruction: str | None = ...,
+    ) -> list[float]: ...
 # ---------------------------------------------------------------------------
 # RoPE utilities (eliminates 7 identical copies)
 # ---------------------------------------------------------------------------
@@ -140,3 +154,67 @@ def resolve_embedding_options(
         opts.get("query_template", default_query_template),
         opts.get("doc_template", default_doc_template),
     )
+# ---------------------------------------------------------------------------
+# Score-pair grouping (shared by ColBERT-family adapters)
+# ---------------------------------------------------------------------------
+def grouped_score_pairs(
+    score_fn: _ScoreFn,
+    queries: list[Item],
+    docs: list[Item],
+    *,
+    instruction: str | None = None,
+) -> ScoreOutput:
+    """Run a per-query ``score()`` callable over parallel (query, doc) pairs.
+    Groups pairs by ``(query.text, query.id, instruction)`` so each unique
+    query is encoded once and its docs are scored as one batch. Used by
+    ColBERT-family adapters to satisfy the worker's ``score_pairs()``
+    contract while reusing the optimized batched ``score()``.
+    Queries with ``text is None`` are not supported and raise ``ValueError``
+    (ColBERT scoring requires text). The grouping key is
+    ``(query.text, query.id or "", instruction or "")`` — two distinct
+    ``Item`` objects with identical text/id/instruction collapse to one
+    encoding pass.
+    Args:
+        score_fn: Bound ``adapter.score(query, items, *, instruction=None)``.
+        queries: Query items (parallel to docs).
+        docs: Document items to score.
+        instruction: Optional instruction passed through to ``score_fn``.
+    Returns:
+        ``ScoreOutput`` with one float per pair, in the original input order.
+    Raises:
+        ValueError: If ``queries`` and ``docs`` lengths differ, or any query
+            lacks text.
+    """
+    if len(queries) != len(docs):
+        msg = f"queries and docs must be parallel; got {len(queries)} vs {len(docs)}"
+        raise ValueError(msg)
+    if not docs:
+        return ScoreOutput(scores=np.zeros(0, dtype=np.float32), batch_size=0)
+    groups: dict[tuple[str, str, str], list[int]] = {}
+    for i, q in enumerate(queries):
+        if q.text is None:
+            msg = f"grouped_score_pairs requires queries[{i}].text; got None"
+            raise ValueError(msg)
+        key = (q.text, q.id or "", instruction or "")
+        groups.setdefault(key, []).append(i)
+    scores = np.zeros(len(docs), dtype=np.float32)
+    for indices in groups.values():
+        q = queries[indices[0]]
+        group_docs = [docs[i] for i in indices]
+        group_scores = score_fn(q, group_docs, instruction=instruction)
+        for idx, s in zip(indices, group_scores, strict=True):
+            scores[idx] = float(s)
+    return ScoreOutput(scores=scores, batch_size=len(docs))

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/base.py RENAMED Viewed

@@ -127,6 +127,20 @@ class ModelAdapter(ABC):
             device: Device string (e.g., "cuda:0", "cpu").
         """
+    def warmup(self) -> None:
+        """Run a warmup forward pass on the loaded model.
+        Called by the model loader after ``load()`` has completed. The default
+        implementation is a no-op for adapters that do not need warmup. Adapters
+        that compile kernels on first call (e.g. flash-attention) or otherwise
+        benefit from a priming pass should override this and run a single
+        inference pass against a tiny synthetic input.
+        Splitting this from ``load()`` lets the cold-start instrumentation
+        attribute deserialize and warmup time separately.
+        """
+        return
     @abstractmethod
     def unload(self) -> None:
         """Unload the model and free resources.

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bert_flash/__init__.py RENAMED Viewed

@@ -172,6 +172,7 @@ class BertFlashAdapter(PEFTLoRAMixin, FlashBaseAdapter):
             self._max_seq_length,
         )
+    def warmup(self) -> None:
         # Warmup flash attention kernels
         logger.info("Warming up CUDA kernels...")
         warmup_items = [Item(text="warmup")]

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3/__init__.py RENAMED Viewed

@@ -26,6 +26,7 @@ from torch.nn import functional
 from sie_server.adapters._base_adapter import BaseAdapter
 from sie_server.adapters._spec import AdapterSpec
 from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
+from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
 from sie_server.core.inference_output import EncodeOutput, SparseVector
 from sie_server.types.inputs import Item
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class BGEM3Adapter(BaseAdapter):
+class BGEM3Adapter(BGEM3ScoreMixin, BaseAdapter):
     """Adapter for BAAI/bge-m3 model.
     This adapter uses direct PyTorch inference with Flash Attention 2
     for optimal performance (dense, sparse, and multi-vector outputs).
+    Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
+    composes scores from the encoder outputs (dense / sparse / multivector).
     """
     spec = AdapterSpec(
         inputs=("text",),
-        outputs=("dense", "sparse", "multivector"),
+        outputs=("dense", "sparse", "multivector", "score"),
         dense_dim=1024,
         sparse_dim=250002,
         multivector_dim=1024,

{sie_server-0.3.0 → sie_server-0.3.2}/src/sie_server/adapters/bge_m3_flag/__init__.py RENAMED Viewed

@@ -23,6 +23,7 @@ import torch
 from sie_server.adapters._base_adapter import BaseAdapter
 from sie_server.adapters._spec import AdapterSpec
 from sie_server.adapters._types import ERR_NOT_LOADED, ERR_REQUIRES_TEXT, ComputePrecision
+from sie_server.adapters.bge_m3_score_mixin import BGEM3ScoreMixin
 from sie_server.core.inference_output import EncodeOutput, SparseVector
 if TYPE_CHECKING:
@@ -35,16 +36,19 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class BGEM3FlagAdapter(BaseAdapter):
+class BGEM3FlagAdapter(BGEM3ScoreMixin, BaseAdapter):
     """Adapter for BAAI/bge-m3 using FlagEmbedding library.
     This adapter uses the FlagEmbedding library's BGEM3FlagModel.
     For better performance, use BGEM3Adapter which uses Flash Attention 2.
+    Scoring (`/v1/score`) is supported via :class:`BGEM3ScoreMixin`, which
+    composes scores from the encoder outputs (dense / sparse / multivector).
     """
     spec = AdapterSpec(
         inputs=("text",),
-        outputs=("dense", "sparse", "multivector"),
+        outputs=("dense", "sparse", "multivector", "score"),
         dense_dim=1024,
         sparse_dim=250002,
         multivector_dim=1024,

sie-server 0.3.0__tar.gz → 0.3.2__tar.gz

sie-server 0.3.0tar.gz → 0.3.2tar.gz