PyPI - haystack-velesdb - Versions diffs - 2.0.0__tar.gz → 3.0.0__tar.gz - Mend

haystack-velesdb 2.0.0tar.gz → 3.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{haystack_velesdb-2.0.0/src/haystack_velesdb.egg-info → haystack_velesdb-3.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haystack-velesdb
-Version: 2.0.0
+Version: 3.0.0
 Summary: Haystack 2.x DocumentStore for VelesDB: The Local AI Memory Database.
 Author-email: VelesDB Team <contact@wiscale.fr>
 License: MIT

{haystack_velesdb-2.0.0 → haystack_velesdb-3.0.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "haystack-velesdb"
-version = "2.0.0"
+version = "3.0.0"
 description = "Haystack 2.x DocumentStore for VelesDB: The Local AI Memory Database."
 readme = "README.md"
 license = {text = "MIT"}

{haystack_velesdb-2.0.0 → haystack_velesdb-3.0.0}/src/haystack_velesdb/__init__.py RENAMED Viewed

@@ -3,4 +3,4 @@
 from haystack_velesdb.document_store import VelesDBDocumentStore
 __all__ = ["VelesDBDocumentStore"]
-__version__ = "2.0.0"
+__version__ = "3.0.0"

{haystack_velesdb-2.0.0 → haystack_velesdb-3.0.0}/src/haystack_velesdb/document_store.py RENAMED Viewed

@@ -5,7 +5,6 @@ as the vector backend in any Haystack 2.x indexing or retrieval pipeline.
 """
 from __future__ import annotations
-import hashlib
 import logging
 from typing import Any, Dict, List, Optional
@@ -15,9 +14,12 @@ from haystack.document_stores.errors import DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 import velesdb
+from velesdb_common.fusion import build_fusion_strategy
+from velesdb_common.ids import stable_hash_id
 from velesdb_common.security import (
     validate_collection_name,
     validate_metric,
+    validate_named_sparse_vector,
     validate_path,
 )
@@ -29,7 +31,6 @@ _DEFAULT_COLLECTION = "haystack_documents"
 _DEFAULT_DIMENSION = 768
 _DEFAULT_METRIC = "cosine"
 _DEFAULT_SCROLL_LIMIT = 10_000
-_INT63_MASK = (1 << 63) - 1
 # Reserved keys stored by this integration in the VelesDB payload.
 _RESERVED_PAYLOAD_KEYS = frozenset({"_doc_id", "content"})
@@ -193,25 +194,17 @@ def _translate_haystack_filter(
     return {"condition": _translate_condition(filters)}
-def _str_id_to_int(doc_id: str) -> int:
-    """Map a Haystack string document ID to a stable positive 63-bit integer.
-    Uses the first 8 bytes of SHA-256, masked to 63 bits (~9.2 × 10¹⁸ slots).
-    Collision probability for a 1 M-document collection is roughly 5 × 10⁻¹⁴ —
-    negligible for typical RAG workloads but not zero.  If two distinct string
-    IDs produce the same integer ID, :meth:`write_documents` raises
-    :class:`ValueError` rather than silently overwriting the existing document.
-    """
-    return int.from_bytes(hashlib.sha256(doc_id.encode()).digest()[:8], "big") & _INT63_MASK
-def _doc_to_point(doc: Document) -> dict:
+def _doc_to_point(doc: Document, sparse_vector: Optional[dict] = None) -> dict:
     """Convert a Haystack Document to a VelesDB point dict.
     Reserved payload keys (``_doc_id``, ``content``) are always written from
     the document's canonical fields, not from ``doc.meta``.  Any meta entry
     that shares a reserved name is silently dropped from the payload to
     prevent round-trip corruption.
+    When *sparse_vector* is given (a flat ``dict[int, float]`` or a named
+    ``dict[str, dict[int, float]]`` mapping) it is attached so the upsert
+    creates the matching sparse index for hybrid retrieval.
     """
     payload: dict = {}
     # Merge meta first; reserved keys are excluded so they cannot
@@ -223,9 +216,11 @@ def _doc_to_point(doc: Document) -> dict:
     payload["_doc_id"] = doc.id
     if doc.content is not None:
         payload["content"] = doc.content
-    point: dict = {"id": _str_id_to_int(doc.id), "payload": payload}
+    point: dict = {"id": stable_hash_id(doc.id), "payload": payload}
     if doc.embedding is not None:
         point["vector"] = list(doc.embedding)
+    if sparse_vector is not None:
+        point["sparse_vector"] = sparse_vector
     return point
@@ -281,7 +276,7 @@ def _build_int_id_map(documents: List[Document]) -> Dict[int, str]:
     """
     int_id_map: Dict[int, str] = {}
     for doc in documents:
-        iid = _str_id_to_int(doc.id)
+        iid = stable_hash_id(doc.id)
         if iid in int_id_map and int_id_map[iid] != doc.id:
             raise ValueError(
                 f"SHA-256 collision in write batch: '{int_id_map[iid]}' and "
@@ -342,18 +337,47 @@ def _filter_skip_policy(
     return [doc for doc in documents if str_to_int[doc.id] not in existing_int_ids]
-def _documents_to_points(documents: List[Document]) -> List[dict]:
+def _build_sparse_by_id(
+    documents: List[Document],
+    sparse_vectors: Optional[List[dict]],
+) -> Dict[str, dict]:
+    """Map each document id to its validated sparse vector.
+    Keying by document id (rather than list position) keeps the sparse
+    vectors aligned with their documents even when ``DuplicatePolicy.SKIP``
+    drops a subset before upsert. Each entry is validated as a flat
+    ``dict[int, float]`` or a named ``dict[str, dict[int, float]]`` mapping.
+    """
+    if sparse_vectors is None:
+        return {}
+    sparse_by_id: Dict[str, dict] = {}
+    for idx, doc in enumerate(documents):
+        if idx >= len(sparse_vectors):
+            break
+        sparse_by_id[doc.id] = validate_named_sparse_vector(sparse_vectors[idx])
+    return sparse_by_id
+def _documents_to_points(
+    documents: List[Document],
+    sparse_by_id: Optional[Dict[str, dict]] = None,
+) -> List[dict]:
     """Convert each document to its VelesDB point dict, logging documents
     that lack an embedding so the caller still gets feedback when the
     underlying SDK accepts vector-less points.
+    *sparse_by_id* (when given) maps document ids to their sparse vector dict;
+    each is attached to its point so the upsert creates the corresponding
+    sparse index.
     """
+    sparse_by_id = sparse_by_id or {}
     points: List[dict] = []
     for doc in documents:
         if doc.embedding is None:
             logger.warning(
                 "Document '%s' has no embedding; stored without vector.", doc.id
             )
-        points.append(_doc_to_point(doc))
+        points.append(_doc_to_point(doc, sparse_vector=sparse_by_id.get(doc.id)))
     return points
@@ -460,6 +484,7 @@ class VelesDBDocumentStore:
         self,
         documents: List[Document],
         policy: DuplicatePolicy = DuplicatePolicy.NONE,
+        sparse_vectors: Optional[List[dict]] = None,
     ) -> int:
         """Write *documents* to VelesDB and return the number written.
@@ -476,6 +501,16 @@ class VelesDBDocumentStore:
           incoming document already exists. Prefer ``OVERWRITE`` or
           ``NONE`` for large batches to avoid the pre-scan cost.
+        Args:
+            documents: Documents to write.
+            policy: Duplicate-handling policy (see above).
+            sparse_vectors: Optional list aligned with *documents*; each entry
+                is a flat ``dict[int, float]`` or a named
+                ``dict[str, dict[int, float]]`` mapping (e.g.
+                ``{"bge_m3": {0: 1.5}}``). A named mapping creates the named
+                sparse index so it can later be queried with
+                ``sparse_index_name="bge_m3"``.
         Raises:
             DuplicateDocumentError: When *policy* is ``FAIL`` and at least
                 one document already exists in the store.
@@ -484,6 +519,7 @@ class VelesDBDocumentStore:
         """
         if not documents:
             return 0
+        sparse_by_id = _build_sparse_by_id(documents, sparse_vectors)
         int_id_map = _build_int_id_map(documents)
         col = self._get_collection()
         if policy == DuplicatePolicy.FAIL:
@@ -495,7 +531,7 @@ class VelesDBDocumentStore:
                 return 0
         else:
             survivors = documents
-        points = _documents_to_points(survivors)
+        points = _documents_to_points(survivors, sparse_by_id)
         result = col.upsert(points)
         return result if isinstance(result, int) else len(points)
@@ -506,7 +542,7 @@ class VelesDBDocumentStore:
         """Delete documents identified by their Haystack string IDs."""
         if not document_ids:
             return
-        int_ids = [_str_id_to_int(did) for did in document_ids]
+        int_ids = [stable_hash_id(did) for did in document_ids]
         self._get_collection().delete(int_ids)
     def embedding_retrieval(
@@ -516,6 +552,8 @@ class VelesDBDocumentStore:
         top_k: int = 10,
         filters: Optional[Dict[str, Any]] = None,
         scale_score: bool = True,
+        fusion: Optional[str] = None,
+        fusion_params: Optional[dict] = None,
     ) -> List[Document]:
         """Return the *top_k* documents most similar to *query_embedding*.
@@ -527,13 +565,29 @@ class VelesDBDocumentStore:
                 forwarded; ``meta.<key>`` is stripped to ``<key>``.
             scale_score: When ``True`` and ``metric="cosine"``, scores are
                 normalised from ``[-1, 1]`` to ``[0, 1]``. Ignored for other
-                metrics, where raw scores are returned unchanged.
+                metrics, where raw scores are returned unchanged. Score
+                scaling does not apply to fused (``fusion``) results, whose
+                scores come from the fusion strategy rather than the metric.
+            fusion: Optional fusion strategy name applied to the ranking —
+                one of ``"average"``, ``"maximum"``, ``"rrf"``,
+                ``"weighted"``, ``"relative_score"`` / ``"rsf"``. When set,
+                the query is ranked through the chosen
+                :class:`velesdb.FusionStrategy`, which changes the result
+                ordering relative to the default dense ranking. ``filters``
+                are not supported together with ``fusion``.
+            fusion_params: Optional parameters for *fusion* (see
+                :func:`velesdb_common.fusion.build_fusion_strategy`).
         Raises:
             NotImplementedError: When *filters* uses an operator VelesDB
                 does not support.
-            ValueError: When *filters* is structurally malformed.
+            ValueError: When *filters* is structurally malformed, or when
+                *filters* is combined with *fusion*.
         """
+        if fusion is not None:
+            return self._fusion_retrieval(
+                query_embedding, top_k, filters, fusion, fusion_params
+            )
         veles_filter = _translate_haystack_filter(filters)
         results: List[dict] = self._get_collection().search_request(
             velesdb.SearchOptions(
@@ -544,6 +598,36 @@ class VelesDBDocumentStore:
         )
         return [_result_to_doc(r, scale_score=scale_score, metric=self._metric) for r in results]
+    def _fusion_retrieval(
+        self,
+        query_embedding: List[float],
+        top_k: int,
+        filters: Optional[Dict[str, Any]],
+        fusion: str,
+        fusion_params: Optional[dict],
+    ) -> List[Document]:
+        """Rank a single query through a :class:`velesdb.FusionStrategy`.
+        Delegates to ``Collection.multi_query_search`` with a one-element
+        query list so the chosen strategy decides the fused scores. The
+        shared :func:`velesdb_common.fusion.build_fusion_strategy` builder is
+        reused (same as the LangChain and LlamaIndex integrations).
+        """
+        if filters is not None:
+            raise ValueError(
+                "fusion cannot be combined with filters; apply filters in a "
+                "separate dense embedding_retrieval call or omit fusion."
+            )
+        strategy = build_fusion_strategy(fusion, fusion_params)
+        results: List[dict] = self._get_collection().multi_query_search(
+            vectors=[query_embedding],
+            top_k=top_k,
+            fusion=strategy,
+        )
+        # Fused scores are strategy-derived, not metric similarities, so the
+        # cosine [-1, 1] -> [0, 1] rescaling is intentionally not applied.
+        return [_result_to_doc(r, metric=self._metric) for r in results]
     # ------------------------------------------------------------------
     # Haystack pipeline serialisation
     # ------------------------------------------------------------------

{haystack_velesdb-2.0.0 → haystack_velesdb-3.0.0/src/haystack_velesdb.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: haystack-velesdb
-Version: 2.0.0
+Version: 3.0.0
 Summary: Haystack 2.x DocumentStore for VelesDB: The Local AI Memory Database.
 Author-email: VelesDB Team <contact@wiscale.fr>
 License: MIT

{haystack_velesdb-2.0.0 → haystack_velesdb-3.0.0}/tests/test_document_store.py RENAMED Viewed

@@ -43,6 +43,78 @@ class DuplicateDocumentError(Exception):
 # ---------------------------------------------------------------------------
+class _FakeFusionStrategy:
+    """Minimal stand-in for velesdb.FusionStrategy.
+    Records the strategy name so a fake ``multi_query_search`` can vary its
+    result ordering by strategy (mirrors the real binding, where different
+    strategies produce different fused scores).
+    """
+    def __init__(self, name: str, params: Optional[dict] = None) -> None:
+        self.name = name
+        self.params = params or {}
+    @staticmethod
+    def average() -> "_FakeFusionStrategy":
+        return _FakeFusionStrategy("average")
+    @staticmethod
+    def maximum() -> "_FakeFusionStrategy":
+        return _FakeFusionStrategy("maximum")
+    @staticmethod
+    def rrf(k: int = 60) -> "_FakeFusionStrategy":
+        return _FakeFusionStrategy("rrf", {"k": k})
+    @staticmethod
+    def weighted(
+        avg_weight: float = 0.6,
+        max_weight: float = 0.3,
+        hit_weight: float = 0.1,
+    ) -> "_FakeFusionStrategy":
+        return _FakeFusionStrategy(
+            "weighted",
+            {
+                "avg_weight": avg_weight,
+                "max_weight": max_weight,
+                "hit_weight": hit_weight,
+            },
+        )
+    @staticmethod
+    def relative_score(
+        dense_weight: float, sparse_weight: float
+    ) -> "_FakeFusionStrategy":
+        return _FakeFusionStrategy(
+            "relative_score",
+            {"dense_weight": dense_weight, "sparse_weight": sparse_weight},
+        )
+def _build_fake_fusion(
+    fusion: str, fusion_params: Optional[dict] = None
+) -> _FakeFusionStrategy:
+    """Stand-in for velesdb_common.fusion.build_fusion_strategy.
+    Maps the strategy name to the matching FusionStrategy factory so the
+    document store's fusion routing can be exercised without the real
+    velesdb_common package.
+    """
+    params = fusion_params or {}
+    if fusion in ("relative_score", "rsf"):
+        return _FakeFusionStrategy.relative_score(
+            params.get("dense_weight", 0.5), params.get("sparse_weight", 0.5)
+        )
+    if fusion == "weighted":
+        return _FakeFusionStrategy.weighted()
+    if fusion == "maximum":
+        return _FakeFusionStrategy.maximum()
+    if fusion == "average":
+        return _FakeFusionStrategy.average()
+    return _FakeFusionStrategy.rrf(params.get("k", 60))
 class _FakeSearchOptions:
     """Minimal stand-in for velesdb.SearchOptions used by search_request."""
@@ -95,6 +167,33 @@ class _FakeCollection:
         """Canonical search entry point — delegate to the legacy `search`."""
         return self.search(opts.vector, top_k=opts.top_k, filter=opts.filter)
+    def multi_query_search(
+        self,
+        vectors: list,
+        top_k: int = 10,
+        fusion: Any = None,
+        filter: Any = None,  # pylint: disable=redefined-builtin
+    ) -> list:
+        """Fused multi-query search whose ordering depends on the strategy.
+        The real binding produces strategy-dependent fused scores. This fake
+        reproduces that observable behaviour: the points are sorted by a
+        per-strategy key so callers can assert that ``fusion='rsf'`` and
+        ``fusion='weighted'`` yield different orderings.
+        """
+        del vectors, filter  # the fake ignores these
+        points = list(self._points.values())
+        name = getattr(fusion, "name", "rrf")
+        # Reverse the order for relative_score so the resulting ranking
+        # differs from the default (rrf) and from weighted.
+        reverse = name in ("relative_score", "rsf")
+        ordered = points[::-1] if reverse else points
+        results = [
+            {"id": p["id"], "score": 0.9, "payload": p.get("payload", {})}
+            for p in ordered[:top_k]
+        ]
+        return results
     def scroll(  # pylint: disable=redefined-builtin
         self,
         *,
@@ -172,7 +271,9 @@ def _load_module() -> types.ModuleType:
     sys.modules["haystack.document_stores.errors"] = errors_mod
     sys.modules["velesdb"] = types.SimpleNamespace(  # type: ignore
-        Database=_FakeDatabase, SearchOptions=_FakeSearchOptions
+        Database=_FakeDatabase,
+        SearchOptions=_FakeSearchOptions,
+        FusionStrategy=_FakeFusionStrategy,
     )
     # Stub velesdb_common.security with no-op validators (real package has its own tests).
@@ -181,13 +282,29 @@ def _load_module() -> types.ModuleType:
     vc_mod = types.ModuleType("velesdb_common")
     sys.modules["velesdb_common"] = vc_mod
+    # Load the REAL velesdb_common.ids (pure stdlib) so the store exercises the
+    # canonical stable_hash_id rather than a forked copy (single-source-of-truth
+    # + license hygiene — see docs/planning/CORE_PARITY_REMEDIATION.md T3).
+    _ids_path = Path(__file__).resolve().parents[2] / "common" / "src" / "velesdb_common" / "ids.py"
+    _ids_spec = importlib.util.spec_from_file_location("velesdb_common.ids", _ids_path)
+    assert _ids_spec and _ids_spec.loader
+    vc_ids = importlib.util.module_from_spec(_ids_spec)
+    sys.modules["velesdb_common.ids"] = vc_ids
+    _ids_spec.loader.exec_module(vc_ids)
     vc_sec = types.ModuleType("velesdb_common.security")
     vc_sec.validate_path = _passthrough  # type: ignore[attr-defined]
     vc_sec.validate_collection_name = _passthrough  # type: ignore[attr-defined]
     vc_sec.validate_metric = _passthrough  # type: ignore[attr-defined]
+    vc_sec.validate_named_sparse_vector = _passthrough  # type: ignore[attr-defined]
     vc_sec.SecurityError = ValueError  # type: ignore[attr-defined]
     sys.modules["velesdb_common.security"] = vc_sec
+    vc_fusion = types.ModuleType("velesdb_common.fusion")
+    vc_fusion.build_fusion_strategy = _build_fake_fusion  # type: ignore[attr-defined]
+    sys.modules["velesdb_common.fusion"] = vc_fusion
     pkg = types.ModuleType("haystack_velesdb")
     pkg.__path__ = [str(root)]  # type: ignore[attr-defined]
     sys.modules["haystack_velesdb"] = pkg
@@ -782,3 +899,169 @@ def test_embedding_retrieval_translates_haystack_filter_to_veles_shape() -> None
         }, "embedding_retrieval must translate Haystack filter to VelesDB Filter shape"
     finally:
         _MOD.velesdb = original_velesdb
+# ---------------------------------------------------------------------------
+# I1: fusion (RSF / Weighted) on embedding_retrieval
+# ---------------------------------------------------------------------------
+def _store_with_three_docs(name: str) -> Any:
+    store = _MOD.VelesDBDocumentStore(path="/tmp/hs", collection_name=name)
+    store.write_documents([
+        Document(id="d1", content="one", embedding=[0.1]),
+        Document(id="d2", content="two", embedding=[0.2]),
+        Document(id="d3", content="three", embedding=[0.3]),
+    ])
+    return store
+def test_embedding_retrieval_fusion_changes_ordering_vs_default() -> None:
+    """fusion='rsf' must reorder results relative to the default ranking."""
+    store = _store_with_three_docs("t_fusion_rsf")
+    default_ids = [d.id for d in store.embedding_retrieval([0.1], top_k=3)]
+    rsf_ids = [
+        d.id for d in store.embedding_retrieval([0.1], top_k=3, fusion="rsf")
+    ]
+    assert rsf_ids != default_ids, "fusion='rsf' must change result ordering"
+    assert sorted(rsf_ids) == sorted(default_ids), "same doc set, different order"
+def test_embedding_retrieval_rsf_and_weighted_differ() -> None:
+    """rsf and weighted fusion must produce different orderings."""
+    store = _store_with_three_docs("t_fusion_pair")
+    rsf_ids = [
+        d.id for d in store.embedding_retrieval([0.1], top_k=3, fusion="rsf")
+    ]
+    weighted_ids = [
+        d.id
+        for d in store.embedding_retrieval([0.1], top_k=3, fusion="weighted")
+    ]
+    assert rsf_ids != weighted_ids, "rsf and weighted must differ in ordering"
+def test_embedding_retrieval_fusion_passes_params() -> None:
+    """fusion_params must reach build_fusion_strategy and the collection."""
+    captured: dict = {}
+    class _CapturingCollection(_FakeCollection):
+        def multi_query_search(
+            self,
+            vectors: list,
+            top_k: int = 10,
+            fusion: Any = None,
+            filter: Any = None,  # pylint: disable=redefined-builtin
+        ) -> list:
+            captured["fusion_name"] = getattr(fusion, "name", None)
+            captured["fusion_params"] = getattr(fusion, "params", None)
+            return super().multi_query_search(
+                vectors, top_k=top_k, fusion=fusion, filter=filter
+            )
+    class _CapturingDatabase:
+        def __init__(self, path: str) -> None:
+            self._col = _CapturingCollection()
+        def get_collection(self, name: str) -> _CapturingCollection:
+            return self._col
+        def create_collection(
+            self, name: str, dimension: int, metric: str
+        ) -> _CapturingCollection:
+            return self._col
+    original_velesdb = _MOD.velesdb
+    try:
+        _MOD.velesdb = types.SimpleNamespace(  # type: ignore
+            Database=_CapturingDatabase,
+            SearchOptions=_FakeSearchOptions,
+            FusionStrategy=_FakeFusionStrategy,
+        )
+        store = _MOD.VelesDBDocumentStore(
+            path="/tmp/hs", collection_name="t_fusion_params"
+        )
+        store.write_documents([Document(id="p", content="x", embedding=[0.5])])
+        store.embedding_retrieval(
+            [0.5],
+            top_k=3,
+            fusion="rsf",
+            fusion_params={"dense_weight": 0.7, "sparse_weight": 0.3},
+        )
+        assert captured["fusion_name"] == "relative_score"
+        assert captured["fusion_params"]["dense_weight"] == 0.7
+    finally:
+        _MOD.velesdb = original_velesdb
+# ---------------------------------------------------------------------------
+# I2: named-sparse-index creation on write_documents
+# ---------------------------------------------------------------------------
+def test_write_documents_forwards_named_sparse_vectors() -> None:
+    """A named sparse vector dict must reach the upserted point so the
+    underlying named sparse index is created.
+    """
+    captured: dict = {}
+    class _CapturingCollection(_FakeCollection):
+        def upsert(self, points: list) -> int:
+            captured["points"] = points
+            return super().upsert(points)
+    class _CapturingDatabase:
+        def __init__(self, path: str) -> None:
+            self._col = _CapturingCollection()
+        def get_collection(self, name: str) -> _CapturingCollection:
+            return self._col
+        def create_collection(
+            self, name: str, dimension: int, metric: str
+        ) -> _CapturingCollection:
+            return self._col
+    original_velesdb = _MOD.velesdb
+    try:
+        _MOD.velesdb = types.SimpleNamespace(  # type: ignore
+            Database=_CapturingDatabase,
+            SearchOptions=_FakeSearchOptions,
+            FusionStrategy=_FakeFusionStrategy,
+        )
+        store = _MOD.VelesDBDocumentStore(
+            path="/tmp/hs", collection_name="t_named_sparse"
+        )
+        store.write_documents(
+            [Document(id="s1", content="hi", embedding=[0.5])],
+            sparse_vectors=[{"bge_m3": {0: 1.5, 7: 0.8}}],
+        )
+        point = captured["points"][0]
+        assert point["sparse_vector"] == {"bge_m3": {0: 1.5, 7: 0.8}}
+    finally:
+        _MOD.velesdb = original_velesdb
+def test_id_hashing_uses_canonical_stable_hash_id():
+    """T3: the store delegates string->int ID hashing to the shared
+    velesdb_common.ids.stable_hash_id, not a forked local copy. This keeps the
+    same logical document mapped to the same VelesDB point ID across every
+    integration (single source of truth) and avoids re-implementing the hash in
+    an MIT package (license hygiene). See docs/planning/CORE_PARITY_REMEDIATION.md.
+    """
+    import hashlib
+    import velesdb_common.ids as canonical_ids
+    # the module imported the canonical helper, and the forked copy is gone
+    assert _MOD.stable_hash_id is canonical_ids.stable_hash_id
+    assert not hasattr(_MOD, "_str_id_to_int")
+    assert not hasattr(_MOD, "_INT63_MASK")
+    # and it yields the canonical positive-63-bit value
+    for doc_id in ["", "doc-1", "héllo-世界", "Document_42::chunk#3", "a" * 500]:
+        expected = (
+            int.from_bytes(hashlib.sha256(doc_id.encode("utf-8")).digest()[:8], "big")
+            & 0x7FFFFFFFFFFFFFFF
+        )
+        assert _MOD.stable_hash_id(doc_id) == expected
+        assert 0 <= _MOD.stable_hash_id(doc_id) <= 0x7FFFFFFFFFFFFFFF