perseus-vault-haystack 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: '3.12'
18
+
19
+ - name: Install build
20
+ run: pip install build==1.2.2.post1
21
+
22
+ - name: Build
23
+ run: python -m build
24
+
25
+ - uses: actions/upload-artifact@v4
26
+ with:
27
+ name: dist
28
+ path: dist/
29
+
30
+ publish:
31
+ needs: build
32
+ runs-on: ubuntu-latest
33
+ environment:
34
+ name: pypi
35
+ url: https://pypi.org/p/perseus-vault-haystack
36
+ permissions:
37
+ id-token: write
38
+ steps:
39
+ - uses: actions/download-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ - name: Publish to PyPI
45
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,28 @@
1
+ name: Test
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ os: [ubuntu-latest, windows-latest, macos-latest]
16
+ python-version: ['3.10', '3.12']
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install
25
+ run: pip install -e ".[test]"
26
+
27
+ - name: Run tests
28
+ run: python -m pytest tests/ -q
@@ -0,0 +1,14 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ build/
6
+ dist/
7
+ .pytest_cache/
8
+ .venv/
9
+ venv/
10
+ *.db
11
+ perseus-vault
12
+ perseus-vault.exe
13
+ mimir
14
+ mimir.exe
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Perseus Computing LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: perseus-vault-haystack
3
+ Version: 0.1.0
4
+ Summary: Local-first, encrypted persistent memory for Haystack 2.x pipelines — backed by Perseus Vault.
5
+ Project-URL: Homepage, https://github.com/Perseus-Computing-LLC/mimir-haystack
6
+ Project-URL: Repository, https://github.com/Perseus-Computing-LLC/mimir-haystack
7
+ Project-URL: Bug Tracker, https://github.com/Perseus-Computing-LLC/mimir-haystack/issues
8
+ Project-URL: Perseus Vault, https://github.com/Perseus-Computing-LLC/perseus-vault
9
+ Author-email: Perseus Computing LLC <hermes@perseus.observer>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: agents,haystack,mcp,memory,perseus-vault,rag
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: haystack-ai>=2.0.0
25
+ Provides-Extra: test
26
+ Requires-Dist: pytest>=7.0; extra == 'test'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # perseus-vault-haystack
30
+
31
+ Local-first, encrypted **persistent memory for [Haystack](https://haystack.deepset.ai/) 2.x pipelines**, backed by [Perseus Vault](https://github.com/Perseus-Computing-LLC/perseus-vault) (formerly "Mimir"/"Mneme").
32
+
33
+ Perseus Vault is an open-source (MIT) memory engine that runs entirely on your machine, stores data in an encrypted SQLite database, and exposes 40+ tools over the Model Context Protocol (MCP). This package wraps Perseus Vault's `remember` / `recall` / `forget` tools as Haystack components so your pipelines can persist and retrieve documents across runs — no external vector database or API key required.
34
+
35
+ ## Components
36
+
37
+ | Class | Type | Role |
38
+ | --- | --- | --- |
39
+ | `PerseusVaultMemoryStore` | Memory store | Owns the `perseus-vault` subprocess and config; holds `add_memories` / `search_memories` / `delete_all_memories`. |
40
+ | `PerseusVaultMemoryWriter` | `@component` | Pipeline sink that persists `Document`s into the store. |
41
+ | `PerseusVaultMemoryRetriever` | `@component` | Pipeline source that retrieves the most relevant `Document`s for a query. |
42
+
43
+ ## Prerequisite: the `perseus-vault` binary
44
+
45
+ These components talk to a local `perseus-vault` executable over stdio. Install it first:
46
+
47
+ 1. Download a pre-built binary from the [Perseus Vault releases page](https://github.com/Perseus-Computing-LLC/perseus-vault/releases) (or build from source).
48
+ 2. Put it on your `$PATH` (so `perseus-vault` resolves), **or** pass its absolute path via `perseus_vault_binary=`.
49
+
50
+ You can verify it works with:
51
+
52
+ ```bash
53
+ perseus-vault --version
54
+ ```
55
+
56
+ ## Install
57
+
58
+ ```bash
59
+ pip install perseus-vault-haystack
60
+ ```
61
+
62
+ This pulls in `haystack-ai`. The `perseus-vault` binary is a separate, language-agnostic dependency (see above).
63
+
64
+ ## Quickstart — write then read in a pipeline
65
+
66
+ ```python
67
+ from haystack import Pipeline, Document
68
+ from perseus_vault_haystack import (
69
+ PerseusVaultMemoryStore,
70
+ PerseusVaultMemoryWriter,
71
+ PerseusVaultMemoryRetriever,
72
+ )
73
+
74
+ # One store, shared by both components (single perseus-vault subprocess).
75
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db", category="docs")
76
+
77
+ # --- Write documents into persistent memory ---
78
+ write_pipe = Pipeline()
79
+ write_pipe.add_component("writer", PerseusVaultMemoryWriter(memory_store=store))
80
+ write_pipe.run(
81
+ {
82
+ "writer": {
83
+ "documents": [
84
+ Document(content="Perseus Vault is a local-first, encrypted memory engine."),
85
+ Document(content="Haystack is an open-source LLM framework by deepset."),
86
+ ]
87
+ }
88
+ }
89
+ )
90
+
91
+ # --- Retrieve them later (even in a separate process / run) ---
92
+ read_pipe = Pipeline()
93
+ read_pipe.add_component("retriever", PerseusVaultMemoryRetriever(memory_store=store, top_k=3))
94
+ result = read_pipe.run({"retriever": {"query": "What is Perseus Vault?"}})
95
+
96
+ for doc in result["retriever"]["documents"]:
97
+ print(doc.score, doc.content)
98
+ ```
99
+
100
+ Because Perseus Vault persists to an encrypted SQLite file, documents written in one run are available in any future run pointed at the same `db_path`.
101
+
102
+ ### Use directly (without a pipeline)
103
+
104
+ ```python
105
+ from haystack import Document
106
+ from perseus_vault_haystack import PerseusVaultMemoryStore
107
+
108
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
109
+ store.add_memories([Document(content="Remember this fact.")])
110
+ hits = store.search_memories("fact", top_k=5)
111
+ ```
112
+
113
+ ## Configuration
114
+
115
+ `PerseusVaultMemoryStore` accepts:
116
+
117
+ - `db_path` — path to the Perseus Vault SQLite database (default `~/.mimir/haystack.db`).
118
+ - `perseus_vault_binary` — name on `$PATH` or absolute path to the executable (default `perseus-vault`).
119
+ - `category` — Perseus Vault category scoping all writes/recalls for this store (default `haystack-memory`). Use distinct categories to isolate corpora.
120
+ - `top_k` — default number of documents returned by retrieval (default `10`).
121
+ - `timeout_s` — per-RPC timeout for the subprocess (default `30`).
122
+
123
+ ## Serialization
124
+
125
+ All three classes implement `to_dict()` / `from_dict()` and round-trip through `Pipeline.dumps()` / `Pipeline.loads()`.
126
+
127
+ ## License
128
+
129
+ MIT © 2026 Perseus Computing LLC. Perseus Vault (formerly Mimir/Mneme) is also MIT-licensed.
@@ -0,0 +1,101 @@
1
+ # perseus-vault-haystack
2
+
3
+ Local-first, encrypted **persistent memory for [Haystack](https://haystack.deepset.ai/) 2.x pipelines**, backed by [Perseus Vault](https://github.com/Perseus-Computing-LLC/perseus-vault) (formerly "Mimir"/"Mneme").
4
+
5
+ Perseus Vault is an open-source (MIT) memory engine that runs entirely on your machine, stores data in an encrypted SQLite database, and exposes 40+ tools over the Model Context Protocol (MCP). This package wraps Perseus Vault's `remember` / `recall` / `forget` tools as Haystack components so your pipelines can persist and retrieve documents across runs — no external vector database or API key required.
6
+
7
+ ## Components
8
+
9
+ | Class | Type | Role |
10
+ | --- | --- | --- |
11
+ | `PerseusVaultMemoryStore` | Memory store | Owns the `perseus-vault` subprocess and config; holds `add_memories` / `search_memories` / `delete_all_memories`. |
12
+ | `PerseusVaultMemoryWriter` | `@component` | Pipeline sink that persists `Document`s into the store. |
13
+ | `PerseusVaultMemoryRetriever` | `@component` | Pipeline source that retrieves the most relevant `Document`s for a query. |
14
+
15
+ ## Prerequisite: the `perseus-vault` binary
16
+
17
+ These components talk to a local `perseus-vault` executable over stdio. Install it first:
18
+
19
+ 1. Download a pre-built binary from the [Perseus Vault releases page](https://github.com/Perseus-Computing-LLC/perseus-vault/releases) (or build from source).
20
+ 2. Put it on your `$PATH` (so `perseus-vault` resolves), **or** pass its absolute path via `perseus_vault_binary=`.
21
+
22
+ You can verify it works with:
23
+
24
+ ```bash
25
+ perseus-vault --version
26
+ ```
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install perseus-vault-haystack
32
+ ```
33
+
34
+ This pulls in `haystack-ai`. The `perseus-vault` binary is a separate, language-agnostic dependency (see above).
35
+
36
+ ## Quickstart — write then read in a pipeline
37
+
38
+ ```python
39
+ from haystack import Pipeline, Document
40
+ from perseus_vault_haystack import (
41
+ PerseusVaultMemoryStore,
42
+ PerseusVaultMemoryWriter,
43
+ PerseusVaultMemoryRetriever,
44
+ )
45
+
46
+ # One store, shared by both components (single perseus-vault subprocess).
47
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db", category="docs")
48
+
49
+ # --- Write documents into persistent memory ---
50
+ write_pipe = Pipeline()
51
+ write_pipe.add_component("writer", PerseusVaultMemoryWriter(memory_store=store))
52
+ write_pipe.run(
53
+ {
54
+ "writer": {
55
+ "documents": [
56
+ Document(content="Perseus Vault is a local-first, encrypted memory engine."),
57
+ Document(content="Haystack is an open-source LLM framework by deepset."),
58
+ ]
59
+ }
60
+ }
61
+ )
62
+
63
+ # --- Retrieve them later (even in a separate process / run) ---
64
+ read_pipe = Pipeline()
65
+ read_pipe.add_component("retriever", PerseusVaultMemoryRetriever(memory_store=store, top_k=3))
66
+ result = read_pipe.run({"retriever": {"query": "What is Perseus Vault?"}})
67
+
68
+ for doc in result["retriever"]["documents"]:
69
+ print(doc.score, doc.content)
70
+ ```
71
+
72
+ Because Perseus Vault persists to an encrypted SQLite file, documents written in one run are available in any future run pointed at the same `db_path`.
73
+
74
+ ### Use directly (without a pipeline)
75
+
76
+ ```python
77
+ from haystack import Document
78
+ from perseus_vault_haystack import PerseusVaultMemoryStore
79
+
80
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
81
+ store.add_memories([Document(content="Remember this fact.")])
82
+ hits = store.search_memories("fact", top_k=5)
83
+ ```
84
+
85
+ ## Configuration
86
+
87
+ `PerseusVaultMemoryStore` accepts:
88
+
89
+ - `db_path` — path to the Perseus Vault SQLite database (default `~/.mimir/haystack.db`).
90
+ - `perseus_vault_binary` — name on `$PATH` or absolute path to the executable (default `perseus-vault`).
91
+ - `category` — Perseus Vault category scoping all writes/recalls for this store (default `haystack-memory`). Use distinct categories to isolate corpora.
92
+ - `top_k` — default number of documents returned by retrieval (default `10`).
93
+ - `timeout_s` — per-RPC timeout for the subprocess (default `30`).
94
+
95
+ ## Serialization
96
+
97
+ All three classes implement `to_dict()` / `from_dict()` and round-trip through `Pipeline.dumps()` / `Pipeline.loads()`.
98
+
99
+ ## License
100
+
101
+ MIT © 2026 Perseus Computing LLC. Perseus Vault (formerly Mimir/Mneme) is also MIT-licensed.
@@ -0,0 +1,77 @@
1
+ ---
2
+ layout: integration
3
+ name: Perseus Vault
4
+ description: Add local-first, encrypted, persistent memory to your Haystack agents and pipelines with Perseus Vault
5
+ authors:
6
+ - name: Perseus Computing LLC
7
+ socials:
8
+ github: Perseus-Computing-LLC
9
+ pypi: https://pypi.org/project/perseus-vault-haystack/
10
+ repo: https://github.com/Perseus-Computing-LLC/mimir-haystack
11
+ type: Memory Store
12
+ report_issue: https://github.com/Perseus-Computing-LLC/mimir-haystack/issues
13
+ logo: /logos/perseus-vault.png
14
+ version: Haystack 2.0
15
+ toc: true
16
+ ---
17
+
18
+ ### **Table of Contents**
19
+
20
+ - [Overview](#overview)
21
+ - [Installation](#installation)
22
+ - [Usage](#usage)
23
+ - [Available Classes](#available-classes)
24
+ - [Use in a Pipeline](#use-in-a-pipeline)
25
+ - [License](#license)
26
+
27
+ ## Overview
28
+
29
+ [Perseus Vault](https://github.com/Perseus-Computing-LLC/perseus-vault) is an open-source (MIT), local-first, encrypted persistent memory engine with 40+ tools exposed over the Model Context Protocol (MCP). It runs entirely on your machine and stores data in an encrypted SQLite database — no external vector database or API key required.
30
+
31
+ The `perseus-vault-haystack` package provides:
32
+
33
+ - `PerseusVaultMemoryStore`: A persistent memory store backed by a local Perseus Vault engine.
34
+ - `PerseusVaultMemoryWriter`: A component that persists `Document`s into the store.
35
+ - `PerseusVaultMemoryRetriever`: A component that retrieves the most relevant `Document`s for a query.
36
+
37
+ ## Installation
38
+
39
+ ```bash
40
+ pip install perseus-vault-haystack
41
+ ```
42
+
43
+ You also need the `perseus-vault` binary on your `$PATH` (download from the [Perseus Vault releases page](https://github.com/Perseus-Computing-LLC/perseus-vault/releases)).
44
+
45
+ ## Usage
46
+
47
+ ### Available Classes
48
+
49
+ - `PerseusVaultMemoryStore` — owns the `perseus-vault` subprocess and configuration.
50
+ - `PerseusVaultMemoryWriter` — pipeline sink that writes documents to memory.
51
+ - `PerseusVaultMemoryRetriever` — pipeline source that recalls documents by query.
52
+
53
+ ### Use in a Pipeline
54
+
55
+ ```python
56
+ from haystack import Pipeline, Document
57
+ from perseus_vault_haystack import (
58
+ PerseusVaultMemoryStore,
59
+ PerseusVaultMemoryWriter,
60
+ PerseusVaultMemoryRetriever,
61
+ )
62
+
63
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db", category="docs")
64
+
65
+ write_pipe = Pipeline()
66
+ write_pipe.add_component("writer", PerseusVaultMemoryWriter(memory_store=store))
67
+ write_pipe.run({"writer": {"documents": [Document(content="Perseus Vault is local-first.")]}})
68
+
69
+ read_pipe = Pipeline()
70
+ read_pipe.add_component("retriever", PerseusVaultMemoryRetriever(memory_store=store, top_k=3))
71
+ result = read_pipe.run({"retriever": {"query": "What is Perseus Vault?"}})
72
+ print(result["retriever"]["documents"])
73
+ ```
74
+
75
+ ## License
76
+
77
+ `perseus-vault-haystack` is distributed under the terms of the [MIT license](https://opensource.org/licenses/MIT).
@@ -0,0 +1,27 @@
1
+ # SPDX-FileCopyrightText: 2026 Perseus Computing LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """perseus-vault-haystack — Perseus Vault persistent memory for Haystack 2.x.
6
+
7
+ Perseus Vault (https://github.com/Perseus-Computing-LLC/perseus-vault) is an
8
+ open-source (MIT) local-first, encrypted persistent memory engine with 40+ MCP
9
+ tools. This package exposes Perseus Vault to Haystack 2.x pipelines as a memory
10
+ store plus two ``@component`` adapters.
11
+
12
+ Requirements:
13
+ A ``perseus-vault`` binary must be on ``$PATH`` or passed explicitly via
14
+ ``perseus_vault_binary``. Download from:
15
+ https://github.com/Perseus-Computing-LLC/perseus-vault/releases
16
+ """
17
+
18
+ from .components import PerseusVaultMemoryRetriever, PerseusVaultMemoryWriter
19
+ from .memory_store import PerseusVaultMemoryStore
20
+
21
+ __all__ = [
22
+ "PerseusVaultMemoryStore",
23
+ "PerseusVaultMemoryWriter",
24
+ "PerseusVaultMemoryRetriever",
25
+ ]
26
+
27
+ __version__ = "0.1.0"
@@ -0,0 +1,237 @@
1
+ # SPDX-FileCopyrightText: 2026 Perseus Computing LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """Low-level JSON-RPC (MCP stdio) client for the Perseus Vault memory engine.
6
+
7
+ Perseus Vault (https://github.com/Perseus-Computing-LLC/perseus-vault) is an
8
+ open-source (MIT) local-first, encrypted persistent memory engine exposing 40+
9
+ MCP tools. It runs as ``perseus-vault serve --db <path>`` and speaks JSON-RPC 2.0
10
+ over stdin/stdout (the MCP stdio transport).
11
+
12
+ This client spawns the ``perseus-vault`` binary and provides a thin, thread-safe
13
+ ``call_tool`` method. It is adapted from the proven client core in
14
+ ``Perseus-Computing-LLC/adk-perseus-vault-memory``.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import atexit
20
+ import json
21
+ import os
22
+ import queue
23
+ import shutil
24
+ import subprocess
25
+ import threading
26
+ import time
27
+
28
+
29
+ class PerseusVaultClient:
30
+ """Thread-safe JSON-RPC client over a ``perseus-vault`` stdio subprocess.
31
+
32
+ The client lazily spawns the subprocess on first use (``start``), performs
33
+ the MCP ``initialize`` handshake, and exposes ``call_tool`` to invoke any
34
+ Perseus Vault MCP tool. The subprocess is terminated at interpreter exit.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ db_path: str = "~/.mimir/haystack.db",
40
+ perseus_vault_binary: str = "perseus-vault",
41
+ timeout_s: float = 30.0,
42
+ ) -> None:
43
+ """Initialize the client (does not start the subprocess yet).
44
+
45
+ :param db_path: Path to the Perseus Vault SQLite database file.
46
+ :param perseus_vault_binary: Name (resolved on ``$PATH``) or absolute path
47
+ of the ``perseus-vault`` executable.
48
+ :param timeout_s: Per-RPC timeout guarding against a hung subprocess.
49
+ """
50
+ self.db_path = os.path.expanduser(db_path)
51
+ self.perseus_vault_binary = perseus_vault_binary
52
+ self.timeout_s = timeout_s
53
+
54
+ self._proc: subprocess.Popen | None = None
55
+ self._lock = threading.Lock()
56
+ self._request_id = 0
57
+ self._recv: queue.Queue = queue.Queue()
58
+ self._reader: threading.Thread | None = None
59
+ self._started = False
60
+
61
+ # ------------------------------------------------------------------ #
62
+ # Lifecycle
63
+ # ------------------------------------------------------------------ #
64
+ def _resolve_binary(self) -> str:
65
+ if os.path.isabs(self.perseus_vault_binary):
66
+ if not os.path.exists(self.perseus_vault_binary):
67
+ msg = f"perseus-vault binary not found at '{self.perseus_vault_binary}'."
68
+ raise RuntimeError(msg)
69
+ return self.perseus_vault_binary
70
+ resolved = shutil.which(self.perseus_vault_binary)
71
+ if (
72
+ resolved is None
73
+ and os.name == "nt"
74
+ and not self.perseus_vault_binary.lower().endswith(".exe")
75
+ ):
76
+ # On Windows the binary may be installed without the .exe suffix
77
+ # (shutil.which only matches PATHEXT extensions by default).
78
+ resolved = shutil.which(self.perseus_vault_binary + ".exe")
79
+ if resolved is None:
80
+ msg = (
81
+ f"perseus-vault binary not found on $PATH (looked for "
82
+ f"'{self.perseus_vault_binary}'). Install Perseus Vault from "
83
+ "https://github.com/Perseus-Computing-LLC/perseus-vault/releases "
84
+ "or pass an absolute path via perseus_vault_binary=."
85
+ )
86
+ raise RuntimeError(msg)
87
+ return resolved
88
+
89
+ def start(self) -> None:
90
+ """Spawn the subprocess and perform the MCP handshake (idempotent)."""
91
+ with self._lock:
92
+ if self._started:
93
+ return
94
+ binary = self._resolve_binary()
95
+
96
+ db_dir = os.path.dirname(self.db_path)
97
+ if db_dir:
98
+ os.makedirs(db_dir, exist_ok=True)
99
+
100
+ # stderr is discarded: nothing drains it, so a chatty server filling
101
+ # the OS pipe buffer would block on its stderr write while we wait on
102
+ # stdout (a classic two-pipe deadlock).
103
+ self._proc = subprocess.Popen(
104
+ [binary, "serve", "--db", self.db_path],
105
+ stdin=subprocess.PIPE,
106
+ stdout=subprocess.PIPE,
107
+ stderr=subprocess.DEVNULL,
108
+ text=True,
109
+ )
110
+
111
+ proc_stdout = self._proc.stdout
112
+
113
+ def _pump() -> None:
114
+ try:
115
+ for line in proc_stdout: # type: ignore[union-attr]
116
+ self._recv.put(line)
117
+ except Exception: # noqa: BLE001
118
+ pass
119
+ finally:
120
+ self._recv.put(None) # EOF sentinel
121
+
122
+ self._reader = threading.Thread(target=_pump, daemon=True)
123
+ self._reader.start()
124
+ self._started = True
125
+ atexit.register(self.close)
126
+
127
+ # Handshake (outside the lock; _rpc takes the lock itself).
128
+ self._rpc(
129
+ "initialize",
130
+ {
131
+ "protocolVersion": "2024-11-05",
132
+ "capabilities": {},
133
+ "clientInfo": {"name": "perseus-vault-haystack", "version": "0.1.0"},
134
+ },
135
+ )
136
+ self._notify("notifications/initialized", {})
137
+
138
+ def close(self) -> None:
139
+ """Terminate the Perseus Vault subprocess."""
140
+ proc = self._proc
141
+ if proc is None:
142
+ return
143
+ try:
144
+ proc.terminate()
145
+ proc.wait(timeout=5)
146
+ except Exception: # noqa: BLE001
147
+ try:
148
+ proc.kill()
149
+ except Exception: # noqa: BLE001
150
+ pass
151
+
152
+ # ------------------------------------------------------------------ #
153
+ # JSON-RPC plumbing
154
+ # ------------------------------------------------------------------ #
155
+ def _next_id(self) -> int:
156
+ self._request_id += 1
157
+ return self._request_id
158
+
159
+ def _rpc(self, method: str, params: object) -> dict:
160
+ """Send a JSON-RPC request and return its ``result`` dict."""
161
+ with self._lock:
162
+ if self._proc is None or self._proc.stdin is None:
163
+ msg = "Perseus Vault subprocess is not running. Call start() first."
164
+ raise RuntimeError(msg)
165
+ req_id = self._next_id()
166
+ req = {"jsonrpc": "2.0", "id": req_id, "method": method, "params": params}
167
+ payload = json.dumps(req, default=str)
168
+ try:
169
+ self._proc.stdin.write(payload + "\n")
170
+ self._proc.stdin.flush()
171
+ except (BrokenPipeError, OSError) as e:
172
+ msg = (
173
+ f"Perseus Vault subprocess communication failed: {e}. "
174
+ "The perseus-vault process may have crashed."
175
+ )
176
+ raise RuntimeError(msg) from e
177
+
178
+ deadline = time.monotonic() + self.timeout_s
179
+ while True:
180
+ remaining = deadline - time.monotonic()
181
+ if remaining <= 0:
182
+ msg = f"Perseus Vault RPC '{method}' timed out after {self.timeout_s}s."
183
+ raise RuntimeError(msg)
184
+ try:
185
+ raw = self._recv.get(timeout=remaining)
186
+ except queue.Empty:
187
+ msg = f"Perseus Vault RPC '{method}' timed out after {self.timeout_s}s."
188
+ raise RuntimeError(msg) from None
189
+ if raw is None:
190
+ msg = "Perseus Vault subprocess closed its output (it may have crashed)."
191
+ raise RuntimeError(msg)
192
+ raw = raw.strip()
193
+ if not raw:
194
+ continue
195
+ try:
196
+ resp = json.loads(raw)
197
+ except json.JSONDecodeError:
198
+ continue # non-JSON noise on stdout
199
+ if resp.get("id") != req_id:
200
+ continue # notification or a stale/other reply
201
+ if "error" in resp:
202
+ err = resp["error"]
203
+ msg = f"Perseus Vault RPC error [{err.get('code')}]: {err.get('message')}"
204
+ raise RuntimeError(msg)
205
+ return resp.get("result", {})
206
+
207
+ def _notify(self, method: str, params: object) -> None:
208
+ """Send a JSON-RPC notification (no id, no response expected)."""
209
+ with self._lock:
210
+ if self._proc is None or self._proc.stdin is None:
211
+ return
212
+ payload = json.dumps({"jsonrpc": "2.0", "method": method, "params": params})
213
+ try:
214
+ self._proc.stdin.write(payload + "\n")
215
+ self._proc.stdin.flush()
216
+ except (BrokenPipeError, OSError):
217
+ pass
218
+
219
+ def call_tool(self, name: str, arguments: dict) -> dict:
220
+ """Call a Perseus Vault MCP tool and return its ``structuredContent``.
221
+
222
+ Falls back to parsing the first text content block if no structured
223
+ content is present.
224
+ """
225
+ if not self._started:
226
+ self.start()
227
+ result = self._rpc("tools/call", {"name": name, "arguments": arguments})
228
+ sc = result.get("structuredContent")
229
+ if sc is not None:
230
+ return sc
231
+ content = result.get("content", [])
232
+ if content:
233
+ try:
234
+ return json.loads(content[0].get("text", "{}"))
235
+ except (json.JSONDecodeError, IndexError, KeyError, AttributeError):
236
+ pass
237
+ return {}
@@ -0,0 +1,122 @@
1
+ # SPDX-FileCopyrightText: 2026 Perseus Computing LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """Haystack 2.x components wrapping the Perseus Vault memory store."""
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from haystack import component, default_from_dict, default_to_dict
12
+ from haystack.dataclasses import Document
13
+
14
+ from .memory_store import PerseusVaultMemoryStore
15
+
16
+
17
+ @component
18
+ class PerseusVaultMemoryWriter:
19
+ """Haystack component that persists ``Document``s into a ``PerseusVaultMemoryStore``.
20
+
21
+ Slots into a pipeline as a sink: it writes the incoming documents to Perseus
22
+ Vault and passes them through unchanged (plus a count), so it can also sit
23
+ mid-pipeline.
24
+
25
+ Usage::
26
+
27
+ from perseus_vault_haystack import PerseusVaultMemoryStore, PerseusVaultMemoryWriter
28
+
29
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
30
+ writer = PerseusVaultMemoryWriter(memory_store=store)
31
+ writer.run(documents=[Document(content="Perseus Vault is local-first.")])
32
+ """
33
+
34
+ def __init__(self, *, memory_store: PerseusVaultMemoryStore) -> None:
35
+ """Initialize the writer.
36
+
37
+ :param memory_store: Backing :class:`PerseusVaultMemoryStore` to write into.
38
+ """
39
+ if not isinstance(memory_store, PerseusVaultMemoryStore):
40
+ msg = "memory_store must be an instance of PerseusVaultMemoryStore"
41
+ raise ValueError(msg)
42
+ self._memory_store = memory_store
43
+
44
+ @component.output_types(documents=list[Document], documents_written=int)
45
+ def run(self, documents: list[Document]) -> dict[str, Any]:
46
+ """Store ``documents`` in Perseus Vault and pass them through.
47
+
48
+ :param documents: Documents to persist.
49
+ :returns: ``{"documents": <same documents>, "documents_written": <count>}``.
50
+ """
51
+ written = self._memory_store.add_memories(documents)
52
+ return {"documents": documents, "documents_written": written}
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ """Serialize this component to a dictionary."""
56
+ return default_to_dict(self, memory_store=self._memory_store.to_dict())
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryWriter:
60
+ """Deserialize a component from a dictionary."""
61
+ data["init_parameters"]["memory_store"] = PerseusVaultMemoryStore.from_dict(
62
+ data["init_parameters"]["memory_store"]
63
+ )
64
+ return default_from_dict(cls, data)
65
+
66
+
67
+ @component
68
+ class PerseusVaultMemoryRetriever:
69
+ """Haystack component that retrieves ``Document``s from a ``PerseusVaultMemoryStore``.
70
+
71
+ A thin pipeline adapter over :meth:`PerseusVaultMemoryStore.search_memories`.
72
+ Takes a ``query`` and returns the most relevant stored documents — drop it in
73
+ front of a prompt builder for retrieval-augmented generation over persistent
74
+ memory.
75
+
76
+ Usage::
77
+
78
+ from perseus_vault_haystack import PerseusVaultMemoryStore, PerseusVaultMemoryRetriever
79
+
80
+ store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
81
+ retriever = PerseusVaultMemoryRetriever(memory_store=store, top_k=5)
82
+ result = retriever.run(query="What is Perseus Vault?")
83
+ docs = result["documents"]
84
+ """
85
+
86
+ def __init__(self, *, memory_store: PerseusVaultMemoryStore, top_k: int | None = None) -> None:
87
+ """Initialize the retriever.
88
+
89
+ :param memory_store: Backing :class:`PerseusVaultMemoryStore` to query.
90
+ :param top_k: Default max results; falls back to the store's ``top_k``
91
+ when ``None``.
92
+ """
93
+ if not isinstance(memory_store, PerseusVaultMemoryStore):
94
+ msg = "memory_store must be an instance of PerseusVaultMemoryStore"
95
+ raise ValueError(msg)
96
+ self._memory_store = memory_store
97
+ self._top_k = top_k
98
+
99
+ @component.output_types(documents=list[Document])
100
+ def run(self, query: str, top_k: int | None = None) -> dict[str, list[Document]]:
101
+ """Search the attached store and return matching documents.
102
+
103
+ :param query: Natural-language / keyword query.
104
+ :param top_k: Per-call override; falls back to init ``top_k``, then the
105
+ store's default.
106
+ :returns: ``{"documents": [Document, ...]}`` ordered by relevance.
107
+ """
108
+ effective_top_k = top_k if top_k is not None else self._top_k
109
+ documents = self._memory_store.search_memories(query=query, top_k=effective_top_k)
110
+ return {"documents": documents}
111
+
112
+ def to_dict(self) -> dict[str, Any]:
113
+ """Serialize this component to a dictionary."""
114
+ return default_to_dict(self, memory_store=self._memory_store.to_dict(), top_k=self._top_k)
115
+
116
+ @classmethod
117
+ def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryRetriever:
118
+ """Deserialize a component from a dictionary."""
119
+ data["init_parameters"]["memory_store"] = PerseusVaultMemoryStore.from_dict(
120
+ data["init_parameters"]["memory_store"]
121
+ )
122
+ return default_from_dict(cls, data)
@@ -0,0 +1,188 @@
1
+ # SPDX-FileCopyrightText: 2026 Perseus Computing LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """Perseus Vault-backed memory store for Haystack 2.x."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ import time
12
+ from typing import Any
13
+
14
+ from haystack import default_from_dict, default_to_dict
15
+ from haystack.dataclasses import Document
16
+
17
+ from ._client import PerseusVaultClient
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ _DEFAULT_CATEGORY = "haystack-memory"
22
+
23
+
24
+ class PerseusVaultMemoryStore:
25
+ """Persistent memory backend backed by the Perseus Vault engine.
26
+
27
+ Wraps the Perseus Vault MCP tools ``perseus_vault_remember`` (write),
28
+ ``perseus_vault_recall`` (search) and ``perseus_vault_forget`` (delete). Each
29
+ Haystack ``Document`` is stored as one Perseus Vault entity; the document's
30
+ ``content`` becomes the entity body and its ``meta`` is preserved as JSON. On
31
+ recall, entities are rehydrated back into ``Document`` objects with their
32
+ original ``id``, ``content``, ``meta`` and a relevance ``score`` from Perseus
33
+ Vault.
34
+
35
+ The store owns the long-lived ``perseus-vault`` subprocess; the thin
36
+ :class:`~perseus_vault_haystack.PerseusVaultMemoryWriter` and
37
+ :class:`~perseus_vault_haystack.PerseusVaultMemoryRetriever` components
38
+ delegate to it so a single store can back several pipeline components.
39
+
40
+ This class is safe to use across threads (the underlying client is
41
+ thread-safe).
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ db_path: str = "~/.mimir/haystack.db",
47
+ perseus_vault_binary: str = "perseus-vault",
48
+ category: str = _DEFAULT_CATEGORY,
49
+ top_k: int = 10,
50
+ timeout_s: float = 30.0,
51
+ ) -> None:
52
+ """Initialize the store.
53
+
54
+ :param db_path: Path to the Perseus Vault SQLite database file.
55
+ :param perseus_vault_binary: Name (on ``$PATH``) or absolute path of the
56
+ ``perseus-vault`` executable.
57
+ :param category: Perseus Vault category that scopes every write and
58
+ recall for this store. Use distinct categories to isolate corpora.
59
+ :param top_k: Default maximum number of documents returned by
60
+ :meth:`search_memories`.
61
+ :param timeout_s: Per-RPC timeout for the underlying Perseus Vault
62
+ subprocess.
63
+ """
64
+ self.db_path = db_path
65
+ self.perseus_vault_binary = perseus_vault_binary
66
+ self.category = category
67
+ self.top_k = top_k
68
+ self.timeout_s = timeout_s
69
+
70
+ self._client = PerseusVaultClient(
71
+ db_path=db_path,
72
+ perseus_vault_binary=perseus_vault_binary,
73
+ timeout_s=timeout_s,
74
+ )
75
+
76
+ # ------------------------------------------------------------------ #
77
+ # Write
78
+ # ------------------------------------------------------------------ #
79
+ def add_memories(self, documents: list[Document]) -> int:
80
+ """Persist ``documents`` into Perseus Vault via ``perseus_vault_remember``.
81
+
82
+ Documents with empty ``content`` are skipped. The document ``id`` is used
83
+ as the Perseus Vault entity key so re-writing the same document updates it
84
+ in place (idempotent upsert).
85
+
86
+ :param documents: Documents to store.
87
+ :returns: The number of documents actually written.
88
+ """
89
+ written = 0
90
+ for doc in documents:
91
+ if not doc.content:
92
+ continue
93
+ key = doc.id or f"doc:{int(time.time() * 1_000_000)}:{written}"
94
+ self._client.call_tool(
95
+ "perseus_vault_remember",
96
+ {
97
+ "category": self.category,
98
+ "key": key,
99
+ "body_json": json.dumps(
100
+ {
101
+ "doc_id": doc.id,
102
+ "content": doc.content,
103
+ "meta": doc.meta or {},
104
+ }
105
+ ),
106
+ "tags": ["haystack"],
107
+ },
108
+ )
109
+ written += 1
110
+ logger.info("Stored %d documents in Perseus Vault category '%s'", written, self.category)
111
+ return written
112
+
113
+ # ------------------------------------------------------------------ #
114
+ # Search
115
+ # ------------------------------------------------------------------ #
116
+ def search_memories(self, query: str, top_k: int | None = None) -> list[Document]:
117
+ """Search Perseus Vault via ``perseus_vault_recall`` and return matching documents.
118
+
119
+ :param query: Natural-language / keyword query. Empty queries return
120
+ ``[]``.
121
+ :param top_k: Per-call override of the store's default ``top_k``.
122
+ :returns: A list of :class:`~haystack.dataclasses.Document`, ordered by
123
+ Perseus Vault relevance, each carrying a ``score`` when Perseus Vault
124
+ provides one.
125
+ """
126
+ if not query:
127
+ return []
128
+ limit = top_k if top_k is not None else self.top_k
129
+ result = self._client.call_tool(
130
+ "perseus_vault_recall",
131
+ {"query": query, "limit": limit, "category": self.category},
132
+ )
133
+ items = result.get("items", []) or result.get("results", [])
134
+ documents: list[Document] = []
135
+ for item in items:
136
+ body = item.get("body_json", "{}")
137
+ try:
138
+ body_data = json.loads(body) if isinstance(body, str) else body
139
+ except (json.JSONDecodeError, TypeError):
140
+ body_data = {}
141
+ if not isinstance(body_data, dict):
142
+ continue
143
+ content = body_data.get("content") or item.get("content")
144
+ if not content:
145
+ continue
146
+ # Perseus Vault's recall ranks by relevance but names the field
147
+ # differently across versions: prefer an explicit ``score``, else fall
148
+ # back to ``certainty`` (relevance/confidence in v2.x).
149
+ score = item.get("score")
150
+ if score is None:
151
+ score = item.get("certainty")
152
+ documents.append(
153
+ Document(
154
+ id=body_data.get("doc_id") or item.get("key", ""),
155
+ content=content,
156
+ meta=body_data.get("meta", {}) or {},
157
+ score=float(score) if isinstance(score, (int, float)) else None,
158
+ )
159
+ )
160
+ logger.info("Recalled %d documents for query '%s'", len(documents), query[:80])
161
+ return documents
162
+
163
+ # ------------------------------------------------------------------ #
164
+ # Delete
165
+ # ------------------------------------------------------------------ #
166
+ def delete_all_memories(self) -> None:
167
+ """Delete every entity in this store's category via ``perseus_vault_forget``."""
168
+ self._client.call_tool("perseus_vault_forget", {"category": self.category})
169
+ logger.info("Deleted all documents in Perseus Vault category '%s'", self.category)
170
+
171
+ # ------------------------------------------------------------------ #
172
+ # Serialization
173
+ # ------------------------------------------------------------------ #
174
+ def to_dict(self) -> dict[str, Any]:
175
+ """Serialize this store for pipeline persistence."""
176
+ return default_to_dict(
177
+ self,
178
+ db_path=self.db_path,
179
+ perseus_vault_binary=self.perseus_vault_binary,
180
+ category=self.category,
181
+ top_k=self.top_k,
182
+ timeout_s=self.timeout_s,
183
+ )
184
+
185
+ @classmethod
186
+ def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryStore:
187
+ """Deserialize a store from a dict produced by :meth:`to_dict`."""
188
+ return default_from_dict(cls, data)
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "perseus-vault-haystack"
7
+ version = "0.1.0"
8
+ description = "Local-first, encrypted persistent memory for Haystack 2.x pipelines — backed by Perseus Vault."
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Perseus Computing LLC", email = "hermes@perseus.observer" },
14
+ ]
15
+ keywords = ["haystack", "perseus-vault", "memory", "rag", "mcp", "agents"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Operating System :: OS Independent",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ "Intended Audience :: Developers",
27
+ ]
28
+ dependencies = [
29
+ "haystack-ai>=2.0.0",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ test = ["pytest>=7.0"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/Perseus-Computing-LLC/mimir-haystack"
37
+ Repository = "https://github.com/Perseus-Computing-LLC/mimir-haystack"
38
+ "Bug Tracker" = "https://github.com/Perseus-Computing-LLC/mimir-haystack/issues"
39
+ "Perseus Vault" = "https://github.com/Perseus-Computing-LLC/perseus-vault"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["perseus_vault_haystack"]
43
+
44
+ [tool.pytest.ini_options]
45
+ testpaths = ["tests"]
File without changes
@@ -0,0 +1,284 @@
1
+ # SPDX-FileCopyrightText: 2026 Perseus Computing LLC
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """Unit tests for perseus-vault-haystack.
6
+
7
+ The Perseus Vault subprocess is mocked at the ``PerseusVaultClient`` boundary so
8
+ these tests run without the ``perseus-vault`` binary installed.
9
+ """
10
+
11
+ import json
12
+ from unittest.mock import MagicMock
13
+
14
+ import pytest
15
+ from haystack import Document
16
+
17
+
18
+ def _output_sockets(comp):
19
+ """Return {name: type} for a component instance's declared output types."""
20
+ return {
21
+ name: socket.type
22
+ for name, socket in comp.__haystack_output__._sockets_dict.items()
23
+ }
24
+
25
+ from perseus_vault_haystack import (
26
+ PerseusVaultMemoryRetriever,
27
+ PerseusVaultMemoryStore,
28
+ PerseusVaultMemoryWriter,
29
+ )
30
+
31
+
32
+ class FakePerseusVaultClient:
33
+ """In-memory stand-in for PerseusVaultClient.call_tool.
34
+
35
+ Implements just enough of perseus_vault_remember / perseus_vault_recall /
36
+ perseus_vault_forget to exercise the store end-to-end without a subprocess.
37
+ """
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ self.store = {} # key -> body_json
41
+ self.calls = []
42
+
43
+ def call_tool(self, name, arguments):
44
+ self.calls.append((name, arguments))
45
+ if name == "perseus_vault_remember":
46
+ self.store[arguments["key"]] = arguments["body_json"]
47
+ return {"ok": True}
48
+ if name == "perseus_vault_recall":
49
+ query = arguments.get("query", "").lower()
50
+ limit = arguments.get("limit", 10)
51
+ items = []
52
+ for key, body_json in self.store.items():
53
+ body = json.loads(body_json)
54
+ if query in body.get("content", "").lower():
55
+ items.append({"key": key, "body_json": body_json, "score": 0.9})
56
+ return {"items": items[:limit]}
57
+ if name == "perseus_vault_forget":
58
+ self.store.clear()
59
+ return {"ok": True}
60
+ return {}
61
+
62
+
63
+ @pytest.fixture
64
+ def store(monkeypatch):
65
+ """A PerseusVaultMemoryStore whose client is the in-memory fake."""
66
+ fake = FakePerseusVaultClient()
67
+ s = PerseusVaultMemoryStore(db_path="/tmp/test.db", category="test")
68
+ monkeypatch.setattr(s, "_client", fake)
69
+ return s
70
+
71
+
72
+ # --------------------------------------------------------------------------- #
73
+ # PerseusVaultMemoryStore
74
+ # --------------------------------------------------------------------------- #
75
+ def test_add_and_search(store):
76
+ written = store.add_memories(
77
+ [
78
+ Document(content="Perseus Vault is a local-first memory engine."),
79
+ Document(content="Haystack is an LLM framework."),
80
+ ]
81
+ )
82
+ assert written == 2
83
+
84
+ hits = store.search_memories("perseus vault")
85
+ assert len(hits) == 1
86
+ assert "local-first" in hits[0].content
87
+ assert hits[0].score == pytest.approx(0.9)
88
+
89
+
90
+ def test_add_skips_empty_content(store):
91
+ written = store.add_memories([Document(content=""), Document(content=None)])
92
+ assert written == 0
93
+
94
+
95
+ def test_search_empty_query_returns_empty(store):
96
+ assert store.search_memories("") == []
97
+
98
+
99
+ def test_search_preserves_meta_and_id(store):
100
+ store.add_memories([Document(id="d1", content="alpha beta", meta={"k": "v"})])
101
+ hits = store.search_memories("alpha")
102
+ assert hits[0].id == "d1"
103
+ assert hits[0].meta == {"k": "v"}
104
+
105
+
106
+ def test_top_k_override(store):
107
+ store.add_memories([Document(content=f"item number {i}") for i in range(5)])
108
+ hits = store.search_memories("item", top_k=2)
109
+ assert len(hits) == 2
110
+
111
+
112
+ def test_delete_all(store):
113
+ store.add_memories([Document(content="to be deleted")])
114
+ store.delete_all_memories()
115
+ assert store.search_memories("deleted") == []
116
+
117
+
118
+ def test_store_to_dict_from_dict_roundtrip():
119
+ s = PerseusVaultMemoryStore(
120
+ db_path="/x/y.db",
121
+ perseus_vault_binary="/opt/perseus-vault",
122
+ category="c",
123
+ top_k=7,
124
+ timeout_s=12.0,
125
+ )
126
+ d = s.to_dict()
127
+ assert d["init_parameters"]["category"] == "c"
128
+ assert d["init_parameters"]["top_k"] == 7
129
+ s2 = PerseusVaultMemoryStore.from_dict(d)
130
+ assert s2.db_path == "/x/y.db"
131
+ assert s2.perseus_vault_binary == "/opt/perseus-vault"
132
+ assert s2.category == "c"
133
+ assert s2.top_k == 7
134
+ assert s2.timeout_s == 12.0
135
+
136
+
137
+ # --------------------------------------------------------------------------- #
138
+ # PerseusVaultMemoryWriter
139
+ # --------------------------------------------------------------------------- #
140
+ def test_writer_run(store):
141
+ writer = PerseusVaultMemoryWriter(memory_store=store)
142
+ docs = [Document(content="written via component")]
143
+ out = writer.run(documents=docs)
144
+ assert out["documents_written"] == 1
145
+ assert out["documents"] == docs
146
+ # confirm it actually landed in the store
147
+ assert store.search_memories("written")[0].content == "written via component"
148
+
149
+
150
+ def test_writer_output_types(store):
151
+ sockets = _output_sockets(PerseusVaultMemoryWriter(memory_store=store))
152
+ assert sockets["documents"] == list[Document]
153
+ assert sockets["documents_written"] is int
154
+
155
+
156
+ def test_writer_rejects_bad_store():
157
+ with pytest.raises(ValueError):
158
+ PerseusVaultMemoryWriter(memory_store=object())
159
+
160
+
161
+ def test_writer_to_dict_from_dict():
162
+ s = PerseusVaultMemoryStore(category="w")
163
+ writer = PerseusVaultMemoryWriter(memory_store=s)
164
+ d = writer.to_dict()
165
+ assert d["type"].endswith("PerseusVaultMemoryWriter")
166
+ assert d["init_parameters"]["memory_store"]["init_parameters"]["category"] == "w"
167
+ writer2 = PerseusVaultMemoryWriter.from_dict(d)
168
+ assert isinstance(writer2._memory_store, PerseusVaultMemoryStore)
169
+ assert writer2._memory_store.category == "w"
170
+
171
+
172
+ # --------------------------------------------------------------------------- #
173
+ # PerseusVaultMemoryRetriever
174
+ # --------------------------------------------------------------------------- #
175
+ def test_retriever_run(store):
176
+ store.add_memories([Document(content="findable content here")])
177
+ retriever = PerseusVaultMemoryRetriever(memory_store=store)
178
+ out = retriever.run(query="findable")
179
+ assert len(out["documents"]) == 1
180
+ assert out["documents"][0].content == "findable content here"
181
+
182
+
183
+ def test_retriever_output_types(store):
184
+ sockets = _output_sockets(PerseusVaultMemoryRetriever(memory_store=store))
185
+ assert sockets["documents"] == list[Document]
186
+
187
+
188
+ def test_retriever_top_k_precedence(store):
189
+ store.add_memories([Document(content=f"match {i}") for i in range(5)])
190
+ # init top_k=2, call override=1 -> override wins
191
+ retriever = PerseusVaultMemoryRetriever(memory_store=store, top_k=2)
192
+ assert len(retriever.run(query="match", top_k=1)["documents"]) == 1
193
+ # no override -> init top_k=2
194
+ assert len(retriever.run(query="match")["documents"]) == 2
195
+
196
+
197
+ def test_retriever_rejects_bad_store():
198
+ with pytest.raises(ValueError):
199
+ PerseusVaultMemoryRetriever(memory_store=object())
200
+
201
+
202
+ def test_retriever_to_dict_from_dict():
203
+ s = PerseusVaultMemoryStore(category="r")
204
+ retriever = PerseusVaultMemoryRetriever(memory_store=s, top_k=3)
205
+ d = retriever.to_dict()
206
+ assert d["init_parameters"]["top_k"] == 3
207
+ retriever2 = PerseusVaultMemoryRetriever.from_dict(d)
208
+ assert isinstance(retriever2._memory_store, PerseusVaultMemoryStore)
209
+ assert retriever2._top_k == 3
210
+
211
+
212
+ # --------------------------------------------------------------------------- #
213
+ # Pipeline integration (serialization round-trip through a real Pipeline)
214
+ # --------------------------------------------------------------------------- #
215
+ def test_components_in_pipeline_dumps_loads():
216
+ from haystack import Pipeline
217
+
218
+ s = PerseusVaultMemoryStore(category="pipe")
219
+ pipe = Pipeline()
220
+ pipe.add_component("writer", PerseusVaultMemoryWriter(memory_store=s))
221
+ pipe.add_component("retriever", PerseusVaultMemoryRetriever(memory_store=s))
222
+ yaml_str = pipe.dumps()
223
+ assert "PerseusVaultMemoryWriter" in yaml_str
224
+ assert "PerseusVaultMemoryRetriever" in yaml_str
225
+ restored = Pipeline.loads(yaml_str)
226
+ assert restored.get_component("writer") is not None
227
+ assert restored.get_component("retriever") is not None
228
+
229
+
230
+ # --------------------------------------------------------------------------- #
231
+ # Client binary resolution (no subprocess spawned)
232
+ # --------------------------------------------------------------------------- #
233
+ def test_client_missing_binary_raises():
234
+ from perseus_vault_haystack._client import PerseusVaultClient
235
+
236
+ c = PerseusVaultClient(perseus_vault_binary="definitely-not-a-real-binary-xyz")
237
+ with pytest.raises(RuntimeError, match="perseus-vault binary not found"):
238
+ c.start()
239
+
240
+
241
+ # --------------------------------------------------------------------------- #
242
+ # Real-binary smoke test (skipped automatically when no perseus-vault binary
243
+ # is found)
244
+ # --------------------------------------------------------------------------- #
245
+ def _resolve_real_perseus_vault():
246
+ """Locate a real perseus-vault binary, honoring PERSEUS_VAULT_BINARY override.
247
+
248
+ Prefers the canonical ``perseus-vault`` name; falls back to the legacy
249
+ ``mimir`` compat symlink so the smoke test still runs on older installs.
250
+ """
251
+ import os
252
+ import shutil
253
+
254
+ explicit = os.environ.get("PERSEUS_VAULT_BINARY")
255
+ if explicit and os.path.exists(explicit):
256
+ return explicit
257
+ found = (
258
+ shutil.which("perseus-vault")
259
+ or shutil.which("perseus-vault.exe")
260
+ or shutil.which("mimir")
261
+ or shutil.which("mimir.exe")
262
+ )
263
+ return found
264
+
265
+
266
+ @pytest.mark.skipif(
267
+ _resolve_real_perseus_vault() is None, reason="perseus-vault binary not available"
268
+ )
269
+ def test_real_roundtrip(tmp_path):
270
+ """End-to-end write+recall against a real perseus-vault subprocess."""
271
+ binary = _resolve_real_perseus_vault()
272
+ s = PerseusVaultMemoryStore(
273
+ db_path=str(tmp_path / "real.db"),
274
+ perseus_vault_binary=binary,
275
+ category="pytest-smoke",
276
+ timeout_s=20,
277
+ )
278
+ written = s.add_memories(
279
+ [Document(id="r1", content="Perseus Vault provides persistent memory.", meta={"k": "v"})]
280
+ )
281
+ assert written == 1
282
+ hits = s.search_memories("persistent memory", top_k=5)
283
+ assert any(h.content == "Perseus Vault provides persistent memory." for h in hits)
284
+ s._client.close()