perseus-vault-haystack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perseus_vault_haystack/__init__.py +27 -0
- perseus_vault_haystack/_client.py +237 -0
- perseus_vault_haystack/components.py +122 -0
- perseus_vault_haystack/memory_store.py +188 -0
- perseus_vault_haystack/py.typed +0 -0
- perseus_vault_haystack-0.1.0.dist-info/METADATA +129 -0
- perseus_vault_haystack-0.1.0.dist-info/RECORD +9 -0
- perseus_vault_haystack-0.1.0.dist-info/WHEEL +4 -0
- perseus_vault_haystack-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Perseus Computing LLC
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""perseus-vault-haystack — Perseus Vault persistent memory for Haystack 2.x.
|
|
6
|
+
|
|
7
|
+
Perseus Vault (https://github.com/Perseus-Computing-LLC/perseus-vault) is an
|
|
8
|
+
open-source (MIT) local-first, encrypted persistent memory engine with 40+ MCP
|
|
9
|
+
tools. This package exposes Perseus Vault to Haystack 2.x pipelines as a memory
|
|
10
|
+
store plus two ``@component`` adapters.
|
|
11
|
+
|
|
12
|
+
Requirements:
|
|
13
|
+
A ``perseus-vault`` binary must be on ``$PATH`` or passed explicitly via
|
|
14
|
+
``perseus_vault_binary``. Download from:
|
|
15
|
+
https://github.com/Perseus-Computing-LLC/perseus-vault/releases
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .components import PerseusVaultMemoryRetriever, PerseusVaultMemoryWriter
|
|
19
|
+
from .memory_store import PerseusVaultMemoryStore
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"PerseusVaultMemoryStore",
|
|
23
|
+
"PerseusVaultMemoryWriter",
|
|
24
|
+
"PerseusVaultMemoryRetriever",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Perseus Computing LLC
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""Low-level JSON-RPC (MCP stdio) client for the Perseus Vault memory engine.
|
|
6
|
+
|
|
7
|
+
Perseus Vault (https://github.com/Perseus-Computing-LLC/perseus-vault) is an
|
|
8
|
+
open-source (MIT) local-first, encrypted persistent memory engine exposing 40+
|
|
9
|
+
MCP tools. It runs as ``perseus-vault serve --db <path>`` and speaks JSON-RPC 2.0
|
|
10
|
+
over stdin/stdout (the MCP stdio transport).
|
|
11
|
+
|
|
12
|
+
This client spawns the ``perseus-vault`` binary and provides a thin, thread-safe
|
|
13
|
+
``call_tool`` method. It is adapted from the proven client core in
|
|
14
|
+
``Perseus-Computing-LLC/adk-perseus-vault-memory``.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import atexit
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import queue
|
|
23
|
+
import shutil
|
|
24
|
+
import subprocess
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class PerseusVaultClient:
|
|
30
|
+
"""Thread-safe JSON-RPC client over a ``perseus-vault`` stdio subprocess.
|
|
31
|
+
|
|
32
|
+
The client lazily spawns the subprocess on first use (``start``), performs
|
|
33
|
+
the MCP ``initialize`` handshake, and exposes ``call_tool`` to invoke any
|
|
34
|
+
Perseus Vault MCP tool. The subprocess is terminated at interpreter exit.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
db_path: str = "~/.mimir/haystack.db",
|
|
40
|
+
perseus_vault_binary: str = "perseus-vault",
|
|
41
|
+
timeout_s: float = 30.0,
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Initialize the client (does not start the subprocess yet).
|
|
44
|
+
|
|
45
|
+
:param db_path: Path to the Perseus Vault SQLite database file.
|
|
46
|
+
:param perseus_vault_binary: Name (resolved on ``$PATH``) or absolute path
|
|
47
|
+
of the ``perseus-vault`` executable.
|
|
48
|
+
:param timeout_s: Per-RPC timeout guarding against a hung subprocess.
|
|
49
|
+
"""
|
|
50
|
+
self.db_path = os.path.expanduser(db_path)
|
|
51
|
+
self.perseus_vault_binary = perseus_vault_binary
|
|
52
|
+
self.timeout_s = timeout_s
|
|
53
|
+
|
|
54
|
+
self._proc: subprocess.Popen | None = None
|
|
55
|
+
self._lock = threading.Lock()
|
|
56
|
+
self._request_id = 0
|
|
57
|
+
self._recv: queue.Queue = queue.Queue()
|
|
58
|
+
self._reader: threading.Thread | None = None
|
|
59
|
+
self._started = False
|
|
60
|
+
|
|
61
|
+
# ------------------------------------------------------------------ #
|
|
62
|
+
# Lifecycle
|
|
63
|
+
# ------------------------------------------------------------------ #
|
|
64
|
+
def _resolve_binary(self) -> str:
|
|
65
|
+
if os.path.isabs(self.perseus_vault_binary):
|
|
66
|
+
if not os.path.exists(self.perseus_vault_binary):
|
|
67
|
+
msg = f"perseus-vault binary not found at '{self.perseus_vault_binary}'."
|
|
68
|
+
raise RuntimeError(msg)
|
|
69
|
+
return self.perseus_vault_binary
|
|
70
|
+
resolved = shutil.which(self.perseus_vault_binary)
|
|
71
|
+
if (
|
|
72
|
+
resolved is None
|
|
73
|
+
and os.name == "nt"
|
|
74
|
+
and not self.perseus_vault_binary.lower().endswith(".exe")
|
|
75
|
+
):
|
|
76
|
+
# On Windows the binary may be installed without the .exe suffix
|
|
77
|
+
# (shutil.which only matches PATHEXT extensions by default).
|
|
78
|
+
resolved = shutil.which(self.perseus_vault_binary + ".exe")
|
|
79
|
+
if resolved is None:
|
|
80
|
+
msg = (
|
|
81
|
+
f"perseus-vault binary not found on $PATH (looked for "
|
|
82
|
+
f"'{self.perseus_vault_binary}'). Install Perseus Vault from "
|
|
83
|
+
"https://github.com/Perseus-Computing-LLC/perseus-vault/releases "
|
|
84
|
+
"or pass an absolute path via perseus_vault_binary=."
|
|
85
|
+
)
|
|
86
|
+
raise RuntimeError(msg)
|
|
87
|
+
return resolved
|
|
88
|
+
|
|
89
|
+
def start(self) -> None:
|
|
90
|
+
"""Spawn the subprocess and perform the MCP handshake (idempotent)."""
|
|
91
|
+
with self._lock:
|
|
92
|
+
if self._started:
|
|
93
|
+
return
|
|
94
|
+
binary = self._resolve_binary()
|
|
95
|
+
|
|
96
|
+
db_dir = os.path.dirname(self.db_path)
|
|
97
|
+
if db_dir:
|
|
98
|
+
os.makedirs(db_dir, exist_ok=True)
|
|
99
|
+
|
|
100
|
+
# stderr is discarded: nothing drains it, so a chatty server filling
|
|
101
|
+
# the OS pipe buffer would block on its stderr write while we wait on
|
|
102
|
+
# stdout (a classic two-pipe deadlock).
|
|
103
|
+
self._proc = subprocess.Popen(
|
|
104
|
+
[binary, "serve", "--db", self.db_path],
|
|
105
|
+
stdin=subprocess.PIPE,
|
|
106
|
+
stdout=subprocess.PIPE,
|
|
107
|
+
stderr=subprocess.DEVNULL,
|
|
108
|
+
text=True,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
proc_stdout = self._proc.stdout
|
|
112
|
+
|
|
113
|
+
def _pump() -> None:
|
|
114
|
+
try:
|
|
115
|
+
for line in proc_stdout: # type: ignore[union-attr]
|
|
116
|
+
self._recv.put(line)
|
|
117
|
+
except Exception: # noqa: BLE001
|
|
118
|
+
pass
|
|
119
|
+
finally:
|
|
120
|
+
self._recv.put(None) # EOF sentinel
|
|
121
|
+
|
|
122
|
+
self._reader = threading.Thread(target=_pump, daemon=True)
|
|
123
|
+
self._reader.start()
|
|
124
|
+
self._started = True
|
|
125
|
+
atexit.register(self.close)
|
|
126
|
+
|
|
127
|
+
# Handshake (outside the lock; _rpc takes the lock itself).
|
|
128
|
+
self._rpc(
|
|
129
|
+
"initialize",
|
|
130
|
+
{
|
|
131
|
+
"protocolVersion": "2024-11-05",
|
|
132
|
+
"capabilities": {},
|
|
133
|
+
"clientInfo": {"name": "perseus-vault-haystack", "version": "0.1.0"},
|
|
134
|
+
},
|
|
135
|
+
)
|
|
136
|
+
self._notify("notifications/initialized", {})
|
|
137
|
+
|
|
138
|
+
def close(self) -> None:
|
|
139
|
+
"""Terminate the Perseus Vault subprocess."""
|
|
140
|
+
proc = self._proc
|
|
141
|
+
if proc is None:
|
|
142
|
+
return
|
|
143
|
+
try:
|
|
144
|
+
proc.terminate()
|
|
145
|
+
proc.wait(timeout=5)
|
|
146
|
+
except Exception: # noqa: BLE001
|
|
147
|
+
try:
|
|
148
|
+
proc.kill()
|
|
149
|
+
except Exception: # noqa: BLE001
|
|
150
|
+
pass
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------ #
|
|
153
|
+
# JSON-RPC plumbing
|
|
154
|
+
# ------------------------------------------------------------------ #
|
|
155
|
+
def _next_id(self) -> int:
|
|
156
|
+
self._request_id += 1
|
|
157
|
+
return self._request_id
|
|
158
|
+
|
|
159
|
+
def _rpc(self, method: str, params: object) -> dict:
|
|
160
|
+
"""Send a JSON-RPC request and return its ``result`` dict."""
|
|
161
|
+
with self._lock:
|
|
162
|
+
if self._proc is None or self._proc.stdin is None:
|
|
163
|
+
msg = "Perseus Vault subprocess is not running. Call start() first."
|
|
164
|
+
raise RuntimeError(msg)
|
|
165
|
+
req_id = self._next_id()
|
|
166
|
+
req = {"jsonrpc": "2.0", "id": req_id, "method": method, "params": params}
|
|
167
|
+
payload = json.dumps(req, default=str)
|
|
168
|
+
try:
|
|
169
|
+
self._proc.stdin.write(payload + "\n")
|
|
170
|
+
self._proc.stdin.flush()
|
|
171
|
+
except (BrokenPipeError, OSError) as e:
|
|
172
|
+
msg = (
|
|
173
|
+
f"Perseus Vault subprocess communication failed: {e}. "
|
|
174
|
+
"The perseus-vault process may have crashed."
|
|
175
|
+
)
|
|
176
|
+
raise RuntimeError(msg) from e
|
|
177
|
+
|
|
178
|
+
deadline = time.monotonic() + self.timeout_s
|
|
179
|
+
while True:
|
|
180
|
+
remaining = deadline - time.monotonic()
|
|
181
|
+
if remaining <= 0:
|
|
182
|
+
msg = f"Perseus Vault RPC '{method}' timed out after {self.timeout_s}s."
|
|
183
|
+
raise RuntimeError(msg)
|
|
184
|
+
try:
|
|
185
|
+
raw = self._recv.get(timeout=remaining)
|
|
186
|
+
except queue.Empty:
|
|
187
|
+
msg = f"Perseus Vault RPC '{method}' timed out after {self.timeout_s}s."
|
|
188
|
+
raise RuntimeError(msg) from None
|
|
189
|
+
if raw is None:
|
|
190
|
+
msg = "Perseus Vault subprocess closed its output (it may have crashed)."
|
|
191
|
+
raise RuntimeError(msg)
|
|
192
|
+
raw = raw.strip()
|
|
193
|
+
if not raw:
|
|
194
|
+
continue
|
|
195
|
+
try:
|
|
196
|
+
resp = json.loads(raw)
|
|
197
|
+
except json.JSONDecodeError:
|
|
198
|
+
continue # non-JSON noise on stdout
|
|
199
|
+
if resp.get("id") != req_id:
|
|
200
|
+
continue # notification or a stale/other reply
|
|
201
|
+
if "error" in resp:
|
|
202
|
+
err = resp["error"]
|
|
203
|
+
msg = f"Perseus Vault RPC error [{err.get('code')}]: {err.get('message')}"
|
|
204
|
+
raise RuntimeError(msg)
|
|
205
|
+
return resp.get("result", {})
|
|
206
|
+
|
|
207
|
+
def _notify(self, method: str, params: object) -> None:
|
|
208
|
+
"""Send a JSON-RPC notification (no id, no response expected)."""
|
|
209
|
+
with self._lock:
|
|
210
|
+
if self._proc is None or self._proc.stdin is None:
|
|
211
|
+
return
|
|
212
|
+
payload = json.dumps({"jsonrpc": "2.0", "method": method, "params": params})
|
|
213
|
+
try:
|
|
214
|
+
self._proc.stdin.write(payload + "\n")
|
|
215
|
+
self._proc.stdin.flush()
|
|
216
|
+
except (BrokenPipeError, OSError):
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
def call_tool(self, name: str, arguments: dict) -> dict:
|
|
220
|
+
"""Call a Perseus Vault MCP tool and return its ``structuredContent``.
|
|
221
|
+
|
|
222
|
+
Falls back to parsing the first text content block if no structured
|
|
223
|
+
content is present.
|
|
224
|
+
"""
|
|
225
|
+
if not self._started:
|
|
226
|
+
self.start()
|
|
227
|
+
result = self._rpc("tools/call", {"name": name, "arguments": arguments})
|
|
228
|
+
sc = result.get("structuredContent")
|
|
229
|
+
if sc is not None:
|
|
230
|
+
return sc
|
|
231
|
+
content = result.get("content", [])
|
|
232
|
+
if content:
|
|
233
|
+
try:
|
|
234
|
+
return json.loads(content[0].get("text", "{}"))
|
|
235
|
+
except (json.JSONDecodeError, IndexError, KeyError, AttributeError):
|
|
236
|
+
pass
|
|
237
|
+
return {}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Perseus Computing LLC
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""Haystack 2.x components wrapping the Perseus Vault memory store."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from haystack import component, default_from_dict, default_to_dict
|
|
12
|
+
from haystack.dataclasses import Document
|
|
13
|
+
|
|
14
|
+
from .memory_store import PerseusVaultMemoryStore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@component
|
|
18
|
+
class PerseusVaultMemoryWriter:
|
|
19
|
+
"""Haystack component that persists ``Document``s into a ``PerseusVaultMemoryStore``.
|
|
20
|
+
|
|
21
|
+
Slots into a pipeline as a sink: it writes the incoming documents to Perseus
|
|
22
|
+
Vault and passes them through unchanged (plus a count), so it can also sit
|
|
23
|
+
mid-pipeline.
|
|
24
|
+
|
|
25
|
+
Usage::
|
|
26
|
+
|
|
27
|
+
from perseus_vault_haystack import PerseusVaultMemoryStore, PerseusVaultMemoryWriter
|
|
28
|
+
|
|
29
|
+
store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
|
|
30
|
+
writer = PerseusVaultMemoryWriter(memory_store=store)
|
|
31
|
+
writer.run(documents=[Document(content="Perseus Vault is local-first.")])
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, *, memory_store: PerseusVaultMemoryStore) -> None:
|
|
35
|
+
"""Initialize the writer.
|
|
36
|
+
|
|
37
|
+
:param memory_store: Backing :class:`PerseusVaultMemoryStore` to write into.
|
|
38
|
+
"""
|
|
39
|
+
if not isinstance(memory_store, PerseusVaultMemoryStore):
|
|
40
|
+
msg = "memory_store must be an instance of PerseusVaultMemoryStore"
|
|
41
|
+
raise ValueError(msg)
|
|
42
|
+
self._memory_store = memory_store
|
|
43
|
+
|
|
44
|
+
@component.output_types(documents=list[Document], documents_written=int)
|
|
45
|
+
def run(self, documents: list[Document]) -> dict[str, Any]:
|
|
46
|
+
"""Store ``documents`` in Perseus Vault and pass them through.
|
|
47
|
+
|
|
48
|
+
:param documents: Documents to persist.
|
|
49
|
+
:returns: ``{"documents": <same documents>, "documents_written": <count>}``.
|
|
50
|
+
"""
|
|
51
|
+
written = self._memory_store.add_memories(documents)
|
|
52
|
+
return {"documents": documents, "documents_written": written}
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> dict[str, Any]:
|
|
55
|
+
"""Serialize this component to a dictionary."""
|
|
56
|
+
return default_to_dict(self, memory_store=self._memory_store.to_dict())
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryWriter:
|
|
60
|
+
"""Deserialize a component from a dictionary."""
|
|
61
|
+
data["init_parameters"]["memory_store"] = PerseusVaultMemoryStore.from_dict(
|
|
62
|
+
data["init_parameters"]["memory_store"]
|
|
63
|
+
)
|
|
64
|
+
return default_from_dict(cls, data)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@component
|
|
68
|
+
class PerseusVaultMemoryRetriever:
|
|
69
|
+
"""Haystack component that retrieves ``Document``s from a ``PerseusVaultMemoryStore``.
|
|
70
|
+
|
|
71
|
+
A thin pipeline adapter over :meth:`PerseusVaultMemoryStore.search_memories`.
|
|
72
|
+
Takes a ``query`` and returns the most relevant stored documents — drop it in
|
|
73
|
+
front of a prompt builder for retrieval-augmented generation over persistent
|
|
74
|
+
memory.
|
|
75
|
+
|
|
76
|
+
Usage::
|
|
77
|
+
|
|
78
|
+
from perseus_vault_haystack import PerseusVaultMemoryStore, PerseusVaultMemoryRetriever
|
|
79
|
+
|
|
80
|
+
store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
|
|
81
|
+
retriever = PerseusVaultMemoryRetriever(memory_store=store, top_k=5)
|
|
82
|
+
result = retriever.run(query="What is Perseus Vault?")
|
|
83
|
+
docs = result["documents"]
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self, *, memory_store: PerseusVaultMemoryStore, top_k: int | None = None) -> None:
|
|
87
|
+
"""Initialize the retriever.
|
|
88
|
+
|
|
89
|
+
:param memory_store: Backing :class:`PerseusVaultMemoryStore` to query.
|
|
90
|
+
:param top_k: Default max results; falls back to the store's ``top_k``
|
|
91
|
+
when ``None``.
|
|
92
|
+
"""
|
|
93
|
+
if not isinstance(memory_store, PerseusVaultMemoryStore):
|
|
94
|
+
msg = "memory_store must be an instance of PerseusVaultMemoryStore"
|
|
95
|
+
raise ValueError(msg)
|
|
96
|
+
self._memory_store = memory_store
|
|
97
|
+
self._top_k = top_k
|
|
98
|
+
|
|
99
|
+
@component.output_types(documents=list[Document])
|
|
100
|
+
def run(self, query: str, top_k: int | None = None) -> dict[str, list[Document]]:
|
|
101
|
+
"""Search the attached store and return matching documents.
|
|
102
|
+
|
|
103
|
+
:param query: Natural-language / keyword query.
|
|
104
|
+
:param top_k: Per-call override; falls back to init ``top_k``, then the
|
|
105
|
+
store's default.
|
|
106
|
+
:returns: ``{"documents": [Document, ...]}`` ordered by relevance.
|
|
107
|
+
"""
|
|
108
|
+
effective_top_k = top_k if top_k is not None else self._top_k
|
|
109
|
+
documents = self._memory_store.search_memories(query=query, top_k=effective_top_k)
|
|
110
|
+
return {"documents": documents}
|
|
111
|
+
|
|
112
|
+
def to_dict(self) -> dict[str, Any]:
|
|
113
|
+
"""Serialize this component to a dictionary."""
|
|
114
|
+
return default_to_dict(self, memory_store=self._memory_store.to_dict(), top_k=self._top_k)
|
|
115
|
+
|
|
116
|
+
@classmethod
|
|
117
|
+
def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryRetriever:
|
|
118
|
+
"""Deserialize a component from a dictionary."""
|
|
119
|
+
data["init_parameters"]["memory_store"] = PerseusVaultMemoryStore.from_dict(
|
|
120
|
+
data["init_parameters"]["memory_store"]
|
|
121
|
+
)
|
|
122
|
+
return default_from_dict(cls, data)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Perseus Computing LLC
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: MIT
|
|
4
|
+
|
|
5
|
+
"""Perseus Vault-backed memory store for Haystack 2.x."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import time
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from haystack import default_from_dict, default_to_dict
|
|
15
|
+
from haystack.dataclasses import Document
|
|
16
|
+
|
|
17
|
+
from ._client import PerseusVaultClient
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
_DEFAULT_CATEGORY = "haystack-memory"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class PerseusVaultMemoryStore:
|
|
25
|
+
"""Persistent memory backend backed by the Perseus Vault engine.
|
|
26
|
+
|
|
27
|
+
Wraps the Perseus Vault MCP tools ``perseus_vault_remember`` (write),
|
|
28
|
+
``perseus_vault_recall`` (search) and ``perseus_vault_forget`` (delete). Each
|
|
29
|
+
Haystack ``Document`` is stored as one Perseus Vault entity; the document's
|
|
30
|
+
``content`` becomes the entity body and its ``meta`` is preserved as JSON. On
|
|
31
|
+
recall, entities are rehydrated back into ``Document`` objects with their
|
|
32
|
+
original ``id``, ``content``, ``meta`` and a relevance ``score`` from Perseus
|
|
33
|
+
Vault.
|
|
34
|
+
|
|
35
|
+
The store owns the long-lived ``perseus-vault`` subprocess; the thin
|
|
36
|
+
:class:`~perseus_vault_haystack.PerseusVaultMemoryWriter` and
|
|
37
|
+
:class:`~perseus_vault_haystack.PerseusVaultMemoryRetriever` components
|
|
38
|
+
delegate to it so a single store can back several pipeline components.
|
|
39
|
+
|
|
40
|
+
This class is safe to use across threads (the underlying client is
|
|
41
|
+
thread-safe).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
db_path: str = "~/.mimir/haystack.db",
|
|
47
|
+
perseus_vault_binary: str = "perseus-vault",
|
|
48
|
+
category: str = _DEFAULT_CATEGORY,
|
|
49
|
+
top_k: int = 10,
|
|
50
|
+
timeout_s: float = 30.0,
|
|
51
|
+
) -> None:
|
|
52
|
+
"""Initialize the store.
|
|
53
|
+
|
|
54
|
+
:param db_path: Path to the Perseus Vault SQLite database file.
|
|
55
|
+
:param perseus_vault_binary: Name (on ``$PATH``) or absolute path of the
|
|
56
|
+
``perseus-vault`` executable.
|
|
57
|
+
:param category: Perseus Vault category that scopes every write and
|
|
58
|
+
recall for this store. Use distinct categories to isolate corpora.
|
|
59
|
+
:param top_k: Default maximum number of documents returned by
|
|
60
|
+
:meth:`search_memories`.
|
|
61
|
+
:param timeout_s: Per-RPC timeout for the underlying Perseus Vault
|
|
62
|
+
subprocess.
|
|
63
|
+
"""
|
|
64
|
+
self.db_path = db_path
|
|
65
|
+
self.perseus_vault_binary = perseus_vault_binary
|
|
66
|
+
self.category = category
|
|
67
|
+
self.top_k = top_k
|
|
68
|
+
self.timeout_s = timeout_s
|
|
69
|
+
|
|
70
|
+
self._client = PerseusVaultClient(
|
|
71
|
+
db_path=db_path,
|
|
72
|
+
perseus_vault_binary=perseus_vault_binary,
|
|
73
|
+
timeout_s=timeout_s,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------------ #
|
|
77
|
+
# Write
|
|
78
|
+
# ------------------------------------------------------------------ #
|
|
79
|
+
def add_memories(self, documents: list[Document]) -> int:
|
|
80
|
+
"""Persist ``documents`` into Perseus Vault via ``perseus_vault_remember``.
|
|
81
|
+
|
|
82
|
+
Documents with empty ``content`` are skipped. The document ``id`` is used
|
|
83
|
+
as the Perseus Vault entity key so re-writing the same document updates it
|
|
84
|
+
in place (idempotent upsert).
|
|
85
|
+
|
|
86
|
+
:param documents: Documents to store.
|
|
87
|
+
:returns: The number of documents actually written.
|
|
88
|
+
"""
|
|
89
|
+
written = 0
|
|
90
|
+
for doc in documents:
|
|
91
|
+
if not doc.content:
|
|
92
|
+
continue
|
|
93
|
+
key = doc.id or f"doc:{int(time.time() * 1_000_000)}:{written}"
|
|
94
|
+
self._client.call_tool(
|
|
95
|
+
"perseus_vault_remember",
|
|
96
|
+
{
|
|
97
|
+
"category": self.category,
|
|
98
|
+
"key": key,
|
|
99
|
+
"body_json": json.dumps(
|
|
100
|
+
{
|
|
101
|
+
"doc_id": doc.id,
|
|
102
|
+
"content": doc.content,
|
|
103
|
+
"meta": doc.meta or {},
|
|
104
|
+
}
|
|
105
|
+
),
|
|
106
|
+
"tags": ["haystack"],
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
written += 1
|
|
110
|
+
logger.info("Stored %d documents in Perseus Vault category '%s'", written, self.category)
|
|
111
|
+
return written
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------ #
|
|
114
|
+
# Search
|
|
115
|
+
# ------------------------------------------------------------------ #
|
|
116
|
+
def search_memories(self, query: str, top_k: int | None = None) -> list[Document]:
|
|
117
|
+
"""Search Perseus Vault via ``perseus_vault_recall`` and return matching documents.
|
|
118
|
+
|
|
119
|
+
:param query: Natural-language / keyword query. Empty queries return
|
|
120
|
+
``[]``.
|
|
121
|
+
:param top_k: Per-call override of the store's default ``top_k``.
|
|
122
|
+
:returns: A list of :class:`~haystack.dataclasses.Document`, ordered by
|
|
123
|
+
Perseus Vault relevance, each carrying a ``score`` when Perseus Vault
|
|
124
|
+
provides one.
|
|
125
|
+
"""
|
|
126
|
+
if not query:
|
|
127
|
+
return []
|
|
128
|
+
limit = top_k if top_k is not None else self.top_k
|
|
129
|
+
result = self._client.call_tool(
|
|
130
|
+
"perseus_vault_recall",
|
|
131
|
+
{"query": query, "limit": limit, "category": self.category},
|
|
132
|
+
)
|
|
133
|
+
items = result.get("items", []) or result.get("results", [])
|
|
134
|
+
documents: list[Document] = []
|
|
135
|
+
for item in items:
|
|
136
|
+
body = item.get("body_json", "{}")
|
|
137
|
+
try:
|
|
138
|
+
body_data = json.loads(body) if isinstance(body, str) else body
|
|
139
|
+
except (json.JSONDecodeError, TypeError):
|
|
140
|
+
body_data = {}
|
|
141
|
+
if not isinstance(body_data, dict):
|
|
142
|
+
continue
|
|
143
|
+
content = body_data.get("content") or item.get("content")
|
|
144
|
+
if not content:
|
|
145
|
+
continue
|
|
146
|
+
# Perseus Vault's recall ranks by relevance but names the field
|
|
147
|
+
# differently across versions: prefer an explicit ``score``, else fall
|
|
148
|
+
# back to ``certainty`` (relevance/confidence in v2.x).
|
|
149
|
+
score = item.get("score")
|
|
150
|
+
if score is None:
|
|
151
|
+
score = item.get("certainty")
|
|
152
|
+
documents.append(
|
|
153
|
+
Document(
|
|
154
|
+
id=body_data.get("doc_id") or item.get("key", ""),
|
|
155
|
+
content=content,
|
|
156
|
+
meta=body_data.get("meta", {}) or {},
|
|
157
|
+
score=float(score) if isinstance(score, (int, float)) else None,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
logger.info("Recalled %d documents for query '%s'", len(documents), query[:80])
|
|
161
|
+
return documents
|
|
162
|
+
|
|
163
|
+
# ------------------------------------------------------------------ #
|
|
164
|
+
# Delete
|
|
165
|
+
# ------------------------------------------------------------------ #
|
|
166
|
+
def delete_all_memories(self) -> None:
|
|
167
|
+
"""Delete every entity in this store's category via ``perseus_vault_forget``."""
|
|
168
|
+
self._client.call_tool("perseus_vault_forget", {"category": self.category})
|
|
169
|
+
logger.info("Deleted all documents in Perseus Vault category '%s'", self.category)
|
|
170
|
+
|
|
171
|
+
# ------------------------------------------------------------------ #
|
|
172
|
+
# Serialization
|
|
173
|
+
# ------------------------------------------------------------------ #
|
|
174
|
+
def to_dict(self) -> dict[str, Any]:
|
|
175
|
+
"""Serialize this store for pipeline persistence."""
|
|
176
|
+
return default_to_dict(
|
|
177
|
+
self,
|
|
178
|
+
db_path=self.db_path,
|
|
179
|
+
perseus_vault_binary=self.perseus_vault_binary,
|
|
180
|
+
category=self.category,
|
|
181
|
+
top_k=self.top_k,
|
|
182
|
+
timeout_s=self.timeout_s,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def from_dict(cls, data: dict[str, Any]) -> PerseusVaultMemoryStore:
|
|
187
|
+
"""Deserialize a store from a dict produced by :meth:`to_dict`."""
|
|
188
|
+
return default_from_dict(cls, data)
|
|
File without changes
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: perseus-vault-haystack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first, encrypted persistent memory for Haystack 2.x pipelines — backed by Perseus Vault.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Perseus-Computing-LLC/mimir-haystack
|
|
6
|
+
Project-URL: Repository, https://github.com/Perseus-Computing-LLC/mimir-haystack
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/Perseus-Computing-LLC/mimir-haystack/issues
|
|
8
|
+
Project-URL: Perseus Vault, https://github.com/Perseus-Computing-LLC/perseus-vault
|
|
9
|
+
Author-email: Perseus Computing LLC <hermes@perseus.observer>
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agents,haystack,mcp,memory,perseus-vault,rag
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: haystack-ai>=2.0.0
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'test'
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# perseus-vault-haystack
|
|
30
|
+
|
|
31
|
+
Local-first, encrypted **persistent memory for [Haystack](https://haystack.deepset.ai/) 2.x pipelines**, backed by [Perseus Vault](https://github.com/Perseus-Computing-LLC/perseus-vault) (formerly "Mimir"/"Mneme").
|
|
32
|
+
|
|
33
|
+
Perseus Vault is an open-source (MIT) memory engine that runs entirely on your machine, stores data in an encrypted SQLite database, and exposes 40+ tools over the Model Context Protocol (MCP). This package wraps Perseus Vault's `remember` / `recall` / `forget` tools as Haystack components so your pipelines can persist and retrieve documents across runs — no external vector database or API key required.
|
|
34
|
+
|
|
35
|
+
## Components
|
|
36
|
+
|
|
37
|
+
| Class | Type | Role |
|
|
38
|
+
| --- | --- | --- |
|
|
39
|
+
| `PerseusVaultMemoryStore` | Memory store | Owns the `perseus-vault` subprocess and config; holds `add_memories` / `search_memories` / `delete_all_memories`. |
|
|
40
|
+
| `PerseusVaultMemoryWriter` | `@component` | Pipeline sink that persists `Document`s into the store. |
|
|
41
|
+
| `PerseusVaultMemoryRetriever` | `@component` | Pipeline source that retrieves the most relevant `Document`s for a query. |
|
|
42
|
+
|
|
43
|
+
## Prerequisite: the `perseus-vault` binary
|
|
44
|
+
|
|
45
|
+
These components talk to a local `perseus-vault` executable over stdio. Install it first:
|
|
46
|
+
|
|
47
|
+
1. Download a pre-built binary from the [Perseus Vault releases page](https://github.com/Perseus-Computing-LLC/perseus-vault/releases) (or build from source).
|
|
48
|
+
2. Put it on your `$PATH` (so `perseus-vault` resolves), **or** pass its absolute path via `perseus_vault_binary=`.
|
|
49
|
+
|
|
50
|
+
You can verify it works with:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
perseus-vault --version
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install perseus-vault-haystack
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
This pulls in `haystack-ai`. The `perseus-vault` binary is a separate, language-agnostic dependency (see above).
|
|
63
|
+
|
|
64
|
+
## Quickstart — write then read in a pipeline
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from haystack import Pipeline, Document
|
|
68
|
+
from perseus_vault_haystack import (
|
|
69
|
+
PerseusVaultMemoryStore,
|
|
70
|
+
PerseusVaultMemoryWriter,
|
|
71
|
+
PerseusVaultMemoryRetriever,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# One store, shared by both components (single perseus-vault subprocess).
|
|
75
|
+
store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db", category="docs")
|
|
76
|
+
|
|
77
|
+
# --- Write documents into persistent memory ---
|
|
78
|
+
write_pipe = Pipeline()
|
|
79
|
+
write_pipe.add_component("writer", PerseusVaultMemoryWriter(memory_store=store))
|
|
80
|
+
write_pipe.run(
|
|
81
|
+
{
|
|
82
|
+
"writer": {
|
|
83
|
+
"documents": [
|
|
84
|
+
Document(content="Perseus Vault is a local-first, encrypted memory engine."),
|
|
85
|
+
Document(content="Haystack is an open-source LLM framework by deepset."),
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# --- Retrieve them later (even in a separate process / run) ---
|
|
92
|
+
read_pipe = Pipeline()
|
|
93
|
+
read_pipe.add_component("retriever", PerseusVaultMemoryRetriever(memory_store=store, top_k=3))
|
|
94
|
+
result = read_pipe.run({"retriever": {"query": "What is Perseus Vault?"}})
|
|
95
|
+
|
|
96
|
+
for doc in result["retriever"]["documents"]:
|
|
97
|
+
print(doc.score, doc.content)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Because Perseus Vault persists to an encrypted SQLite file, documents written in one run are available in any future run pointed at the same `db_path`.
|
|
101
|
+
|
|
102
|
+
### Use directly (without a pipeline)
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from haystack import Document
|
|
106
|
+
from perseus_vault_haystack import PerseusVaultMemoryStore
|
|
107
|
+
|
|
108
|
+
store = PerseusVaultMemoryStore(db_path="~/.mimir/haystack.db")
|
|
109
|
+
store.add_memories([Document(content="Remember this fact.")])
|
|
110
|
+
hits = store.search_memories("fact", top_k=5)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Configuration
|
|
114
|
+
|
|
115
|
+
`PerseusVaultMemoryStore` accepts:
|
|
116
|
+
|
|
117
|
+
- `db_path` — path to the Perseus Vault SQLite database (default `~/.mimir/haystack.db`).
|
|
118
|
+
- `perseus_vault_binary` — name on `$PATH` or absolute path to the executable (default `perseus-vault`).
|
|
119
|
+
- `category` — Perseus Vault category scoping all writes/recalls for this store (default `haystack-memory`). Use distinct categories to isolate corpora.
|
|
120
|
+
- `top_k` — default number of documents returned by retrieval (default `10`).
|
|
121
|
+
- `timeout_s` — per-RPC timeout for the subprocess (default `30`).
|
|
122
|
+
|
|
123
|
+
## Serialization
|
|
124
|
+
|
|
125
|
+
All three classes implement `to_dict()` / `from_dict()` and round-trip through `Pipeline.dumps()` / `Pipeline.loads()`.
|
|
126
|
+
|
|
127
|
+
## License
|
|
128
|
+
|
|
129
|
+
MIT © 2026 Perseus Computing LLC. Perseus Vault (formerly Mimir/Mneme) is also MIT-licensed.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
perseus_vault_haystack/__init__.py,sha256=dA1n9c26MMkpHBk7FVMNkKcuP9EH0FQLVjl1a4-bIFs,918
|
|
2
|
+
perseus_vault_haystack/_client.py,sha256=yeH9POfvgglFjYv1f3sKBqV7G7Zcfum5i8InZJGdbjs,9423
|
|
3
|
+
perseus_vault_haystack/components.py,sha256=uc8SsQpPup9bJmGI5PaBL1ofSY3PDtzNaR5fMEh995E,5017
|
|
4
|
+
perseus_vault_haystack/memory_store.py,sha256=S4X5NyeFm1wkyYHDMzP3maoncRs4tQGg48NyRBjot3A,7671
|
|
5
|
+
perseus_vault_haystack/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
perseus_vault_haystack-0.1.0.dist-info/METADATA,sha256=Ym_LeN2EGowKGsgld7ga49AIXtn47JwlJy2PgJ421Gs,5526
|
|
7
|
+
perseus_vault_haystack-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
perseus_vault_haystack-0.1.0.dist-info/licenses/LICENSE,sha256=aPjotXyzchCdsLnPGaLiCHAhdGQvIBywrdt0qJC9SuU,1078
|
|
9
|
+
perseus_vault_haystack-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Perseus Computing LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|