cryptomem 2026.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cryptomem/__init__.py +51 -0
- cryptomem/adapters/__init__.py +5 -0
- cryptomem/adapters/base.py +11 -0
- cryptomem/adapters/mock_adapter.py +14 -0
- cryptomem/adapters/ollama_adapter.py +41 -0
- cryptomem/cli.py +38 -0
- cryptomem/client.py +361 -0
- cryptomem/config.py +37 -0
- cryptomem/crypto/__init__.py +5 -0
- cryptomem/crypto/hashing.py +25 -0
- cryptomem/crypto/keys.py +63 -0
- cryptomem/crypto/merkle.py +59 -0
- cryptomem/crypto/signer.py +52 -0
- cryptomem/efficiency/__init__.py +12 -0
- cryptomem/efficiency/budgeter.py +40 -0
- cryptomem/efficiency/cache.py +32 -0
- cryptomem/efficiency/compressor.py +38 -0
- cryptomem/efficiency/deduper.py +25 -0
- cryptomem/embeddings/__init__.py +5 -0
- cryptomem/embeddings/base.py +26 -0
- cryptomem/embeddings/minilm.py +36 -0
- cryptomem/embeddings/stub.py +31 -0
- cryptomem/models.py +49 -0
- cryptomem/proactive/__init__.py +6 -0
- cryptomem/proactive/extractor.py +27 -0
- cryptomem/proactive/planner.py +45 -0
- cryptomem/proactive/triggers.py +59 -0
- cryptomem/proactive/writeback.py +35 -0
- cryptomem/py.typed +0 -0
- cryptomem/retrieval/__init__.py +3 -0
- cryptomem/retrieval/retriever.py +80 -0
- cryptomem/server/__init__.py +3 -0
- cryptomem/server/app.py +186 -0
- cryptomem/store/__init__.py +6 -0
- cryptomem/store/base.py +29 -0
- cryptomem/store/neo4j_store.py +156 -0
- cryptomem/store/remote_store.py +74 -0
- cryptomem/store/sqlite_store.py +119 -0
- cryptomem/verification/__init__.py +15 -0
- cryptomem/verification/citations.py +51 -0
- cryptomem/verification/cove.py +51 -0
- cryptomem/verification/entropy.py +65 -0
- cryptomem/verification/faithfulness.py +47 -0
- cryptomem/verification/grounding.py +26 -0
- cryptomem-2026.6.1.dist-info/METADATA +86 -0
- cryptomem-2026.6.1.dist-info/RECORD +48 -0
- cryptomem-2026.6.1.dist-info/WHEEL +4 -0
- cryptomem-2026.6.1.dist-info/entry_points.txt +2 -0
cryptomem/__init__.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from cryptomem.adapters.mock_adapter import MockAdapter
|
|
2
|
+
from cryptomem.adapters.ollama_adapter import OllamaAdapter
|
|
3
|
+
from cryptomem.client import ABSTAIN, MemoryClient
|
|
4
|
+
from cryptomem.config import Settings
|
|
5
|
+
from cryptomem.embeddings.minilm import MiniLMEmbedder
|
|
6
|
+
from cryptomem.embeddings.stub import StubEmbedder
|
|
7
|
+
from cryptomem.models import (
|
|
8
|
+
Contradiction,
|
|
9
|
+
CryptoEnvelope,
|
|
10
|
+
MemoryNode,
|
|
11
|
+
Relationship,
|
|
12
|
+
ScoredNode,
|
|
13
|
+
)
|
|
14
|
+
from cryptomem.proactive import Planner, TriggerEngine, WriteBack
|
|
15
|
+
from cryptomem.store.remote_store import RemoteStore
|
|
16
|
+
from cryptomem.store.sqlite_store import SqliteStore
|
|
17
|
+
from cryptomem.verification import (
|
|
18
|
+
ChainOfVerification,
|
|
19
|
+
Citer,
|
|
20
|
+
FaithfulnessChecker,
|
|
21
|
+
GroundingGate,
|
|
22
|
+
SemanticEntropy,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__version__ = "2026.06.1"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"ABSTAIN",
|
|
29
|
+
"MemoryClient",
|
|
30
|
+
"Settings",
|
|
31
|
+
"MemoryNode",
|
|
32
|
+
"Relationship",
|
|
33
|
+
"CryptoEnvelope",
|
|
34
|
+
"ScoredNode",
|
|
35
|
+
"Contradiction",
|
|
36
|
+
"StubEmbedder",
|
|
37
|
+
"MiniLMEmbedder",
|
|
38
|
+
"MockAdapter",
|
|
39
|
+
"OllamaAdapter",
|
|
40
|
+
"GroundingGate",
|
|
41
|
+
"FaithfulnessChecker",
|
|
42
|
+
"Citer",
|
|
43
|
+
"SemanticEntropy",
|
|
44
|
+
"ChainOfVerification",
|
|
45
|
+
"Planner",
|
|
46
|
+
"TriggerEngine",
|
|
47
|
+
"WriteBack",
|
|
48
|
+
"SqliteStore",
|
|
49
|
+
"RemoteStore",
|
|
50
|
+
"__version__",
|
|
51
|
+
]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LLMAdapter(ABC):
|
|
7
|
+
"""Uniform completion interface across model backends."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def complete(self, prompt: str) -> str:
|
|
11
|
+
"""Return the model completion for ``prompt``."""
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from cryptomem.adapters.base import LLMAdapter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MockAdapter(LLMAdapter):
|
|
7
|
+
"""Model-free adapter for deterministic, offline development and tests.
|
|
8
|
+
|
|
9
|
+
Echoes the verified context it was handed so grounding and abstention can
|
|
10
|
+
be asserted without downloading or running any model.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def complete(self, prompt: str) -> str:
|
|
14
|
+
return f"[mock] grounded answer based on:\n{prompt.strip()}"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
|
|
5
|
+
from cryptomem.adapters.base import LLMAdapter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OllamaAdapter(LLMAdapter):
|
|
9
|
+
"""Completion adapter that forwards prompts to a local Ollama server.
|
|
10
|
+
|
|
11
|
+
Speaks Ollama's native ``/api/generate`` wire protocol with streaming
|
|
12
|
+
disabled, so it works against any stock Ollama install with zero config.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
base_url: str = "http://localhost:11434",
|
|
18
|
+
model: str = "qwen2.5:0.5b",
|
|
19
|
+
timeout: float = 60.0,
|
|
20
|
+
):
|
|
21
|
+
self.base_url = base_url.rstrip("/")
|
|
22
|
+
self.model = model
|
|
23
|
+
self._client = httpx.Client(base_url=self.base_url, timeout=timeout)
|
|
24
|
+
|
|
25
|
+
def complete(self, prompt: str) -> str:
|
|
26
|
+
resp = self._client.post(
|
|
27
|
+
"/api/generate",
|
|
28
|
+
json={"model": self.model, "prompt": prompt, "stream": False},
|
|
29
|
+
)
|
|
30
|
+
resp.raise_for_status()
|
|
31
|
+
return str(resp.json().get("response", ""))
|
|
32
|
+
|
|
33
|
+
def is_alive(self) -> bool:
|
|
34
|
+
"""Return ``True`` if the Ollama server answers ``GET /api/tags``."""
|
|
35
|
+
try:
|
|
36
|
+
return self._client.get("/api/tags").status_code == 200
|
|
37
|
+
except httpx.HTTPError:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
def close(self) -> None:
|
|
41
|
+
self._client.close()
|
cryptomem/cli.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
|
|
5
|
+
from cryptomem import __version__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main(argv: list[str] | None = None) -> int:
|
|
9
|
+
"""Entry point for the ``cryptomem`` command."""
|
|
10
|
+
parser = argparse.ArgumentParser(prog="cryptomem", description="cryptomem CLI")
|
|
11
|
+
parser.add_argument("--version", action="version", version=f"cryptomem {__version__}")
|
|
12
|
+
sub = parser.add_subparsers(dest="command")
|
|
13
|
+
|
|
14
|
+
serve = sub.add_parser("serve", help="run the Ollama-compatible sidecar")
|
|
15
|
+
serve.add_argument("--host", default="127.0.0.1")
|
|
16
|
+
serve.add_argument("--port", type=int, default=8088)
|
|
17
|
+
serve.add_argument("--ollama-url", default="http://localhost:11434")
|
|
18
|
+
|
|
19
|
+
args = parser.parse_args(argv)
|
|
20
|
+
|
|
21
|
+
if args.command == "serve":
|
|
22
|
+
try:
|
|
23
|
+
import uvicorn
|
|
24
|
+
|
|
25
|
+
from cryptomem.server.app import create_app
|
|
26
|
+
except ImportError:
|
|
27
|
+
print("The sidecar needs the 'serve' extra: pip install 'cryptomem[serve]'")
|
|
28
|
+
return 1
|
|
29
|
+
app = create_app(ollama_url=args.ollama_url)
|
|
30
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
31
|
+
return 0
|
|
32
|
+
|
|
33
|
+
parser.print_help()
|
|
34
|
+
return 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
if __name__ == "__main__":
|
|
38
|
+
raise SystemExit(main())
|
cryptomem/client.py
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
from cryptomem.adapters.base import LLMAdapter
|
|
7
|
+
from cryptomem.adapters.mock_adapter import MockAdapter
|
|
8
|
+
from cryptomem.config import Settings
|
|
9
|
+
from cryptomem.crypto.hashing import sha256
|
|
10
|
+
from cryptomem.crypto.keys import build_signer
|
|
11
|
+
from cryptomem.crypto.merkle import merkle_proof, merkle_root, verify_proof
|
|
12
|
+
from cryptomem.crypto.signer import Signer
|
|
13
|
+
from cryptomem.efficiency.budgeter import fit_to_budget
|
|
14
|
+
from cryptomem.efficiency.cache import SemanticCache
|
|
15
|
+
from cryptomem.efficiency.compressor import compress_heuristic
|
|
16
|
+
from cryptomem.efficiency.deduper import dedupe
|
|
17
|
+
from cryptomem.embeddings.base import Embedder, cosine_similarity
|
|
18
|
+
from cryptomem.embeddings.stub import StubEmbedder
|
|
19
|
+
from cryptomem.models import (
|
|
20
|
+
Contradiction,
|
|
21
|
+
CryptoEnvelope,
|
|
22
|
+
MemoryNode,
|
|
23
|
+
Relationship,
|
|
24
|
+
ScoredNode,
|
|
25
|
+
)
|
|
26
|
+
from cryptomem.proactive.planner import Planner
|
|
27
|
+
from cryptomem.proactive.triggers import TriggerEngine
|
|
28
|
+
from cryptomem.proactive.writeback import WriteBack
|
|
29
|
+
from cryptomem.retrieval.retriever import Retriever
|
|
30
|
+
from cryptomem.store.base import MemoryStore
|
|
31
|
+
from cryptomem.store.sqlite_store import SqliteStore
|
|
32
|
+
from cryptomem.verification.citations import Citer
|
|
33
|
+
from cryptomem.verification.cove import ChainOfVerification
|
|
34
|
+
from cryptomem.verification.entropy import SemanticEntropy
|
|
35
|
+
from cryptomem.verification.faithfulness import FaithfulnessChecker
|
|
36
|
+
from cryptomem.verification.grounding import GroundingGate, render_context
|
|
37
|
+
|
|
38
|
+
ABSTAIN = "I cannot answer that from verified memory."
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class MemoryClient:
|
|
42
|
+
"""High-level verifiable memory: archive, query, verify, traverse, ground.
|
|
43
|
+
|
|
44
|
+
Every fact is SHA-256 hashed and Ed25519 signed at write time. At read time
|
|
45
|
+
each node is re-verified; tampered or unsigned facts are dropped and the
|
|
46
|
+
agent abstains rather than guessing. Retrieval runs a
|
|
47
|
+
retrieve -> dedupe -> rank -> budget -> (compress) -> ground pipeline.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
settings: Settings | None = None,
|
|
53
|
+
store: MemoryStore | None = None,
|
|
54
|
+
signer: Signer | None = None,
|
|
55
|
+
embedder: Embedder | None = None,
|
|
56
|
+
):
|
|
57
|
+
self.settings = settings or Settings()
|
|
58
|
+
self.signer = signer or build_signer(self.settings)
|
|
59
|
+
self.store = store or self._build_store()
|
|
60
|
+
self.embedder = embedder or StubEmbedder()
|
|
61
|
+
self.retriever = Retriever(
|
|
62
|
+
store=self.store,
|
|
63
|
+
embedder=self.embedder,
|
|
64
|
+
verify=self.verify,
|
|
65
|
+
require_verification=self.settings.require_verification,
|
|
66
|
+
)
|
|
67
|
+
self.gate = GroundingGate()
|
|
68
|
+
self.cache = SemanticCache()
|
|
69
|
+
self.citer = Citer(self.embedder, min_support=self.settings.citation_min_support)
|
|
70
|
+
self.faithfulness = FaithfulnessChecker(
|
|
71
|
+
self.embedder, threshold=self.settings.faithfulness_threshold
|
|
72
|
+
)
|
|
73
|
+
self.entropy = SemanticEntropy(
|
|
74
|
+
self.embedder, cluster_threshold=self.settings.entropy_cluster_threshold
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def _build_store(self) -> MemoryStore:
|
|
78
|
+
"""Select the store from ``settings.mode``; fall back to SQLite offline.
|
|
79
|
+
|
|
80
|
+
``mode="remote"`` (with a ``backend_url``) uses a :class:`RemoteStore`,
|
|
81
|
+
but if the backend fails its health check at init the client degrades
|
|
82
|
+
gracefully to a local SQLite store so edge devices keep working.
|
|
83
|
+
"""
|
|
84
|
+
mode = self.settings.mode.lower()
|
|
85
|
+
if mode == "neo4j":
|
|
86
|
+
from cryptomem.store.neo4j_store import Neo4jStore
|
|
87
|
+
|
|
88
|
+
return Neo4jStore(
|
|
89
|
+
uri=self.settings.neo4j_uri,
|
|
90
|
+
user=self.settings.neo4j_user,
|
|
91
|
+
password=self.settings.neo4j_password,
|
|
92
|
+
database=self.settings.neo4j_database,
|
|
93
|
+
)
|
|
94
|
+
if mode == "remote" and self.settings.backend_url:
|
|
95
|
+
from cryptomem.store.remote_store import RemoteStore
|
|
96
|
+
|
|
97
|
+
remote = RemoteStore(self.settings.backend_url, self.settings.backend_api_key)
|
|
98
|
+
if remote.healthz():
|
|
99
|
+
return remote
|
|
100
|
+
return SqliteStore(self.settings.sqlite_path)
|
|
101
|
+
return SqliteStore(self.settings.sqlite_path)
|
|
102
|
+
|
|
103
|
+
def archive(
|
|
104
|
+
self,
|
|
105
|
+
entity: str,
|
|
106
|
+
content: str,
|
|
107
|
+
relationships: list[Relationship] | None = None,
|
|
108
|
+
metadata: dict | None = None,
|
|
109
|
+
node_id: str | None = None,
|
|
110
|
+
) -> MemoryNode:
|
|
111
|
+
"""Sign and persist a new memory node, returning the stored node."""
|
|
112
|
+
rels = relationships or []
|
|
113
|
+
meta = dict(metadata or {})
|
|
114
|
+
meta.setdefault("timestamp", datetime.now(timezone.utc).isoformat())
|
|
115
|
+
nid = node_id or f"mem_{uuid.uuid4().hex[:12]}"
|
|
116
|
+
|
|
117
|
+
unsigned = MemoryNode(
|
|
118
|
+
node_id=nid,
|
|
119
|
+
entity=entity,
|
|
120
|
+
content=content,
|
|
121
|
+
relationships=rels,
|
|
122
|
+
metadata=meta,
|
|
123
|
+
)
|
|
124
|
+
return self._seal(unsigned)
|
|
125
|
+
|
|
126
|
+
def _seal(self, node: MemoryNode) -> MemoryNode:
|
|
127
|
+
"""Hash, Merkle-anchor, sign, embed, and persist ``node`` in place."""
|
|
128
|
+
node.crypto = None
|
|
129
|
+
node.embedding = None
|
|
130
|
+
digest = sha256(node.model_dump())
|
|
131
|
+
existing = [
|
|
132
|
+
n.crypto.hash
|
|
133
|
+
for n in self.store.all()
|
|
134
|
+
if n.crypto is not None and n.node_id != node.node_id
|
|
135
|
+
]
|
|
136
|
+
root = merkle_root([*existing, digest])
|
|
137
|
+
node.embedding = self.embedder.embed(f"{node.entity} {node.content}")
|
|
138
|
+
node.crypto = CryptoEnvelope(
|
|
139
|
+
hash=digest,
|
|
140
|
+
signature=self.signer.sign(digest),
|
|
141
|
+
public_key_ref=self.signer.public_key_hex,
|
|
142
|
+
merkle_root=root,
|
|
143
|
+
)
|
|
144
|
+
self.store.write(node)
|
|
145
|
+
return node
|
|
146
|
+
|
|
147
|
+
def verify(self, node: MemoryNode) -> bool:
|
|
148
|
+
"""Re-derive the hash and check the signature; ``False`` on any tamper."""
|
|
149
|
+
if node.crypto is None:
|
|
150
|
+
return False
|
|
151
|
+
recomputed = sha256(node.model_dump())
|
|
152
|
+
if recomputed != node.crypto.hash:
|
|
153
|
+
return False
|
|
154
|
+
return Signer.verify(node.crypto.public_key_ref, node.crypto.hash, node.crypto.signature)
|
|
155
|
+
|
|
156
|
+
def query(self, text: str, top_k: int = 5, depth: int = 0) -> list[ScoredNode]:
|
|
157
|
+
"""Retrieve verified nodes ranked by similarity and graph distance."""
|
|
158
|
+
return self.retriever.retrieve(text, top_k=top_k, depth=depth)
|
|
159
|
+
|
|
160
|
+
def neighbors(self, node_id: str, depth: int = 1) -> list[MemoryNode]:
|
|
161
|
+
"""Return verified neighbours reachable within ``depth`` hops."""
|
|
162
|
+
return [n for n in self.store.neighbors(node_id, depth=depth) if self.verify(n)]
|
|
163
|
+
|
|
164
|
+
def _ledger_leaves(self) -> tuple[list[MemoryNode], list[str]]:
|
|
165
|
+
nodes = sorted(
|
|
166
|
+
(n for n in self.store.all() if n.crypto is not None), key=lambda n: n.node_id
|
|
167
|
+
)
|
|
168
|
+
return nodes, [n.crypto.hash for n in nodes if n.crypto is not None]
|
|
169
|
+
|
|
170
|
+
def ledger_root(self) -> str | None:
|
|
171
|
+
"""Merkle root over every stored node's hash (current ledger anchor)."""
|
|
172
|
+
return merkle_root(self._ledger_leaves()[1])
|
|
173
|
+
|
|
174
|
+
def proof(self, node_id: str) -> dict | None:
|
|
175
|
+
"""Return a verifiable Merkle inclusion proof for a node.
|
|
176
|
+
|
|
177
|
+
Includes the node's write-time anchor plus an audit path against the
|
|
178
|
+
current ledger root, so an auditor can confirm membership offline via
|
|
179
|
+
:func:`verify_proof` without trusting this process.
|
|
180
|
+
"""
|
|
181
|
+
node = self.store.get(node_id)
|
|
182
|
+
if node is None or node.crypto is None:
|
|
183
|
+
return None
|
|
184
|
+
nodes, leaves = self._ledger_leaves()
|
|
185
|
+
index = next((i for i, n in enumerate(nodes) if n.node_id == node_id), -1)
|
|
186
|
+
path = merkle_proof(leaves, index) if index >= 0 else []
|
|
187
|
+
root = merkle_root(leaves)
|
|
188
|
+
included = root is not None and verify_proof(node.crypto.hash, path, root)
|
|
189
|
+
return {
|
|
190
|
+
"node_id": node.node_id,
|
|
191
|
+
"leaf_hash": node.crypto.hash,
|
|
192
|
+
"merkle_root": node.crypto.merkle_root,
|
|
193
|
+
"ledger_root": root,
|
|
194
|
+
"proof": [{"sibling": sibling, "position": pos} for sibling, pos in path],
|
|
195
|
+
"included": included,
|
|
196
|
+
"verified": self.verify(node),
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
def contradictions(self) -> list[Contradiction]:
|
|
200
|
+
"""Surface same-entity nodes whose content diverges (contradiction radar)."""
|
|
201
|
+
by_entity: dict[str, list[MemoryNode]] = {}
|
|
202
|
+
for node in self.store.all():
|
|
203
|
+
if self.verify(node):
|
|
204
|
+
by_entity.setdefault(node.entity, []).append(node)
|
|
205
|
+
|
|
206
|
+
found: list[Contradiction] = []
|
|
207
|
+
threshold = self.settings.contradiction_threshold
|
|
208
|
+
for entity, nodes in by_entity.items():
|
|
209
|
+
for i in range(len(nodes)):
|
|
210
|
+
for j in range(i + 1, len(nodes)):
|
|
211
|
+
left, right = nodes[i], nodes[j]
|
|
212
|
+
if left.embedding is None or right.embedding is None:
|
|
213
|
+
continue
|
|
214
|
+
sim = cosine_similarity(left.embedding, right.embedding)
|
|
215
|
+
if sim < threshold:
|
|
216
|
+
found.append(
|
|
217
|
+
Contradiction(
|
|
218
|
+
entity=entity,
|
|
219
|
+
left_id=left.node_id,
|
|
220
|
+
right_id=right.node_id,
|
|
221
|
+
similarity=round(sim, 4),
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
return found
|
|
225
|
+
|
|
226
|
+
def _ground(self, question: str, top_k: int, depth: int, compress: bool) -> list[ScoredNode]:
|
|
227
|
+
scored = dedupe(self.query(question, top_k=top_k, depth=depth))
|
|
228
|
+
admitted, grounded = self.gate.admit(scored)
|
|
229
|
+
if not grounded:
|
|
230
|
+
return []
|
|
231
|
+
admitted = fit_to_budget(admitted, self.settings.max_context_tokens)
|
|
232
|
+
if compress:
|
|
233
|
+
for s in admitted:
|
|
234
|
+
s.node.content = compress_heuristic(s.node.content)
|
|
235
|
+
return admitted
|
|
236
|
+
|
|
237
|
+
def respond(
|
|
238
|
+
self,
|
|
239
|
+
question: str,
|
|
240
|
+
adapter: LLMAdapter | None = None,
|
|
241
|
+
top_k: int = 5,
|
|
242
|
+
depth: int = 0,
|
|
243
|
+
compress: bool = False,
|
|
244
|
+
) -> tuple[str, dict]:
|
|
245
|
+
"""Answer plus a provenance block (injected nodes, verification, root)."""
|
|
246
|
+
admitted = self._ground(question, top_k, depth, compress)
|
|
247
|
+
if not admitted:
|
|
248
|
+
return ABSTAIN, {"injected_nodes": [], "verified": False, "merkle_root": None}
|
|
249
|
+
|
|
250
|
+
adapter = adapter or MockAdapter()
|
|
251
|
+
prompt = (
|
|
252
|
+
"Answer using ONLY the verified facts below. "
|
|
253
|
+
"If they do not cover the question, say you cannot answer.\n\n"
|
|
254
|
+
f"Verified facts:\n{render_context(admitted)}\n\nQuestion: {question}"
|
|
255
|
+
)
|
|
256
|
+
text = adapter.complete(prompt)
|
|
257
|
+
head_crypto = admitted[0].node.crypto
|
|
258
|
+
provenance: dict = {
|
|
259
|
+
"injected_nodes": [s.node.node_id for s in admitted],
|
|
260
|
+
"verified": all(s.verified for s in admitted),
|
|
261
|
+
"merkle_root": head_crypto.merkle_root if head_crypto else None,
|
|
262
|
+
}
|
|
263
|
+
if self.settings.enable_citations:
|
|
264
|
+
text, citations = self.citer.annotate(text, admitted)
|
|
265
|
+
provenance["citations"] = citations
|
|
266
|
+
if self.settings.enable_faithfulness:
|
|
267
|
+
score, _ = self.faithfulness.score(text, admitted)
|
|
268
|
+
provenance["faithfulness"] = score
|
|
269
|
+
provenance["faithful"] = score >= self.settings.faithfulness_threshold
|
|
270
|
+
if self.settings.enable_proactive:
|
|
271
|
+
provenance["proactive_suggestions"] = self.suggest(
|
|
272
|
+
[s.node for s in admitted], limit=self.settings.proactive_suggestions
|
|
273
|
+
)
|
|
274
|
+
if self.settings.enable_writeback:
|
|
275
|
+
provenance["staged_nodes"] = [n.node_id for n in self.stage_facts(text)]
|
|
276
|
+
return text, provenance
|
|
277
|
+
|
|
278
|
+
def answer(
|
|
279
|
+
self,
|
|
280
|
+
question: str,
|
|
281
|
+
adapter: LLMAdapter | None = None,
|
|
282
|
+
top_k: int = 5,
|
|
283
|
+
depth: int = 0,
|
|
284
|
+
compress: bool = False,
|
|
285
|
+
use_cache: bool = True,
|
|
286
|
+
) -> str:
|
|
287
|
+
"""Answer strictly from verified memory, or abstain if none is found.
|
|
288
|
+
|
|
289
|
+
Pipeline: retrieve -> dedupe -> ground -> token-budget -> (compress) ->
|
|
290
|
+
adapter, with a semantic answer cache in front.
|
|
291
|
+
"""
|
|
292
|
+
query_vec = self.embedder.embed(question)
|
|
293
|
+
if use_cache:
|
|
294
|
+
cached = self.cache.get(query_vec)
|
|
295
|
+
if cached is not None:
|
|
296
|
+
return cached
|
|
297
|
+
|
|
298
|
+
text, _ = self.respond(
|
|
299
|
+
question, adapter=adapter, top_k=top_k, depth=depth, compress=compress
|
|
300
|
+
)
|
|
301
|
+
if use_cache:
|
|
302
|
+
self.cache.put(query_vec, text)
|
|
303
|
+
return text
|
|
304
|
+
|
|
305
|
+
def confidence(
|
|
306
|
+
self,
|
|
307
|
+
question: str,
|
|
308
|
+
adapter: LLMAdapter | None = None,
|
|
309
|
+
samples: int | None = None,
|
|
310
|
+
top_k: int = 5,
|
|
311
|
+
depth: int = 0,
|
|
312
|
+
) -> dict:
|
|
313
|
+
"""Estimate epistemic confidence via semantic entropy over samples.
|
|
314
|
+
|
|
315
|
+
Grounds the question once, then samples the adapter repeatedly and
|
|
316
|
+
clusters the answers by meaning: agreement -> high confidence, scatter
|
|
317
|
+
-> low. Returns ``{clusters, entropy, confidence}``; abstains (zero
|
|
318
|
+
confidence) when nothing grounds.
|
|
319
|
+
"""
|
|
320
|
+
admitted = self._ground(question, top_k=top_k, depth=depth, compress=False)
|
|
321
|
+
if not admitted:
|
|
322
|
+
return {"clusters": 0, "entropy": 1.0, "confidence": 0.0}
|
|
323
|
+
adapter = adapter or MockAdapter()
|
|
324
|
+
prompt = (
|
|
325
|
+
"Answer using ONLY the verified facts below.\n\n"
|
|
326
|
+
f"Verified facts:\n{render_context(admitted)}\n\nQuestion: {question}"
|
|
327
|
+
)
|
|
328
|
+
n = samples if samples is not None else self.settings.entropy_samples
|
|
329
|
+
return self.entropy.estimate(prompt, adapter, samples=n)
|
|
330
|
+
|
|
331
|
+
def verify_answer(self, draft: str, top_k: int = 3) -> dict:
|
|
332
|
+
"""Re-check a draft answer claim-by-claim against verified memory (CoVe)."""
|
|
333
|
+
cove = ChainOfVerification(self, self.embedder, self.settings.faithfulness_threshold)
|
|
334
|
+
return cove.verify(draft, top_k=top_k)
|
|
335
|
+
|
|
336
|
+
def suggest(self, injected: list[MemoryNode], limit: int | None = None) -> list[dict]:
|
|
337
|
+
"""Pre-stage adjacent verified facts for the likely next turn (planner)."""
|
|
338
|
+
n = limit if limit is not None else self.settings.proactive_suggestions
|
|
339
|
+
return Planner(self.store, self.verify).suggest(injected, limit=n)
|
|
340
|
+
|
|
341
|
+
def triggers(self) -> list[dict]:
|
|
342
|
+
"""Evaluate proactive triggers (reconcile / refresh / link-gap)."""
|
|
343
|
+
return TriggerEngine(self).evaluate()
|
|
344
|
+
|
|
345
|
+
def stage_facts(self, text: str, entity: str | None = None) -> list[MemoryNode]:
|
|
346
|
+
"""Extract facts from ``text`` and stage them as signed pending nodes."""
|
|
347
|
+
return WriteBack(self).stage(text, entity=entity)
|
|
348
|
+
|
|
349
|
+
def pending(self) -> list[MemoryNode]:
|
|
350
|
+
"""Return verified nodes awaiting confirmation (``status="pending"``)."""
|
|
351
|
+
return [
|
|
352
|
+
n for n in self.store.all() if self.verify(n) and n.metadata.get("status") == "pending"
|
|
353
|
+
]
|
|
354
|
+
|
|
355
|
+
def confirm(self, node_id: str) -> MemoryNode | None:
|
|
356
|
+
"""Promote a pending node to ``confirmed`` and re-sign it."""
|
|
357
|
+
node = self.store.get(node_id)
|
|
358
|
+
if node is None or node.metadata.get("status") != "pending":
|
|
359
|
+
return None
|
|
360
|
+
node.metadata["status"] = "confirmed"
|
|
361
|
+
return self._seal(node)
|
cryptomem/config.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Settings(BaseSettings):
|
|
7
|
+
"""Typed configuration, overridable via ``CRYPTOMEM_*`` env vars or ``.env``."""
|
|
8
|
+
|
|
9
|
+
model_config = SettingsConfigDict(env_prefix="CRYPTOMEM_", env_file=".env", extra="ignore")
|
|
10
|
+
|
|
11
|
+
profile: str = "potato"
|
|
12
|
+
mode: str = "sqlite"
|
|
13
|
+
sqlite_path: str = ":memory:"
|
|
14
|
+
ollama_url: str = "http://localhost:11434"
|
|
15
|
+
default_model: str = "qwen2.5:0.5b"
|
|
16
|
+
embedder: str = "stub"
|
|
17
|
+
backend_url: str | None = None
|
|
18
|
+
backend_api_key: str | None = None
|
|
19
|
+
neo4j_uri: str = "bolt://localhost:7687"
|
|
20
|
+
neo4j_user: str = "neo4j"
|
|
21
|
+
neo4j_password: str = "neo4j"
|
|
22
|
+
neo4j_database: str = "neo4j"
|
|
23
|
+
signing_key_path: str = "./cryptomem.key"
|
|
24
|
+
byok_provider: str | None = None
|
|
25
|
+
signing_seed_env: str = "CRYPTOMEM_SIGNING_SEED"
|
|
26
|
+
max_context_tokens: int = 1500
|
|
27
|
+
require_verification: bool = True
|
|
28
|
+
contradiction_threshold: float = 0.6
|
|
29
|
+
enable_citations: bool = False
|
|
30
|
+
citation_min_support: float = 0.2
|
|
31
|
+
enable_faithfulness: bool = False
|
|
32
|
+
faithfulness_threshold: float = 0.25
|
|
33
|
+
entropy_samples: int = 5
|
|
34
|
+
entropy_cluster_threshold: float = 0.8
|
|
35
|
+
enable_proactive: bool = False
|
|
36
|
+
proactive_suggestions: int = 3
|
|
37
|
+
enable_writeback: bool = False
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def canonical_bytes(node: dict[str, Any]) -> bytes:
|
|
9
|
+
"""Serialize the identity-bearing fields of a node deterministically.
|
|
10
|
+
|
|
11
|
+
Write-time and read-time serialization must be byte-identical for
|
|
12
|
+
verification to hold, so relationships are sorted and keys are ordered.
|
|
13
|
+
"""
|
|
14
|
+
payload = {
|
|
15
|
+
"entity": node["entity"],
|
|
16
|
+
"content": node["content"],
|
|
17
|
+
"relationships": sorted((r["type"], r["target_id"]) for r in node.get("relationships", [])),
|
|
18
|
+
"metadata": node.get("metadata", {}),
|
|
19
|
+
}
|
|
20
|
+
return json.dumps(payload, sort_keys=True, separators=(",", ":")).encode()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sha256(node: dict[str, Any]) -> str:
|
|
24
|
+
"""Return the hex SHA-256 digest of a node's canonical bytes."""
|
|
25
|
+
return hashlib.sha256(canonical_bytes(node)).hexdigest()
|
cryptomem/crypto/keys.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from nacl.signing import SigningKey
|
|
8
|
+
|
|
9
|
+
from cryptomem.crypto.signer import Signer
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from cryptomem.config import Settings
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class KeyProvider(ABC):
|
|
16
|
+
"""Pluggable source of the Ed25519 signing key (BYOK seam).
|
|
17
|
+
|
|
18
|
+
Lets deployments keep the private key wherever policy dictates — a local
|
|
19
|
+
file for dev, an environment-injected seed for containers, or (via a custom
|
|
20
|
+
provider) a KMS/Vault integration — without the engine ever hard-coding key
|
|
21
|
+
storage.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def get_signer(self) -> Signer:
|
|
26
|
+
"""Return a ready-to-use :class:`Signer`."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class LocalFileKeyProvider(KeyProvider):
|
|
30
|
+
"""Load (or generate-and-persist) a hex seed on the local filesystem."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, path: str):
|
|
33
|
+
self.path = path
|
|
34
|
+
|
|
35
|
+
def get_signer(self) -> Signer:
|
|
36
|
+
return Signer.from_key_file(self.path)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class EnvKeyProvider(KeyProvider):
|
|
40
|
+
"""Read a hex-encoded Ed25519 seed from an environment variable.
|
|
41
|
+
|
|
42
|
+
Suited to containerised/secret-manager workflows where the key is injected
|
|
43
|
+
at runtime and never written to disk.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, env_var: str):
|
|
47
|
+
self.env_var = env_var
|
|
48
|
+
|
|
49
|
+
def get_signer(self) -> Signer:
|
|
50
|
+
seed = os.environ.get(self.env_var)
|
|
51
|
+
if not seed:
|
|
52
|
+
raise RuntimeError(f"BYOK env provider: ${self.env_var} is not set")
|
|
53
|
+
return Signer(SigningKey(bytes.fromhex(seed.strip())))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def build_signer(settings: Settings) -> Signer:
|
|
57
|
+
"""Resolve a :class:`Signer` from ``settings.byok_provider``."""
|
|
58
|
+
provider = (settings.byok_provider or "file").lower()
|
|
59
|
+
if provider == "file":
|
|
60
|
+
return LocalFileKeyProvider(settings.signing_key_path).get_signer()
|
|
61
|
+
if provider == "env":
|
|
62
|
+
return EnvKeyProvider(settings.signing_seed_env).get_signer()
|
|
63
|
+
raise ValueError(f"unknown byok_provider: {settings.byok_provider!r}")
|