loci_memory 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +1 -0
- holographic/__init__.py +325 -0
- holographic/holographic.py +203 -0
- holographic/retrieval.py +593 -0
- holographic/store.py +573 -0
- loci_memory/__init__.py +7 -0
- loci_memory/loci.py +628 -0
- loci_memory-0.0.1.dist-info/METADATA +78 -0
- loci_memory-0.0.1.dist-info/RECORD +13 -0
- loci_memory-0.0.1.dist-info/WHEEL +5 -0
- loci_memory-0.0.1.dist-info/entry_points.txt +2 -0
- loci_memory-0.0.1.dist-info/licenses/LICENSE +21 -0
- loci_memory-0.0.1.dist-info/top_level.txt +3 -0
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Empty
|
holographic/__init__.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any, Dict, List
|
|
7
|
+
|
|
8
|
+
from .store import MemoryStore
|
|
9
|
+
from .retrieval import FactRetriever
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
# ---------------------------------------------------------------------------
|
|
14
|
+
# Tool schemas
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
FACT_STORE_SCHEMA = {
|
|
18
|
+
"name": "fact_store",
|
|
19
|
+
"description": (
|
|
20
|
+
"Deep structured memory with algebraic reasoning. "
|
|
21
|
+
"Use alongside the memory tool — memory for always-on context, "
|
|
22
|
+
"fact_store for deep recall and compositional queries.\n\n"
|
|
23
|
+
"ACTIONS (simple → powerful):\n"
|
|
24
|
+
"• add — Store a fact the user would expect you to remember.\n"
|
|
25
|
+
"• search — Keyword lookup ('editor config', 'deploy process').\n"
|
|
26
|
+
"• probe — Entity recall: ALL facts about a person/thing.\n"
|
|
27
|
+
"• related — What connects to an entity? Structural adjacency.\n"
|
|
28
|
+
"• reason — Compositional: facts connected to MULTIPLE entities simultaneously.\n"
|
|
29
|
+
"• contradict — Memory hygiene: find facts making conflicting claims.\n"
|
|
30
|
+
"• update/remove/list — CRUD operations.\n\n"
|
|
31
|
+
"IMPORTANT: Before answering questions about the user, ALWAYS probe or reason first."
|
|
32
|
+
),
|
|
33
|
+
"parameters": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"properties": {
|
|
36
|
+
"action": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"enum": ["add", "search", "probe", "related", "reason", "contradict", "update", "remove", "list"],
|
|
39
|
+
},
|
|
40
|
+
"content": {"type": "string", "description": "Fact content (required for 'add')."},
|
|
41
|
+
"query": {"type": "string", "description": "Search query (required for 'search')."},
|
|
42
|
+
"entity": {"type": "string", "description": "Entity name for 'probe'/'related'."},
|
|
43
|
+
"entities": {"type": "array", "items": {"type": "string"}, "description": "Entity names for 'reason'."},
|
|
44
|
+
"fact_id": {"type": "integer", "description": "Fact ID for 'update'/'remove'."},
|
|
45
|
+
"category": {"type": "string", "enum": ["user_pref", "project", "tool", "general"]},
|
|
46
|
+
"tags": {"type": "string", "description": "Comma-separated tags."},
|
|
47
|
+
"trust_delta": {"type": "number", "description": "Trust adjustment for 'update'."},
|
|
48
|
+
"min_trust": {"type": "number", "description": "Minimum trust filter (default: 0.3)."},
|
|
49
|
+
"limit": {"type": "integer", "description": "Max results (default: 10)."},
|
|
50
|
+
},
|
|
51
|
+
"required": ["action"],
|
|
52
|
+
},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
FACT_FEEDBACK_SCHEMA = {
|
|
56
|
+
"name": "fact_feedback",
|
|
57
|
+
"description": (
|
|
58
|
+
"Rate a fact after using it. Mark 'helpful' if accurate, 'unhelpful' if outdated. "
|
|
59
|
+
"This trains the memory — good facts rise, bad facts sink."
|
|
60
|
+
),
|
|
61
|
+
"parameters": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"properties": {
|
|
64
|
+
"action": {"type": "string", "enum": ["helpful", "unhelpful"]},
|
|
65
|
+
"fact_id": {"type": "integer", "description": "The fact ID to rate."},
|
|
66
|
+
},
|
|
67
|
+
"required": ["action", "fact_id"],
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# Holographic Memory implementation
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
class HolographicMemoryProvider():
|
|
77
|
+
"""Holographic memory with structured facts, entity resolution, and HRR retrieval."""
|
|
78
|
+
|
|
79
|
+
def __init__(self):
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def name(self) -> str:
|
|
84
|
+
return "holographic"
|
|
85
|
+
|
|
86
|
+
def is_available(self) -> bool:
|
|
87
|
+
return True # SQLite is always available, numpy is optional
|
|
88
|
+
|
|
89
|
+
def initialize(self, session_id: str) -> None:
|
|
90
|
+
#from loci_constants import get_loci_home
|
|
91
|
+
#_loci_home = str(get_loci_home())
|
|
92
|
+
_loci_home = ".loci_memory"
|
|
93
|
+
_default_db = _loci_home + "/memory_store.db"
|
|
94
|
+
db_path = _default_db
|
|
95
|
+
default_trust = float(0.5)
|
|
96
|
+
hrr_dim = int(1024)
|
|
97
|
+
hrr_weight = float(0.3)
|
|
98
|
+
temporal_decay = int(0)
|
|
99
|
+
|
|
100
|
+
self._store = MemoryStore(db_path=db_path, default_trust=default_trust, hrr_dim=hrr_dim)
|
|
101
|
+
self._retriever = FactRetriever(
|
|
102
|
+
store=self._store,
|
|
103
|
+
temporal_decay_half_life=temporal_decay,
|
|
104
|
+
hrr_weight=hrr_weight,
|
|
105
|
+
hrr_dim=hrr_dim,
|
|
106
|
+
)
|
|
107
|
+
self._session_id = session_id
|
|
108
|
+
|
|
109
|
+
def system_prompt_block(self) -> str:
|
|
110
|
+
if not self._store:
|
|
111
|
+
return ""
|
|
112
|
+
try:
|
|
113
|
+
total = self._store._conn.execute(
|
|
114
|
+
"SELECT COUNT(*) FROM facts"
|
|
115
|
+
).fetchone()[0]
|
|
116
|
+
except Exception:
|
|
117
|
+
total = 0
|
|
118
|
+
if total == 0:
|
|
119
|
+
return (
|
|
120
|
+
"# Holographic Memory\n"
|
|
121
|
+
"Active. Empty fact store — proactively add facts the user would expect you to remember.\n"
|
|
122
|
+
"Use fact_store(action='add') to store durable structured facts about people, projects, preferences, decisions.\n"
|
|
123
|
+
"Use fact_feedback to rate facts after using them (trains trust scores)."
|
|
124
|
+
)
|
|
125
|
+
return (
|
|
126
|
+
f"# Holographic Memory\n"
|
|
127
|
+
f"Active. {total} facts stored with entity resolution and trust scoring.\n"
|
|
128
|
+
f"Use fact_store to search, probe entities, reason across entities, or add facts.\n"
|
|
129
|
+
f"Use fact_feedback to rate facts after using them (trains trust scores)."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def prefetch(self, query: str, *, session_id: str = "") -> str:
|
|
133
|
+
if not self._retriever or not query:
|
|
134
|
+
return ""
|
|
135
|
+
try:
|
|
136
|
+
results = self._retriever.search(query, min_trust=self._min_trust, limit=5)
|
|
137
|
+
if not results:
|
|
138
|
+
return ""
|
|
139
|
+
lines = []
|
|
140
|
+
for r in results:
|
|
141
|
+
trust = r.get("trust_score", r.get("trust", 0))
|
|
142
|
+
lines.append(f"- [{trust:.1f}] {r.get('content', '')}")
|
|
143
|
+
return "## Holographic Memory\n" + "\n".join(lines)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.debug("Holographic prefetch failed: %s", e)
|
|
146
|
+
return ""
|
|
147
|
+
|
|
148
|
+
def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
|
|
149
|
+
# Holographic memory stores explicit facts via tools, not auto-sync.
|
|
150
|
+
# The on_session_end hook handles auto-extraction if configured.
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
def get_tool_schemas(self) -> List[Dict[str, Any]]:
|
|
154
|
+
return [FACT_STORE_SCHEMA, FACT_FEEDBACK_SCHEMA]
|
|
155
|
+
|
|
156
|
+
def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
|
|
157
|
+
if tool_name == "fact_store":
|
|
158
|
+
return self._handle_fact_store(args)
|
|
159
|
+
elif tool_name == "fact_feedback":
|
|
160
|
+
return self._handle_fact_feedback(args)
|
|
161
|
+
return (f"Unknown tool: {tool_name}")
|
|
162
|
+
|
|
163
|
+
def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
|
|
164
|
+
if not self._config.get("auto_extract", False):
|
|
165
|
+
return
|
|
166
|
+
if not self._store or not messages:
|
|
167
|
+
return
|
|
168
|
+
self._auto_extract_facts(messages)
|
|
169
|
+
|
|
170
|
+
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
|
171
|
+
"""Mirror built-in memory writes as facts."""
|
|
172
|
+
if action == "add" and self._store and content:
|
|
173
|
+
try:
|
|
174
|
+
category = "user_pref" if target == "user" else "general"
|
|
175
|
+
self._store.add_fact(content, category=category)
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.debug("Holographic memory_write mirror failed: %s", e)
|
|
178
|
+
|
|
179
|
+
def shutdown(self) -> None:
|
|
180
|
+
self._store = None
|
|
181
|
+
self._retriever = None
|
|
182
|
+
|
|
183
|
+
#
|
|
184
|
+
# -- Tool handlers -------------------------------------------------------
|
|
185
|
+
#
|
|
186
|
+
def _handle_fact_store(self, args: dict) -> str:
|
|
187
|
+
try:
|
|
188
|
+
action = args["action"]
|
|
189
|
+
store = self._store
|
|
190
|
+
retriever = self._retriever
|
|
191
|
+
|
|
192
|
+
if action == "add":
|
|
193
|
+
fact_id = store.add_fact(
|
|
194
|
+
args["content"],
|
|
195
|
+
category=args.get("category", "general"),
|
|
196
|
+
tags=args.get("tags", ""),
|
|
197
|
+
)
|
|
198
|
+
return json.dumps({"fact_id": fact_id, "status": "added"})
|
|
199
|
+
|
|
200
|
+
elif action == "search":
|
|
201
|
+
results = retriever.search(
|
|
202
|
+
args["query"],
|
|
203
|
+
category=args.get("category"),
|
|
204
|
+
min_trust=float(args.get("min_trust", self._min_trust)),
|
|
205
|
+
limit=int(args.get("limit", 10)),
|
|
206
|
+
)
|
|
207
|
+
return json.dumps({"results": results, "count": len(results)})
|
|
208
|
+
|
|
209
|
+
elif action == "probe":
|
|
210
|
+
results = retriever.probe(
|
|
211
|
+
args["entity"],
|
|
212
|
+
category=args.get("category"),
|
|
213
|
+
limit=int(args.get("limit", 10)),
|
|
214
|
+
)
|
|
215
|
+
return json.dumps({"results": results, "count": len(results)})
|
|
216
|
+
|
|
217
|
+
elif action == "related":
|
|
218
|
+
results = retriever.related(
|
|
219
|
+
args["entity"],
|
|
220
|
+
category=args.get("category"),
|
|
221
|
+
limit=int(args.get("limit", 10)),
|
|
222
|
+
)
|
|
223
|
+
return json.dumps({"results": results, "count": len(results)})
|
|
224
|
+
|
|
225
|
+
elif action == "reason":
|
|
226
|
+
entities = args.get("entities", [])
|
|
227
|
+
if not entities:
|
|
228
|
+
return ("reason requires 'entities' list")
|
|
229
|
+
results = retriever.reason(
|
|
230
|
+
entities,
|
|
231
|
+
category=args.get("category"),
|
|
232
|
+
limit=int(args.get("limit", 10)),
|
|
233
|
+
)
|
|
234
|
+
return json.dumps({"results": results, "count": len(results)})
|
|
235
|
+
|
|
236
|
+
elif action == "contradict":
|
|
237
|
+
results = retriever.contradict(
|
|
238
|
+
category=args.get("category"),
|
|
239
|
+
limit=int(args.get("limit", 10)),
|
|
240
|
+
)
|
|
241
|
+
return json.dumps({"results": results, "count": len(results)})
|
|
242
|
+
|
|
243
|
+
elif action == "update":
|
|
244
|
+
updated = store.update_fact(
|
|
245
|
+
int(args["fact_id"]),
|
|
246
|
+
content=args.get("content"),
|
|
247
|
+
trust_delta=float(args["trust_delta"]) if "trust_delta" in args else None,
|
|
248
|
+
tags=args.get("tags"),
|
|
249
|
+
category=args.get("category"),
|
|
250
|
+
)
|
|
251
|
+
return json.dumps({"updated": updated})
|
|
252
|
+
|
|
253
|
+
elif action == "remove":
|
|
254
|
+
removed = store.remove_fact(int(args["fact_id"]))
|
|
255
|
+
return json.dumps({"removed": removed})
|
|
256
|
+
|
|
257
|
+
elif action == "list":
|
|
258
|
+
facts = store.list_facts(
|
|
259
|
+
category=args.get("category"),
|
|
260
|
+
min_trust=float(args.get("min_trust", 0.0)),
|
|
261
|
+
limit=int(args.get("limit", 10)),
|
|
262
|
+
)
|
|
263
|
+
return json.dumps({"facts": facts, "count": len(facts)})
|
|
264
|
+
|
|
265
|
+
else:
|
|
266
|
+
return (f"Unknown action: {action}")
|
|
267
|
+
|
|
268
|
+
except KeyError as exc:
|
|
269
|
+
return (f"Missing required argument: {exc}")
|
|
270
|
+
except Exception as exc:
|
|
271
|
+
return (str(exc))
|
|
272
|
+
|
|
273
|
+
def _handle_fact_feedback(self, args: dict) -> str:
|
|
274
|
+
try:
|
|
275
|
+
fact_id = int(args["fact_id"])
|
|
276
|
+
helpful = args["action"] == "helpful"
|
|
277
|
+
result = self._store.record_feedback(fact_id, helpful=helpful)
|
|
278
|
+
return json.dumps(result)
|
|
279
|
+
except KeyError as exc:
|
|
280
|
+
return (f"Missing required argument: {exc}")
|
|
281
|
+
except Exception as exc:
|
|
282
|
+
return (str(exc))
|
|
283
|
+
|
|
284
|
+
# -- Auto-extraction (on_session_end) ------------------------------------
|
|
285
|
+
|
|
286
|
+
def _auto_extract_facts(self, messages: list) -> None:
|
|
287
|
+
_PREF_PATTERNS = [
|
|
288
|
+
re.compile(r'\bI\s+(?:prefer|like|love|use|want|need)\s+(.+)', re.IGNORECASE),
|
|
289
|
+
re.compile(r'\bmy\s+(?:favorite|preferred|default)\s+\w+\s+is\s+(.+)', re.IGNORECASE),
|
|
290
|
+
re.compile(r'\bI\s+(?:always|never|usually)\s+(.+)', re.IGNORECASE),
|
|
291
|
+
]
|
|
292
|
+
_DECISION_PATTERNS = [
|
|
293
|
+
re.compile(r'\bwe\s+(?:decided|agreed|chose)\s+(?:to\s+)?(.+)', re.IGNORECASE),
|
|
294
|
+
re.compile(r'\bthe\s+project\s+(?:uses|needs|requires)\s+(.+)', re.IGNORECASE),
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
extracted = 0
|
|
298
|
+
for msg in messages:
|
|
299
|
+
if msg.get("role") != "user":
|
|
300
|
+
continue
|
|
301
|
+
content = msg.get("content", "")
|
|
302
|
+
if not isinstance(content, str) or len(content) < 10:
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
for pattern in _PREF_PATTERNS:
|
|
306
|
+
if pattern.search(content):
|
|
307
|
+
try:
|
|
308
|
+
self._store.add_fact(content[:400], category="user_pref")
|
|
309
|
+
extracted += 1
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
312
|
+
break
|
|
313
|
+
|
|
314
|
+
for pattern in _DECISION_PATTERNS:
|
|
315
|
+
if pattern.search(content):
|
|
316
|
+
try:
|
|
317
|
+
self._store.add_fact(content[:400], category="project")
|
|
318
|
+
extracted += 1
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
if extracted:
|
|
324
|
+
logger.info("Auto-extracted %d facts from conversation", extracted)
|
|
325
|
+
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Holographic Reduced Representations (HRR) with phase encoding.
|
|
2
|
+
|
|
3
|
+
HRRs are a vector symbolic architecture for encoding compositional structure
|
|
4
|
+
into fixed-width distributed representations. This module uses *phase vectors*:
|
|
5
|
+
each concept is a vector of angles in [0, 2π). The algebraic operations are:
|
|
6
|
+
|
|
7
|
+
bind — circular convolution (phase addition) — associates two concepts
|
|
8
|
+
unbind — circular correlation (phase subtraction) — retrieves a bound value
|
|
9
|
+
bundle — superposition (circular mean) — merges multiple concepts
|
|
10
|
+
|
|
11
|
+
Phase encoding is numerically stable, avoids the magnitude collapse of
|
|
12
|
+
traditional complex-number HRRs, and maps cleanly to cosine similarity.
|
|
13
|
+
|
|
14
|
+
Atoms are generated deterministically from SHA-256 so representations are
|
|
15
|
+
identical across processes, machines, and language versions.
|
|
16
|
+
|
|
17
|
+
References:
|
|
18
|
+
Plate (1995) — Holographic Reduced Representations
|
|
19
|
+
Gayler (2004) — Vector Symbolic Architectures answer Jackendoff's challenges
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import hashlib
|
|
23
|
+
import logging
|
|
24
|
+
import struct
|
|
25
|
+
import math
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
import numpy as np
|
|
29
|
+
_HAS_NUMPY = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
_HAS_NUMPY = False
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
_TWO_PI = 2.0 * math.pi
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _require_numpy() -> None:
|
|
39
|
+
if not _HAS_NUMPY:
|
|
40
|
+
raise RuntimeError("numpy is required for holographic operations")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def encode_atom(word: str, dim: int = 1024) -> "np.ndarray":
|
|
44
|
+
"""Deterministic phase vector via SHA-256 counter blocks.
|
|
45
|
+
|
|
46
|
+
Uses hashlib (not numpy RNG) for cross-platform reproducibility.
|
|
47
|
+
|
|
48
|
+
Algorithm:
|
|
49
|
+
- Generate enough SHA-256 blocks by hashing f"{word}:{i}" for i=0,1,2,...
|
|
50
|
+
- Concatenate digests, interpret as uint16 values via struct.unpack
|
|
51
|
+
- Scale to [0, 2π): phases = values * (2π / 65536)
|
|
52
|
+
- Truncate to dim elements
|
|
53
|
+
- Returns np.float64 array of shape (dim,)
|
|
54
|
+
"""
|
|
55
|
+
_require_numpy()
|
|
56
|
+
|
|
57
|
+
# Each SHA-256 digest is 32 bytes = 16 uint16 values.
|
|
58
|
+
values_per_block = 16
|
|
59
|
+
blocks_needed = math.ceil(dim / values_per_block)
|
|
60
|
+
|
|
61
|
+
uint16_values: list[int] = []
|
|
62
|
+
for i in range(blocks_needed):
|
|
63
|
+
digest = hashlib.sha256(f"{word}:{i}".encode()).digest()
|
|
64
|
+
uint16_values.extend(struct.unpack("<16H", digest))
|
|
65
|
+
|
|
66
|
+
phases = np.array(uint16_values[:dim], dtype=np.float64) * (_TWO_PI / 65536.0)
|
|
67
|
+
return phases
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def bind(a: "np.ndarray", b: "np.ndarray") -> "np.ndarray":
|
|
71
|
+
"""Circular convolution = element-wise phase addition.
|
|
72
|
+
|
|
73
|
+
Binding associates two concepts into a single composite vector.
|
|
74
|
+
The result is dissimilar to both inputs (quasi-orthogonal).
|
|
75
|
+
"""
|
|
76
|
+
_require_numpy()
|
|
77
|
+
return (a + b) % _TWO_PI
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def unbind(memory: "np.ndarray", key: "np.ndarray") -> "np.ndarray":
|
|
81
|
+
"""Circular correlation = element-wise phase subtraction.
|
|
82
|
+
|
|
83
|
+
Unbinding retrieves the value associated with a key from a memory vector.
|
|
84
|
+
unbind(bind(a, b), a) ≈ b (up to superposition noise)
|
|
85
|
+
"""
|
|
86
|
+
_require_numpy()
|
|
87
|
+
return (memory - key) % _TWO_PI
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def bundle(*vectors: "np.ndarray") -> "np.ndarray":
|
|
91
|
+
"""Superposition via circular mean of complex exponentials.
|
|
92
|
+
|
|
93
|
+
Bundling merges multiple vectors into one that is similar to each input.
|
|
94
|
+
The result can hold O(sqrt(dim)) items before similarity degrades.
|
|
95
|
+
"""
|
|
96
|
+
_require_numpy()
|
|
97
|
+
complex_sum = np.sum([np.exp(1j * v) for v in vectors], axis=0)
|
|
98
|
+
return np.angle(complex_sum) % _TWO_PI
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def similarity(a: "np.ndarray", b: "np.ndarray") -> float:
|
|
102
|
+
"""Phase cosine similarity. Range [-1, 1].
|
|
103
|
+
|
|
104
|
+
Returns 1.0 for identical vectors, near 0.0 for random (unrelated) vectors,
|
|
105
|
+
and -1.0 for perfectly anti-correlated vectors.
|
|
106
|
+
"""
|
|
107
|
+
_require_numpy()
|
|
108
|
+
return float(np.mean(np.cos(a - b)))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def encode_text(text: str, dim: int = 1024) -> "np.ndarray":
|
|
112
|
+
"""Bag-of-words: bundle of atom vectors for each token.
|
|
113
|
+
|
|
114
|
+
Tokenizes by lowercasing, splitting on whitespace, and stripping
|
|
115
|
+
leading/trailing punctuation from each token.
|
|
116
|
+
|
|
117
|
+
Returns bundle of all token atom vectors.
|
|
118
|
+
If text is empty or produces no tokens, returns encode_atom("__hrr_empty__", dim).
|
|
119
|
+
"""
|
|
120
|
+
_require_numpy()
|
|
121
|
+
|
|
122
|
+
tokens = [
|
|
123
|
+
token.strip(".,!?;:\"'()[]{}")
|
|
124
|
+
for token in text.lower().split()
|
|
125
|
+
]
|
|
126
|
+
tokens = [t for t in tokens if t]
|
|
127
|
+
|
|
128
|
+
if not tokens:
|
|
129
|
+
return encode_atom("__hrr_empty__", dim)
|
|
130
|
+
|
|
131
|
+
atom_vectors = [encode_atom(token, dim) for token in tokens]
|
|
132
|
+
return bundle(*atom_vectors)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def encode_fact(content: str, entities: list[str], dim: int = 1024) -> "np.ndarray":
|
|
136
|
+
"""Structured encoding: content bound to ROLE_CONTENT, each entity bound to ROLE_ENTITY, all bundled.
|
|
137
|
+
|
|
138
|
+
Role vectors are reserved atoms: "__hrr_role_content__", "__hrr_role_entity__"
|
|
139
|
+
|
|
140
|
+
Components:
|
|
141
|
+
1. bind(encode_text(content, dim), encode_atom("__hrr_role_content__", dim))
|
|
142
|
+
2. For each entity: bind(encode_atom(entity.lower(), dim), encode_atom("__hrr_role_entity__", dim))
|
|
143
|
+
3. bundle all components together
|
|
144
|
+
|
|
145
|
+
This enables algebraic extraction:
|
|
146
|
+
unbind(fact, bind(entity, ROLE_ENTITY)) ≈ content_vector
|
|
147
|
+
"""
|
|
148
|
+
_require_numpy()
|
|
149
|
+
|
|
150
|
+
role_content = encode_atom("__hrr_role_content__", dim)
|
|
151
|
+
role_entity = encode_atom("__hrr_role_entity__", dim)
|
|
152
|
+
|
|
153
|
+
components: list[np.ndarray] = [
|
|
154
|
+
bind(encode_text(content, dim), role_content)
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
for entity in entities:
|
|
158
|
+
components.append(bind(encode_atom(entity.lower(), dim), role_entity))
|
|
159
|
+
|
|
160
|
+
return bundle(*components)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def phases_to_bytes(phases: "np.ndarray") -> bytes:
|
|
164
|
+
"""Serialize phase vector to bytes. float64 tobytes — 8 KB at dim=1024."""
|
|
165
|
+
_require_numpy()
|
|
166
|
+
return phases.tobytes()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def bytes_to_phases(data: bytes) -> "np.ndarray":
|
|
170
|
+
"""Deserialize bytes back to phase vector. Inverse of phases_to_bytes.
|
|
171
|
+
|
|
172
|
+
The .copy() call is required because frombuffer returns a read-only view
|
|
173
|
+
backed by the bytes object; callers expect a mutable array.
|
|
174
|
+
"""
|
|
175
|
+
_require_numpy()
|
|
176
|
+
return np.frombuffer(data, dtype=np.float64).copy()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def snr_estimate(dim: int, n_items: int) -> float:
|
|
180
|
+
"""Signal-to-noise ratio estimate for holographic storage.
|
|
181
|
+
|
|
182
|
+
SNR = sqrt(dim / n_items) when n_items > 0, else inf.
|
|
183
|
+
|
|
184
|
+
The SNR falls below 2.0 when n_items > dim / 4, meaning retrieval
|
|
185
|
+
errors become likely. Logs a warning when this threshold is crossed.
|
|
186
|
+
"""
|
|
187
|
+
_require_numpy()
|
|
188
|
+
|
|
189
|
+
if n_items <= 0:
|
|
190
|
+
return float("inf")
|
|
191
|
+
|
|
192
|
+
snr = math.sqrt(dim / n_items)
|
|
193
|
+
|
|
194
|
+
if snr < 2.0:
|
|
195
|
+
logger.warning(
|
|
196
|
+
"HRR storage near capacity: SNR=%.2f (dim=%d, n_items=%d). "
|
|
197
|
+
"Retrieval accuracy may degrade. Consider increasing dim or reducing stored items.",
|
|
198
|
+
snr,
|
|
199
|
+
dim,
|
|
200
|
+
n_items,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return snr
|