flatmem 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flatmem/__init__.py ADDED
@@ -0,0 +1,50 @@
1
+ """
2
+ flatmem -- Constant-RAM content-addressable memory for digital organisms.
3
+
4
+ A universal flat-memory substrate based on Sparse Distributed Memory + VSA
5
+ role-binding + computed-identity keys + read-time mean-removal. Designed as
6
+ a drop-in lifelong-learning memory for Artificial Life simulations,
7
+ agent-based models, robotics, and language organisms.
8
+
9
+ Two-function interface:
10
+ write(addr, data)
11
+ read(addr) -> cleaned data
12
+
13
+ Substrate size is FIXED regardless of how many items are written.
14
+ Modality-blind: addresses can be any high-dimensional pattern.
15
+
16
+ Quick start:
17
+ from flatmem import MultiRoleMemory, ComputedKey
18
+ mem = MultiRoleMemory(d=512, M=16384, k=64, roles=('next', 'reward'))
19
+ mem.relate('cat', 'isa', 'mammal')
20
+ print(mem.query('cat', 'isa')) # ('mammal', 1.0)
21
+ """
22
+
23
+ from .core import (
24
+ ComputedKey,
25
+ VSASDM,
26
+ MultiRoleMemory,
27
+ tokenize,
28
+ cos,
29
+ renorm,
30
+ )
31
+
32
+ from .encoders import (
33
+ scalar_phasor,
34
+ decode_scalar,
35
+ position_phasor,
36
+ random_projection_encoder,
37
+ bind,
38
+ unbind,
39
+ bundle,
40
+ permute,
41
+ )
42
+
43
+ __version__ = "0.1.0"
44
+ __all__ = [
45
+ "ComputedKey", "VSASDM", "MultiRoleMemory",
46
+ "tokenize", "cos", "renorm",
47
+ "scalar_phasor", "decode_scalar", "position_phasor",
48
+ "random_projection_encoder",
49
+ "bind", "unbind", "bundle", "permute",
50
+ ]
flatmem/core.py ADDED
@@ -0,0 +1,479 @@
1
+ """
2
+ flatmem.core -- Core flat-memory substrate.
3
+
4
+ Three building blocks:
5
+ 1. ComputedKey -- zero-storage identity (char-trigram + whole-word phasor)
6
+ 2. VSASDM -- fixed Sparse Distributed Memory bank
7
+ 3. MultiRoleMemory -- multi-channel substrate via VSA role-binding,
8
+ with traffic-class bank separation, dense
9
+ (cooccur/next) vs sparse (isa/sensory/property/verb)
10
+
11
+ Recall is reconstructive: read returns superposed sum + optional
12
+ mean-removal of common directions for scale-invariant adaptation.
13
+ """
14
+
15
+ import re
16
+ import hashlib
17
+ import numpy as np
18
+
19
+
20
+ # ── utilities ────────────────────────────────────────────────────────────────
21
+
22
+ def tokenize(text):
23
+ """Lowercase, strip non-alphanumeric (keep apostrophe), split on whitespace."""
24
+ return [t for t in re.sub(r"[^a-z0-9'\s]", ' ', text.lower()).split() if t]
25
+
26
+
27
+ def renorm(hv):
28
+ """Component-wise unit-phasor normalization for ℂ^d vectors."""
29
+ mags = np.abs(hv)
30
+ mags = np.where(mags > 1e-9, mags, 1.0)
31
+ return (hv / mags).astype(np.complex64)
32
+
33
+
34
+ def cos(a, b, d):
35
+ """Phasor cosine similarity: Re<a,b> / d."""
36
+ return float(np.real(np.vdot(a, b))) / d
37
+
38
+
39
+ # ── 1. Computed (zero-storage) identity ──────────────────────────────────────
40
+
41
+ class ComputedKey:
42
+ """
43
+ Generates a deterministic phasor HV for any string identifier WITHOUT
44
+ storing it. Identity is reconstructed on demand from the string.
45
+
46
+ key(w) = renorm( Σ phasor(char_trigrams(w)) + word_weight · phasor(WORD:w) )
47
+
48
+ - char_trigrams give similarity for OOV (typos, morphology)
49
+ - whole-word phasor (word_weight > 0) ensures distinct words are
50
+ orthogonal regardless of character overlap; recommended word_weight=4
51
+
52
+ Cross-platform deterministic (hashlib, not Python's salted hash()).
53
+ The _cache dict is a regenerable accelerator, NOT stored knowledge.
54
+ """
55
+
56
+ def __init__(self, d=512, seed=114, word_weight=4.0):
57
+ self.d = int(d)
58
+ self.seed = int(seed)
59
+ self.word_weight = float(word_weight)
60
+ self._cache = {}
61
+
62
+ def _seeded_phasor(self, tag):
63
+ h = hashlib.sha256(f'{tag}:{self.seed}'.encode()).digest()
64
+ s = int.from_bytes(h[:8], 'little')
65
+ rng = np.random.default_rng(s)
66
+ ph = rng.uniform(-np.pi, np.pi, self.d).astype(np.float32)
67
+ return np.exp(1j * ph).astype(np.complex64)
68
+
69
+ def key(self, word):
70
+ """Return the deterministic HV for `word`."""
71
+ if word in self._cache:
72
+ return self._cache[word]
73
+ w = f'#{word}#'
74
+ trigrams = [w[i:i + 3] for i in range(len(w) - 2)] or [w]
75
+ accum = np.zeros(self.d, dtype=np.complex64)
76
+ for tg in trigrams:
77
+ accum += self._seeded_phasor(tg)
78
+ if self.word_weight:
79
+ accum += self.word_weight * self._seeded_phasor(f'WORD:{word}')
80
+ k = renorm(accum)
81
+ self._cache[word] = k
82
+ return k
83
+
84
+ def __getstate__(self):
85
+ return {'d': self.d, 'seed': self.seed, 'word_weight': self.word_weight}
86
+
87
+ def __setstate__(self, s):
88
+ self.d = s['d']
89
+ self.seed = s['seed']
90
+ self.word_weight = float(s.get('word_weight', 0.0))
91
+ self._cache = {}
92
+
93
+
94
+ # ── 2. Sparse Distributed Memory (Kanerva 1988, complex variant) ────────────
95
+
96
+ class VSASDM:
97
+ """
98
+ Fixed bank of M hard-location addresses + counter rows.
99
+
100
+ write(addr, data, word=None):
101
+ Activate top-k nearest hard locations by Re<H[m], addr>;
102
+ add `data` to counter rows at those k locations.
103
+ read(addr, word=None):
104
+ Sum counter rows at activated locations, return renormalized.
105
+
106
+ Substrate size = M · d · 8 bytes (Hconj) + same (C). FIXED.
107
+ The Hconj address bank is regenerable from `seed` — dropped from pickle.
108
+ Per-word activation set is cached in _loc_cache (regenerable).
109
+
110
+ consolidate_every (optional, dense banks only): auto-call consolidate()
111
+ after every N writes. Per-row L2 renorm. Off by default (0).
112
+ """
113
+
114
+ def __init__(self, d=512, M=16384, k=64, seed=114, consolidate_every=0):
115
+ self.d = int(d); self.M = int(M); self.k = int(k); self.seed = int(seed)
116
+ self.consolidate_every = int(consolidate_every)
117
+ self._writes_since = 0
118
+ self.n_consolidations = 0
119
+ self.Hconj = self._make_Hconj()
120
+ self.C = np.zeros((self.M, self.d), dtype=np.complex64)
121
+ self._loc_cache = {}
122
+
123
+ def _make_Hconj(self):
124
+ rng = np.random.default_rng(self.seed + 7)
125
+ ph = rng.uniform(-np.pi, np.pi, (self.M, self.d)).astype(np.float32)
126
+ return np.exp(-1j * ph).astype(np.complex64) # conj(exp(i*ph))
127
+
128
+ def _activate(self, addr):
129
+ sims = (self.Hconj @ addr).real
130
+ return np.argpartition(-sims, self.k)[:self.k]
131
+
132
+ def locs(self, addr, word=None):
133
+ if word is not None and word in self._loc_cache:
134
+ return self._loc_cache[word]
135
+ idx = self._activate(addr)
136
+ if word is not None:
137
+ self._loc_cache[word] = idx
138
+ return idx
139
+
140
+ def locs_batch(self, addrs, words):
141
+ """Batch-activate many addresses in ONE matmul (speed win for cold cache)."""
142
+ out = [None] * len(words)
143
+ need_rows, need_i = [], []
144
+ for i, wd in enumerate(words):
145
+ cached = self._loc_cache.get(wd)
146
+ if cached is not None:
147
+ out[i] = cached
148
+ else:
149
+ need_rows.append(addrs[i]); need_i.append(i)
150
+ if need_rows:
151
+ A = np.stack(need_rows) # (m, d)
152
+ sims = (A @ self.Hconj.T).real # (m, M)
153
+ for r, i in enumerate(need_i):
154
+ idx = np.argpartition(-sims[r], self.k)[:self.k]
155
+ self._loc_cache[words[i]] = idx
156
+ out[i] = idx
157
+ return out
158
+
159
+ def write(self, addr, data, word=None):
160
+ self.C[self.locs(addr, word)] += data
161
+ if self.consolidate_every:
162
+ self._writes_since += 1
163
+ if self._writes_since >= self.consolidate_every:
164
+ self.consolidate()
165
+ self._writes_since = 0
166
+ self.n_consolidations += 1
167
+
168
+ def read(self, addr, word=None):
169
+ return renorm(self.C[self.locs(addr, word)].sum(axis=0))
170
+
171
+ def consolidate(self):
172
+ """Per-row L2 renormalization. Bounds magnitude drift. Loc cache stays valid."""
173
+ nrm = np.linalg.norm(self.C, axis=1, keepdims=True)
174
+ nrm = np.where(nrm > 1e-9, nrm, 1.0)
175
+ self.C = (self.C / nrm).astype(np.complex64)
176
+
177
+ def substrate_bytes(self):
178
+ """Total fixed bytes of the substrate."""
179
+ return self.Hconj.nbytes + self.C.nbytes
180
+
181
+ def __getstate__(self):
182
+ s = self.__dict__.copy()
183
+ s['Hconj'] = None
184
+ s['_loc_cache'] = {}
185
+ return s
186
+
187
+ def __setstate__(self, s):
188
+ self.__dict__.update(s)
189
+ if getattr(self, 'Hconj', None) is None:
190
+ self.Hconj = self._make_Hconj()
191
+ if not hasattr(self, '_loc_cache') or self._loc_cache is None:
192
+ self._loc_cache = {}
193
+
194
+
195
+ # ── 3. Multi-role substrate ──────────────────────────────────────────────────
196
+
197
+ class MultiRoleMemory:
198
+ """
199
+ A flat memory holding multiple relation types in ONE substrate via VSA
200
+ role-binding (key(item) ⊙ ROLE_phasor = unique address).
201
+
202
+ Two banks separated by traffic class:
203
+ - dense (high write traffic): cooccur, next -- one bank
204
+ - sparse (low write traffic): isa, sensory, property, verb, ...
205
+
206
+ Why two banks: shared bank lets a high-traffic channel (millions of
207
+ writes) swamp a low-traffic one (single facts) at shared hard locations.
208
+ Empirically necessary. Both banks are fixed-size; total is still constant.
209
+
210
+ Methods:
211
+ relate(item, role, target) -- store item-role->target (a string)
212
+ write_relation(item, role, hv) -- store item-role->arbitrary HV
213
+ recall(item, role) -- raw reconstructive read
214
+ query(item, role, candidates=None)-- recall + cleanup -> best candidate
215
+ chain(item, role_a, c_a, role_b, c_b) -- two-hop cross-channel
216
+ expose_cooccur(text) -- co-occurrence window write
217
+ expose_transitions(text) -- per-token (prev,next) write
218
+ next_word_candidates(prev, top_k) -- bigram-equivalent, vectorized cleanup
219
+ similarity(w1, w2) -- mean-removed cooccur cosine
220
+ neighbors(word, k) -- nearest seen words
221
+ consolidate() -- per-bank consolidation
222
+ expose_verb_observation(verb, c) -- Channel 2 scalar via phase encoding
223
+ predict_verb_coefficient(verb) -- decode learned scalar
224
+ assert_relation(item, role, target, n=20) -- reinforced fact injection
225
+ """
226
+
227
+ DEFAULT_ROLES = ('cooccur', 'next', 'isa', 'sensory', 'property', 'verb')
228
+ DENSE_ROLES = {'cooccur', 'next'}
229
+
230
+ def __init__(self, d=512, M=16384, k=64, seed=114, window=3, remove_r=1,
231
+ svd_sample=2000, roles=DEFAULT_ROLES, M_rel=8192,
232
+ consolidate_every=0, q_omega=0.05, q_seed=12345):
233
+ self.d = int(d); self.window = int(window)
234
+ self.remove_r = int(remove_r); self.svd_sample = int(svd_sample)
235
+ self.ck = ComputedKey(d=d, seed=seed)
236
+ # Two banks: dense (cooccur/next) vs sparse (isa/sensory/...)
237
+ self.sdm = VSASDM(d=d, M=M, k=k, seed=seed, consolidate_every=consolidate_every)
238
+ self.sdm_rel = VSASDM(d=d, M=M_rel, k=k, seed=seed + 1)
239
+ # Role phasors (fixed)
240
+ rng = np.random.default_rng(seed + 999)
241
+ self.roles = {}
242
+ for name in roles:
243
+ ph = rng.uniform(-np.pi, np.pi, self.d).astype(np.float32)
244
+ self.roles[name] = np.exp(1j * ph).astype(np.complex64)
245
+ self._seen = set()
246
+ self._cooccur_seen = set()
247
+ self._role_targets = {}
248
+ self._verb_seen = set()
249
+ self._dirs = None
250
+ self._dirty = True
251
+ # Verb-rotor quantity axis
252
+ self.q_omega = float(q_omega)
253
+ rng_axis = np.random.default_rng(q_seed)
254
+ self.q_axis = (rng_axis.integers(0, 2, size=self.d) * 2 - 1).astype(np.float32)
255
+
256
+ # ── routing ───────────────────────────────────────────────────────────
257
+ def _bank(self, role):
258
+ return self.sdm if role in self.DENSE_ROLES else self.sdm_rel
259
+
260
+ def _bind(self, a, r):
261
+ return (a * r).astype(np.complex64)
262
+
263
+ def _unbind(self, c, r):
264
+ return (c * np.conj(r)).astype(np.complex64)
265
+
266
+ def _addr(self, item, role):
267
+ return self._bind(self.ck.key(item), self.roles[role])
268
+
269
+ def _slot(self, item, role):
270
+ return f'{item}\x00{role}'
271
+
272
+ # ── writing ──────────────────────────────────────────────────────────
273
+ def write_relation(self, item, role, value_hv):
274
+ """Store item-role -> arbitrary HV value."""
275
+ self._bank(role).write(
276
+ self._addr(item, role),
277
+ np.asarray(value_hv, dtype=np.complex64),
278
+ word=self._slot(item, role),
279
+ )
280
+ self._seen.add(item)
281
+
282
+ def relate(self, item, role, target):
283
+ """Store item-role -> target (string). Tracks targets for cleanup."""
284
+ self.write_relation(item, role, self.ck.key(target))
285
+ self._role_targets.setdefault(role, set()).add(target)
286
+
287
+ def assert_relation(self, item, role, target, n=20):
288
+ """Reinforced injection: writes n times. Use for clean fact injection."""
289
+ for _ in range(n):
290
+ self.relate(item, role, target)
291
+
292
+ def targets(self, role):
293
+ return self._role_targets.get(role, set())
294
+
295
+ # ── Channel 1: co-occurrence ─────────────────────────────────────────
296
+ def expose_cooccur(self, text):
297
+ """Sliding-window co-occurrence write. Returns token count."""
298
+ tokens = tokenize(text)
299
+ if not tokens:
300
+ return 0
301
+ n = len(tokens)
302
+ K = np.stack([self.ck.key(t) for t in tokens])
303
+ w = self.window
304
+ P = np.empty((n + 1, self.d), dtype=np.complex128)
305
+ P[0] = 0
306
+ P[1:] = np.cumsum(K.astype(np.complex128), axis=0)
307
+ agg, order = {}, []
308
+ for i in range(n):
309
+ lo = i - w if i - w > 0 else 0
310
+ hi = i + w + 1 if i + w + 1 < n else n
311
+ ctx = (P[hi] - P[lo]) - K[i]
312
+ t = tokens[i]
313
+ if t in agg:
314
+ agg[t] = agg[t] + ctx
315
+ else:
316
+ agg[t] = ctx; order.append(t)
317
+ self._seen.add(t); self._cooccur_seen.add(t)
318
+ ukeys = np.stack([self._bind(self.ck.key(t), self.roles['cooccur']) for t in order])
319
+ slots = [self._slot(t, 'cooccur') for t in order]
320
+ locs = self.sdm.locs_batch(ukeys, slots)
321
+ for t, idx in zip(order, locs):
322
+ self.sdm.C[idx] += agg[t].astype(np.complex64)
323
+ self._dirty = True
324
+ return n
325
+
326
+ # ── Channel 2: verb rotor (scalar arithmetic) ────────────────────────
327
+ def _encode_scalar(self, c):
328
+ phase = float(c) * self.q_omega * self.q_axis
329
+ return np.exp(1j * phase).astype(np.complex64)
330
+
331
+ def _decode_scalar(self, hv):
332
+ phases = np.angle(hv).astype(np.float32)
333
+ cs = phases / (self.q_omega * self.q_axis + 1e-9)
334
+ return float(np.median(cs))
335
+
336
+ def expose_verb_observation(self, verb, c_est):
337
+ """Channel 2: encode scalar coefficient as phasor, superpose in (verb,'verb')."""
338
+ self.write_relation(verb, 'verb', self._encode_scalar(c_est))
339
+ self._verb_seen.add(verb)
340
+
341
+ def predict_verb_coefficient(self, verb):
342
+ """Decode running-average coefficient. None if unseen."""
343
+ if verb not in self._verb_seen:
344
+ return None
345
+ return self._decode_scalar(self.recall(verb, 'verb'))
346
+
347
+ # ── Channel 5: transitions / generation ──────────────────────────────
348
+ def expose_transitions(self, text):
349
+ """Per-token (prev → curr) write under 'next' role."""
350
+ tokens = tokenize(text)
351
+ if len(tokens) < 2:
352
+ return 0
353
+ agg, order = {}, []
354
+ for i in range(len(tokens) - 1):
355
+ prev, curr = tokens[i], tokens[i + 1]
356
+ ck_curr = self.ck.key(curr)
357
+ if prev in agg:
358
+ agg[prev] = agg[prev] + ck_curr
359
+ else:
360
+ agg[prev] = ck_curr.astype(np.complex64).copy(); order.append(prev)
361
+ self._seen.add(prev)
362
+ ukeys = np.stack([self._bind(self.ck.key(p), self.roles['next']) for p in order])
363
+ slots = [self._slot(p, 'next') for p in order]
364
+ locs = self._bank('next').locs_batch(ukeys, slots)
365
+ for p, idx in zip(order, locs):
366
+ self._bank('next').C[idx] += agg[p]
367
+ self._role_targets.setdefault('next', set()).add(p)
368
+ return len(tokens) - 1
369
+
370
+ def next_word_candidates(self, prev_word, candidates=None, top_k=20):
371
+ """Vectorized bigram-equivalent: recall + score candidate keys."""
372
+ if candidates is None:
373
+ candidates = self._cooccur_seen
374
+ if not candidates:
375
+ return []
376
+ r = self.recall(prev_word, 'next')
377
+ cands = list(candidates)
378
+ K = np.stack([self.ck.key(c) for c in cands])
379
+ sims = (np.conj(r) @ K.T).real / self.d
380
+ if top_k >= len(cands):
381
+ order = np.argsort(-sims)
382
+ else:
383
+ top = np.argpartition(-sims, top_k)[:top_k]
384
+ order = top[np.argsort(-sims[top])]
385
+ return [(cands[i], float(sims[i])) for i in order]
386
+
387
+ # ── reading ──────────────────────────────────────────────────────────
388
+ def recall(self, item, role):
389
+ return self._bank(role).read(self._addr(item, role),
390
+ word=self._slot(item, role))
391
+
392
+ def query(self, item, role, candidates=None):
393
+ """Recall + cleanup against candidates. Returns (best, score)."""
394
+ if candidates is None:
395
+ candidates = self.targets(role)
396
+ r = self.recall(item, role)
397
+ best, bscore = None, -9.0
398
+ for c in candidates:
399
+ s = cos(r, self.ck.key(c), self.d)
400
+ if s > bscore:
401
+ bscore, best = s, c
402
+ return best, bscore
403
+
404
+ def chain(self, item, role_a, cands_a, role_b, cands_b):
405
+ """Two-hop: role_b(role_a(item)). Returns (mid, end)."""
406
+ mid, _ = self.query(item, role_a, cands_a)
407
+ end, _ = self.query(mid, role_b, cands_b)
408
+ return mid, end
409
+
410
+ # ── cooccur similarity (Channel 1) with mean-removal ─────────────────
411
+ def _refresh_dirs(self):
412
+ if not self._dirty and self._dirs is not None:
413
+ return
414
+ if self.remove_r <= 0 or not self._cooccur_seen:
415
+ self._dirs = np.zeros((0, self.d), dtype=np.complex64)
416
+ self._dirty = False
417
+ return
418
+ words = list(self._cooccur_seen)
419
+ if len(words) > self.svd_sample:
420
+ rng = np.random.default_rng(0)
421
+ words = [words[i] for i in rng.choice(len(words), self.svd_sample,
422
+ replace=False)]
423
+ Mtx = np.stack([self.recall(w, 'cooccur') for w in words])
424
+ _, _, Vh = np.linalg.svd(Mtx, full_matrices=False)
425
+ self._dirs = Vh[:self.remove_r].astype(np.complex64)
426
+ self._dirty = False
427
+
428
+ def cooccur_recall(self, word):
429
+ self._refresh_dirs()
430
+ m = self.recall(word, 'cooccur')
431
+ for v in self._dirs:
432
+ m = m - np.vdot(v, m) * v
433
+ return renorm(m)
434
+
435
+ def similarity(self, w1, w2):
436
+ if w1 not in self._cooccur_seen or w2 not in self._cooccur_seen:
437
+ return None
438
+ return cos(self.cooccur_recall(w1), self.cooccur_recall(w2), self.d)
439
+
440
+ def neighbors(self, word, k=10, candidates=None):
441
+ """Nearest words by cooccur similarity, against optional candidate pool."""
442
+ if word not in self._cooccur_seen:
443
+ return []
444
+ pool = list(candidates) if candidates is not None else list(self._cooccur_seen)
445
+ target = self.cooccur_recall(word)
446
+ out = [(w, cos(target, self.cooccur_recall(w), self.d))
447
+ for w in pool if w != word]
448
+ return sorted(out, key=lambda x: -x[1])[:k]
449
+
450
+ def consolidate(self):
451
+ """Consolidate both banks. Marks dirty so common-directions rebuild."""
452
+ self.sdm.consolidate()
453
+ self.sdm_rel.consolidate()
454
+ self._dirty = True
455
+
456
+ # ── introspection ────────────────────────────────────────────────────
457
+ def role_orthogonality(self, item, role_a, role_b):
458
+ a = self._addr(item, role_a)
459
+ b = self._addr(item, role_b)
460
+ return cos(a, b, self.d)
461
+
462
+ def substrate_bytes(self):
463
+ return self.sdm.substrate_bytes() + self.sdm_rel.substrate_bytes()
464
+
465
+ @property
466
+ def vocab_size(self):
467
+ return len(self._seen)
468
+
469
+ def status(self):
470
+ return {
471
+ 'roles': list(self.roles.keys()),
472
+ 'vocab': len(self._seen),
473
+ 'cooccur_vocab': len(self._cooccur_seen),
474
+ 'verb_vocab': len(self._verb_seen),
475
+ 'dense_mb': round(self.sdm.substrate_bytes() / 1_048_576, 1),
476
+ 'sparse_mb': round(self.sdm_rel.substrate_bytes() / 1_048_576, 1),
477
+ 'substrate_mb': round(self.substrate_bytes() / 1_048_576, 1),
478
+ 'flat': True,
479
+ }
flatmem/encoders.py ADDED
@@ -0,0 +1,117 @@
1
+ """
2
+ flatmem.encoders -- Helper encoders for converting raw state to phasor HVs.
3
+
4
+ Domain-agnostic primitives + common patterns for ALife state encoding.
5
+ All return component-wise unit-phasor complex64 arrays of dimension d.
6
+
7
+ VSA ops (bind, bundle, permute) provided as convenience wrappers around
8
+ component-wise complex multiply / sum / np.roll.
9
+ """
10
+
11
+ import hashlib
12
+ import numpy as np
13
+
14
+ from .core import renorm
15
+
16
+
17
+ # ── primitives ───────────────────────────────────────────────────────────────
18
+
19
+ def _seeded_phasor(tag, d, seed):
20
+ """Deterministic random phasor HV from a tag (string or int)."""
21
+ h = hashlib.sha256(f'{tag}:{seed}'.encode()).digest()
22
+ s = int.from_bytes(h[:8], 'little')
23
+ rng = np.random.default_rng(s)
24
+ ph = rng.uniform(-np.pi, np.pi, d).astype(np.float32)
25
+ return np.exp(1j * ph).astype(np.complex64)
26
+
27
+
28
+ def _qaxis(d, seed=12345):
29
+ """Bipolar random ±1 axis vector for fractional-power scalar encoding."""
30
+ rng = np.random.default_rng(seed)
31
+ return (rng.integers(0, 2, size=d) * 2 - 1).astype(np.float32)
32
+
33
+
34
+ # ── scalar encoding (Fractional Power Encoding) ──────────────────────────────
35
+
36
+ def scalar_phasor(value, d=512, omega=0.05, seed=12345, q_axis=None):
37
+ """
38
+ Encode a real scalar as a phasor HV. Phase per component = value * omega * q_axis.
39
+
40
+ Reversible: median(angle / (omega * q_axis)) recovers value (modulo 2pi/omega).
41
+ Linear: scalar_phasor(a+b) ≈ scalar_phasor(a) * scalar_phasor(b) (componentwise).
42
+
43
+ Used for continuous quantities (reward, concentration, joint angle, time).
44
+ """
45
+ if q_axis is None:
46
+ q_axis = _qaxis(d, seed)
47
+ phase = float(value) * omega * q_axis
48
+ return np.exp(1j * phase).astype(np.complex64)
49
+
50
+
51
+ def decode_scalar(hv, omega=0.05, seed=12345, q_axis=None):
52
+ """Inverse of scalar_phasor (median across components for noise tolerance)."""
53
+ if q_axis is None:
54
+ q_axis = _qaxis(len(hv), seed)
55
+ phases = np.angle(hv).astype(np.float32)
56
+ cs = phases / (omega * q_axis + 1e-9)
57
+ return float(np.median(cs))
58
+
59
+
60
+ # ── 2D position encoding ─────────────────────────────────────────────────────
61
+
62
+ def position_phasor(x, y, d=512, omega=0.05, scale=1.0, seed=12345):
63
+ """
64
+ Encode 2D position (x, y) with two independent axes bound together.
65
+ phasor(x, x_axis) ⊙ phasor(y, y_axis)
66
+ Spatial proximity → high cosine. Useful for grid/continuous-position ALife.
67
+ """
68
+ x_axis = _qaxis(d, seed)
69
+ y_axis = _qaxis(d, seed + 1)
70
+ x_hv = np.exp(1j * float(x) * scale * omega * x_axis).astype(np.complex64)
71
+ y_hv = np.exp(1j * float(y) * scale * omega * y_axis).astype(np.complex64)
72
+ return (x_hv * y_hv).astype(np.complex64)
73
+
74
+
75
+ # ── arbitrary vector → HV (random projection) ────────────────────────────────
76
+
77
+ def random_projection_encoder(vector, d=512, seed=42, project_matrix_cache={}):
78
+ """
79
+ Map an arbitrary ndarray to a phasor HV via fixed random projection.
80
+ hv = renorm( exp(i * (proj_matrix @ vector)) )
81
+ Cache the projection matrix in process for reuse.
82
+
83
+ For sensor arrays, embeddings, or any dense numeric state.
84
+ """
85
+ v = np.asarray(vector, dtype=np.float32).ravel()
86
+ key = (v.shape[0], d, seed)
87
+ if key not in project_matrix_cache:
88
+ rng = np.random.default_rng(seed)
89
+ project_matrix_cache[key] = rng.standard_normal((d, v.shape[0])).astype(np.float32)
90
+ P = project_matrix_cache[key]
91
+ phase = (P @ v).astype(np.float32)
92
+ return np.exp(1j * phase).astype(np.complex64)
93
+
94
+
95
+ # ── VSA operations ───────────────────────────────────────────────────────────
96
+
97
+ def bind(a, b):
98
+ """VSA binding via component-wise complex multiply. Inverse: bind(c, conj(b))."""
99
+ return (a * b).astype(np.complex64)
100
+
101
+
102
+ def unbind(c, b):
103
+ """Inverse of bind: recover a from c=bind(a,b) when b is a unit phasor."""
104
+ return (c * np.conj(b)).astype(np.complex64)
105
+
106
+
107
+ def bundle(*vs):
108
+ """VSA bundling via sum + component-wise renorm. Many HVs into one."""
109
+ acc = np.zeros_like(vs[0])
110
+ for v in vs:
111
+ acc = acc + v
112
+ return renorm(acc)
113
+
114
+
115
+ def permute(hv, shift=1):
116
+ """Cyclic shift for sequence position. permute^i acts like position i."""
117
+ return np.roll(hv, shift).astype(np.complex64)
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: flatmem
3
+ Version: 0.1.0
4
+ Summary: Constant-RAM content-addressable memory substrate for digital organisms and Artificial Life agents.
5
+ Author-email: Prince Siddhpara <princesiddhpara67@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/HitoshiFTW/Flatmem-Mura-ALife-Labs
8
+ Project-URL: Repository, https://github.com/HitoshiFTW/Flatmem-Mura-ALife-Labs
9
+ Project-URL: Paper, https://github.com/HitoshiFTW/Flatmem-Mura-ALife-Labs/blob/main/PAPER.md
10
+ Keywords: memory,sdm,vsa,hdc,alife,artificial-life,sparse-distributed-memory,vector-symbolic,hyperdimensional,agent-based,online-learning
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Life
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: numpy>=1.20
21
+ Provides-Extra: test
22
+ Requires-Dist: pytest>=7.0; extra == "test"
23
+ Dynamic: license-file
24
+
25
+ # flatmem
26
+
27
+ **A brain you can fit in 233 MB that absorbs frontier-scale text and memory never grows.**
28
+ **Not a model. An organism.**
29
+
30
+ `flatmem` is a constant-RAM, content-addressable memory substrate for digital organisms, Artificial Life simulations, agent-based models, robotics, and language organisms. The total memory footprint is **fixed** regardless of how much data is written. Inspired by, and mathematically grounded in, the Marr-Albus-Kanerva cerebellar model.
31
+
32
+ ---
33
+
34
+ ## Why this exists
35
+
36
+ Traditional memory in AI grows with data: embedding tables (`{id: vector}`), bigram dicts, replay buffers, vector databases. RAM scales linearly with experience. Brains do not — adult human cortex is ~86 billion neurons fixed; lifetime memories live in **synaptic strengths in a fixed matrix**, not in growing dictionaries.
37
+
38
+ `flatmem` is that fixed matrix in software. Every memory you write superposes into existing counter weights at sparse locations. Recall is reconstructive (gist-based, like human memory). The substrate is **modality-blind**: text, sensor arrays, chemical gradients, robot joint angles — anything that can be encoded as a high-dimensional pattern.
39
+
40
+ ## The 5-piece architecture
41
+
42
+ 1. **Sparse Distributed Memory (SDM)** — fixed bank of `M` random hard-location addresses + counter rows. Writes activate top-`k` nearest by cosine; counters accumulate.
43
+ 2. **VSA role-binding** — `addr(item, role) = key(item) ⊙ ROLE` (Hadamard complex multiply). Multiple relation types in ONE substrate without interference.
44
+ 3. **Traffic-class bank separation** — high-traffic channels (co-occurrence) and low-traffic channels (sparse facts) live in separate fixed banks; otherwise dense traffic swamps sparse signal at shared locations.
45
+ 4. **Computed-identity keys** — zero stored bytes per item. Identity = char-trigram + whole-word random phasor projection. Infinite vocabulary, no embedding table.
46
+ 5. **Read-time mean-removal** — Arora's all-but-the-top, adapted to live recall. Subtracts the dominant common direction; scale-invariant sensory adaptation.
47
+
48
+ Each piece has prior art (Kanerva 1988, Plate 1995, Frady-Sommer TPAM, Arora 2017). The **integration** as a working lifelong-learning constant-RAM memory organism is novel.
49
+
50
+ ## Install
51
+
52
+ ```bash
53
+ pip install flatmem
54
+ # or from source
55
+ git clone https://github.com/HitoshiFTW/Flatmem-Mura-ALife-Labs
56
+ cd flatmem
57
+ pip install -e .
58
+ ```
59
+
60
+ Only dependency: `numpy>=1.20`.
61
+
62
+ ## Quick start
63
+
64
+ ```python
65
+ from flatmem import MultiRoleMemory
66
+
67
+ mem = MultiRoleMemory(d=512, M=16384, k=64)
68
+
69
+ # Inject knowledge with reinforcement
70
+ mem.assert_relation('cat', 'isa', 'mammal', n=20)
71
+ mem.assert_relation('dog', 'isa', 'mammal', n=20)
72
+ mem.assert_relation('mammal', 'property', 'warm', n=20)
73
+
74
+ # Query
75
+ print(mem.query('cat', 'isa')) # ('mammal', 1.0)
76
+ print(mem.query('dog', 'isa')) # ('mammal', 1.0)
77
+
78
+ # Cross-channel composition (the property of a cat's hypernym)
79
+ mid, end = mem.chain('cat', 'isa', ['mammal', 'flower', 'tree'],
80
+ 'property', ['warm', 'cold', 'tall'])
81
+ print(f'{mid} -> {end}') # 'mammal -> warm'
82
+
83
+ # Read text — co-occurrence accumulates in fixed substrate
84
+ for sentence in ["the cat sat on the mat", "the dog ran fast"] * 50:
85
+ mem.expose_cooccur(sentence)
86
+ print(mem.similarity('cat', 'dog')) # > 0 (semantic neighbors)
87
+
88
+ # Substrate is FIXED regardless of how much you write
89
+ print(f'{mem.substrate_bytes() / 1_048_576:.0f} MB') # always the same
90
+ ```
91
+
92
+ ## Universal interface
93
+
94
+ The substrate exposes **two functions**:
95
+
96
+ ```python
97
+ mem.write(addr, data) # via .relate / .write_relation / .expose_*
98
+ mem.read(addr) # via .recall / .query / .similarity / .neighbors
99
+ ```
100
+
101
+ Anything that can be encoded as a `d`-dim phasor HV can address it. `flatmem.encoders` provides helpers for the common ALife encodings:
102
+
103
+ ```python
104
+ from flatmem.encoders import (
105
+ scalar_phasor, # encode a scalar (reward, concentration, joint angle)
106
+ position_phasor, # encode 2D position with spatial topology
107
+ random_projection_encoder,# encode an arbitrary numeric vector
108
+ bind, unbind, bundle, permute, # VSA primitives
109
+ )
110
+ ```
111
+
112
+ ## ALife integration patterns
113
+
114
+ The `examples/` directory shows drop-in patterns for several ALife domains:
115
+
116
+ | File | Paradigm | What it shows |
117
+ |------|----------|---------------|
118
+ | `01_text_organism.py` | Language organism | Co-occurrence + IS-A + similarity in one substrate |
119
+ | `02_maze_agent.py` | Grid-world RL | Q-values per (state, action) without a Q-table dict |
120
+ | `03_boids_memory.py` | Flocking / swarms | Experiential steering from past visual states |
121
+ | `04_chemotaxis.py` | Chemical gradients | Running-mean concentration via phasor superposition |
122
+
123
+ The same substrate object handles all four. No special-case code.
124
+
125
+ ## What it's NOT for
126
+
127
+ `flatmem` is a **cognitive / episodic / symbolic** memory substrate. It is the **wrong** choice for:
128
+ - Mass-cellular-automata grid storage at 60 Hz (use raw VRAM arrays for the grid; put `flatmem` in the agents that navigate it).
129
+ - Lookup tables requiring **exact** retrieval (recall is reconstructive / gist).
130
+ - Anything where you need 100% precision and have unlimited RAM.
131
+
132
+ If your problem is "I want to remember the GIST of a lifetime of experience in fixed bytes" — this is the right tool.
133
+
134
+ ## Engineering notes
135
+
136
+ - **Real-time loops**: top-`k` activation is O(M·d). Cache strategically; don't query per frame at 60 Hz.
137
+ - **Multi-agent**: each agent has its own `MultiRoleMemory` (`~192 MB default`). Decentralize.
138
+ - **Federated merge**: agents with same `seed` have aligned hard locations; counter banks can be summed (`C_merged = C_a + C_b`) for emergent hive intelligence or generational inheritance. Capacity wall at ~hundreds of agents (noise floor grows as √N).
139
+ - **GPU**: not optimized for GPU yet. SDM top-`k` selection causes warp divergence; replace with differentiable softmax for GPU port.
140
+
141
+ ## Run the tests
142
+
143
+ ```bash
144
+ python tests/test_basics.py # core correctness
145
+ python tests/test_universality.py # ALife integration patterns
146
+ ```
147
+
148
+ ## Research paper
149
+
150
+ See [PAPER.md](./PAPER.md) for the full architecture rationale, empirical results, prior-art discussion, and novelty analysis. Originally developed inside the **Ikigai** organism project at **Mura ALife Labs** as the constant-RAM memory substrate for a language-grounded digital organism.
151
+
152
+ ## Cite
153
+
154
+ If you use `flatmem` in research:
155
+
156
+ ```bibtex
157
+ @misc{siddhpara2026flatmem,
158
+ author = {Siddhpara, Prince},
159
+ title = {flatmem: A Constant-RAM Content-Addressable Memory Substrate for Digital Organisms},
160
+ year = {2026},
161
+ publisher = {Mura ALife Labs},
162
+ howpublished = {\url{https://github.com/HitoshiFTW/Flatmem-Mura-ALife-Labs}},
163
+ }
164
+ ```
165
+
166
+ ## License
167
+
168
+ MIT. See [LICENSE](./LICENSE).
169
+
170
+ ---
171
+
172
+ <p align="center">
173
+ <b>Mura ALife Labs</b> · 2026<br>
174
+ <i>Building digital organisms, not models.</i>
175
+ </p>
@@ -0,0 +1,8 @@
1
+ flatmem/__init__.py,sha256=kxxm1vAP1hZu-q0vSs4kY9-leVn4eOdJwrx8WmsOu1c,1321
2
+ flatmem/core.py,sha256=IjNLfMrUbnI3-z2UzW_zQLsWyqlkyO2-epW9LjjWeRY,20301
3
+ flatmem/encoders.py,sha256=-Wyis_nh1iRw1TsOrt6ayalEzaaU3i-IW5PX-I4o3ic,4690
4
+ flatmem-0.1.0.dist-info/licenses/LICENSE,sha256=ASo4cQymLpKzYbZVBvwTi9JMZ44UFvH0c6jWY8HsXXY,1093
5
+ flatmem-0.1.0.dist-info/METADATA,sha256=JRZncKbF-c5jfOtrV-RMyBEL3mfzUD4b7yTF5yp9hcM,8233
6
+ flatmem-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
7
+ flatmem-0.1.0.dist-info/top_level.txt,sha256=oCLoEO6W8cFdSD_u3gMaud6RGlKCbrwhyAmV5NrztnE,8
8
+ flatmem-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Prince Siddhpara — Mura ALife Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ flatmem