hrr-memory 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/memory.js CHANGED
@@ -1,234 +1,275 @@
1
- /**
2
- * HRRMemory — auto-sharded holographic memory store.
3
- *
4
- * Stores (subject, relation, object) triples in sharded buckets.
5
- * Each bucket holds max 25 facts. When full, a new overflow bucket
6
- * is created automatically. Queries scan all buckets for a subject.
7
- *
8
- * Symbol vectors are shared across all buckets.
9
- * All values are lowercased on store. If you need case-sensitive values,
10
- * normalize them yourself before calling store().
11
- *
12
- * @typedef {{ match: string|null, score: number, confident: boolean, bucket: string|null }} QueryResult
13
- * @typedef {{ relation: string, object: string }} Fact
14
- * @typedef {{ subject: string, relation: string, object: string }} Triple
15
- * @typedef {{ type: 'direct', match: string, score: number, confident: boolean, subject: string, relation: string, bucket: string|null }} DirectAskResult
16
- * @typedef {{ type: 'subject', subject: string, facts: Fact[] }} SubjectAskResult
17
- * @typedef {{ type: 'search', term: string, results: Triple[] }} SearchAskResult
18
- * @typedef {{ type: 'miss', query: string }} MissAskResult
19
- * @typedef {DirectAskResult | SubjectAskResult | SearchAskResult | MissAskResult} AskResult
20
- * @typedef {{ dimensions: number, maxBucketSize: number, symbols: number, buckets: number, subjects: number, totalFacts: number, ramBytes: number, ramMB: number, perBucket: Array<{name: string, facts: number, full: boolean}> }} Stats
21
- */
22
-
23
- import { readFileSync, writeFileSync, existsSync } from 'fs';
24
- import { bind, unbind, similarity } from './ops.js';
25
- import { SymbolTable } from './symbols.js';
26
- import { Bucket, MAX_BUCKET_SIZE } from './bucket.js';
27
-
28
- const STOP_WORDS = new Set([
29
- 'what', 'is', 'the', 'a', 'an', 'does', 'do', 'where', 'who', 'how',
30
- 'which', 'of', 'for', 'in', 'at', 'to', 'my', 'your', 'their', 'has',
31
- 'have', 'was', 'were', 'are', 'been',
32
- ]);
33
-
34
- export class HRRMemory {
35
- constructor(d = 2048) {
36
- this.d = d;
37
- this.symbols = new SymbolTable(d);
38
- this.buckets = new Map();
39
- this.routing = new Map();
40
- }
41
-
42
- _activeBucket(subject) {
43
- const key = subject.toLowerCase().trim();
44
- const ids = this.routing.get(key);
45
- if (ids) {
46
- const lastId = ids[ids.length - 1];
47
- const last = this.buckets.get(lastId);
48
- if (!last.isFull) return last;
49
- const newId = key + '#' + ids.length;
50
- const nb = new Bucket(newId, this.d);
51
- this.buckets.set(newId, nb);
52
- ids.push(newId);
53
- return nb;
54
- }
55
- const b = new Bucket(key, this.d);
56
- this.buckets.set(key, b);
57
- this.routing.set(key, [key]);
58
- return b;
59
- }
60
-
61
- _subjectBuckets(subject) {
62
- const ids = this.routing.get(subject.toLowerCase().trim()) || [];
63
- return ids.map(id => this.buckets.get(id)).filter(Boolean);
64
- }
65
-
66
- store(subject, relation, object) {
67
- const triple = {
68
- subject: subject.toLowerCase().trim(),
69
- relation: relation.toLowerCase().trim(),
70
- object: object.toLowerCase().trim(),
71
- };
72
- for (const b of this._subjectBuckets(subject)) {
73
- if (b.triples.some(t =>
74
- t.subject === triple.subject &&
75
- t.relation === triple.relation &&
76
- t.object === triple.object
77
- )) return false;
78
- }
79
- const s = this.symbols.get(subject);
80
- const r = this.symbols.get(relation);
81
- const o = this.symbols.get(object);
82
- const association = bind(bind(s, r), o);
83
- this._activeBucket(subject).storeVector(association, triple);
84
- return true;
85
- }
86
-
87
- forget(subject, relation, object) {
88
- const s = subject.toLowerCase().trim();
89
- const r = relation.toLowerCase().trim();
90
- const o = object.toLowerCase().trim();
91
- const ids = this.routing.get(s);
92
- if (!ids) return false;
93
- for (let i = 0; i < ids.length; i++) {
94
- const bucket = this.buckets.get(ids[i]);
95
- const idx = bucket.triples.findIndex(t =>
96
- t.subject === s && t.relation === r && t.object === o
97
- );
98
- if (idx === -1) continue;
99
- bucket.triples.splice(idx, 1);
100
- bucket.rebuild(this.symbols);
101
- if (bucket.count === 0 && ids[i].includes('#')) {
102
- this.buckets.delete(ids[i]);
103
- ids.splice(i, 1);
104
- }
105
- return true;
106
- }
107
- return false;
108
- }
109
-
110
- query(subject, relation) {
111
- const buckets = this._subjectBuckets(subject);
112
- if (buckets.length === 0) return { match: null, score: 0, confident: false, bucket: null };
113
- const probe = bind(this.symbols.get(subject), this.symbols.get(relation));
114
- let bestName = null, bestScore = -1, bestBucket = null;
115
- for (const bucket of buckets) {
116
- if (bucket.count === 0) continue;
117
- const result = unbind(probe, bucket.memory);
118
- for (const t of bucket.triples) {
119
- const score = similarity(result, this.symbols.get(t.object));
120
- if (score > bestScore) {
121
- bestScore = score;
122
- bestName = t.object;
123
- bestBucket = bucket.name;
124
- }
125
- }
126
- }
127
- return {
128
- match: bestName,
129
- score: Math.round(bestScore * 1000) / 1000,
130
- confident: bestScore > 0.1,
131
- bucket: bestBucket,
132
- };
133
- }
134
-
135
- querySubject(subject) {
136
- const key = subject.toLowerCase().trim();
137
- const facts = [];
138
- for (const bucket of this._subjectBuckets(subject)) {
139
- for (const t of bucket.triples) {
140
- if (t.subject === key) facts.push({ relation: t.relation, object: t.object });
141
- }
142
- }
143
- return facts;
144
- }
145
-
146
- search(relation, object) {
147
- const results = [];
148
- const rel = relation ? relation.toLowerCase().trim() : null;
149
- const obj = object ? object.toLowerCase().trim() : null;
150
- for (const [_, bucket] of this.buckets) {
151
- for (const t of bucket.triples) {
152
- if (rel && t.relation !== rel) continue;
153
- if (obj && t.object !== obj) continue;
154
- results.push(t);
155
- }
156
- }
157
- return results;
158
- }
159
-
160
- ask(question) {
161
- const parts = question.toLowerCase().trim()
162
- .replace(/[?.,!]/g, '')
163
- .replace(/'s\b/g, '')
164
- .replace(/-/g, '_')
165
- .split(/\s+/)
166
- .filter(w => !STOP_WORDS.has(w) && w.length > 0);
167
- for (let i = 0; i < parts.length - 1; i++) {
168
- const result = this.query(parts[i], parts[i + 1]);
169
- if (result.confident) return { type: 'direct', ...result, subject: parts[i], relation: parts[i + 1] };
170
- }
171
- for (const word of parts) {
172
- const facts = this.querySubject(word);
173
- if (facts.length > 0) return { type: 'subject', subject: word, facts };
174
- }
175
- for (const word of parts) {
176
- const results = this.search(null, word);
177
- if (results.length > 0) return { type: 'search', term: word, results };
178
- }
179
- return { type: 'miss', query: question };
180
- }
181
-
182
- stats() {
183
- let totalFacts = 0;
184
- const bucketInfo = [];
185
- for (const [_, b] of this.buckets) {
186
- totalFacts += b.count;
187
- bucketInfo.push({ name: b.name, facts: b.count, full: b.isFull });
188
- }
189
- const symBytes = this.symbols.size * this.d * 4;
190
- const bktBytes = this.buckets.size * this.d * 4;
191
- return {
192
- dimensions: this.d,
193
- maxBucketSize: MAX_BUCKET_SIZE,
194
- symbols: this.symbols.size,
195
- buckets: this.buckets.size,
196
- subjects: this.routing.size,
197
- totalFacts,
198
- ramBytes: symBytes + bktBytes,
199
- ramMB: Math.round((symBytes + bktBytes) / 1024 / 1024 * 10) / 10,
200
- perBucket: bucketInfo,
201
- };
202
- }
203
-
204
- toJSON() {
205
- const buckets = {};
206
- for (const [k, v] of this.buckets) buckets[k] = v.toJSON();
207
- const routing = {};
208
- for (const [k, v] of this.routing) routing[k] = v;
209
- return { version: 3, d: this.d, symbols: this.symbols.toJSON(), buckets, routing };
210
- }
211
-
212
- static fromJSON(data) {
213
- const d = data.d || 2048;
214
- const mem = new HRRMemory(d);
215
- mem.symbols = SymbolTable.fromJSON(data.symbols || {}, d);
216
- for (const [k, v] of Object.entries(data.buckets || {})) {
217
- mem.buckets.set(k, Bucket.fromJSON(v, d));
218
- }
219
- for (const [k, v] of Object.entries(data.routing || {})) {
220
- mem.routing.set(k, Array.isArray(v) ? v : [v]);
221
- }
222
- return mem;
223
- }
224
-
225
- save(filePath) {
226
- writeFileSync(filePath, JSON.stringify(this.toJSON()));
227
- }
228
-
229
- static load(filePath, d = 2048) {
230
- if (!existsSync(filePath)) return new HRRMemory(d);
231
- try { return HRRMemory.fromJSON(JSON.parse(readFileSync(filePath, 'utf8'))); }
232
- catch { return new HRRMemory(d); }
233
- }
234
- }
1
+ /**
2
+ * HRRMemory — auto-sharded holographic memory store.
3
+ *
4
+ * Stores (subject, relation, object) triples in sharded buckets.
5
+ * Each bucket holds max 25 facts. When full, a new overflow bucket
6
+ * is created automatically. Queries scan all buckets for a subject.
7
+ *
8
+ * Symbol vectors are shared across all buckets.
9
+ * All values are lowercased on store. If you need case-sensitive values,
10
+ * normalize them yourself before calling store().
11
+ *
12
+ * @typedef {{ match: string|null, score: number, confident: boolean, bucket: string|null }} QueryResult
13
+ * @typedef {{ relation: string, object: string }} Fact
14
+ * @typedef {{ subject: string, relation: string, object: string }} Triple
15
+ * @typedef {{ type: 'direct', match: string, score: number, confident: boolean, subject: string, relation: string, bucket: string|null }} DirectAskResult
16
+ * @typedef {{ type: 'subject', subject: string, facts: Fact[] }} SubjectAskResult
17
+ * @typedef {{ type: 'search', term: string, results: Triple[] }} SearchAskResult
18
+ * @typedef {{ type: 'miss', query: string }} MissAskResult
19
+ * @typedef {DirectAskResult | SubjectAskResult | SearchAskResult | MissAskResult} AskResult
20
+ * @typedef {{ dimensions: number, maxBucketSize: number, symbols: number, buckets: number, subjects: number, totalFacts: number, ramBytes: number, ramMB: number, perBucket: Array<{name: string, facts: number, full: boolean}> }} Stats
21
+ */
22
+
23
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
24
+ import { bind, unbind, similarity } from './ops.js';
25
+ import { SymbolTable } from './symbols.js';
26
+ import { Bucket, MAX_BUCKET_SIZE } from './bucket.js';
27
+
28
+ const STOP_WORDS = new Set([
29
+ 'what', 'is', 'the', 'a', 'an', 'does', 'do', 'where', 'who', 'how',
30
+ 'which', 'of', 'for', 'in', 'at', 'to', 'my', 'your', 'their', 'has',
31
+ 'have', 'was', 'were', 'are', 'been',
32
+ ]);
33
+
34
+ export class HRRMemory {
35
+ constructor(d = 2048) {
36
+ this.d = d;
37
+ this.symbols = new SymbolTable(d);
38
+ this.buckets = new Map();
39
+ this.routing = new Map();
40
+ this._relIndex = new Map(); // "subject\0relation" → Set<bucket_id>
41
+ }
42
+
43
+ _activeBucket(subject) {
44
+ const key = subject.toLowerCase().trim();
45
+ const ids = this.routing.get(key);
46
+ if (ids) {
47
+ const lastId = ids[ids.length - 1];
48
+ const last = this.buckets.get(lastId);
49
+ if (!last.isFull) return last;
50
+ const newId = key + '#' + ids.length;
51
+ const nb = new Bucket(newId, this.d);
52
+ this.buckets.set(newId, nb);
53
+ ids.push(newId);
54
+ return nb;
55
+ }
56
+ const b = new Bucket(key, this.d);
57
+ this.buckets.set(key, b);
58
+ this.routing.set(key, [key]);
59
+ return b;
60
+ }
61
+
62
+ _subjectBuckets(subject) {
63
+ const ids = this.routing.get(subject.toLowerCase().trim()) || [];
64
+ return ids.map(id => this.buckets.get(id)).filter(Boolean);
65
+ }
66
+
67
+ store(subject, relation, object) {
68
+ const triple = {
69
+ subject: subject.toLowerCase().trim(),
70
+ relation: relation.toLowerCase().trim(),
71
+ object: object.toLowerCase().trim(),
72
+ };
73
+ for (const b of this._subjectBuckets(subject)) {
74
+ if (b.triples.some(t =>
75
+ t.subject === triple.subject &&
76
+ t.relation === triple.relation &&
77
+ t.object === triple.object
78
+ )) return false;
79
+ }
80
+ const s = this.symbols.get(subject);
81
+ const r = this.symbols.get(relation);
82
+ const o = this.symbols.get(object);
83
+ const association = bind(bind(s, r), o);
84
+ const bucket = this._activeBucket(subject);
85
+ bucket.storeVector(association, triple);
86
+
87
+ // Update relation shard index
88
+ const indexKey = triple.subject + '\0' + triple.relation;
89
+ let set = this._relIndex.get(indexKey);
90
+ if (!set) { set = new Set(); this._relIndex.set(indexKey, set); }
91
+ set.add(bucket.name);
92
+
93
+ return true;
94
+ }
95
+
96
+ forget(subject, relation, object) {
97
+ const s = subject.toLowerCase().trim();
98
+ const r = relation.toLowerCase().trim();
99
+ const o = object.toLowerCase().trim();
100
+ const ids = this.routing.get(s);
101
+ if (!ids) return false;
102
+ for (let i = 0; i < ids.length; i++) {
103
+ const bucket = this.buckets.get(ids[i]);
104
+ const idx = bucket.triples.findIndex(t =>
105
+ t.subject === s && t.relation === r && t.object === o
106
+ );
107
+ if (idx === -1) continue;
108
+ bucket.triples.splice(idx, 1);
109
+ bucket.rebuild(this.symbols);
110
+
111
+ // Update relation index: remove bucket if it no longer has this relation
112
+ const indexKey = s + '\0' + r;
113
+ const set = this._relIndex.get(indexKey);
114
+ if (set) {
115
+ const stillHas = bucket.triples.some(t => t.relation === r);
116
+ if (!stillHas) {
117
+ set.delete(ids[i]);
118
+ if (set.size === 0) this._relIndex.delete(indexKey);
119
+ }
120
+ }
121
+
122
+ if (bucket.count === 0 && ids[i].includes('#')) {
123
+ this.buckets.delete(ids[i]);
124
+ ids.splice(i, 1);
125
+ }
126
+ return true;
127
+ }
128
+ return false;
129
+ }
130
+
131
+ query(subject, relation) {
132
+ const s = subject.toLowerCase().trim();
133
+ const r = relation.toLowerCase().trim();
134
+
135
+ // Fast-path: use relation index to scan only relevant shards
136
+ const indexKey = s + '\0' + r;
137
+ const indexed = this._relIndex.get(indexKey);
138
+ const buckets = indexed
139
+ ? [...indexed].map(id => this.buckets.get(id)).filter(Boolean)
140
+ : this._subjectBuckets(subject); // fallback for pre-index data
141
+
142
+ if (buckets.length === 0) return { match: null, score: 0, confident: false, bucket: null };
143
+ const probe = bind(this.symbols.get(subject), this.symbols.get(relation));
144
+ let bestName = null, bestScore = -1, bestBucket = null;
145
+ for (const bucket of buckets) {
146
+ if (bucket.count === 0) continue;
147
+ const result = unbind(probe, bucket.memory);
148
+ for (const t of bucket.triples) {
149
+ const score = similarity(result, this.symbols.get(t.object));
150
+ if (score > bestScore) {
151
+ bestScore = score;
152
+ bestName = t.object;
153
+ bestBucket = bucket.name;
154
+ }
155
+ }
156
+ }
157
+ return {
158
+ match: bestName,
159
+ score: Math.round(bestScore * 1000) / 1000,
160
+ confident: bestScore > 0.1,
161
+ bucket: bestBucket,
162
+ };
163
+ }
164
+
165
+ querySubject(subject) {
166
+ const key = subject.toLowerCase().trim();
167
+ const facts = [];
168
+ for (const bucket of this._subjectBuckets(subject)) {
169
+ for (const t of bucket.triples) {
170
+ if (t.subject === key) facts.push({ relation: t.relation, object: t.object });
171
+ }
172
+ }
173
+ return facts;
174
+ }
175
+
176
+ search(relation, object) {
177
+ const results = [];
178
+ const rel = relation ? relation.toLowerCase().trim() : null;
179
+ const obj = object ? object.toLowerCase().trim() : null;
180
+ for (const [_, bucket] of this.buckets) {
181
+ for (const t of bucket.triples) {
182
+ if (rel && t.relation !== rel) continue;
183
+ if (obj && t.object !== obj) continue;
184
+ results.push(t);
185
+ }
186
+ }
187
+ return results;
188
+ }
189
+
190
+ ask(question) {
191
+ const parts = question.toLowerCase().trim()
192
+ .replace(/[?.,!]/g, '')
193
+ .replace(/'s\b/g, '')
194
+ .replace(/-/g, '_')
195
+ .split(/\s+/)
196
+ .filter(w => !STOP_WORDS.has(w) && w.length > 0);
197
+ for (let i = 0; i < parts.length - 1; i++) {
198
+ const result = this.query(parts[i], parts[i + 1]);
199
+ if (result.confident) return { type: 'direct', ...result, subject: parts[i], relation: parts[i + 1] };
200
+ }
201
+ for (const word of parts) {
202
+ const facts = this.querySubject(word);
203
+ if (facts.length > 0) return { type: 'subject', subject: word, facts };
204
+ }
205
+ for (const word of parts) {
206
+ const results = this.search(null, word);
207
+ if (results.length > 0) return { type: 'search', term: word, results };
208
+ }
209
+ return { type: 'miss', query: question };
210
+ }
211
+
212
+ stats() {
213
+ let totalFacts = 0;
214
+ const bucketInfo = [];
215
+ for (const [_, b] of this.buckets) {
216
+ totalFacts += b.count;
217
+ bucketInfo.push({ name: b.name, facts: b.count, full: b.isFull });
218
+ }
219
+ const symBytes = this.symbols.size * this.d * 4;
220
+ const bktBytes = this.buckets.size * this.d * 4;
221
+ return {
222
+ dimensions: this.d,
223
+ maxBucketSize: MAX_BUCKET_SIZE,
224
+ symbols: this.symbols.size,
225
+ buckets: this.buckets.size,
226
+ subjects: this.routing.size,
227
+ totalFacts,
228
+ ramBytes: symBytes + bktBytes,
229
+ ramMB: Math.round((symBytes + bktBytes) / 1024 / 1024 * 10) / 10,
230
+ perBucket: bucketInfo,
231
+ };
232
+ }
233
+
234
+ toJSON() {
235
+ const buckets = {};
236
+ for (const [k, v] of this.buckets) buckets[k] = v.toJSON();
237
+ const routing = {};
238
+ for (const [k, v] of this.routing) routing[k] = v;
239
+ return { version: 3, d: this.d, symbols: this.symbols.toJSON(), buckets, routing };
240
+ }
241
+
242
+ static fromJSON(data) {
243
+ const d = data.d || 2048;
244
+ const mem = new HRRMemory(d);
245
+ mem.symbols = SymbolTable.fromJSON(data.symbols || {}, d);
246
+ for (const [k, v] of Object.entries(data.buckets || {})) {
247
+ mem.buckets.set(k, Bucket.fromJSON(v, d));
248
+ }
249
+ for (const [k, v] of Object.entries(data.routing || {})) {
250
+ mem.routing.set(k, Array.isArray(v) ? v : [v]);
251
+ }
252
+
253
+ // Rebuild relation index from deserialized triples
254
+ for (const [bucketId, bucket] of mem.buckets) {
255
+ for (const t of bucket.triples) {
256
+ const indexKey = t.subject + '\0' + t.relation;
257
+ let set = mem._relIndex.get(indexKey);
258
+ if (!set) { set = new Set(); mem._relIndex.set(indexKey, set); }
259
+ set.add(bucketId);
260
+ }
261
+ }
262
+
263
+ return mem;
264
+ }
265
+
266
+ save(filePath) {
267
+ writeFileSync(filePath, JSON.stringify(this.toJSON()));
268
+ }
269
+
270
+ static load(filePath, d = 2048) {
271
+ if (!existsSync(filePath)) return new HRRMemory(d);
272
+ try { return HRRMemory.fromJSON(JSON.parse(readFileSync(filePath, 'utf8'))); }
273
+ catch { return new HRRMemory(d); }
274
+ }
275
+ }