hrr-memory 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -179
- package/package.json +56 -56
- package/src/bucket.js +55 -55
- package/src/index.js +25 -25
- package/src/memory.js +275 -234
- package/src/ops.js +107 -107
- package/src/symbols.js +49 -49
- package/types/index.d.ts +257 -257
package/src/memory.js
CHANGED
|
@@ -1,234 +1,275 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* HRRMemory — auto-sharded holographic memory store.
|
|
3
|
-
*
|
|
4
|
-
* Stores (subject, relation, object) triples in sharded buckets.
|
|
5
|
-
* Each bucket holds max 25 facts. When full, a new overflow bucket
|
|
6
|
-
* is created automatically. Queries scan all buckets for a subject.
|
|
7
|
-
*
|
|
8
|
-
* Symbol vectors are shared across all buckets.
|
|
9
|
-
* All values are lowercased on store. If you need case-sensitive values,
|
|
10
|
-
* normalize them yourself before calling store().
|
|
11
|
-
*
|
|
12
|
-
* @typedef {{ match: string|null, score: number, confident: boolean, bucket: string|null }} QueryResult
|
|
13
|
-
* @typedef {{ relation: string, object: string }} Fact
|
|
14
|
-
* @typedef {{ subject: string, relation: string, object: string }} Triple
|
|
15
|
-
* @typedef {{ type: 'direct', match: string, score: number, confident: boolean, subject: string, relation: string, bucket: string|null }} DirectAskResult
|
|
16
|
-
* @typedef {{ type: 'subject', subject: string, facts: Fact[] }} SubjectAskResult
|
|
17
|
-
* @typedef {{ type: 'search', term: string, results: Triple[] }} SearchAskResult
|
|
18
|
-
* @typedef {{ type: 'miss', query: string }} MissAskResult
|
|
19
|
-
* @typedef {DirectAskResult | SubjectAskResult | SearchAskResult | MissAskResult} AskResult
|
|
20
|
-
* @typedef {{ dimensions: number, maxBucketSize: number, symbols: number, buckets: number, subjects: number, totalFacts: number, ramBytes: number, ramMB: number, perBucket: Array<{name: string, facts: number, full: boolean}> }} Stats
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
24
|
-
import { bind, unbind, similarity } from './ops.js';
|
|
25
|
-
import { SymbolTable } from './symbols.js';
|
|
26
|
-
import { Bucket, MAX_BUCKET_SIZE } from './bucket.js';
|
|
27
|
-
|
|
28
|
-
const STOP_WORDS = new Set([
|
|
29
|
-
'what', 'is', 'the', 'a', 'an', 'does', 'do', 'where', 'who', 'how',
|
|
30
|
-
'which', 'of', 'for', 'in', 'at', 'to', 'my', 'your', 'their', 'has',
|
|
31
|
-
'have', 'was', 'were', 'are', 'been',
|
|
32
|
-
]);
|
|
33
|
-
|
|
34
|
-
export class HRRMemory {
|
|
35
|
-
constructor(d = 2048) {
|
|
36
|
-
this.d = d;
|
|
37
|
-
this.symbols = new SymbolTable(d);
|
|
38
|
-
this.buckets = new Map();
|
|
39
|
-
this.routing = new Map();
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
const
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
const
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
this.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
t.
|
|
76
|
-
t.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
const
|
|
81
|
-
const
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
const
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
return
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
return {
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
const
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
1
|
+
/**
|
|
2
|
+
* HRRMemory — auto-sharded holographic memory store.
|
|
3
|
+
*
|
|
4
|
+
* Stores (subject, relation, object) triples in sharded buckets.
|
|
5
|
+
* Each bucket holds max 25 facts. When full, a new overflow bucket
|
|
6
|
+
* is created automatically. Queries scan all buckets for a subject.
|
|
7
|
+
*
|
|
8
|
+
* Symbol vectors are shared across all buckets.
|
|
9
|
+
* All values are lowercased on store. If you need case-sensitive values,
|
|
10
|
+
* normalize them yourself before calling store().
|
|
11
|
+
*
|
|
12
|
+
* @typedef {{ match: string|null, score: number, confident: boolean, bucket: string|null }} QueryResult
|
|
13
|
+
* @typedef {{ relation: string, object: string }} Fact
|
|
14
|
+
* @typedef {{ subject: string, relation: string, object: string }} Triple
|
|
15
|
+
* @typedef {{ type: 'direct', match: string, score: number, confident: boolean, subject: string, relation: string, bucket: string|null }} DirectAskResult
|
|
16
|
+
* @typedef {{ type: 'subject', subject: string, facts: Fact[] }} SubjectAskResult
|
|
17
|
+
* @typedef {{ type: 'search', term: string, results: Triple[] }} SearchAskResult
|
|
18
|
+
* @typedef {{ type: 'miss', query: string }} MissAskResult
|
|
19
|
+
* @typedef {DirectAskResult | SubjectAskResult | SearchAskResult | MissAskResult} AskResult
|
|
20
|
+
* @typedef {{ dimensions: number, maxBucketSize: number, symbols: number, buckets: number, subjects: number, totalFacts: number, ramBytes: number, ramMB: number, perBucket: Array<{name: string, facts: number, full: boolean}> }} Stats
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { readFileSync, writeFileSync, existsSync } from 'fs';
|
|
24
|
+
import { bind, unbind, similarity } from './ops.js';
|
|
25
|
+
import { SymbolTable } from './symbols.js';
|
|
26
|
+
import { Bucket, MAX_BUCKET_SIZE } from './bucket.js';
|
|
27
|
+
|
|
28
|
+
const STOP_WORDS = new Set([
|
|
29
|
+
'what', 'is', 'the', 'a', 'an', 'does', 'do', 'where', 'who', 'how',
|
|
30
|
+
'which', 'of', 'for', 'in', 'at', 'to', 'my', 'your', 'their', 'has',
|
|
31
|
+
'have', 'was', 'were', 'are', 'been',
|
|
32
|
+
]);
|
|
33
|
+
|
|
34
|
+
export class HRRMemory {
|
|
35
|
+
constructor(d = 2048) {
|
|
36
|
+
this.d = d;
|
|
37
|
+
this.symbols = new SymbolTable(d);
|
|
38
|
+
this.buckets = new Map();
|
|
39
|
+
this.routing = new Map();
|
|
40
|
+
this._relIndex = new Map(); // "subject\0relation" → Set<bucket_id>
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_activeBucket(subject) {
|
|
44
|
+
const key = subject.toLowerCase().trim();
|
|
45
|
+
const ids = this.routing.get(key);
|
|
46
|
+
if (ids) {
|
|
47
|
+
const lastId = ids[ids.length - 1];
|
|
48
|
+
const last = this.buckets.get(lastId);
|
|
49
|
+
if (!last.isFull) return last;
|
|
50
|
+
const newId = key + '#' + ids.length;
|
|
51
|
+
const nb = new Bucket(newId, this.d);
|
|
52
|
+
this.buckets.set(newId, nb);
|
|
53
|
+
ids.push(newId);
|
|
54
|
+
return nb;
|
|
55
|
+
}
|
|
56
|
+
const b = new Bucket(key, this.d);
|
|
57
|
+
this.buckets.set(key, b);
|
|
58
|
+
this.routing.set(key, [key]);
|
|
59
|
+
return b;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
_subjectBuckets(subject) {
|
|
63
|
+
const ids = this.routing.get(subject.toLowerCase().trim()) || [];
|
|
64
|
+
return ids.map(id => this.buckets.get(id)).filter(Boolean);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
store(subject, relation, object) {
|
|
68
|
+
const triple = {
|
|
69
|
+
subject: subject.toLowerCase().trim(),
|
|
70
|
+
relation: relation.toLowerCase().trim(),
|
|
71
|
+
object: object.toLowerCase().trim(),
|
|
72
|
+
};
|
|
73
|
+
for (const b of this._subjectBuckets(subject)) {
|
|
74
|
+
if (b.triples.some(t =>
|
|
75
|
+
t.subject === triple.subject &&
|
|
76
|
+
t.relation === triple.relation &&
|
|
77
|
+
t.object === triple.object
|
|
78
|
+
)) return false;
|
|
79
|
+
}
|
|
80
|
+
const s = this.symbols.get(subject);
|
|
81
|
+
const r = this.symbols.get(relation);
|
|
82
|
+
const o = this.symbols.get(object);
|
|
83
|
+
const association = bind(bind(s, r), o);
|
|
84
|
+
const bucket = this._activeBucket(subject);
|
|
85
|
+
bucket.storeVector(association, triple);
|
|
86
|
+
|
|
87
|
+
// Update relation → shard index
|
|
88
|
+
const indexKey = triple.subject + '\0' + triple.relation;
|
|
89
|
+
let set = this._relIndex.get(indexKey);
|
|
90
|
+
if (!set) { set = new Set(); this._relIndex.set(indexKey, set); }
|
|
91
|
+
set.add(bucket.name);
|
|
92
|
+
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
forget(subject, relation, object) {
|
|
97
|
+
const s = subject.toLowerCase().trim();
|
|
98
|
+
const r = relation.toLowerCase().trim();
|
|
99
|
+
const o = object.toLowerCase().trim();
|
|
100
|
+
const ids = this.routing.get(s);
|
|
101
|
+
if (!ids) return false;
|
|
102
|
+
for (let i = 0; i < ids.length; i++) {
|
|
103
|
+
const bucket = this.buckets.get(ids[i]);
|
|
104
|
+
const idx = bucket.triples.findIndex(t =>
|
|
105
|
+
t.subject === s && t.relation === r && t.object === o
|
|
106
|
+
);
|
|
107
|
+
if (idx === -1) continue;
|
|
108
|
+
bucket.triples.splice(idx, 1);
|
|
109
|
+
bucket.rebuild(this.symbols);
|
|
110
|
+
|
|
111
|
+
// Update relation index: remove bucket if it no longer has this relation
|
|
112
|
+
const indexKey = s + '\0' + r;
|
|
113
|
+
const set = this._relIndex.get(indexKey);
|
|
114
|
+
if (set) {
|
|
115
|
+
const stillHas = bucket.triples.some(t => t.relation === r);
|
|
116
|
+
if (!stillHas) {
|
|
117
|
+
set.delete(ids[i]);
|
|
118
|
+
if (set.size === 0) this._relIndex.delete(indexKey);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (bucket.count === 0 && ids[i].includes('#')) {
|
|
123
|
+
this.buckets.delete(ids[i]);
|
|
124
|
+
ids.splice(i, 1);
|
|
125
|
+
}
|
|
126
|
+
return true;
|
|
127
|
+
}
|
|
128
|
+
return false;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
query(subject, relation) {
|
|
132
|
+
const s = subject.toLowerCase().trim();
|
|
133
|
+
const r = relation.toLowerCase().trim();
|
|
134
|
+
|
|
135
|
+
// Fast-path: use relation index to scan only relevant shards
|
|
136
|
+
const indexKey = s + '\0' + r;
|
|
137
|
+
const indexed = this._relIndex.get(indexKey);
|
|
138
|
+
const buckets = indexed
|
|
139
|
+
? [...indexed].map(id => this.buckets.get(id)).filter(Boolean)
|
|
140
|
+
: this._subjectBuckets(subject); // fallback for pre-index data
|
|
141
|
+
|
|
142
|
+
if (buckets.length === 0) return { match: null, score: 0, confident: false, bucket: null };
|
|
143
|
+
const probe = bind(this.symbols.get(subject), this.symbols.get(relation));
|
|
144
|
+
let bestName = null, bestScore = -1, bestBucket = null;
|
|
145
|
+
for (const bucket of buckets) {
|
|
146
|
+
if (bucket.count === 0) continue;
|
|
147
|
+
const result = unbind(probe, bucket.memory);
|
|
148
|
+
for (const t of bucket.triples) {
|
|
149
|
+
const score = similarity(result, this.symbols.get(t.object));
|
|
150
|
+
if (score > bestScore) {
|
|
151
|
+
bestScore = score;
|
|
152
|
+
bestName = t.object;
|
|
153
|
+
bestBucket = bucket.name;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return {
|
|
158
|
+
match: bestName,
|
|
159
|
+
score: Math.round(bestScore * 1000) / 1000,
|
|
160
|
+
confident: bestScore > 0.1,
|
|
161
|
+
bucket: bestBucket,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
querySubject(subject) {
|
|
166
|
+
const key = subject.toLowerCase().trim();
|
|
167
|
+
const facts = [];
|
|
168
|
+
for (const bucket of this._subjectBuckets(subject)) {
|
|
169
|
+
for (const t of bucket.triples) {
|
|
170
|
+
if (t.subject === key) facts.push({ relation: t.relation, object: t.object });
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return facts;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
search(relation, object) {
|
|
177
|
+
const results = [];
|
|
178
|
+
const rel = relation ? relation.toLowerCase().trim() : null;
|
|
179
|
+
const obj = object ? object.toLowerCase().trim() : null;
|
|
180
|
+
for (const [_, bucket] of this.buckets) {
|
|
181
|
+
for (const t of bucket.triples) {
|
|
182
|
+
if (rel && t.relation !== rel) continue;
|
|
183
|
+
if (obj && t.object !== obj) continue;
|
|
184
|
+
results.push(t);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return results;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
ask(question) {
|
|
191
|
+
const parts = question.toLowerCase().trim()
|
|
192
|
+
.replace(/[?.,!]/g, '')
|
|
193
|
+
.replace(/'s\b/g, '')
|
|
194
|
+
.replace(/-/g, '_')
|
|
195
|
+
.split(/\s+/)
|
|
196
|
+
.filter(w => !STOP_WORDS.has(w) && w.length > 0);
|
|
197
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
198
|
+
const result = this.query(parts[i], parts[i + 1]);
|
|
199
|
+
if (result.confident) return { type: 'direct', ...result, subject: parts[i], relation: parts[i + 1] };
|
|
200
|
+
}
|
|
201
|
+
for (const word of parts) {
|
|
202
|
+
const facts = this.querySubject(word);
|
|
203
|
+
if (facts.length > 0) return { type: 'subject', subject: word, facts };
|
|
204
|
+
}
|
|
205
|
+
for (const word of parts) {
|
|
206
|
+
const results = this.search(null, word);
|
|
207
|
+
if (results.length > 0) return { type: 'search', term: word, results };
|
|
208
|
+
}
|
|
209
|
+
return { type: 'miss', query: question };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
stats() {
|
|
213
|
+
let totalFacts = 0;
|
|
214
|
+
const bucketInfo = [];
|
|
215
|
+
for (const [_, b] of this.buckets) {
|
|
216
|
+
totalFacts += b.count;
|
|
217
|
+
bucketInfo.push({ name: b.name, facts: b.count, full: b.isFull });
|
|
218
|
+
}
|
|
219
|
+
const symBytes = this.symbols.size * this.d * 4;
|
|
220
|
+
const bktBytes = this.buckets.size * this.d * 4;
|
|
221
|
+
return {
|
|
222
|
+
dimensions: this.d,
|
|
223
|
+
maxBucketSize: MAX_BUCKET_SIZE,
|
|
224
|
+
symbols: this.symbols.size,
|
|
225
|
+
buckets: this.buckets.size,
|
|
226
|
+
subjects: this.routing.size,
|
|
227
|
+
totalFacts,
|
|
228
|
+
ramBytes: symBytes + bktBytes,
|
|
229
|
+
ramMB: Math.round((symBytes + bktBytes) / 1024 / 1024 * 10) / 10,
|
|
230
|
+
perBucket: bucketInfo,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
toJSON() {
|
|
235
|
+
const buckets = {};
|
|
236
|
+
for (const [k, v] of this.buckets) buckets[k] = v.toJSON();
|
|
237
|
+
const routing = {};
|
|
238
|
+
for (const [k, v] of this.routing) routing[k] = v;
|
|
239
|
+
return { version: 3, d: this.d, symbols: this.symbols.toJSON(), buckets, routing };
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
static fromJSON(data) {
|
|
243
|
+
const d = data.d || 2048;
|
|
244
|
+
const mem = new HRRMemory(d);
|
|
245
|
+
mem.symbols = SymbolTable.fromJSON(data.symbols || {}, d);
|
|
246
|
+
for (const [k, v] of Object.entries(data.buckets || {})) {
|
|
247
|
+
mem.buckets.set(k, Bucket.fromJSON(v, d));
|
|
248
|
+
}
|
|
249
|
+
for (const [k, v] of Object.entries(data.routing || {})) {
|
|
250
|
+
mem.routing.set(k, Array.isArray(v) ? v : [v]);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Rebuild relation index from deserialized triples
|
|
254
|
+
for (const [bucketId, bucket] of mem.buckets) {
|
|
255
|
+
for (const t of bucket.triples) {
|
|
256
|
+
const indexKey = t.subject + '\0' + t.relation;
|
|
257
|
+
let set = mem._relIndex.get(indexKey);
|
|
258
|
+
if (!set) { set = new Set(); mem._relIndex.set(indexKey, set); }
|
|
259
|
+
set.add(bucketId);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return mem;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
save(filePath) {
|
|
267
|
+
writeFileSync(filePath, JSON.stringify(this.toJSON()));
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
static load(filePath, d = 2048) {
|
|
271
|
+
if (!existsSync(filePath)) return new HRRMemory(d);
|
|
272
|
+
try { return HRRMemory.fromJSON(JSON.parse(readFileSync(filePath, 'utf8'))); }
|
|
273
|
+
catch { return new HRRMemory(d); }
|
|
274
|
+
}
|
|
275
|
+
}
|