@aleph-ai/tinyaleph 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +278 -0
- package/backends/cryptographic/index.js +196 -0
- package/backends/index.js +15 -0
- package/backends/interface.js +89 -0
- package/backends/scientific/index.js +272 -0
- package/backends/semantic/index.js +527 -0
- package/backends/semantic/surface.js +393 -0
- package/backends/semantic/two-layer.js +375 -0
- package/core/fano.js +127 -0
- package/core/hilbert.js +564 -0
- package/core/hypercomplex.js +141 -0
- package/core/index.js +133 -0
- package/core/llm.js +132 -0
- package/core/prime.js +184 -0
- package/core/resonance.js +695 -0
- package/core/rformer-tf.js +1086 -0
- package/core/rformer.js +806 -0
- package/core/sieve.js +350 -0
- package/data.json +8163 -0
- package/docs/EXAMPLES_PLAN.md +293 -0
- package/docs/README.md +159 -0
- package/docs/design/ALEPH_CHAT_ARCHITECTURE.md +499 -0
- package/docs/guide/01-quickstart.md +298 -0
- package/docs/guide/02-semantic-computing.md +409 -0
- package/docs/guide/03-cryptographic.md +420 -0
- package/docs/guide/04-scientific.md +494 -0
- package/docs/guide/05-llm-integration.md +568 -0
- package/docs/guide/06-advanced.md +996 -0
- package/docs/guide/README.md +188 -0
- package/docs/reference/01-core.md +695 -0
- package/docs/reference/02-physics.md +601 -0
- package/docs/reference/03-backends.md +892 -0
- package/docs/reference/04-engine.md +632 -0
- package/docs/reference/README.md +252 -0
- package/docs/theory/01-prime-semantics.md +327 -0
- package/docs/theory/02-hypercomplex-algebra.md +421 -0
- package/docs/theory/03-phase-synchronization.md +364 -0
- package/docs/theory/04-entropy-reasoning.md +348 -0
- package/docs/theory/05-non-commutativity.md +402 -0
- package/docs/theory/06-two-layer-meaning.md +414 -0
- package/docs/theory/07-resonant-field-interface.md +419 -0
- package/docs/theory/08-semantic-sieve.md +520 -0
- package/docs/theory/09-temporal-emergence.md +298 -0
- package/docs/theory/10-quaternionic-memory.md +415 -0
- package/docs/theory/README.md +162 -0
- package/engine/aleph.js +418 -0
- package/engine/index.js +7 -0
- package/index.js +23 -0
- package/modular.js +254 -0
- package/package.json +99 -0
- package/physics/collapse.js +95 -0
- package/physics/entropy.js +88 -0
- package/physics/index.js +65 -0
- package/physics/kuramoto.js +91 -0
- package/physics/lyapunov.js +80 -0
- package/physics/oscillator.js +95 -0
- package/types/index.d.ts +575 -0
package/core/sieve.js
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The Semantic Sieve
|
|
3
|
+
* Implements the "Sieve of Distinction" algorithm to ensure Prime Uniqueness Invariant.
|
|
4
|
+
*
|
|
5
|
+
* See: docs/sieve.md
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const { createEngine, SemanticBackend, isPrime, LLM } = require('../modular');
|
|
11
|
+
|
|
12
|
+
const DATA_FILE = path.join(__dirname, '..', 'data.json');
|
|
13
|
+
|
|
14
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
15
|
+
// Prime Registry & Utilities
|
|
16
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
17
|
+
|
|
18
|
+
function isPrime(num) {
|
|
19
|
+
if (num < 2) return false;
|
|
20
|
+
for (let i = 2, s = Math.sqrt(num); i <= s; i++) {
|
|
21
|
+
if (num % i === 0) return false;
|
|
22
|
+
}
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
class PrimeRegistry {
|
|
27
|
+
constructor(existingPrimes) {
|
|
28
|
+
this.used = new Set(existingPrimes);
|
|
29
|
+
this.max = existingPrimes.length > 0 ? Math.max(...existingPrimes) : 1;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
next() {
|
|
33
|
+
let candidate = this.max + 1;
|
|
34
|
+
while (true) {
|
|
35
|
+
if (isPrime(candidate) && !this.used.has(candidate)) {
|
|
36
|
+
this.used.add(candidate);
|
|
37
|
+
this.max = candidate;
|
|
38
|
+
return candidate;
|
|
39
|
+
}
|
|
40
|
+
candidate++;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
46
|
+
// The Semantic Sieve Engine
|
|
47
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
48
|
+
|
|
49
|
+
class Sieve {
|
|
50
|
+
constructor() {
|
|
51
|
+
this.data = require('./data.json');
|
|
52
|
+
|
|
53
|
+
// Initialize Prime Registry with all currently used primes
|
|
54
|
+
const usedPrimes = [
|
|
55
|
+
...this.data.primes,
|
|
56
|
+
...Object.keys(this.data.ontology).map(Number),
|
|
57
|
+
...Object.values(this.data.vocabulary).flat()
|
|
58
|
+
];
|
|
59
|
+
this.primes = new PrimeRegistry(usedPrimes);
|
|
60
|
+
|
|
61
|
+
// Initialize Concept Map (Name -> Prime)
|
|
62
|
+
this.conceptToPrime = new Map();
|
|
63
|
+
for (const [p, label] of Object.entries(this.data.ontology)) {
|
|
64
|
+
this.conceptToPrime.set(label.toLowerCase(), Number(p));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
this.stats = {
|
|
68
|
+
collisionsResolved: 0,
|
|
69
|
+
conceptsCreated: 0,
|
|
70
|
+
primesMinted: 0
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
getOntologyString() {
|
|
75
|
+
return Object.entries(this.data.ontology)
|
|
76
|
+
.map(([p, label]) => `${label}`)
|
|
77
|
+
.join(', ');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
save() {
|
|
81
|
+
// Update data object with current state
|
|
82
|
+
this.data.vocabulary = Object.fromEntries(V);
|
|
83
|
+
|
|
84
|
+
// Update ontology reverse map
|
|
85
|
+
const newOntology = {};
|
|
86
|
+
// Keep existing
|
|
87
|
+
Object.assign(newOntology, this.data.ontology);
|
|
88
|
+
// Add new from conceptToPrime if missing (though we usually update data.ontology directly)
|
|
89
|
+
|
|
90
|
+
fs.writeFileSync(DATA_FILE, JSON.stringify(this.data, null, 2));
|
|
91
|
+
console.log('💾 System state saved.');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
getOrMintPrime(concept) {
|
|
95
|
+
const k = concept.toLowerCase().trim();
|
|
96
|
+
if (this.conceptToPrime.has(k)) {
|
|
97
|
+
return this.conceptToPrime.get(k);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const newPrime = this.primes.next();
|
|
101
|
+
this.conceptToPrime.set(k, newPrime);
|
|
102
|
+
this.data.ontology[newPrime] = concept; // Update ontology
|
|
103
|
+
if (!this.data.primes.includes(newPrime)) {
|
|
104
|
+
this.data.primes.push(newPrime);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
console.log(` ✨ Minted Prime ${newPrime} for concept "${concept}"`);
|
|
108
|
+
this.stats.primesMinted++;
|
|
109
|
+
this.stats.conceptsCreated++;
|
|
110
|
+
return newPrime;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
analyzeCollisions() {
|
|
114
|
+
const map = new Map();
|
|
115
|
+
for (const [word, primes] of V) {
|
|
116
|
+
// Signature is sorted primes to ignore order for collision detection
|
|
117
|
+
// (Though user previously mentioned order matters, the Sieve doc implies set-based collision first)
|
|
118
|
+
// We will use sorted signature for the "Sieve" logic as described in sieve.md Phase 1.
|
|
119
|
+
const sig = [...primes].sort((a, b) => a - b).join(',');
|
|
120
|
+
if (!map.has(sig)) map.set(sig, []);
|
|
121
|
+
map.get(sig).push(word);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return [...map.entries()]
|
|
125
|
+
.filter(([sig, words]) => words.length > 1)
|
|
126
|
+
.sort((a, b) => b[1].length - a[1].length); // Sort by cluster size DESC
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async resolveCluster(signature, words) {
|
|
130
|
+
const currentPrimes = signature ? signature.split(',').map(Number) : [];
|
|
131
|
+
const existingConcepts = currentPrimes.map(p => this.data.ontology[p] || `P${p}`).join(', ');
|
|
132
|
+
|
|
133
|
+
console.log(`\n🔍 Resolving Cluster [${existingConcepts}]: ${words.length} words`);
|
|
134
|
+
console.log(` Words: ${words.slice(0, 10).join(', ')}${words.length > 10 ? '...' : ''}`);
|
|
135
|
+
|
|
136
|
+
// Strategy A: Macro (> 10 words)
|
|
137
|
+
if (words.length > 10) {
|
|
138
|
+
console.log(' 👉 Strategy A: Macro Categorization');
|
|
139
|
+
|
|
140
|
+
// Limit to 50 words to avoid context window issues and improve focus
|
|
141
|
+
const batchWords = words.slice(0, 50);
|
|
142
|
+
const remaining = words.length - batchWords.length;
|
|
143
|
+
console.log(` Processing batch of ${batchWords.length} words (${remaining} remaining)...`);
|
|
144
|
+
|
|
145
|
+
// Create numbered word list to avoid LLM truncating words
|
|
146
|
+
const numberedWords = batchWords.map((w, i) => `${i}:${w}`).join(', ');
|
|
147
|
+
|
|
148
|
+
const sys = `You are a semantic ontologist. Categorize words by their INDEX NUMBERS only.
|
|
149
|
+
|
|
150
|
+
RULES:
|
|
151
|
+
1. Categorize ALL word indices (0 to ${batchWords.length - 1})
|
|
152
|
+
2. Create exactly 4 distinct sub-categories
|
|
153
|
+
3. Category names must be DIFFERENT from: ${existingConcepts}
|
|
154
|
+
4. Use single-word category names: Nature, Motion, Emotion, Abstract, Physical, Temporal, Spiritual, etc.
|
|
155
|
+
5. Return word INDICES (numbers), not the words themselves`;
|
|
156
|
+
|
|
157
|
+
const user = `Words to categorize:
|
|
158
|
+
${numberedWords}
|
|
159
|
+
|
|
160
|
+
Return JSON with indices: {"categories":{"CategoryName":[0,1,2],"OtherCategory":[3,4,5],...}}
|
|
161
|
+
Every index from 0 to ${batchWords.length - 1} must appear exactly once.`;
|
|
162
|
+
|
|
163
|
+
try {
|
|
164
|
+
const res = await LLM.chat([
|
|
165
|
+
{ role: 'system', content: sys },
|
|
166
|
+
{ role: 'user', content: user }
|
|
167
|
+
], {
|
|
168
|
+
temperature: 0,
|
|
169
|
+
maxTokens: 65535
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Extract JSON from response
|
|
173
|
+
let jsonStr = typeof res.content === 'string' ? res.content : JSON.stringify(res.content);
|
|
174
|
+
|
|
175
|
+
// Try multiple extraction patterns
|
|
176
|
+
let result = null;
|
|
177
|
+
|
|
178
|
+
// Pattern 1: Full object match
|
|
179
|
+
const jsonMatch = jsonStr.match(/\{[\s\S]*"categories"\s*:\s*\{[\s\S]*\}\s*\}/);
|
|
180
|
+
if (jsonMatch) {
|
|
181
|
+
try {
|
|
182
|
+
result = JSON.parse(jsonMatch[0]);
|
|
183
|
+
} catch (e) {
|
|
184
|
+
// Try to fix truncated JSON by closing brackets
|
|
185
|
+
let fixed = jsonMatch[0];
|
|
186
|
+
const openBraces = (fixed.match(/\{/g) || []).length;
|
|
187
|
+
const closeBraces = (fixed.match(/\}/g) || []).length;
|
|
188
|
+
fixed += '}'.repeat(Math.max(0, openBraces - closeBraces));
|
|
189
|
+
try {
|
|
190
|
+
result = JSON.parse(fixed);
|
|
191
|
+
} catch (e2) {
|
|
192
|
+
// Continue to other patterns
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Pattern 2: Try to find categories object directly
|
|
198
|
+
if (!result) {
|
|
199
|
+
const catMatch = jsonStr.match(/"categories"\s*:\s*(\{[^]*)/);
|
|
200
|
+
if (catMatch) {
|
|
201
|
+
let catJson = catMatch[1];
|
|
202
|
+
// Count brackets to find end
|
|
203
|
+
let depth = 0;
|
|
204
|
+
let end = 0;
|
|
205
|
+
for (let i = 0; i < catJson.length; i++) {
|
|
206
|
+
if (catJson[i] === '{') depth++;
|
|
207
|
+
else if (catJson[i] === '}') {
|
|
208
|
+
depth--;
|
|
209
|
+
if (depth === 0) { end = i + 1; break; }
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (end > 0) {
|
|
213
|
+
try {
|
|
214
|
+
result = { categories: JSON.parse(catJson.slice(0, end)) };
|
|
215
|
+
} catch (e) {
|
|
216
|
+
// Continue
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (!result || !result.categories) {
|
|
223
|
+
throw new Error('No valid categories JSON found in response');
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
// Convert indices back to words
|
|
227
|
+
const categoriesWithWords = {};
|
|
228
|
+
for (const [catName, indices] of Object.entries(result.categories)) {
|
|
229
|
+
if (!Array.isArray(indices)) continue;
|
|
230
|
+
categoriesWithWords[catName] = indices
|
|
231
|
+
.filter(i => typeof i === 'number' && i >= 0 && i < batchWords.length)
|
|
232
|
+
.map(i => batchWords[i]);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const totalAssigned = Object.values(categoriesWithWords).flat().length;
|
|
236
|
+
console.log(` LLM assigned ${totalAssigned}/${batchWords.length} words to ${Object.keys(categoriesWithWords).length} categories`);
|
|
237
|
+
|
|
238
|
+
for (const [catName, wordList] of Object.entries(categoriesWithWords)) {
|
|
239
|
+
if (!wordList || !wordList.length) continue;
|
|
240
|
+
const p = this.getOrMintPrime(catName);
|
|
241
|
+
|
|
242
|
+
// Apply to words
|
|
243
|
+
let appliedCount = 0;
|
|
244
|
+
for (const rawW of wordList) {
|
|
245
|
+
const w = rawW.toLowerCase().trim(); // Normalize
|
|
246
|
+
if (V.has(w)) {
|
|
247
|
+
const ps = V.get(w);
|
|
248
|
+
if (!ps.includes(p)) {
|
|
249
|
+
ps.push(p);
|
|
250
|
+
V.set(w, ps);
|
|
251
|
+
appliedCount++;
|
|
252
|
+
}
|
|
253
|
+
} else {
|
|
254
|
+
// Debugging: why wasn't it found?
|
|
255
|
+
// console.log(` ⚠️ Word "${w}" not found in V`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
console.log(` Applied concept "${catName}" (${p}) to ${appliedCount} words.`);
|
|
259
|
+
}
|
|
260
|
+
this.stats.collisionsResolved++;
|
|
261
|
+
|
|
262
|
+
} catch (e) {
|
|
263
|
+
console.error(' ❌ Macro Strategy failed:', e.message);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
// Strategy B: Micro (<= 10 words)
|
|
267
|
+
else {
|
|
268
|
+
console.log(' 👉 Strategy B: Discriminator');
|
|
269
|
+
// Pick the first two words to differentiate
|
|
270
|
+
const wordA = words[0];
|
|
271
|
+
const wordB = words[1];
|
|
272
|
+
|
|
273
|
+
const sys = `Compare the words "${wordA}" and "${wordB}".
|
|
274
|
+
They currently share the concepts: [${existingConcepts}].
|
|
275
|
+
The current Ontology contains: ${this.getOntologyString()}
|
|
276
|
+
|
|
277
|
+
Provide ONE single semantic concept that is TRUE for "${wordA}" but FALSE for "${wordB}".
|
|
278
|
+
- Prefer using an existing concept from the Ontology if applicable.
|
|
279
|
+
- If not, define a new one.
|
|
280
|
+
|
|
281
|
+
Return JSON: { "concept": "string", "reasoning": "string" }`;
|
|
282
|
+
|
|
283
|
+
try {
|
|
284
|
+
const res = await LLM.chat([{ role: 'system', content: sys }], {
|
|
285
|
+
temperature: 0.2,
|
|
286
|
+
jsonSchema: {
|
|
287
|
+
type: 'object',
|
|
288
|
+
properties: {
|
|
289
|
+
concept: { type: 'string' },
|
|
290
|
+
reasoning: { type: 'string' }
|
|
291
|
+
},
|
|
292
|
+
required: ['concept']
|
|
293
|
+
}
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
const result = typeof res.content === 'string' ? JSON.parse(res.content) : res.content;
|
|
297
|
+
|
|
298
|
+
const p = this.getOrMintPrime(result.concept);
|
|
299
|
+
|
|
300
|
+
// Apply ONLY to Word A
|
|
301
|
+
const ps = V.get(wordA);
|
|
302
|
+
if (!ps.includes(p)) {
|
|
303
|
+
ps.push(p);
|
|
304
|
+
V.set(wordA, ps);
|
|
305
|
+
console.log(` ✅ Differentiated "${wordA}" from "${wordB}" with concept "${result.concept}" (${p})`);
|
|
306
|
+
this.stats.collisionsResolved++;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
} catch (e) {
|
|
310
|
+
console.error(' ❌ Micro Strategy failed:', e.message);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
async run(maxIterations = 25) {
|
|
316
|
+
console.log('🕸️ Semantic Sieve Initialized');
|
|
317
|
+
console.log('----------------------------');
|
|
318
|
+
|
|
319
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
320
|
+
const collisions = this.analyzeCollisions();
|
|
321
|
+
|
|
322
|
+
if (collisions.length === 0) {
|
|
323
|
+
console.log('\n🎉 Prime Uniqueness Invariant Satisfied! No collisions detected.');
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
console.log(`\nPass ${i + 1}/${maxIterations}: ${collisions.length} clusters detected.`);
|
|
328
|
+
|
|
329
|
+
// Pick largest cluster
|
|
330
|
+
const [sig, cluster] = collisions[0];
|
|
331
|
+
|
|
332
|
+
await this.resolveCluster(sig, cluster);
|
|
333
|
+
this.save();
|
|
334
|
+
|
|
335
|
+
// Small pause to avoid rate limits
|
|
336
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
console.log('\n📊 Sieve Session Complete');
|
|
340
|
+
console.log(` Collisions Resolved: ${this.stats.collisionsResolved}`);
|
|
341
|
+
console.log(` New Concepts: ${this.stats.conceptsCreated}`);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
346
|
+
// Execution
|
|
347
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
348
|
+
|
|
349
|
+
const sieve = new Sieve();
|
|
350
|
+
sieve.run().catch(console.error);
|