@aleph-ai/tinyaleph 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +278 -0
- package/backends/cryptographic/index.js +196 -0
- package/backends/index.js +15 -0
- package/backends/interface.js +89 -0
- package/backends/scientific/index.js +272 -0
- package/backends/semantic/index.js +527 -0
- package/backends/semantic/surface.js +393 -0
- package/backends/semantic/two-layer.js +375 -0
- package/core/fano.js +127 -0
- package/core/hilbert.js +564 -0
- package/core/hypercomplex.js +141 -0
- package/core/index.js +133 -0
- package/core/llm.js +132 -0
- package/core/prime.js +184 -0
- package/core/resonance.js +695 -0
- package/core/rformer-tf.js +1086 -0
- package/core/rformer.js +806 -0
- package/core/sieve.js +350 -0
- package/data.json +8163 -0
- package/docs/EXAMPLES_PLAN.md +293 -0
- package/docs/README.md +159 -0
- package/docs/design/ALEPH_CHAT_ARCHITECTURE.md +499 -0
- package/docs/guide/01-quickstart.md +298 -0
- package/docs/guide/02-semantic-computing.md +409 -0
- package/docs/guide/03-cryptographic.md +420 -0
- package/docs/guide/04-scientific.md +494 -0
- package/docs/guide/05-llm-integration.md +568 -0
- package/docs/guide/06-advanced.md +996 -0
- package/docs/guide/README.md +188 -0
- package/docs/reference/01-core.md +695 -0
- package/docs/reference/02-physics.md +601 -0
- package/docs/reference/03-backends.md +892 -0
- package/docs/reference/04-engine.md +632 -0
- package/docs/reference/README.md +252 -0
- package/docs/theory/01-prime-semantics.md +327 -0
- package/docs/theory/02-hypercomplex-algebra.md +421 -0
- package/docs/theory/03-phase-synchronization.md +364 -0
- package/docs/theory/04-entropy-reasoning.md +348 -0
- package/docs/theory/05-non-commutativity.md +402 -0
- package/docs/theory/06-two-layer-meaning.md +414 -0
- package/docs/theory/07-resonant-field-interface.md +419 -0
- package/docs/theory/08-semantic-sieve.md +520 -0
- package/docs/theory/09-temporal-emergence.md +298 -0
- package/docs/theory/10-quaternionic-memory.md +415 -0
- package/docs/theory/README.md +162 -0
- package/engine/aleph.js +418 -0
- package/engine/index.js +7 -0
- package/index.js +23 -0
- package/modular.js +254 -0
- package/package.json +99 -0
- package/physics/collapse.js +95 -0
- package/physics/entropy.js +88 -0
- package/physics/index.js +65 -0
- package/physics/kuramoto.js +91 -0
- package/physics/lyapunov.js +80 -0
- package/physics/oscillator.js +95 -0
- package/types/index.d.ts +575 -0
|
@@ -0,0 +1,1086 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ResoFormer TensorFlow.js Layers
|
|
3
|
+
*
|
|
4
|
+
* Trainable neural network layers implementing ResoFormer architecture:
|
|
5
|
+
* - QuaternionDense: Dense layer with Hamilton product
|
|
6
|
+
* - ResonantAttention: Attention using Jaccard + Quaternion + Phase
|
|
7
|
+
* - HamiltonCompose: Order-sensitive composition layer
|
|
8
|
+
* - CoherenceHaltingLayer: ACT-style adaptive depth
|
|
9
|
+
* - EntropyCollapseLayer: 64-codebook VQ collapse
|
|
10
|
+
* - ResoFormerBlock: Complete transformer block
|
|
11
|
+
* - ResoFormerModel: End-to-end trainable model
|
|
12
|
+
*
|
|
13
|
+
* State space: H_Q = H_P ⊗ ℍ (Prime Hilbert space ⊗ Quaternions)
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
let tf;
|
|
17
|
+
try {
|
|
18
|
+
tf = require('@tensorflow/tfjs-node');
|
|
19
|
+
} catch (e) {
|
|
20
|
+
try {
|
|
21
|
+
tf = require('@tensorflow/tfjs');
|
|
22
|
+
} catch (e2) {
|
|
23
|
+
console.warn('TensorFlow.js not available. Install with: npm install @tensorflow/tfjs-node');
|
|
24
|
+
tf = null;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const { firstNPrimes } = require('./prime');
|
|
29
|
+
|
|
30
|
+
// ============================================================================
|
|
31
|
+
// UTILITY FUNCTIONS
|
|
32
|
+
// ============================================================================
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Generate prime lookup table as TF tensor
|
|
36
|
+
*/
|
|
37
|
+
function getPrimeLookup(numPrimes = 4096) {
|
|
38
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
39
|
+
const primes = firstNPrimes(numPrimes);
|
|
40
|
+
return tf.tensor1d(primes, 'int32');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Compute log frequencies for primes
|
|
45
|
+
*/
|
|
46
|
+
function getPrimeLogFrequencies(numPrimes = 4096) {
|
|
47
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
48
|
+
const primes = firstNPrimes(numPrimes);
|
|
49
|
+
const logFreqs = primes.map(p => 1 / Math.log(p));
|
|
50
|
+
return tf.tensor1d(logFreqs, 'float32');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ============================================================================
|
|
54
|
+
// QUATERNION OPERATIONS (TF.JS)
|
|
55
|
+
// ============================================================================
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Hamilton product for batched quaternions
|
|
59
|
+
* Input shapes: [batch, ..., 4] x [batch, ..., 4] -> [batch, ..., 4]
|
|
60
|
+
*/
|
|
61
|
+
function quaternionMul(q1, q2) {
|
|
62
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
63
|
+
|
|
64
|
+
return tf.tidy(() => {
|
|
65
|
+
// Split into components
|
|
66
|
+
const w1 = q1.slice([0], [-1]).squeeze([-1]);
|
|
67
|
+
const x1 = q1.slice([1], [-1]).squeeze([-1]);
|
|
68
|
+
const y1 = q1.slice([2], [-1]).squeeze([-1]);
|
|
69
|
+
const z1 = q1.slice([3], [-1]).squeeze([-1]);
|
|
70
|
+
|
|
71
|
+
const w2 = q2.slice([0], [-1]).squeeze([-1]);
|
|
72
|
+
const x2 = q2.slice([1], [-1]).squeeze([-1]);
|
|
73
|
+
const y2 = q2.slice([2], [-1]).squeeze([-1]);
|
|
74
|
+
const z2 = q2.slice([3], [-1]).squeeze([-1]);
|
|
75
|
+
|
|
76
|
+
// Hamilton product formulas
|
|
77
|
+
const w = w1.mul(w2).sub(x1.mul(x2)).sub(y1.mul(y2)).sub(z1.mul(z2));
|
|
78
|
+
const x = w1.mul(x2).add(x1.mul(w2)).add(y1.mul(z2)).sub(z1.mul(y2));
|
|
79
|
+
const y = w1.mul(y2).sub(x1.mul(z2)).add(y1.mul(w2)).add(z1.mul(x2));
|
|
80
|
+
const z = w1.mul(z2).add(x1.mul(y2)).sub(y1.mul(x2)).add(z1.mul(w2));
|
|
81
|
+
|
|
82
|
+
return tf.stack([w, x, y, z], -1);
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Quaternion conjugate
|
|
88
|
+
*/
|
|
89
|
+
function quaternionConj(q) {
|
|
90
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
91
|
+
|
|
92
|
+
return tf.tidy(() => {
|
|
93
|
+
const w = q.slice([0], [1]);
|
|
94
|
+
const xyz = q.slice([1], [3]).neg();
|
|
95
|
+
return tf.concat([w, xyz], -1);
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Quaternion norm squared
|
|
101
|
+
*/
|
|
102
|
+
function quaternionNorm2(q) {
|
|
103
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
104
|
+
return q.square().sum(-1);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Normalize quaternion to unit
|
|
109
|
+
*/
|
|
110
|
+
function quaternionNormalize(q) {
|
|
111
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
112
|
+
|
|
113
|
+
return tf.tidy(() => {
|
|
114
|
+
const norm = quaternionNorm2(q).sqrt().expandDims(-1);
|
|
115
|
+
return q.div(norm.add(1e-8));
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ============================================================================
|
|
120
|
+
// CUSTOM LAYERS
|
|
121
|
+
// ============================================================================
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* QuaternionDense Layer
|
|
125
|
+
* Projects input to quaternion space and applies Hamilton product mixing
|
|
126
|
+
*/
|
|
127
|
+
class QuaternionDense extends tf.layers.Layer {
|
|
128
|
+
static className = 'QuaternionDense';
|
|
129
|
+
|
|
130
|
+
constructor(config) {
|
|
131
|
+
super(config);
|
|
132
|
+
this.units = config.units; // Output quaternions (units * 4 total outputs)
|
|
133
|
+
this.useBias = config.useBias !== false;
|
|
134
|
+
this._built = false;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
build(inputShape) {
|
|
138
|
+
if (this._built) return;
|
|
139
|
+
this._built = true;
|
|
140
|
+
|
|
141
|
+
const inputDim = inputShape[inputShape.length - 1];
|
|
142
|
+
|
|
143
|
+
// Weight matrix maps to quaternion space
|
|
144
|
+
this.kernel = this.addWeight(
|
|
145
|
+
'kernel',
|
|
146
|
+
[inputDim, this.units * 4],
|
|
147
|
+
'float32',
|
|
148
|
+
tf.initializers.glorotUniform()
|
|
149
|
+
);
|
|
150
|
+
|
|
151
|
+
if (this.useBias) {
|
|
152
|
+
this.bias = this.addWeight(
|
|
153
|
+
'bias',
|
|
154
|
+
[this.units * 4],
|
|
155
|
+
'float32',
|
|
156
|
+
tf.initializers.zeros()
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
call(inputs, kwargs) {
|
|
162
|
+
return tf.tidy(() => {
|
|
163
|
+
let x = inputs;
|
|
164
|
+
if (Array.isArray(x)) x = x[0];
|
|
165
|
+
|
|
166
|
+
// Linear projection
|
|
167
|
+
let output = tf.matMul(x, this.kernel.read());
|
|
168
|
+
|
|
169
|
+
if (this.useBias) {
|
|
170
|
+
output = output.add(this.bias.read());
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Reshape to [..., units, 4]
|
|
174
|
+
const shape = output.shape.slice(0, -1).concat([this.units, 4]);
|
|
175
|
+
output = output.reshape(shape);
|
|
176
|
+
|
|
177
|
+
// Normalize each quaternion
|
|
178
|
+
return quaternionNormalize(output);
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
computeOutputShape(inputShape) {
|
|
183
|
+
return inputShape.slice(0, -1).concat([this.units, 4]);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
getConfig() {
|
|
187
|
+
const config = super.getConfig();
|
|
188
|
+
config.units = this.units;
|
|
189
|
+
config.useBias = this.useBias;
|
|
190
|
+
return config;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
tf && tf.serialization.registerClass(QuaternionDense);
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Sparse Prime Embedding Layer
|
|
198
|
+
* Maps token IDs to sparse prime activations with quaternion orientations
|
|
199
|
+
*/
|
|
200
|
+
class SparsePrimeEmbedding extends tf.layers.Layer {
|
|
201
|
+
static className = 'SparsePrimeEmbedding';
|
|
202
|
+
|
|
203
|
+
constructor(config) {
|
|
204
|
+
super(config);
|
|
205
|
+
this.numPrimes = config.numPrimes || 4096;
|
|
206
|
+
this.k = config.k || 32; // Number of active primes per token
|
|
207
|
+
this.embeddingDim = config.embeddingDim || 64;
|
|
208
|
+
this._built = false;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
build(inputShape) {
|
|
212
|
+
if (this._built) return;
|
|
213
|
+
this._built = true;
|
|
214
|
+
|
|
215
|
+
// Prime selection weights (learned)
|
|
216
|
+
this.primeWeights = this.addWeight(
|
|
217
|
+
'primeWeights',
|
|
218
|
+
[this.embeddingDim, this.numPrimes],
|
|
219
|
+
'float32',
|
|
220
|
+
tf.initializers.glorotUniform()
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
// Quaternion orientation per prime
|
|
224
|
+
this.quaternionWeights = this.addWeight(
|
|
225
|
+
'quaternionWeights',
|
|
226
|
+
[this.numPrimes, 4],
|
|
227
|
+
'float32',
|
|
228
|
+
tf.initializers.randomNormal({ mean: 0, stddev: 0.1 })
|
|
229
|
+
);
|
|
230
|
+
|
|
231
|
+
// Phase bias per prime
|
|
232
|
+
this.phaseBias = this.addWeight(
|
|
233
|
+
'phaseBias',
|
|
234
|
+
[this.numPrimes],
|
|
235
|
+
'float32',
|
|
236
|
+
tf.initializers.zeros()
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
call(inputs, kwargs) {
|
|
241
|
+
return tf.tidy(() => {
|
|
242
|
+
let x = inputs;
|
|
243
|
+
if (Array.isArray(x)) x = x[0];
|
|
244
|
+
|
|
245
|
+
// x: [batch, seq, embeddingDim]
|
|
246
|
+
// Compute prime logits
|
|
247
|
+
const logits = tf.matMul(x, this.primeWeights.read()); // [batch, seq, numPrimes]
|
|
248
|
+
|
|
249
|
+
// Top-k selection (soft via gumbel-softmax or hard via topk)
|
|
250
|
+
const { values: topkValues, indices: topkIndices } = tf.topk(logits, this.k);
|
|
251
|
+
|
|
252
|
+
// Normalize to get amplitudes
|
|
253
|
+
const amplitudes = tf.softmax(topkValues, -1); // [batch, seq, k]
|
|
254
|
+
|
|
255
|
+
// Gather quaternions for selected primes
|
|
256
|
+
const quaternions = tf.gather(this.quaternionWeights.read(), topkIndices); // [batch, seq, k, 4]
|
|
257
|
+
|
|
258
|
+
// Gather phases
|
|
259
|
+
const phases = tf.gather(this.phaseBias.read(), topkIndices); // [batch, seq, k]
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
indices: topkIndices, // [batch, seq, k]
|
|
263
|
+
amplitudes: amplitudes, // [batch, seq, k]
|
|
264
|
+
quaternions: quaternions, // [batch, seq, k, 4]
|
|
265
|
+
phases: phases // [batch, seq, k]
|
|
266
|
+
};
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
computeOutputShape(inputShape) {
|
|
271
|
+
const batchShape = inputShape.slice(0, -1);
|
|
272
|
+
return {
|
|
273
|
+
indices: batchShape.concat([this.k]),
|
|
274
|
+
amplitudes: batchShape.concat([this.k]),
|
|
275
|
+
quaternions: batchShape.concat([this.k, 4]),
|
|
276
|
+
phases: batchShape.concat([this.k])
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
getConfig() {
|
|
281
|
+
const config = super.getConfig();
|
|
282
|
+
config.numPrimes = this.numPrimes;
|
|
283
|
+
config.k = this.k;
|
|
284
|
+
config.embeddingDim = this.embeddingDim;
|
|
285
|
+
return config;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
tf && tf.serialization.registerClass(SparsePrimeEmbedding);
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Resonant Attention Layer
|
|
293
|
+
* Computes attention using: α·Jaccard + β·QuaternionAlign + γ·PhaseCoherence
|
|
294
|
+
*/
|
|
295
|
+
class ResonantAttention extends tf.layers.Layer {
|
|
296
|
+
static className = 'ResonantAttention';
|
|
297
|
+
|
|
298
|
+
constructor(config) {
|
|
299
|
+
super(config);
|
|
300
|
+
this.numHeads = config.numHeads || 8;
|
|
301
|
+
this.keyDim = config.keyDim || 64;
|
|
302
|
+
this.dropout = config.dropout || 0.0;
|
|
303
|
+
|
|
304
|
+
// Mixing coefficients (learnable)
|
|
305
|
+
this.alpha = config.alpha || 0.33;
|
|
306
|
+
this.beta = config.beta || 0.33;
|
|
307
|
+
this.gamma = config.gamma || 0.34;
|
|
308
|
+
this._built = false;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
build(inputShape) {
|
|
312
|
+
if (this._built) return;
|
|
313
|
+
this._built = true;
|
|
314
|
+
|
|
315
|
+
const inputDim = inputShape[inputShape.length - 1];
|
|
316
|
+
|
|
317
|
+
// Query, Key, Value projections
|
|
318
|
+
this.queryWeight = this.addWeight(
|
|
319
|
+
'queryWeight',
|
|
320
|
+
[inputDim, this.numHeads * this.keyDim],
|
|
321
|
+
'float32',
|
|
322
|
+
tf.initializers.glorotUniform()
|
|
323
|
+
);
|
|
324
|
+
|
|
325
|
+
this.keyWeight = this.addWeight(
|
|
326
|
+
'keyWeight',
|
|
327
|
+
[inputDim, this.numHeads * this.keyDim],
|
|
328
|
+
'float32',
|
|
329
|
+
tf.initializers.glorotUniform()
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
this.valueWeight = this.addWeight(
|
|
333
|
+
'valueWeight',
|
|
334
|
+
[inputDim, this.numHeads * this.keyDim],
|
|
335
|
+
'float32',
|
|
336
|
+
tf.initializers.glorotUniform()
|
|
337
|
+
);
|
|
338
|
+
|
|
339
|
+
this.outputWeight = this.addWeight(
|
|
340
|
+
'outputWeight',
|
|
341
|
+
[this.numHeads * this.keyDim, inputDim],
|
|
342
|
+
'float32',
|
|
343
|
+
tf.initializers.glorotUniform()
|
|
344
|
+
);
|
|
345
|
+
|
|
346
|
+
// Learnable mixing coefficients
|
|
347
|
+
this.mixingCoeffs = this.addWeight(
|
|
348
|
+
'mixingCoeffs',
|
|
349
|
+
[3],
|
|
350
|
+
'float32',
|
|
351
|
+
tf.initializers.constant({ value: 0.33 })
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
call(inputs, kwargs) {
|
|
356
|
+
return tf.tidy(() => {
|
|
357
|
+
let x = inputs;
|
|
358
|
+
if (Array.isArray(x)) x = x[0];
|
|
359
|
+
|
|
360
|
+
const [batch, seq, dim] = x.shape;
|
|
361
|
+
|
|
362
|
+
// Project to Q, K, V
|
|
363
|
+
let q = tf.matMul(x, this.queryWeight.read());
|
|
364
|
+
let k = tf.matMul(x, this.keyWeight.read());
|
|
365
|
+
let v = tf.matMul(x, this.valueWeight.read());
|
|
366
|
+
|
|
367
|
+
// Reshape for multi-head: [batch, seq, heads, keyDim]
|
|
368
|
+
q = q.reshape([batch, seq, this.numHeads, this.keyDim]);
|
|
369
|
+
k = k.reshape([batch, seq, this.numHeads, this.keyDim]);
|
|
370
|
+
v = v.reshape([batch, seq, this.numHeads, this.keyDim]);
|
|
371
|
+
|
|
372
|
+
// Transpose to [batch, heads, seq, keyDim]
|
|
373
|
+
q = q.transpose([0, 2, 1, 3]);
|
|
374
|
+
k = k.transpose([0, 2, 1, 3]);
|
|
375
|
+
v = v.transpose([0, 2, 1, 3]);
|
|
376
|
+
|
|
377
|
+
// Standard scaled dot-product attention (as baseline)
|
|
378
|
+
// In full implementation, this would use Jaccard + Quaternion + Phase
|
|
379
|
+
let scores = tf.matMul(q, k.transpose([0, 1, 3, 2]));
|
|
380
|
+
scores = scores.div(tf.scalar(Math.sqrt(this.keyDim)));
|
|
381
|
+
|
|
382
|
+
// Softmax attention weights
|
|
383
|
+
const attnWeights = tf.softmax(scores, -1);
|
|
384
|
+
|
|
385
|
+
// Apply attention to values
|
|
386
|
+
let output = tf.matMul(attnWeights, v);
|
|
387
|
+
|
|
388
|
+
// Transpose back and reshape
|
|
389
|
+
output = output.transpose([0, 2, 1, 3]);
|
|
390
|
+
output = output.reshape([batch, seq, this.numHeads * this.keyDim]);
|
|
391
|
+
|
|
392
|
+
// Final projection
|
|
393
|
+
output = tf.matMul(output, this.outputWeight.read());
|
|
394
|
+
|
|
395
|
+
return output;
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
computeOutputShape(inputShape) {
|
|
400
|
+
return inputShape;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
getConfig() {
|
|
404
|
+
const config = super.getConfig();
|
|
405
|
+
config.numHeads = this.numHeads;
|
|
406
|
+
config.keyDim = this.keyDim;
|
|
407
|
+
config.dropout = this.dropout;
|
|
408
|
+
config.alpha = this.alpha;
|
|
409
|
+
config.beta = this.beta;
|
|
410
|
+
config.gamma = this.gamma;
|
|
411
|
+
return config;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
tf && tf.serialization.registerClass(ResonantAttention);
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Hamilton Compose Layer
|
|
419
|
+
* Order-sensitive composition using Hamilton product
|
|
420
|
+
*/
|
|
421
|
+
class HamiltonCompose extends tf.layers.Layer {
|
|
422
|
+
static className = 'HamiltonCompose';
|
|
423
|
+
|
|
424
|
+
constructor(config) {
|
|
425
|
+
super(config);
|
|
426
|
+
this.units = config.units || 64;
|
|
427
|
+
this._built = false;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
build(inputShape) {
|
|
431
|
+
if (this._built) return;
|
|
432
|
+
this._built = true;
|
|
433
|
+
|
|
434
|
+
// Input is assumed to be [..., units, 4] (quaternion format)
|
|
435
|
+
this.combineWeight = this.addWeight(
|
|
436
|
+
'combineWeight',
|
|
437
|
+
[this.units * 4, this.units * 4],
|
|
438
|
+
'float32',
|
|
439
|
+
tf.initializers.glorotUniform()
|
|
440
|
+
);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
call(inputs, kwargs) {
|
|
444
|
+
return tf.tidy(() => {
|
|
445
|
+
// Expect two inputs: [q1, q2] each of shape [batch, ..., units, 4]
|
|
446
|
+
let [q1, q2] = inputs;
|
|
447
|
+
|
|
448
|
+
// Flatten quaternions
|
|
449
|
+
const shape = q1.shape;
|
|
450
|
+
const flatShape = shape.slice(0, -2).concat([this.units * 4]);
|
|
451
|
+
|
|
452
|
+
q1 = q1.reshape(flatShape);
|
|
453
|
+
q2 = q2.reshape(flatShape);
|
|
454
|
+
|
|
455
|
+
// Hamilton-style mixing (simplified: learned combination)
|
|
456
|
+
const combined = tf.matMul(q1.mul(q2), this.combineWeight.read());
|
|
457
|
+
|
|
458
|
+
// Reshape back to quaternion format
|
|
459
|
+
const outShape = shape;
|
|
460
|
+
return quaternionNormalize(combined.reshape(outShape));
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
computeOutputShape(inputShape) {
|
|
465
|
+
return inputShape[0]; // Same as first input
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
getConfig() {
|
|
469
|
+
const config = super.getConfig();
|
|
470
|
+
config.units = this.units;
|
|
471
|
+
return config;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
tf && tf.serialization.registerClass(HamiltonCompose);
|
|
476
|
+
|
|
477
|
+
/**
|
|
478
|
+
* Coherence Gating Layer
|
|
479
|
+
* Computes coherence and gates output
|
|
480
|
+
*/
|
|
481
|
+
class CoherenceGating extends tf.layers.Layer {
|
|
482
|
+
static className = 'CoherenceGating';
|
|
483
|
+
|
|
484
|
+
constructor(config) {
|
|
485
|
+
super(config);
|
|
486
|
+
this.threshold = config.threshold || 0.8;
|
|
487
|
+
this._built = false;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
build(inputShape) {
|
|
491
|
+
if (this._built) return;
|
|
492
|
+
this._built = true;
|
|
493
|
+
|
|
494
|
+
const dim = inputShape[inputShape.length - 1];
|
|
495
|
+
|
|
496
|
+
// Coherence computation weights
|
|
497
|
+
this.coherenceWeight = this.addWeight(
|
|
498
|
+
'coherenceWeight',
|
|
499
|
+
[dim, 1],
|
|
500
|
+
'float32',
|
|
501
|
+
tf.initializers.glorotUniform()
|
|
502
|
+
);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
call(inputs, kwargs) {
|
|
506
|
+
return tf.tidy(() => {
|
|
507
|
+
let x = inputs;
|
|
508
|
+
if (Array.isArray(x)) x = x[0];
|
|
509
|
+
|
|
510
|
+
// Compute coherence score
|
|
511
|
+
const coherence = tf.sigmoid(tf.matMul(x, this.coherenceWeight.read()));
|
|
512
|
+
|
|
513
|
+
// Gate output based on coherence
|
|
514
|
+
const gate = tf.sigmoid(coherence.sub(this.threshold).mul(10));
|
|
515
|
+
|
|
516
|
+
return {
|
|
517
|
+
output: x.mul(gate),
|
|
518
|
+
coherence: coherence.squeeze(-1),
|
|
519
|
+
gate: gate.squeeze(-1)
|
|
520
|
+
};
|
|
521
|
+
});
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
computeOutputShape(inputShape) {
|
|
525
|
+
return {
|
|
526
|
+
output: inputShape,
|
|
527
|
+
coherence: inputShape.slice(0, -1),
|
|
528
|
+
gate: inputShape.slice(0, -1)
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
getConfig() {
|
|
533
|
+
const config = super.getConfig();
|
|
534
|
+
config.threshold = this.threshold;
|
|
535
|
+
return config;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
tf && tf.serialization.registerClass(CoherenceGating);
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Entropy Collapse Layer
|
|
543
|
+
* VQ-style collapse to 64 attractors with entropy regularization
|
|
544
|
+
*/
|
|
545
|
+
class EntropyCollapse extends tf.layers.Layer {
|
|
546
|
+
static className = 'EntropyCollapse';
|
|
547
|
+
|
|
548
|
+
constructor(config) {
|
|
549
|
+
super(config);
|
|
550
|
+
this.numAttractors = config.numAttractors || 64;
|
|
551
|
+
this.targetEntropy = config.targetEntropy || 5.99;
|
|
552
|
+
this.temperature = config.temperature || 1.0;
|
|
553
|
+
this._built = false;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
build(inputShape) {
|
|
557
|
+
if (this._built) return;
|
|
558
|
+
this._built = true;
|
|
559
|
+
|
|
560
|
+
const dim = inputShape[inputShape.length - 1];
|
|
561
|
+
|
|
562
|
+
// Attractor codebook
|
|
563
|
+
this.codebook = this.addWeight(
|
|
564
|
+
'codebook',
|
|
565
|
+
[this.numAttractors, dim],
|
|
566
|
+
'float32',
|
|
567
|
+
tf.initializers.glorotUniform()
|
|
568
|
+
);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
call(inputs, kwargs) {
|
|
572
|
+
return tf.tidy(() => {
|
|
573
|
+
let x = inputs;
|
|
574
|
+
if (Array.isArray(x)) x = x[0];
|
|
575
|
+
|
|
576
|
+
const training = kwargs.training || false;
|
|
577
|
+
|
|
578
|
+
// Compute distances to all attractors
|
|
579
|
+
// x: [batch, seq, dim], codebook: [numAttractors, dim]
|
|
580
|
+
const xExpanded = x.expandDims(-2); // [batch, seq, 1, dim]
|
|
581
|
+
const codebookExpanded = this.codebook.read().expandDims(0).expandDims(0); // [1, 1, numAttractors, dim]
|
|
582
|
+
|
|
583
|
+
const distances = xExpanded.sub(codebookExpanded).square().sum(-1); // [batch, seq, numAttractors]
|
|
584
|
+
|
|
585
|
+
// Convert to similarities (negative distance)
|
|
586
|
+
const logits = distances.neg().div(this.temperature);
|
|
587
|
+
|
|
588
|
+
// Soft assignment probabilities
|
|
589
|
+
const probs = tf.softmax(logits, -1);
|
|
590
|
+
|
|
591
|
+
// Compute entropy
|
|
592
|
+
const entropy = probs.mul(probs.add(1e-10).log()).sum(-1).neg();
|
|
593
|
+
|
|
594
|
+
// Entropy loss (toward target)
|
|
595
|
+
const entropyLoss = entropy.sub(this.targetEntropy).square().mean();
|
|
596
|
+
|
|
597
|
+
if (training) {
|
|
598
|
+
// Soft assignment during training
|
|
599
|
+
const output = tf.matMul(probs, this.codebook.read());
|
|
600
|
+
return { output, probs, entropy, entropyLoss };
|
|
601
|
+
} else {
|
|
602
|
+
// Hard assignment during inference
|
|
603
|
+
const indices = logits.argMax(-1);
|
|
604
|
+
const output = tf.gather(this.codebook.read(), indices.flatten()).reshape(x.shape);
|
|
605
|
+
return { output, indices, entropy };
|
|
606
|
+
}
|
|
607
|
+
});
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
computeOutputShape(inputShape) {
|
|
611
|
+
return {
|
|
612
|
+
output: inputShape,
|
|
613
|
+
probs: inputShape.slice(0, -1).concat([this.numAttractors]),
|
|
614
|
+
entropy: inputShape.slice(0, -1)
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
getConfig() {
|
|
619
|
+
const config = super.getConfig();
|
|
620
|
+
config.numAttractors = this.numAttractors;
|
|
621
|
+
config.targetEntropy = this.targetEntropy;
|
|
622
|
+
config.temperature = this.temperature;
|
|
623
|
+
return config;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
tf && tf.serialization.registerClass(EntropyCollapse);
|
|
628
|
+
|
|
629
|
+
/**
|
|
630
|
+
* Resonance Operator Layer
|
|
631
|
+
* Applies R̂(n)|p⟩ = e^(2πi log_p(n))|p⟩ phase rotation
|
|
632
|
+
*/
|
|
633
|
+
class ResonanceOperator extends tf.layers.Layer {
|
|
634
|
+
static className = 'ResonanceOperator';
|
|
635
|
+
|
|
636
|
+
constructor(config) {
|
|
637
|
+
super(config);
|
|
638
|
+
this._built = false;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
build(inputShape) {
|
|
642
|
+
if (this._built) return;
|
|
643
|
+
this._built = true;
|
|
644
|
+
|
|
645
|
+
const dim = inputShape[inputShape.length - 1];
|
|
646
|
+
|
|
647
|
+
// Learnable n parameter for rotation
|
|
648
|
+
this.rotationParam = this.addWeight(
|
|
649
|
+
'rotationParam',
|
|
650
|
+
[dim],
|
|
651
|
+
'float32',
|
|
652
|
+
tf.initializers.ones()
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
call(inputs, kwargs) {
|
|
657
|
+
return tf.tidy(() => {
|
|
658
|
+
let x = inputs;
|
|
659
|
+
if (Array.isArray(x)) x = x[0];
|
|
660
|
+
|
|
661
|
+
// Interpret x as complex amplitudes (pairs of real/imag)
|
|
662
|
+
const shape = x.shape;
|
|
663
|
+
const dim = shape[shape.length - 1];
|
|
664
|
+
const halfDim = Math.floor(dim / 2);
|
|
665
|
+
|
|
666
|
+
// Split into real and imaginary parts along last axis using split
|
|
667
|
+
const [real, imag] = tf.split(x, 2, -1);
|
|
668
|
+
|
|
669
|
+
// Get rotation parameters (broadcast to match shape)
|
|
670
|
+
const n = this.rotationParam.read().slice([0], [halfDim]).abs().add(1);
|
|
671
|
+
|
|
672
|
+
// Compute phases: 2π * log(n) for each "prime dimension"
|
|
673
|
+
const phases = n.log().mul(2 * Math.PI);
|
|
674
|
+
|
|
675
|
+
// Apply rotation: (r + i*m) * e^(i*phase) = (r*cos - m*sin) + i*(r*sin + m*cos)
|
|
676
|
+
const cos = phases.cos();
|
|
677
|
+
const sin = phases.sin();
|
|
678
|
+
|
|
679
|
+
const newReal = real.mul(cos).sub(imag.mul(sin));
|
|
680
|
+
const newImag = real.mul(sin).add(imag.mul(cos));
|
|
681
|
+
|
|
682
|
+
return tf.concat([newReal, newImag], -1);
|
|
683
|
+
});
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
computeOutputShape(inputShape) {
|
|
687
|
+
return inputShape;
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
getConfig() {
|
|
691
|
+
return super.getConfig();
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
tf && tf.serialization.registerClass(ResonanceOperator);
|
|
696
|
+
|
|
697
|
+
// ============================================================================
|
|
698
|
+
// RESOFORMER BLOCK
|
|
699
|
+
// ============================================================================
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* Complete ResoFormer Block
|
|
703
|
+
* Combines all components: Attention → FFN → Coherence Gate → Optional Collapse
|
|
704
|
+
*/
|
|
705
|
+
class ResoFormerBlock extends tf.layers.Layer {
|
|
706
|
+
static className = 'ResoFormerBlock';
|
|
707
|
+
|
|
708
|
+
constructor(config) {
|
|
709
|
+
super(config);
|
|
710
|
+
this.dim = config.dim || 256;
|
|
711
|
+
this.numHeads = config.numHeads || 8;
|
|
712
|
+
this.ffnDim = config.ffnDim || 1024;
|
|
713
|
+
this.dropoutRate = config.dropout || 0.1;
|
|
714
|
+
this.useCollapse = config.useCollapse || false;
|
|
715
|
+
this._built = false;
|
|
716
|
+
|
|
717
|
+
// Get unique prefix for this block
|
|
718
|
+
const prefix = this.name || 'resoformer_block';
|
|
719
|
+
|
|
720
|
+
// Create sub-layers in constructor with unique names
|
|
721
|
+
this.layerNorm1 = tf.layers.layerNormalization({ axis: -1, name: `${prefix}_ln1` });
|
|
722
|
+
this.layerNorm2 = tf.layers.layerNormalization({ axis: -1, name: `${prefix}_ln2` });
|
|
723
|
+
|
|
724
|
+
this.attention = new ResonantAttention({
|
|
725
|
+
numHeads: this.numHeads,
|
|
726
|
+
keyDim: Math.floor(this.dim / this.numHeads),
|
|
727
|
+
dropout: this.dropoutRate,
|
|
728
|
+
name: `${prefix}_attn`
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
this.resonanceOp = new ResonanceOperator({ name: `${prefix}_resop` });
|
|
732
|
+
|
|
733
|
+
this.ffn1 = tf.layers.dense({ units: this.ffnDim, activation: 'gelu', name: `${prefix}_ffn1` });
|
|
734
|
+
this.ffn2 = tf.layers.dense({ units: this.dim, name: `${prefix}_ffn2` });
|
|
735
|
+
|
|
736
|
+
this.coherenceGate = new CoherenceGating({ threshold: 0.7, name: `${prefix}_cgate` });
|
|
737
|
+
|
|
738
|
+
if (this.useCollapse) {
|
|
739
|
+
this.collapse = new EntropyCollapse({ numAttractors: 64, name: `${prefix}_collapse` });
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
this.dropoutLayer = tf.layers.dropout({ rate: this.dropoutRate, name: `${prefix}_dropout` });
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
build(inputShape) {
|
|
746
|
+
if (this._built) return;
|
|
747
|
+
this._built = true;
|
|
748
|
+
|
|
749
|
+
// Build sub-layers with input shape
|
|
750
|
+
this.layerNorm1.build(inputShape);
|
|
751
|
+
this.layerNorm2.build(inputShape);
|
|
752
|
+
this.attention.build(inputShape);
|
|
753
|
+
this.resonanceOp.build(inputShape);
|
|
754
|
+
this.ffn1.build(inputShape);
|
|
755
|
+
this.ffn2.build([...inputShape.slice(0, -1), this.ffnDim]);
|
|
756
|
+
this.coherenceGate.build(inputShape);
|
|
757
|
+
if (this.useCollapse) {
|
|
758
|
+
this.collapse.build(inputShape);
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
call(inputs, kwargs) {
|
|
763
|
+
return tf.tidy(() => {
|
|
764
|
+
let x = inputs;
|
|
765
|
+
if (Array.isArray(x)) x = x[0];
|
|
766
|
+
|
|
767
|
+
const training = kwargs.training || false;
|
|
768
|
+
|
|
769
|
+
// Pre-norm attention
|
|
770
|
+
let residual = x;
|
|
771
|
+
x = this.layerNorm1.apply(x);
|
|
772
|
+
x = this.attention.apply(x, { training });
|
|
773
|
+
x = this.dropoutLayer.apply(x, { training });
|
|
774
|
+
x = x.add(residual);
|
|
775
|
+
|
|
776
|
+
// Apply resonance operator
|
|
777
|
+
x = this.resonanceOp.apply(x);
|
|
778
|
+
|
|
779
|
+
// Pre-norm FFN
|
|
780
|
+
residual = x;
|
|
781
|
+
x = this.layerNorm2.apply(x);
|
|
782
|
+
x = this.ffn1.apply(x);
|
|
783
|
+
x = this.ffn2.apply(x);
|
|
784
|
+
x = this.dropoutLayer.apply(x, { training });
|
|
785
|
+
x = x.add(residual);
|
|
786
|
+
|
|
787
|
+
// Coherence gating
|
|
788
|
+
const gated = this.coherenceGate.apply(x);
|
|
789
|
+
x = gated.output;
|
|
790
|
+
|
|
791
|
+
// Optional collapse
|
|
792
|
+
if (this.useCollapse) {
|
|
793
|
+
const collapsed = this.collapse.apply(x, { training });
|
|
794
|
+
x = collapsed.output;
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
return x;
|
|
798
|
+
});
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
computeOutputShape(inputShape) {
|
|
802
|
+
return inputShape;
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
getConfig() {
|
|
806
|
+
const config = super.getConfig();
|
|
807
|
+
config.dim = this.dim;
|
|
808
|
+
config.numHeads = this.numHeads;
|
|
809
|
+
config.ffnDim = this.ffnDim;
|
|
810
|
+
config.dropout = this.dropoutRate;
|
|
811
|
+
config.useCollapse = this.useCollapse;
|
|
812
|
+
return config;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
tf && tf.serialization.registerClass(ResoFormerBlock);
|
|
817
|
+
|
|
818
|
+
// ============================================================================
|
|
819
|
+
// RESOFORMER MODEL
|
|
820
|
+
// ============================================================================
|
|
821
|
+
|
|
822
|
+
/**
|
|
823
|
+
* Create a complete ResoFormer model
|
|
824
|
+
*/
|
|
825
|
+
function createResoFormerModel(config = {}) {
|
|
826
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
827
|
+
|
|
828
|
+
const {
|
|
829
|
+
vocabSize = 10000,
|
|
830
|
+
seqLen = 512,
|
|
831
|
+
dim = 256,
|
|
832
|
+
numLayers = 6,
|
|
833
|
+
numHeads = 8,
|
|
834
|
+
ffnDim = 1024,
|
|
835
|
+
numPrimes = 4096,
|
|
836
|
+
k = 32,
|
|
837
|
+
dropout = 0.1
|
|
838
|
+
} = config;
|
|
839
|
+
|
|
840
|
+
// Input layer
|
|
841
|
+
const input = tf.input({ shape: [seqLen], dtype: 'int32', name: 'input_ids' });
|
|
842
|
+
|
|
843
|
+
// Token embedding
|
|
844
|
+
const embedding = tf.layers.embedding({
|
|
845
|
+
inputDim: vocabSize,
|
|
846
|
+
outputDim: dim,
|
|
847
|
+
name: 'token_embedding'
|
|
848
|
+
}).apply(input);
|
|
849
|
+
|
|
850
|
+
// Stack of ResoFormer blocks
|
|
851
|
+
let x = embedding;
|
|
852
|
+
for (let i = 0; i < numLayers; i++) {
|
|
853
|
+
const block = new ResoFormerBlock({
|
|
854
|
+
dim,
|
|
855
|
+
numHeads,
|
|
856
|
+
ffnDim,
|
|
857
|
+
dropout,
|
|
858
|
+
useCollapse: i === numLayers - 1, // Collapse only at last layer
|
|
859
|
+
name: `resoformer_block_${i}`
|
|
860
|
+
});
|
|
861
|
+
x = block.apply(x);
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
// Output projection for language modeling
|
|
865
|
+
const lmHead = tf.layers.dense({
|
|
866
|
+
units: vocabSize,
|
|
867
|
+
name: 'lm_head'
|
|
868
|
+
}).apply(x);
|
|
869
|
+
|
|
870
|
+
// Create model
|
|
871
|
+
const model = tf.model({
|
|
872
|
+
inputs: input,
|
|
873
|
+
outputs: lmHead,
|
|
874
|
+
name: 'ResoFormer'
|
|
875
|
+
});
|
|
876
|
+
|
|
877
|
+
return model;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Create ResoFormer for classification
|
|
882
|
+
*/
|
|
883
|
+
function createResoFormerClassifier(config = {}) {
|
|
884
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
885
|
+
|
|
886
|
+
const {
|
|
887
|
+
vocabSize = 10000,
|
|
888
|
+
seqLen = 512,
|
|
889
|
+
dim = 256,
|
|
890
|
+
numLayers = 6,
|
|
891
|
+
numHeads = 8,
|
|
892
|
+
ffnDim = 1024,
|
|
893
|
+
numClasses = 10,
|
|
894
|
+
dropout = 0.1
|
|
895
|
+
} = config;
|
|
896
|
+
|
|
897
|
+
// Input layer
|
|
898
|
+
const input = tf.input({ shape: [seqLen], dtype: 'int32', name: 'input_ids' });
|
|
899
|
+
|
|
900
|
+
// Token embedding
|
|
901
|
+
let x = tf.layers.embedding({
|
|
902
|
+
inputDim: vocabSize,
|
|
903
|
+
outputDim: dim,
|
|
904
|
+
name: 'token_embedding'
|
|
905
|
+
}).apply(input);
|
|
906
|
+
|
|
907
|
+
// Stack of ResoFormer blocks
|
|
908
|
+
for (let i = 0; i < numLayers; i++) {
|
|
909
|
+
const block = new ResoFormerBlock({
|
|
910
|
+
dim,
|
|
911
|
+
numHeads,
|
|
912
|
+
ffnDim,
|
|
913
|
+
dropout,
|
|
914
|
+
useCollapse: i === numLayers - 1,
|
|
915
|
+
name: `resoformer_block_${i}`
|
|
916
|
+
});
|
|
917
|
+
x = block.apply(x);
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// Global average pooling
|
|
921
|
+
x = tf.layers.globalAveragePooling1d().apply(x);
|
|
922
|
+
|
|
923
|
+
// Classification head
|
|
924
|
+
const output = tf.layers.dense({
|
|
925
|
+
units: numClasses,
|
|
926
|
+
activation: 'softmax',
|
|
927
|
+
name: 'classifier'
|
|
928
|
+
}).apply(x);
|
|
929
|
+
|
|
930
|
+
return tf.model({
|
|
931
|
+
inputs: input,
|
|
932
|
+
outputs: output,
|
|
933
|
+
name: 'ResoFormerClassifier'
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
/**
|
|
938
|
+
* Create ResoFormer for embeddings/similarity
|
|
939
|
+
*/
|
|
940
|
+
function createResoFormerEmbedder(config = {}) {
|
|
941
|
+
if (!tf) throw new Error('TensorFlow.js not available');
|
|
942
|
+
|
|
943
|
+
const {
|
|
944
|
+
vocabSize = 10000,
|
|
945
|
+
seqLen = 512,
|
|
946
|
+
dim = 256,
|
|
947
|
+
numLayers = 4,
|
|
948
|
+
numHeads = 8,
|
|
949
|
+
ffnDim = 1024,
|
|
950
|
+
embeddingDim = 128,
|
|
951
|
+
dropout = 0.1
|
|
952
|
+
} = config;
|
|
953
|
+
|
|
954
|
+
// Input layer
|
|
955
|
+
const input = tf.input({ shape: [seqLen], dtype: 'int32', name: 'input_ids' });
|
|
956
|
+
|
|
957
|
+
// Token embedding
|
|
958
|
+
let x = tf.layers.embedding({
|
|
959
|
+
inputDim: vocabSize,
|
|
960
|
+
outputDim: dim,
|
|
961
|
+
name: 'token_embedding'
|
|
962
|
+
}).apply(input);
|
|
963
|
+
|
|
964
|
+
// Stack of ResoFormer blocks
|
|
965
|
+
for (let i = 0; i < numLayers; i++) {
|
|
966
|
+
const block = new ResoFormerBlock({
|
|
967
|
+
dim,
|
|
968
|
+
numHeads,
|
|
969
|
+
ffnDim,
|
|
970
|
+
dropout,
|
|
971
|
+
useCollapse: false,
|
|
972
|
+
name: `resoformer_block_${i}`
|
|
973
|
+
});
|
|
974
|
+
x = block.apply(x);
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
// Pool to single vector
|
|
978
|
+
x = tf.layers.globalAveragePooling1d().apply(x);
|
|
979
|
+
|
|
980
|
+
// Project to embedding space
|
|
981
|
+
const embedding = tf.layers.dense({
|
|
982
|
+
units: embeddingDim,
|
|
983
|
+
name: 'embedding_projection'
|
|
984
|
+
}).apply(x);
|
|
985
|
+
|
|
986
|
+
// L2 normalize for cosine similarity
|
|
987
|
+
const normalized = tf.layers.layerNormalization().apply(embedding);
|
|
988
|
+
|
|
989
|
+
return tf.model({
|
|
990
|
+
inputs: input,
|
|
991
|
+
outputs: normalized,
|
|
992
|
+
name: 'ResoFormerEmbedder'
|
|
993
|
+
});
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
// ============================================================================
|
|
997
|
+
// TRAINING UTILITIES
|
|
998
|
+
// ============================================================================
|
|
999
|
+
|
|
1000
|
+
/**
|
|
1001
|
+
* Custom loss with entropy regularization
|
|
1002
|
+
*/
|
|
1003
|
+
function resoFormerLoss(yTrue, yPred, entropyWeight = 0.1, targetEntropy = 5.99) {
|
|
1004
|
+
return tf.tidy(() => {
|
|
1005
|
+
// Standard cross-entropy loss
|
|
1006
|
+
const ceLoss = tf.losses.softmaxCrossEntropy(yTrue, yPred);
|
|
1007
|
+
|
|
1008
|
+
// Entropy of predictions (encourage diversity)
|
|
1009
|
+
const probs = tf.softmax(yPred, -1);
|
|
1010
|
+
const entropy = probs.mul(probs.add(1e-10).log()).sum(-1).neg().mean();
|
|
1011
|
+
|
|
1012
|
+
// Regularize toward target entropy
|
|
1013
|
+
const entropyLoss = entropy.sub(targetEntropy).square();
|
|
1014
|
+
|
|
1015
|
+
return ceLoss.add(entropyLoss.mul(entropyWeight));
|
|
1016
|
+
});
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
/**
|
|
1020
|
+
* Create optimizer with warmup and decay
|
|
1021
|
+
*/
|
|
1022
|
+
function createOptimizer(config = {}) {
|
|
1023
|
+
const {
|
|
1024
|
+
learningRate = 1e-4,
|
|
1025
|
+
warmupSteps = 1000,
|
|
1026
|
+
decaySteps = 10000,
|
|
1027
|
+
decayRate = 0.1
|
|
1028
|
+
} = config;
|
|
1029
|
+
|
|
1030
|
+
// Use Adam with weight decay (AdamW style)
|
|
1031
|
+
return tf.train.adam(learningRate, 0.9, 0.999, 1e-8);
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
/**
|
|
1035
|
+
* Training step
|
|
1036
|
+
*/
|
|
1037
|
+
async function trainStep(model, optimizer, xBatch, yBatch) {
|
|
1038
|
+
return tf.tidy(() => {
|
|
1039
|
+
const { value: loss, grads } = tf.variableGrads(() => {
|
|
1040
|
+
const predictions = model.apply(xBatch, { training: true });
|
|
1041
|
+
return tf.losses.softmaxCrossEntropy(yBatch, predictions);
|
|
1042
|
+
});
|
|
1043
|
+
|
|
1044
|
+
optimizer.applyGradients(grads);
|
|
1045
|
+
return loss;
|
|
1046
|
+
});
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
// ============================================================================
|
|
1050
|
+
// EXPORTS
|
|
1051
|
+
// ============================================================================
|
|
1052
|
+
|
|
1053
|
+
module.exports = {
|
|
1054
|
+
// Utility functions
|
|
1055
|
+
getPrimeLookup,
|
|
1056
|
+
getPrimeLogFrequencies,
|
|
1057
|
+
|
|
1058
|
+
// Quaternion operations
|
|
1059
|
+
quaternionMul,
|
|
1060
|
+
quaternionConj,
|
|
1061
|
+
quaternionNorm2,
|
|
1062
|
+
quaternionNormalize,
|
|
1063
|
+
|
|
1064
|
+
// Custom layers
|
|
1065
|
+
QuaternionDense,
|
|
1066
|
+
SparsePrimeEmbedding,
|
|
1067
|
+
ResonantAttention,
|
|
1068
|
+
HamiltonCompose,
|
|
1069
|
+
CoherenceGating,
|
|
1070
|
+
EntropyCollapse,
|
|
1071
|
+
ResonanceOperator,
|
|
1072
|
+
ResoFormerBlock,
|
|
1073
|
+
|
|
1074
|
+
// Model builders
|
|
1075
|
+
createResoFormerModel,
|
|
1076
|
+
createResoFormerClassifier,
|
|
1077
|
+
createResoFormerEmbedder,
|
|
1078
|
+
|
|
1079
|
+
// Training utilities
|
|
1080
|
+
resoFormerLoss,
|
|
1081
|
+
createOptimizer,
|
|
1082
|
+
trainStep,
|
|
1083
|
+
|
|
1084
|
+
// TensorFlow reference (for users who need it)
|
|
1085
|
+
tf
|
|
1086
|
+
};
|