@sparkleideas/neural 3.5.2-patch.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +260 -0
  2. package/__tests__/README.md +235 -0
  3. package/__tests__/algorithms.test.ts +582 -0
  4. package/__tests__/patterns.test.ts +549 -0
  5. package/__tests__/sona.test.ts +445 -0
  6. package/docs/SONA_INTEGRATION.md +460 -0
  7. package/docs/SONA_QUICKSTART.md +168 -0
  8. package/examples/sona-usage.ts +318 -0
  9. package/package.json +23 -0
  10. package/src/algorithms/a2c.d.ts +86 -0
  11. package/src/algorithms/a2c.d.ts.map +1 -0
  12. package/src/algorithms/a2c.js +361 -0
  13. package/src/algorithms/a2c.js.map +1 -0
  14. package/src/algorithms/a2c.ts +478 -0
  15. package/src/algorithms/curiosity.d.ts +82 -0
  16. package/src/algorithms/curiosity.d.ts.map +1 -0
  17. package/src/algorithms/curiosity.js +392 -0
  18. package/src/algorithms/curiosity.js.map +1 -0
  19. package/src/algorithms/curiosity.ts +509 -0
  20. package/src/algorithms/decision-transformer.d.ts +82 -0
  21. package/src/algorithms/decision-transformer.d.ts.map +1 -0
  22. package/src/algorithms/decision-transformer.js +415 -0
  23. package/src/algorithms/decision-transformer.js.map +1 -0
  24. package/src/algorithms/decision-transformer.ts +521 -0
  25. package/src/algorithms/dqn.d.ts +72 -0
  26. package/src/algorithms/dqn.d.ts.map +1 -0
  27. package/src/algorithms/dqn.js +303 -0
  28. package/src/algorithms/dqn.js.map +1 -0
  29. package/src/algorithms/dqn.ts +382 -0
  30. package/src/algorithms/index.d.ts +32 -0
  31. package/src/algorithms/index.d.ts.map +1 -0
  32. package/src/algorithms/index.js +74 -0
  33. package/src/algorithms/index.js.map +1 -0
  34. package/src/algorithms/index.ts +122 -0
  35. package/src/algorithms/ppo.d.ts +72 -0
  36. package/src/algorithms/ppo.d.ts.map +1 -0
  37. package/src/algorithms/ppo.js +331 -0
  38. package/src/algorithms/ppo.js.map +1 -0
  39. package/src/algorithms/ppo.ts +429 -0
  40. package/src/algorithms/q-learning.d.ts +77 -0
  41. package/src/algorithms/q-learning.d.ts.map +1 -0
  42. package/src/algorithms/q-learning.js +259 -0
  43. package/src/algorithms/q-learning.js.map +1 -0
  44. package/src/algorithms/q-learning.ts +333 -0
  45. package/src/algorithms/sarsa.d.ts +82 -0
  46. package/src/algorithms/sarsa.d.ts.map +1 -0
  47. package/src/algorithms/sarsa.js +297 -0
  48. package/src/algorithms/sarsa.js.map +1 -0
  49. package/src/algorithms/sarsa.ts +383 -0
  50. package/src/algorithms/tmp.json +0 -0
  51. package/src/application/index.ts +11 -0
  52. package/src/application/services/neural-application-service.ts +217 -0
  53. package/src/domain/entities/pattern.ts +169 -0
  54. package/src/domain/index.ts +18 -0
  55. package/src/domain/services/learning-service.ts +256 -0
  56. package/src/index.d.ts +118 -0
  57. package/src/index.d.ts.map +1 -0
  58. package/src/index.js +201 -0
  59. package/src/index.js.map +1 -0
  60. package/src/index.ts +363 -0
  61. package/src/modes/balanced.d.ts +60 -0
  62. package/src/modes/balanced.d.ts.map +1 -0
  63. package/src/modes/balanced.js +234 -0
  64. package/src/modes/balanced.js.map +1 -0
  65. package/src/modes/balanced.ts +299 -0
  66. package/src/modes/base.ts +163 -0
  67. package/src/modes/batch.d.ts +82 -0
  68. package/src/modes/batch.d.ts.map +1 -0
  69. package/src/modes/batch.js +316 -0
  70. package/src/modes/batch.js.map +1 -0
  71. package/src/modes/batch.ts +434 -0
  72. package/src/modes/edge.d.ts +85 -0
  73. package/src/modes/edge.d.ts.map +1 -0
  74. package/src/modes/edge.js +310 -0
  75. package/src/modes/edge.js.map +1 -0
  76. package/src/modes/edge.ts +409 -0
  77. package/src/modes/index.d.ts +55 -0
  78. package/src/modes/index.d.ts.map +1 -0
  79. package/src/modes/index.js +83 -0
  80. package/src/modes/index.js.map +1 -0
  81. package/src/modes/index.ts +16 -0
  82. package/src/modes/real-time.d.ts +58 -0
  83. package/src/modes/real-time.d.ts.map +1 -0
  84. package/src/modes/real-time.js +196 -0
  85. package/src/modes/real-time.js.map +1 -0
  86. package/src/modes/real-time.ts +257 -0
  87. package/src/modes/research.d.ts +79 -0
  88. package/src/modes/research.d.ts.map +1 -0
  89. package/src/modes/research.js +389 -0
  90. package/src/modes/research.js.map +1 -0
  91. package/src/modes/research.ts +486 -0
  92. package/src/modes/tmp.json +0 -0
  93. package/src/pattern-learner.d.ts +117 -0
  94. package/src/pattern-learner.d.ts.map +1 -0
  95. package/src/pattern-learner.js +603 -0
  96. package/src/pattern-learner.js.map +1 -0
  97. package/src/pattern-learner.ts +757 -0
  98. package/src/reasoning-bank.d.ts +259 -0
  99. package/src/reasoning-bank.d.ts.map +1 -0
  100. package/src/reasoning-bank.js +993 -0
  101. package/src/reasoning-bank.js.map +1 -0
  102. package/src/reasoning-bank.ts +1279 -0
  103. package/src/reasoningbank-adapter.ts +697 -0
  104. package/src/sona-integration.d.ts +168 -0
  105. package/src/sona-integration.d.ts.map +1 -0
  106. package/src/sona-integration.js +316 -0
  107. package/src/sona-integration.js.map +1 -0
  108. package/src/sona-integration.ts +432 -0
  109. package/src/sona-manager.d.ts +147 -0
  110. package/src/sona-manager.d.ts.map +1 -0
  111. package/src/sona-manager.js +695 -0
  112. package/src/sona-manager.js.map +1 -0
  113. package/src/sona-manager.ts +835 -0
  114. package/src/tmp.json +0 -0
  115. package/src/types.d.ts +431 -0
  116. package/src/types.d.ts.map +1 -0
  117. package/src/types.js +11 -0
  118. package/src/types.js.map +1 -0
  119. package/src/types.ts +590 -0
  120. package/tmp.json +0 -0
  121. package/tsconfig.json +9 -0
  122. package/vitest.config.ts +19 -0
@@ -0,0 +1,509 @@
1
+ /**
2
+ * Curiosity-Driven Exploration
3
+ *
4
+ * Implements intrinsic motivation for exploration:
5
+ * - Intrinsic Curiosity Module (ICM)
6
+ * - Random Network Distillation (RND)
7
+ * - Forward and inverse dynamics models
8
+ * - Exploration bonus generation
9
+ *
10
+ * Performance Target: <5ms per forward pass
11
+ */
12
+
13
+ import type { CuriosityConfig, Trajectory, TrajectoryStep } from '../types.js';
14
+
15
+ /**
16
+ * Default Curiosity configuration
17
+ */
18
+ export const DEFAULT_CURIOSITY_CONFIG: CuriosityConfig = {
19
+ algorithm: 'curiosity',
20
+ learningRate: 0.0001,
21
+ gamma: 0.99,
22
+ entropyCoef: 0.01,
23
+ valueLossCoef: 0.5,
24
+ maxGradNorm: 0.5,
25
+ epochs: 1,
26
+ miniBatchSize: 32,
27
+ intrinsicCoef: 0.1,
28
+ forwardLR: 0.001,
29
+ inverseLR: 0.001,
30
+ featureDim: 64,
31
+ useRND: false,
32
+ };
33
+
34
+ /**
35
+ * Curiosity-Driven Exploration Module
36
+ */
37
+ export class CuriosityModule {
38
+ private config: CuriosityConfig;
39
+
40
+ // Feature encoder
41
+ private featureEncoder: Float32Array;
42
+
43
+ // Forward dynamics model: predicts next feature from current feature + action
44
+ private forwardModel: Float32Array;
45
+
46
+ // Inverse dynamics model: predicts action from current and next features
47
+ private inverseModel: Float32Array;
48
+
49
+ // RND target and predictor networks
50
+ private rndTarget: Float32Array;
51
+ private rndPredictor: Float32Array;
52
+
53
+ // Optimizer state
54
+ private forwardMomentum: Float32Array;
55
+ private inverseMomentum: Float32Array;
56
+ private rndMomentum: Float32Array;
57
+
58
+ // Dimensions
59
+ private stateDim = 768;
60
+ private numActions = 4;
61
+
62
+ // Running statistics for normalization
63
+ private intrinsicMean = 0;
64
+ private intrinsicVar = 1;
65
+ private updateCount = 0;
66
+
67
+ // Statistics
68
+ private avgForwardLoss = 0;
69
+ private avgInverseLoss = 0;
70
+ private avgIntrinsicReward = 0;
71
+
72
+ constructor(config: Partial<CuriosityConfig> = {}) {
73
+ this.config = { ...DEFAULT_CURIOSITY_CONFIG, ...config };
74
+
75
+ const featureDim = this.config.featureDim;
76
+
77
+ // Initialize feature encoder: state_dim -> feature_dim
78
+ this.featureEncoder = this.initWeight(this.stateDim, featureDim);
79
+
80
+ // Forward model: (feature_dim + num_actions) -> feature_dim
81
+ this.forwardModel = this.initWeight(featureDim + this.numActions, featureDim);
82
+
83
+ // Inverse model: (2 * feature_dim) -> num_actions
84
+ this.inverseModel = this.initWeight(2 * featureDim, this.numActions);
85
+
86
+ // RND networks
87
+ this.rndTarget = this.initWeight(this.stateDim, featureDim);
88
+ this.rndPredictor = this.initWeight(this.stateDim, featureDim);
89
+
90
+ // Momentum buffers
91
+ this.forwardMomentum = new Float32Array(this.forwardModel.length);
92
+ this.inverseMomentum = new Float32Array(this.inverseModel.length);
93
+ this.rndMomentum = new Float32Array(this.rndPredictor.length);
94
+ }
95
+
96
+ /**
97
+ * Compute intrinsic reward for a transition
98
+ */
99
+ computeIntrinsicReward(
100
+ state: Float32Array,
101
+ action: string,
102
+ nextState: Float32Array
103
+ ): number {
104
+ if (this.config.useRND) {
105
+ return this.computeRNDReward(nextState);
106
+ } else {
107
+ return this.computeICMReward(state, action, nextState);
108
+ }
109
+ }
110
+
111
+ /**
112
+ * Compute ICM-based intrinsic reward (prediction error)
113
+ */
114
+ computeICMReward(
115
+ state: Float32Array,
116
+ action: string,
117
+ nextState: Float32Array
118
+ ): number {
119
+ const startTime = performance.now();
120
+
121
+ // Encode states to features
122
+ const stateFeature = this.encodeState(state);
123
+ const nextStateFeature = this.encodeState(nextState);
124
+
125
+ // Predict next state feature
126
+ const actionIdx = this.hashAction(action);
127
+ const predictedFeature = this.forwardPredict(stateFeature, actionIdx);
128
+
129
+ // Compute prediction error as intrinsic reward
130
+ let error = 0;
131
+ for (let i = 0; i < this.config.featureDim; i++) {
132
+ error += (predictedFeature[i] - nextStateFeature[i]) ** 2;
133
+ }
134
+
135
+ // Normalize intrinsic reward
136
+ const intrinsic = this.normalizeIntrinsic(error);
137
+
138
+ const elapsed = performance.now() - startTime;
139
+ if (elapsed > 5) {
140
+ console.warn(`ICM reward exceeded target: ${elapsed.toFixed(2)}ms > 5ms`);
141
+ }
142
+
143
+ return intrinsic * this.config.intrinsicCoef;
144
+ }
145
+
146
+ /**
147
+ * Compute RND-based intrinsic reward
148
+ */
149
+ computeRNDReward(state: Float32Array): number {
150
+ const startTime = performance.now();
151
+
152
+ // Target network output (fixed random features)
153
+ const targetOutput = this.rndForward(state, this.rndTarget);
154
+
155
+ // Predictor network output (trained to match target)
156
+ const predictorOutput = this.rndForward(state, this.rndPredictor);
157
+
158
+ // Compute prediction error
159
+ let error = 0;
160
+ for (let i = 0; i < this.config.featureDim; i++) {
161
+ error += (predictorOutput[i] - targetOutput[i]) ** 2;
162
+ }
163
+
164
+ // Normalize
165
+ const intrinsic = this.normalizeIntrinsic(error);
166
+
167
+ const elapsed = performance.now() - startTime;
168
+ if (elapsed > 5) {
169
+ console.warn(`RND reward exceeded target: ${elapsed.toFixed(2)}ms > 5ms`);
170
+ }
171
+
172
+ return intrinsic * this.config.intrinsicCoef;
173
+ }
174
+
175
+ /**
176
+ * Update curiosity models from trajectory
177
+ */
178
+ update(trajectory: Trajectory): { forwardLoss: number; inverseLoss: number } {
179
+ const startTime = performance.now();
180
+
181
+ if (trajectory.steps.length < 2) {
182
+ return { forwardLoss: 0, inverseLoss: 0 };
183
+ }
184
+
185
+ let totalForwardLoss = 0;
186
+ let totalInverseLoss = 0;
187
+ let count = 0;
188
+
189
+ for (let i = 0; i < trajectory.steps.length - 1; i++) {
190
+ const step = trajectory.steps[i];
191
+ const nextStep = trajectory.steps[i + 1];
192
+
193
+ const stateFeature = this.encodeState(step.stateAfter);
194
+ const nextStateFeature = this.encodeState(nextStep.stateAfter);
195
+ const actionIdx = this.hashAction(step.action);
196
+
197
+ // Update forward model
198
+ const forwardLoss = this.updateForwardModel(stateFeature, actionIdx, nextStateFeature);
199
+ totalForwardLoss += forwardLoss;
200
+
201
+ // Update inverse model
202
+ const inverseLoss = this.updateInverseModel(stateFeature, nextStateFeature, actionIdx);
203
+ totalInverseLoss += inverseLoss;
204
+
205
+ // Update RND predictor if using RND
206
+ if (this.config.useRND) {
207
+ this.updateRNDPredictor(nextStep.stateAfter);
208
+ }
209
+
210
+ count++;
211
+ }
212
+
213
+ this.updateCount++;
214
+ this.avgForwardLoss = count > 0 ? totalForwardLoss / count : 0;
215
+ this.avgInverseLoss = count > 0 ? totalInverseLoss / count : 0;
216
+
217
+ const elapsed = performance.now() - startTime;
218
+ if (elapsed > 10) {
219
+ console.warn(`Curiosity update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
220
+ }
221
+
222
+ return {
223
+ forwardLoss: this.avgForwardLoss,
224
+ inverseLoss: this.avgInverseLoss,
225
+ };
226
+ }
227
+
228
+ /**
229
+ * Add intrinsic rewards to trajectory
230
+ */
231
+ augmentTrajectory(trajectory: Trajectory): Trajectory {
232
+ const augmented = { ...trajectory, steps: [...trajectory.steps] };
233
+
234
+ for (let i = 0; i < augmented.steps.length - 1; i++) {
235
+ const step = augmented.steps[i];
236
+ const nextStep = augmented.steps[i + 1];
237
+
238
+ const intrinsic = this.computeIntrinsicReward(
239
+ step.stateAfter,
240
+ step.action,
241
+ nextStep.stateAfter
242
+ );
243
+
244
+ // Augment reward
245
+ augmented.steps[i] = {
246
+ ...step,
247
+ reward: step.reward + intrinsic,
248
+ };
249
+ }
250
+
251
+ return augmented;
252
+ }
253
+
254
+ /**
255
+ * Get statistics
256
+ */
257
+ getStats(): Record<string, number> {
258
+ return {
259
+ updateCount: this.updateCount,
260
+ avgForwardLoss: this.avgForwardLoss,
261
+ avgInverseLoss: this.avgInverseLoss,
262
+ avgIntrinsicReward: this.avgIntrinsicReward,
263
+ intrinsicMean: this.intrinsicMean,
264
+ intrinsicStd: Math.sqrt(this.intrinsicVar),
265
+ };
266
+ }
267
+
268
+ // ==========================================================================
269
+ // Private Methods
270
+ // ==========================================================================
271
+
272
+ private initWeight(inputDim: number, outputDim: number): Float32Array {
273
+ const weight = new Float32Array(inputDim * outputDim);
274
+ const scale = Math.sqrt(2 / inputDim);
275
+ for (let i = 0; i < weight.length; i++) {
276
+ weight[i] = (Math.random() - 0.5) * scale;
277
+ }
278
+ return weight;
279
+ }
280
+
281
+ private encodeState(state: Float32Array): Float32Array {
282
+ const featureDim = this.config.featureDim;
283
+ const feature = new Float32Array(featureDim);
284
+
285
+ for (let f = 0; f < featureDim; f++) {
286
+ let sum = 0;
287
+ for (let s = 0; s < Math.min(state.length, this.stateDim); s++) {
288
+ sum += state[s] * this.featureEncoder[s * featureDim + f];
289
+ }
290
+ feature[f] = Math.max(0, sum); // ReLU
291
+ }
292
+
293
+ return feature;
294
+ }
295
+
296
+ private forwardPredict(stateFeature: Float32Array, action: number): Float32Array {
297
+ const featureDim = this.config.featureDim;
298
+ const inputDim = featureDim + this.numActions;
299
+ const predicted = new Float32Array(featureDim);
300
+
301
+ // Concatenate feature and one-hot action
302
+ const input = new Float32Array(inputDim);
303
+ input.set(stateFeature);
304
+ input[featureDim + action] = 1;
305
+
306
+ // Forward pass
307
+ for (let f = 0; f < featureDim; f++) {
308
+ let sum = 0;
309
+ for (let i = 0; i < inputDim; i++) {
310
+ sum += input[i] * this.forwardModel[i * featureDim + f];
311
+ }
312
+ predicted[f] = sum;
313
+ }
314
+
315
+ return predicted;
316
+ }
317
+
318
+ private inversePredict(
319
+ stateFeature: Float32Array,
320
+ nextStateFeature: Float32Array
321
+ ): Float32Array {
322
+ const featureDim = this.config.featureDim;
323
+ const logits = new Float32Array(this.numActions);
324
+
325
+ // Concatenate features
326
+ const input = new Float32Array(2 * featureDim);
327
+ input.set(stateFeature);
328
+ input.set(nextStateFeature, featureDim);
329
+
330
+ // Forward pass
331
+ for (let a = 0; a < this.numActions; a++) {
332
+ let sum = 0;
333
+ for (let i = 0; i < 2 * featureDim; i++) {
334
+ sum += input[i] * this.inverseModel[i * this.numActions + a];
335
+ }
336
+ logits[a] = sum;
337
+ }
338
+
339
+ return this.softmax(logits);
340
+ }
341
+
342
+ private rndForward(state: Float32Array, weights: Float32Array): Float32Array {
343
+ const featureDim = this.config.featureDim;
344
+ const output = new Float32Array(featureDim);
345
+
346
+ for (let f = 0; f < featureDim; f++) {
347
+ let sum = 0;
348
+ for (let s = 0; s < Math.min(state.length, this.stateDim); s++) {
349
+ sum += state[s] * weights[s * featureDim + f];
350
+ }
351
+ output[f] = Math.max(0, sum); // ReLU
352
+ }
353
+
354
+ return output;
355
+ }
356
+
357
+ private updateForwardModel(
358
+ stateFeature: Float32Array,
359
+ action: number,
360
+ targetFeature: Float32Array
361
+ ): number {
362
+ const featureDim = this.config.featureDim;
363
+ const inputDim = featureDim + this.numActions;
364
+ const lr = this.config.forwardLR;
365
+ const beta = 0.9;
366
+
367
+ // Forward pass
368
+ const predicted = this.forwardPredict(stateFeature, action);
369
+
370
+ // Compute loss and gradient
371
+ let loss = 0;
372
+ const grad = new Float32Array(predicted.length);
373
+
374
+ for (let f = 0; f < featureDim; f++) {
375
+ const diff = predicted[f] - targetFeature[f];
376
+ loss += diff * diff;
377
+ grad[f] = 2 * diff;
378
+ }
379
+
380
+ // Backprop to weights
381
+ const input = new Float32Array(inputDim);
382
+ input.set(stateFeature);
383
+ input[featureDim + action] = 1;
384
+
385
+ for (let i = 0; i < inputDim; i++) {
386
+ for (let f = 0; f < featureDim; f++) {
387
+ const weightGrad = input[i] * grad[f];
388
+ const idx = i * featureDim + f;
389
+ this.forwardMomentum[idx] = beta * this.forwardMomentum[idx] + (1 - beta) * weightGrad;
390
+ this.forwardModel[idx] -= lr * this.forwardMomentum[idx];
391
+ }
392
+ }
393
+
394
+ return loss;
395
+ }
396
+
397
+ private updateInverseModel(
398
+ stateFeature: Float32Array,
399
+ nextStateFeature: Float32Array,
400
+ targetAction: number
401
+ ): number {
402
+ const featureDim = this.config.featureDim;
403
+ const lr = this.config.inverseLR;
404
+ const beta = 0.9;
405
+
406
+ // Forward pass
407
+ const probs = this.inversePredict(stateFeature, nextStateFeature);
408
+
409
+ // Cross-entropy loss
410
+ const loss = -Math.log(probs[targetAction] + 1e-8);
411
+
412
+ // Gradient
413
+ const grad = new Float32Array(this.numActions);
414
+ for (let a = 0; a < this.numActions; a++) {
415
+ grad[a] = probs[a] - (a === targetAction ? 1 : 0);
416
+ }
417
+
418
+ // Backprop to weights
419
+ const input = new Float32Array(2 * featureDim);
420
+ input.set(stateFeature);
421
+ input.set(nextStateFeature, featureDim);
422
+
423
+ for (let i = 0; i < 2 * featureDim; i++) {
424
+ for (let a = 0; a < this.numActions; a++) {
425
+ const weightGrad = input[i] * grad[a];
426
+ const idx = i * this.numActions + a;
427
+ this.inverseMomentum[idx] = beta * this.inverseMomentum[idx] + (1 - beta) * weightGrad;
428
+ this.inverseModel[idx] -= lr * this.inverseMomentum[idx];
429
+ }
430
+ }
431
+
432
+ return loss;
433
+ }
434
+
435
+ private updateRNDPredictor(state: Float32Array): void {
436
+ const featureDim = this.config.featureDim;
437
+ const lr = this.config.learningRate;
438
+ const beta = 0.9;
439
+
440
+ // Target output (fixed)
441
+ const targetOutput = this.rndForward(state, this.rndTarget);
442
+
443
+ // Predictor output
444
+ const predictorOutput = this.rndForward(state, this.rndPredictor);
445
+
446
+ // Gradient
447
+ const grad = new Float32Array(featureDim);
448
+ for (let f = 0; f < featureDim; f++) {
449
+ grad[f] = 2 * (predictorOutput[f] - targetOutput[f]);
450
+ }
451
+
452
+ // Update predictor weights
453
+ for (let s = 0; s < Math.min(state.length, this.stateDim); s++) {
454
+ for (let f = 0; f < featureDim; f++) {
455
+ if (predictorOutput[f] > 0) { // ReLU gradient
456
+ const weightGrad = state[s] * grad[f];
457
+ const idx = s * featureDim + f;
458
+ this.rndMomentum[idx] = beta * this.rndMomentum[idx] + (1 - beta) * weightGrad;
459
+ this.rndPredictor[idx] -= lr * this.rndMomentum[idx];
460
+ }
461
+ }
462
+ }
463
+ }
464
+
465
+ private normalizeIntrinsic(raw: number): number {
466
+ // Update running statistics
467
+ const alpha = 0.01;
468
+ this.intrinsicMean = (1 - alpha) * this.intrinsicMean + alpha * raw;
469
+ this.intrinsicVar = (1 - alpha) * this.intrinsicVar + alpha * (raw - this.intrinsicMean) ** 2;
470
+
471
+ // Normalize
472
+ const normalized = (raw - this.intrinsicMean) / (Math.sqrt(this.intrinsicVar) + 1e-8);
473
+
474
+ // Clip to reasonable range
475
+ return Math.max(-5, Math.min(5, normalized));
476
+ }
477
+
478
+ private softmax(logits: Float32Array): Float32Array {
479
+ const max = Math.max(...logits);
480
+ const exps = new Float32Array(logits.length);
481
+ let sum = 0;
482
+
483
+ for (let i = 0; i < logits.length; i++) {
484
+ exps[i] = Math.exp(logits[i] - max);
485
+ sum += exps[i];
486
+ }
487
+
488
+ for (let i = 0; i < exps.length; i++) {
489
+ exps[i] /= sum;
490
+ }
491
+
492
+ return exps;
493
+ }
494
+
495
+ private hashAction(action: string): number {
496
+ let hash = 0;
497
+ for (let i = 0; i < action.length; i++) {
498
+ hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
499
+ }
500
+ return hash;
501
+ }
502
+ }
503
+
504
+ /**
505
+ * Factory function
506
+ */
507
+ export function createCuriosity(config?: Partial<CuriosityConfig>): CuriosityModule {
508
+ return new CuriosityModule(config);
509
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Decision Transformer
3
+ *
4
+ * Implements sequence modeling approach for RL:
5
+ * - Trajectory as sequence: (s, a, R, s, a, R, ...)
6
+ * - Return-conditioned generation
7
+ * - Causal transformer attention
8
+ * - Offline RL from trajectories
9
+ *
10
+ * Performance Target: <10ms per forward pass
11
+ */
12
+ import type { DecisionTransformerConfig, Trajectory } from '../types.js';
13
+ /**
14
+ * Default Decision Transformer configuration
15
+ */
16
+ export declare const DEFAULT_DT_CONFIG: DecisionTransformerConfig;
17
+ /**
18
+ * Sequence entry for transformer
19
+ */
20
+ interface SequenceEntry {
21
+ returnToGo: number;
22
+ state: Float32Array;
23
+ action: number;
24
+ timestep: number;
25
+ }
26
+ /**
27
+ * Decision Transformer Implementation
28
+ */
29
+ export declare class DecisionTransformer {
30
+ private config;
31
+ private stateEmbed;
32
+ private actionEmbed;
33
+ private returnEmbed;
34
+ private posEmbed;
35
+ private attentionWeights;
36
+ private ffnWeights;
37
+ private actionHead;
38
+ private trajectoryBuffer;
39
+ private stateDim;
40
+ private numActions;
41
+ private updateCount;
42
+ private avgLoss;
43
+ constructor(config?: Partial<DecisionTransformerConfig>);
44
+ /**
45
+ * Add trajectory for training
46
+ */
47
+ addTrajectory(trajectory: Trajectory): void;
48
+ /**
49
+ * Train on buffered trajectories
50
+ * Target: <10ms per batch
51
+ */
52
+ train(): {
53
+ loss: number;
54
+ accuracy: number;
55
+ };
56
+ /**
57
+ * Get action conditioned on target return
58
+ */
59
+ getAction(states: Float32Array[], actions: number[], targetReturn: number): number;
60
+ /**
61
+ * Forward pass through transformer
62
+ */
63
+ forward(sequence: SequenceEntry[]): Float32Array;
64
+ /**
65
+ * Get statistics
66
+ */
67
+ getStats(): Record<string, number>;
68
+ private initEmbedding;
69
+ private initWeight;
70
+ private createSequence;
71
+ private transformerLayer;
72
+ private updateWeights;
73
+ private softmax;
74
+ private argmax;
75
+ private hashAction;
76
+ }
77
+ /**
78
+ * Factory function
79
+ */
80
+ export declare function createDecisionTransformer(config?: Partial<DecisionTransformerConfig>): DecisionTransformer;
81
+ export {};
82
+ //# sourceMappingURL=decision-transformer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decision-transformer.d.ts","sourceRoot":"","sources":["decision-transformer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,yBAAyB,EACzB,UAAU,EAEX,MAAM,aAAa,CAAC;AAErB;;GAEG;AACH,eAAO,MAAM,iBAAiB,EAAE,yBAe/B,CAAC;AAEF;;GAEG;AACH,UAAU,aAAa;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,YAAY,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,mBAAmB;IAC9B,OAAO,CAAC,MAAM,CAA4B;IAG1C,OAAO,CAAC,UAAU,CAAe;IACjC,OAAO,CAAC,WAAW,CAAe;IAClC,OAAO,CAAC,WAAW,CAAe;IAClC,OAAO,CAAC,QAAQ,CAAe;IAG/B,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,UAAU,CAAmB;IAGrC,OAAO,CAAC,UAAU,CAAe;IAGjC,OAAO,CAAC,gBAAgB,CAAoB;IAG5C,OAAO,CAAC,QAAQ,CAAO;IACvB,OAAO,CAAC,UAAU,CAAK;IAGvB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,OAAO,CAAK;gBAER,MAAM,GAAE,OAAO,CAAC,yBAAyB,CAAM;IAiC3D;;OAEG;IACH,aAAa,CAAC,UAAU,EAAE,UAAU,GAAG,IAAI;IAW3C;;;OAGG;IACH,KAAK,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE;IAgE3C;;OAEG;IACH,SAAS,CACP,MAAM,EAAE,YAAY,EAAE,EACtB,OAAO,EAAE,MAAM,EAAE,EACjB,YAAY,EAAE,MAAM,GACnB,MAAM;IAwBT;;OAEG;IACH,OAAO,CAAC,QAAQ,EAAE,aAAa,EAAE,GAAG,YAAY;IA2DhD;;OAEG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAclC,OAAO,CAAC,aAAa;IASrB,OAAO,CAAC,UAAU;IASlB,OAAO,CAAC,cAAc;IA0BtB,OAAO,CAAC,gBAAgB;IAyGxB,OAAO,CAAC,aAAa;IAuBrB,OAAO,CAAC,OAAO;IAiBf,OAAO,CAAC,MAAM;IAYd,OAAO,CAAC,UAAU;CAOnB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,CAAC,EAAE,OAAO,CAAC,yBAAyB,CAAC,GAC1C,mBAAmB,CAErB"}