@sparkleideas/neural 3.5.2-patch.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +260 -0
  2. package/__tests__/README.md +235 -0
  3. package/__tests__/algorithms.test.ts +582 -0
  4. package/__tests__/patterns.test.ts +549 -0
  5. package/__tests__/sona.test.ts +445 -0
  6. package/docs/SONA_INTEGRATION.md +460 -0
  7. package/docs/SONA_QUICKSTART.md +168 -0
  8. package/examples/sona-usage.ts +318 -0
  9. package/package.json +23 -0
  10. package/src/algorithms/a2c.d.ts +86 -0
  11. package/src/algorithms/a2c.d.ts.map +1 -0
  12. package/src/algorithms/a2c.js +361 -0
  13. package/src/algorithms/a2c.js.map +1 -0
  14. package/src/algorithms/a2c.ts +478 -0
  15. package/src/algorithms/curiosity.d.ts +82 -0
  16. package/src/algorithms/curiosity.d.ts.map +1 -0
  17. package/src/algorithms/curiosity.js +392 -0
  18. package/src/algorithms/curiosity.js.map +1 -0
  19. package/src/algorithms/curiosity.ts +509 -0
  20. package/src/algorithms/decision-transformer.d.ts +82 -0
  21. package/src/algorithms/decision-transformer.d.ts.map +1 -0
  22. package/src/algorithms/decision-transformer.js +415 -0
  23. package/src/algorithms/decision-transformer.js.map +1 -0
  24. package/src/algorithms/decision-transformer.ts +521 -0
  25. package/src/algorithms/dqn.d.ts +72 -0
  26. package/src/algorithms/dqn.d.ts.map +1 -0
  27. package/src/algorithms/dqn.js +303 -0
  28. package/src/algorithms/dqn.js.map +1 -0
  29. package/src/algorithms/dqn.ts +382 -0
  30. package/src/algorithms/index.d.ts +32 -0
  31. package/src/algorithms/index.d.ts.map +1 -0
  32. package/src/algorithms/index.js +74 -0
  33. package/src/algorithms/index.js.map +1 -0
  34. package/src/algorithms/index.ts +122 -0
  35. package/src/algorithms/ppo.d.ts +72 -0
  36. package/src/algorithms/ppo.d.ts.map +1 -0
  37. package/src/algorithms/ppo.js +331 -0
  38. package/src/algorithms/ppo.js.map +1 -0
  39. package/src/algorithms/ppo.ts +429 -0
  40. package/src/algorithms/q-learning.d.ts +77 -0
  41. package/src/algorithms/q-learning.d.ts.map +1 -0
  42. package/src/algorithms/q-learning.js +259 -0
  43. package/src/algorithms/q-learning.js.map +1 -0
  44. package/src/algorithms/q-learning.ts +333 -0
  45. package/src/algorithms/sarsa.d.ts +82 -0
  46. package/src/algorithms/sarsa.d.ts.map +1 -0
  47. package/src/algorithms/sarsa.js +297 -0
  48. package/src/algorithms/sarsa.js.map +1 -0
  49. package/src/algorithms/sarsa.ts +383 -0
  50. package/src/algorithms/tmp.json +0 -0
  51. package/src/application/index.ts +11 -0
  52. package/src/application/services/neural-application-service.ts +217 -0
  53. package/src/domain/entities/pattern.ts +169 -0
  54. package/src/domain/index.ts +18 -0
  55. package/src/domain/services/learning-service.ts +256 -0
  56. package/src/index.d.ts +118 -0
  57. package/src/index.d.ts.map +1 -0
  58. package/src/index.js +201 -0
  59. package/src/index.js.map +1 -0
  60. package/src/index.ts +363 -0
  61. package/src/modes/balanced.d.ts +60 -0
  62. package/src/modes/balanced.d.ts.map +1 -0
  63. package/src/modes/balanced.js +234 -0
  64. package/src/modes/balanced.js.map +1 -0
  65. package/src/modes/balanced.ts +299 -0
  66. package/src/modes/base.ts +163 -0
  67. package/src/modes/batch.d.ts +82 -0
  68. package/src/modes/batch.d.ts.map +1 -0
  69. package/src/modes/batch.js +316 -0
  70. package/src/modes/batch.js.map +1 -0
  71. package/src/modes/batch.ts +434 -0
  72. package/src/modes/edge.d.ts +85 -0
  73. package/src/modes/edge.d.ts.map +1 -0
  74. package/src/modes/edge.js +310 -0
  75. package/src/modes/edge.js.map +1 -0
  76. package/src/modes/edge.ts +409 -0
  77. package/src/modes/index.d.ts +55 -0
  78. package/src/modes/index.d.ts.map +1 -0
  79. package/src/modes/index.js +83 -0
  80. package/src/modes/index.js.map +1 -0
  81. package/src/modes/index.ts +16 -0
  82. package/src/modes/real-time.d.ts +58 -0
  83. package/src/modes/real-time.d.ts.map +1 -0
  84. package/src/modes/real-time.js +196 -0
  85. package/src/modes/real-time.js.map +1 -0
  86. package/src/modes/real-time.ts +257 -0
  87. package/src/modes/research.d.ts +79 -0
  88. package/src/modes/research.d.ts.map +1 -0
  89. package/src/modes/research.js +389 -0
  90. package/src/modes/research.js.map +1 -0
  91. package/src/modes/research.ts +486 -0
  92. package/src/modes/tmp.json +0 -0
  93. package/src/pattern-learner.d.ts +117 -0
  94. package/src/pattern-learner.d.ts.map +1 -0
  95. package/src/pattern-learner.js +603 -0
  96. package/src/pattern-learner.js.map +1 -0
  97. package/src/pattern-learner.ts +757 -0
  98. package/src/reasoning-bank.d.ts +259 -0
  99. package/src/reasoning-bank.d.ts.map +1 -0
  100. package/src/reasoning-bank.js +993 -0
  101. package/src/reasoning-bank.js.map +1 -0
  102. package/src/reasoning-bank.ts +1279 -0
  103. package/src/reasoningbank-adapter.ts +697 -0
  104. package/src/sona-integration.d.ts +168 -0
  105. package/src/sona-integration.d.ts.map +1 -0
  106. package/src/sona-integration.js +316 -0
  107. package/src/sona-integration.js.map +1 -0
  108. package/src/sona-integration.ts +432 -0
  109. package/src/sona-manager.d.ts +147 -0
  110. package/src/sona-manager.d.ts.map +1 -0
  111. package/src/sona-manager.js +695 -0
  112. package/src/sona-manager.js.map +1 -0
  113. package/src/sona-manager.ts +835 -0
  114. package/src/tmp.json +0 -0
  115. package/src/types.d.ts +431 -0
  116. package/src/types.d.ts.map +1 -0
  117. package/src/types.js +11 -0
  118. package/src/types.js.map +1 -0
  119. package/src/types.ts +590 -0
  120. package/tmp.json +0 -0
  121. package/tsconfig.json +9 -0
  122. package/vitest.config.ts +19 -0
@@ -0,0 +1,478 @@
1
+ /**
2
+ * Advantage Actor-Critic (A2C)
3
+ *
4
+ * Implements synchronous A2C algorithm with:
5
+ * - Shared actor-critic network
6
+ * - N-step returns
7
+ * - Entropy regularization
8
+ * - Advantage normalization
9
+ *
10
+ * Performance Target: <10ms per update step
11
+ */
12
+
13
+ import type {
14
+ RLConfig,
15
+ Trajectory,
16
+ TrajectoryStep,
17
+ } from '../types.js';
18
+
19
+ /**
20
+ * A2C configuration
21
+ */
22
+ export interface A2CConfig extends RLConfig {
23
+ algorithm: 'a2c';
24
+ nSteps: number;
25
+ useGAE: boolean;
26
+ gaeLambda: number;
27
+ }
28
+
29
+ /**
30
+ * Default A2C configuration
31
+ */
32
+ export const DEFAULT_A2C_CONFIG: A2CConfig = {
33
+ algorithm: 'a2c',
34
+ learningRate: 0.0007,
35
+ gamma: 0.99,
36
+ entropyCoef: 0.01,
37
+ valueLossCoef: 0.5,
38
+ maxGradNorm: 0.5,
39
+ epochs: 1,
40
+ miniBatchSize: 32,
41
+ nSteps: 5,
42
+ useGAE: true,
43
+ gaeLambda: 0.95,
44
+ };
45
+
46
+ /**
47
+ * A2C experience entry
48
+ */
49
+ interface A2CExperience {
50
+ state: Float32Array;
51
+ action: number;
52
+ reward: number;
53
+ value: number;
54
+ logProb: number;
55
+ entropy: number;
56
+ }
57
+
58
+ /**
59
+ * A2C Algorithm Implementation
60
+ */
61
+ export class A2CAlgorithm {
62
+ private config: A2CConfig;
63
+
64
+ // Shared network weights
65
+ private sharedWeights: Float32Array;
66
+ private policyHead: Float32Array;
67
+ private valueHead: Float32Array;
68
+
69
+ // Optimizer state
70
+ private sharedMomentum: Float32Array;
71
+ private policyMomentum: Float32Array;
72
+ private valueMomentum: Float32Array;
73
+
74
+ // Experience buffer for n-step
75
+ private buffer: A2CExperience[] = [];
76
+
77
+ // Dimensions
78
+ private inputDim = 768;
79
+ private hiddenDim = 64;
80
+ private numActions = 4;
81
+
82
+ // Statistics
83
+ private updateCount = 0;
84
+ private avgPolicyLoss = 0;
85
+ private avgValueLoss = 0;
86
+ private avgEntropy = 0;
87
+
88
+ constructor(config: Partial<A2CConfig> = {}) {
89
+ this.config = { ...DEFAULT_A2C_CONFIG, ...config };
90
+
91
+ // Initialize network
92
+ const scale = Math.sqrt(2 / this.inputDim);
93
+ this.sharedWeights = new Float32Array(this.inputDim * this.hiddenDim);
94
+ this.policyHead = new Float32Array(this.hiddenDim * this.numActions);
95
+ this.valueHead = new Float32Array(this.hiddenDim);
96
+
97
+ for (let i = 0; i < this.sharedWeights.length; i++) {
98
+ this.sharedWeights[i] = (Math.random() - 0.5) * scale;
99
+ }
100
+ for (let i = 0; i < this.policyHead.length; i++) {
101
+ this.policyHead[i] = (Math.random() - 0.5) * 0.1;
102
+ }
103
+ for (let i = 0; i < this.valueHead.length; i++) {
104
+ this.valueHead[i] = (Math.random() - 0.5) * 0.1;
105
+ }
106
+
107
+ // Initialize momentum
108
+ this.sharedMomentum = new Float32Array(this.sharedWeights.length);
109
+ this.policyMomentum = new Float32Array(this.policyHead.length);
110
+ this.valueMomentum = new Float32Array(this.valueHead.length);
111
+ }
112
+
113
+ /**
114
+ * Add experience from trajectory
115
+ */
116
+ addExperience(trajectory: Trajectory): void {
117
+ for (const step of trajectory.steps) {
118
+ const { probs, value, entropy } = this.evaluate(step.stateAfter);
119
+ const action = this.hashAction(step.action);
120
+
121
+ this.buffer.push({
122
+ state: step.stateAfter,
123
+ action,
124
+ reward: step.reward,
125
+ value,
126
+ logProb: Math.log(probs[action] + 1e-8),
127
+ entropy,
128
+ });
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Perform A2C update
134
+ * Target: <10ms
135
+ */
136
+ update(): { policyLoss: number; valueLoss: number; entropy: number } {
137
+ const startTime = performance.now();
138
+
139
+ if (this.buffer.length < this.config.nSteps) {
140
+ return { policyLoss: 0, valueLoss: 0, entropy: 0 };
141
+ }
142
+
143
+ // Compute returns and advantages
144
+ const returns = this.computeReturns();
145
+ const advantages = this.computeAdvantages(returns);
146
+
147
+ // Initialize gradients
148
+ const sharedGrad = new Float32Array(this.sharedWeights.length);
149
+ const policyGrad = new Float32Array(this.policyHead.length);
150
+ const valueGrad = new Float32Array(this.valueHead.length);
151
+
152
+ let totalPolicyLoss = 0;
153
+ let totalValueLoss = 0;
154
+ let totalEntropy = 0;
155
+
156
+ // Process all experiences
157
+ for (let i = 0; i < this.buffer.length; i++) {
158
+ const exp = this.buffer[i];
159
+ const advantage = advantages[i];
160
+ const return_ = returns[i];
161
+
162
+ // Get current policy and value
163
+ const { probs, value, hidden } = this.forwardWithHidden(exp.state);
164
+ const logProb = Math.log(probs[exp.action] + 1e-8);
165
+
166
+ // Policy loss
167
+ const policyLoss = -logProb * advantage;
168
+ totalPolicyLoss += policyLoss;
169
+
170
+ // Value loss
171
+ const valueLoss = (value - return_) ** 2;
172
+ totalValueLoss += valueLoss;
173
+
174
+ // Entropy
175
+ let entropy = 0;
176
+ for (const p of probs) {
177
+ if (p > 0) entropy -= p * Math.log(p);
178
+ }
179
+ totalEntropy += entropy;
180
+
181
+ // Accumulate gradients
182
+ this.accumulateGradients(
183
+ sharedGrad, policyGrad, valueGrad,
184
+ exp.state, hidden, exp.action,
185
+ advantage, value - return_
186
+ );
187
+ }
188
+
189
+ // Add entropy bonus to policy gradient
190
+ for (let i = 0; i < policyGrad.length; i++) {
191
+ policyGrad[i] -= this.config.entropyCoef * totalEntropy / this.buffer.length;
192
+ }
193
+
194
+ // Apply gradients
195
+ this.applyGradients(sharedGrad, policyGrad, valueGrad, this.buffer.length);
196
+
197
+ // Clear buffer
198
+ this.buffer = [];
199
+ this.updateCount++;
200
+
201
+ this.avgPolicyLoss = totalPolicyLoss / this.buffer.length || 0;
202
+ this.avgValueLoss = totalValueLoss / this.buffer.length || 0;
203
+ this.avgEntropy = totalEntropy / this.buffer.length || 0;
204
+
205
+ const elapsed = performance.now() - startTime;
206
+ if (elapsed > 10) {
207
+ console.warn(`A2C update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
208
+ }
209
+
210
+ return {
211
+ policyLoss: this.avgPolicyLoss,
212
+ valueLoss: this.avgValueLoss,
213
+ entropy: this.avgEntropy,
214
+ };
215
+ }
216
+
217
+ /**
218
+ * Get action from policy
219
+ */
220
+ getAction(state: Float32Array): { action: number; value: number } {
221
+ const { probs, value } = this.evaluate(state);
222
+ const action = this.sampleAction(probs);
223
+ return { action, value };
224
+ }
225
+
226
+ /**
227
+ * Get statistics
228
+ */
229
+ getStats(): Record<string, number> {
230
+ return {
231
+ updateCount: this.updateCount,
232
+ bufferSize: this.buffer.length,
233
+ avgPolicyLoss: this.avgPolicyLoss,
234
+ avgValueLoss: this.avgValueLoss,
235
+ avgEntropy: this.avgEntropy,
236
+ };
237
+ }
238
+
239
+ // ==========================================================================
240
+ // Private Methods
241
+ // ==========================================================================
242
+
243
+ private evaluate(state: Float32Array): { probs: Float32Array; value: number; entropy: number } {
244
+ const { probs, value } = this.forward(state);
245
+
246
+ let entropy = 0;
247
+ for (const p of probs) {
248
+ if (p > 0) entropy -= p * Math.log(p);
249
+ }
250
+
251
+ return { probs, value, entropy };
252
+ }
253
+
254
+ private forward(state: Float32Array): { probs: Float32Array; value: number } {
255
+ // Shared hidden layer
256
+ const hidden = new Float32Array(this.hiddenDim);
257
+ for (let h = 0; h < this.hiddenDim; h++) {
258
+ let sum = 0;
259
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
260
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
261
+ }
262
+ hidden[h] = Math.max(0, sum); // ReLU
263
+ }
264
+
265
+ // Policy head
266
+ const logits = new Float32Array(this.numActions);
267
+ for (let a = 0; a < this.numActions; a++) {
268
+ let sum = 0;
269
+ for (let h = 0; h < this.hiddenDim; h++) {
270
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
271
+ }
272
+ logits[a] = sum;
273
+ }
274
+ const probs = this.softmax(logits);
275
+
276
+ // Value head
277
+ let value = 0;
278
+ for (let h = 0; h < this.hiddenDim; h++) {
279
+ value += hidden[h] * this.valueHead[h];
280
+ }
281
+
282
+ return { probs, value };
283
+ }
284
+
285
+ private forwardWithHidden(state: Float32Array): { probs: Float32Array; value: number; hidden: Float32Array } {
286
+ const hidden = new Float32Array(this.hiddenDim);
287
+ for (let h = 0; h < this.hiddenDim; h++) {
288
+ let sum = 0;
289
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
290
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
291
+ }
292
+ hidden[h] = Math.max(0, sum);
293
+ }
294
+
295
+ const logits = new Float32Array(this.numActions);
296
+ for (let a = 0; a < this.numActions; a++) {
297
+ let sum = 0;
298
+ for (let h = 0; h < this.hiddenDim; h++) {
299
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
300
+ }
301
+ logits[a] = sum;
302
+ }
303
+ const probs = this.softmax(logits);
304
+
305
+ let value = 0;
306
+ for (let h = 0; h < this.hiddenDim; h++) {
307
+ value += hidden[h] * this.valueHead[h];
308
+ }
309
+
310
+ return { probs, value, hidden };
311
+ }
312
+
313
+ private computeReturns(): number[] {
314
+ const returns = new Array(this.buffer.length).fill(0);
315
+ let cumReturn = 0;
316
+
317
+ // Bootstrap from last value if not terminal
318
+ if (this.buffer.length > 0) {
319
+ cumReturn = this.buffer[this.buffer.length - 1].value;
320
+ }
321
+
322
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
323
+ cumReturn = this.buffer[t].reward + this.config.gamma * cumReturn;
324
+ returns[t] = cumReturn;
325
+ }
326
+
327
+ return returns;
328
+ }
329
+
330
+ private computeAdvantages(returns: number[]): number[] {
331
+ if (this.config.useGAE) {
332
+ return this.computeGAE();
333
+ }
334
+
335
+ // Simple advantage: return - value
336
+ const advantages = new Array(this.buffer.length).fill(0);
337
+ for (let i = 0; i < this.buffer.length; i++) {
338
+ advantages[i] = returns[i] - this.buffer[i].value;
339
+ }
340
+
341
+ // Normalize
342
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
343
+ const std = Math.sqrt(
344
+ advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length
345
+ ) + 1e-8;
346
+
347
+ return advantages.map(a => (a - mean) / std);
348
+ }
349
+
350
+ private computeGAE(): number[] {
351
+ const advantages = new Array(this.buffer.length).fill(0);
352
+ let lastGae = 0;
353
+
354
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
355
+ const nextValue = t < this.buffer.length - 1
356
+ ? this.buffer[t + 1].value
357
+ : 0;
358
+ const delta = this.buffer[t].reward + this.config.gamma * nextValue - this.buffer[t].value;
359
+ lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
360
+ advantages[t] = lastGae;
361
+ }
362
+
363
+ // Normalize
364
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
365
+ const std = Math.sqrt(
366
+ advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length
367
+ ) + 1e-8;
368
+
369
+ return advantages.map(a => (a - mean) / std);
370
+ }
371
+
372
+ private accumulateGradients(
373
+ sharedGrad: Float32Array,
374
+ policyGrad: Float32Array,
375
+ valueGrad: Float32Array,
376
+ state: Float32Array,
377
+ hidden: Float32Array,
378
+ action: number,
379
+ advantage: number,
380
+ valueError: number
381
+ ): void {
382
+ // Policy gradient
383
+ for (let h = 0; h < this.hiddenDim; h++) {
384
+ policyGrad[h * this.numActions + action] += hidden[h] * advantage;
385
+ }
386
+
387
+ // Value gradient
388
+ for (let h = 0; h < this.hiddenDim; h++) {
389
+ valueGrad[h] += hidden[h] * valueError * this.config.valueLossCoef;
390
+ }
391
+
392
+ // Shared layer gradient (backprop through both heads)
393
+ for (let h = 0; h < this.hiddenDim; h++) {
394
+ if (hidden[h] > 0) { // ReLU gradient
395
+ const policySignal = advantage * this.policyHead[h * this.numActions + action];
396
+ const valueSignal = valueError * this.valueHead[h] * this.config.valueLossCoef;
397
+ const totalSignal = policySignal + valueSignal;
398
+
399
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
400
+ sharedGrad[i * this.hiddenDim + h] += state[i] * totalSignal;
401
+ }
402
+ }
403
+ }
404
+ }
405
+
406
+ private applyGradients(
407
+ sharedGrad: Float32Array,
408
+ policyGrad: Float32Array,
409
+ valueGrad: Float32Array,
410
+ batchSize: number
411
+ ): void {
412
+ const lr = this.config.learningRate / batchSize;
413
+ const beta = 0.9;
414
+
415
+ // Apply to shared weights
416
+ for (let i = 0; i < this.sharedWeights.length; i++) {
417
+ const grad = Math.max(Math.min(sharedGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
418
+ this.sharedMomentum[i] = beta * this.sharedMomentum[i] + (1 - beta) * grad;
419
+ this.sharedWeights[i] -= lr * this.sharedMomentum[i];
420
+ }
421
+
422
+ // Apply to policy head
423
+ for (let i = 0; i < this.policyHead.length; i++) {
424
+ const grad = Math.max(Math.min(policyGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
425
+ this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * grad;
426
+ this.policyHead[i] -= lr * this.policyMomentum[i];
427
+ }
428
+
429
+ // Apply to value head
430
+ for (let i = 0; i < this.valueHead.length; i++) {
431
+ const grad = Math.max(Math.min(valueGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
432
+ this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * grad;
433
+ this.valueHead[i] -= lr * this.valueMomentum[i];
434
+ }
435
+ }
436
+
437
+ private softmax(logits: Float32Array): Float32Array {
438
+ const max = Math.max(...logits);
439
+ const exps = new Float32Array(logits.length);
440
+ let sum = 0;
441
+
442
+ for (let i = 0; i < logits.length; i++) {
443
+ exps[i] = Math.exp(logits[i] - max);
444
+ sum += exps[i];
445
+ }
446
+
447
+ for (let i = 0; i < exps.length; i++) {
448
+ exps[i] /= sum;
449
+ }
450
+
451
+ return exps;
452
+ }
453
+
454
+ private sampleAction(probs: Float32Array): number {
455
+ const r = Math.random();
456
+ let cumSum = 0;
457
+ for (let i = 0; i < probs.length; i++) {
458
+ cumSum += probs[i];
459
+ if (r < cumSum) return i;
460
+ }
461
+ return probs.length - 1;
462
+ }
463
+
464
+ private hashAction(action: string): number {
465
+ let hash = 0;
466
+ for (let i = 0; i < action.length; i++) {
467
+ hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
468
+ }
469
+ return hash;
470
+ }
471
+ }
472
+
473
+ /**
474
+ * Factory function
475
+ */
476
+ export function createA2C(config?: Partial<A2CConfig>): A2CAlgorithm {
477
+ return new A2CAlgorithm(config);
478
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Curiosity-Driven Exploration
3
+ *
4
+ * Implements intrinsic motivation for exploration:
5
+ * - Intrinsic Curiosity Module (ICM)
6
+ * - Random Network Distillation (RND)
7
+ * - Forward and inverse dynamics models
8
+ * - Exploration bonus generation
9
+ *
10
+ * Performance Target: <5ms per forward pass
11
+ */
12
+ import type { CuriosityConfig, Trajectory } from '../types.js';
13
+ /**
14
+ * Default Curiosity configuration
15
+ */
16
+ export declare const DEFAULT_CURIOSITY_CONFIG: CuriosityConfig;
17
+ /**
18
+ * Curiosity-Driven Exploration Module
19
+ */
20
+ export declare class CuriosityModule {
21
+ private config;
22
+ private featureEncoder;
23
+ private forwardModel;
24
+ private inverseModel;
25
+ private rndTarget;
26
+ private rndPredictor;
27
+ private forwardMomentum;
28
+ private inverseMomentum;
29
+ private rndMomentum;
30
+ private stateDim;
31
+ private numActions;
32
+ private intrinsicMean;
33
+ private intrinsicVar;
34
+ private updateCount;
35
+ private avgForwardLoss;
36
+ private avgInverseLoss;
37
+ private avgIntrinsicReward;
38
+ constructor(config?: Partial<CuriosityConfig>);
39
+ /**
40
+ * Compute intrinsic reward for a transition
41
+ */
42
+ computeIntrinsicReward(state: Float32Array, action: string, nextState: Float32Array): number;
43
+ /**
44
+ * Compute ICM-based intrinsic reward (prediction error)
45
+ */
46
+ computeICMReward(state: Float32Array, action: string, nextState: Float32Array): number;
47
+ /**
48
+ * Compute RND-based intrinsic reward
49
+ */
50
+ computeRNDReward(state: Float32Array): number;
51
+ /**
52
+ * Update curiosity models from trajectory
53
+ */
54
+ update(trajectory: Trajectory): {
55
+ forwardLoss: number;
56
+ inverseLoss: number;
57
+ };
58
+ /**
59
+ * Add intrinsic rewards to trajectory
60
+ */
61
+ augmentTrajectory(trajectory: Trajectory): Trajectory;
62
+ /**
63
+ * Get statistics
64
+ */
65
+ getStats(): Record<string, number>;
66
+ private initWeight;
67
+ private encodeState;
68
+ private forwardPredict;
69
+ private inversePredict;
70
+ private rndForward;
71
+ private updateForwardModel;
72
+ private updateInverseModel;
73
+ private updateRNDPredictor;
74
+ private normalizeIntrinsic;
75
+ private softmax;
76
+ private hashAction;
77
+ }
78
+ /**
79
+ * Factory function
80
+ */
81
+ export declare function createCuriosity(config?: Partial<CuriosityConfig>): CuriosityModule;
82
+ //# sourceMappingURL=curiosity.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"curiosity.d.ts","sourceRoot":"","sources":["curiosity.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAkB,MAAM,aAAa,CAAC;AAE/E;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eActC,CAAC;AAEF;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAkB;IAGhC,OAAO,CAAC,cAAc,CAAe;IAGrC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,WAAW,CAAe;IAGlC,OAAO,CAAC,QAAQ,CAAO;IACvB,OAAO,CAAC,UAAU,CAAK;IAGvB,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,WAAW,CAAK;IAGxB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,kBAAkB,CAAK;gBAEnB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAwBjD;;OAEG;IACH,sBAAsB,CACpB,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,YAAY,GACtB,MAAM;IAQT;;OAEG;IACH,gBAAgB,CACd,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,YAAY,GACtB,MAAM;IA4BT;;OAEG;IACH,gBAAgB,CAAC,KAAK,EAAE,YAAY,GAAG,MAAM;IA0B7C;;OAEG;IACH,MAAM,CAAC,UAAU,EAAE,UAAU,GAAG;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE;IAkD5E;;OAEG;IACH,iBAAiB,CAAC,UAAU,EAAE,UAAU,GAAG,UAAU;IAuBrD;;OAEG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAelC,OAAO,CAAC,UAAU;IASlB,OAAO,CAAC,WAAW;IAenB,OAAO,CAAC,cAAc;IAsBtB,OAAO,CAAC,cAAc;IAwBtB,OAAO,CAAC,UAAU;IAelB,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,kBAAkB;IAsC1B,OAAO,CAAC,kBAAkB;IA8B1B,OAAO,CAAC,kBAAkB;IAa1B,OAAO,CAAC,OAAO;IAiBf,OAAO,CAAC,UAAU;CAOnB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,eAAe,CAElF"}