@arcanea/guardian-evolution 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/algorithms/a2c.d.ts +86 -0
  2. package/dist/algorithms/a2c.d.ts.map +1 -0
  3. package/dist/algorithms/a2c.js +361 -0
  4. package/dist/algorithms/a2c.js.map +1 -0
  5. package/dist/algorithms/curiosity.d.ts +82 -0
  6. package/dist/algorithms/curiosity.d.ts.map +1 -0
  7. package/dist/algorithms/curiosity.js +392 -0
  8. package/dist/algorithms/curiosity.js.map +1 -0
  9. package/dist/algorithms/decision-transformer.d.ts +82 -0
  10. package/dist/algorithms/decision-transformer.d.ts.map +1 -0
  11. package/dist/algorithms/decision-transformer.js +415 -0
  12. package/dist/algorithms/decision-transformer.js.map +1 -0
  13. package/dist/algorithms/dqn.d.ts +72 -0
  14. package/dist/algorithms/dqn.d.ts.map +1 -0
  15. package/dist/algorithms/dqn.js +303 -0
  16. package/dist/algorithms/dqn.js.map +1 -0
  17. package/dist/algorithms/index.d.ts +32 -0
  18. package/dist/algorithms/index.d.ts.map +1 -0
  19. package/dist/algorithms/index.js +74 -0
  20. package/dist/algorithms/index.js.map +1 -0
  21. package/dist/algorithms/ppo.d.ts +72 -0
  22. package/dist/algorithms/ppo.d.ts.map +1 -0
  23. package/dist/algorithms/ppo.js +331 -0
  24. package/dist/algorithms/ppo.js.map +1 -0
  25. package/dist/algorithms/q-learning.d.ts +77 -0
  26. package/dist/algorithms/q-learning.d.ts.map +1 -0
  27. package/dist/algorithms/q-learning.js +259 -0
  28. package/dist/algorithms/q-learning.js.map +1 -0
  29. package/dist/algorithms/sarsa.d.ts +82 -0
  30. package/dist/algorithms/sarsa.d.ts.map +1 -0
  31. package/dist/algorithms/sarsa.js +297 -0
  32. package/dist/algorithms/sarsa.js.map +1 -0
  33. package/dist/index.d.ts +118 -0
  34. package/dist/index.d.ts.map +1 -0
  35. package/dist/index.js +201 -0
  36. package/dist/index.js.map +1 -0
  37. package/dist/modes/balanced.d.ts +60 -0
  38. package/dist/modes/balanced.d.ts.map +1 -0
  39. package/dist/modes/balanced.js +234 -0
  40. package/dist/modes/balanced.js.map +1 -0
  41. package/dist/modes/batch.d.ts +82 -0
  42. package/dist/modes/batch.d.ts.map +1 -0
  43. package/dist/modes/batch.js +316 -0
  44. package/dist/modes/batch.js.map +1 -0
  45. package/dist/modes/edge.d.ts +85 -0
  46. package/dist/modes/edge.d.ts.map +1 -0
  47. package/dist/modes/edge.js +310 -0
  48. package/dist/modes/edge.js.map +1 -0
  49. package/dist/modes/index.d.ts +55 -0
  50. package/dist/modes/index.d.ts.map +1 -0
  51. package/dist/modes/index.js +83 -0
  52. package/dist/modes/index.js.map +1 -0
  53. package/dist/modes/real-time.d.ts +58 -0
  54. package/dist/modes/real-time.d.ts.map +1 -0
  55. package/dist/modes/real-time.js +196 -0
  56. package/dist/modes/real-time.js.map +1 -0
  57. package/dist/modes/research.d.ts +79 -0
  58. package/dist/modes/research.d.ts.map +1 -0
  59. package/dist/modes/research.js +389 -0
  60. package/dist/modes/research.js.map +1 -0
  61. package/dist/pattern-learner.d.ts +117 -0
  62. package/dist/pattern-learner.d.ts.map +1 -0
  63. package/dist/pattern-learner.js +603 -0
  64. package/dist/pattern-learner.js.map +1 -0
  65. package/dist/reasoning-bank.d.ts +259 -0
  66. package/dist/reasoning-bank.d.ts.map +1 -0
  67. package/dist/reasoning-bank.js +993 -0
  68. package/dist/reasoning-bank.js.map +1 -0
  69. package/dist/reasoningbank-adapter.d.ts +168 -0
  70. package/dist/reasoningbank-adapter.d.ts.map +1 -0
  71. package/dist/reasoningbank-adapter.js +463 -0
  72. package/dist/reasoningbank-adapter.js.map +1 -0
  73. package/dist/sona-integration.d.ts +168 -0
  74. package/dist/sona-integration.d.ts.map +1 -0
  75. package/dist/sona-integration.js +316 -0
  76. package/dist/sona-integration.js.map +1 -0
  77. package/dist/sona-manager.d.ts +147 -0
  78. package/dist/sona-manager.d.ts.map +1 -0
  79. package/dist/sona-manager.js +695 -0
  80. package/dist/sona-manager.js.map +1 -0
  81. package/dist/types.d.ts +431 -0
  82. package/dist/types.d.ts.map +1 -0
  83. package/dist/types.js +11 -0
  84. package/dist/types.js.map +1 -0
  85. package/package.json +47 -0
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Advantage Actor-Critic (A2C)
3
+ *
4
+ * Implements synchronous A2C algorithm with:
5
+ * - Shared actor-critic network
6
+ * - N-step returns
7
+ * - Entropy regularization
8
+ * - Advantage normalization
9
+ *
10
+ * Performance Target: <10ms per update step
11
+ */
12
+ import type { RLConfig, Trajectory } from '../types.js';
13
+ /**
14
+ * A2C configuration
15
+ */
16
+ export interface A2CConfig extends RLConfig {
17
+ algorithm: 'a2c';
18
+ nSteps: number;
19
+ useGAE: boolean;
20
+ gaeLambda: number;
21
+ }
22
+ /**
23
+ * Default A2C configuration
24
+ */
25
+ export declare const DEFAULT_A2C_CONFIG: A2CConfig;
26
+ /**
27
+ * A2C Algorithm Implementation
28
+ */
29
+ export declare class A2CAlgorithm {
30
+ private config;
31
+ private sharedWeights;
32
+ private policyHead;
33
+ private valueHead;
34
+ private sharedMomentum;
35
+ private policyMomentum;
36
+ private valueMomentum;
37
+ private buffer;
38
+ private inputDim;
39
+ private hiddenDim;
40
+ private numActions;
41
+ private updateCount;
42
+ private avgPolicyLoss;
43
+ private avgValueLoss;
44
+ private avgEntropy;
45
+ constructor(config?: Partial<A2CConfig>);
46
+ /**
47
+ * Add experience from trajectory
48
+ */
49
+ addExperience(trajectory: Trajectory): void;
50
+ /**
51
+ * Perform A2C update
52
+ * Target: <10ms
53
+ */
54
+ update(): {
55
+ policyLoss: number;
56
+ valueLoss: number;
57
+ entropy: number;
58
+ };
59
+ /**
60
+ * Get action from policy
61
+ */
62
+ getAction(state: Float32Array): {
63
+ action: number;
64
+ value: number;
65
+ };
66
+ /**
67
+ * Get statistics
68
+ */
69
+ getStats(): Record<string, number>;
70
+ private evaluate;
71
+ private forward;
72
+ private forwardWithHidden;
73
+ private computeReturns;
74
+ private computeAdvantages;
75
+ private computeGAE;
76
+ private accumulateGradients;
77
+ private applyGradients;
78
+ private softmax;
79
+ private sampleAction;
80
+ private hashAction;
81
+ }
82
+ /**
83
+ * Factory function
84
+ */
85
+ export declare function createA2C(config?: Partial<A2CConfig>): A2CAlgorithm;
86
+ //# sourceMappingURL=a2c.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"a2c.d.ts","sourceRoot":"","sources":["../../src/algorithms/a2c.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EACV,QAAQ,EACR,UAAU,EAEX,MAAM,aAAa,CAAC;AAErB;;GAEG;AACH,MAAM,WAAW,SAAU,SAAQ,QAAQ;IACzC,SAAS,EAAE,KAAK,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,OAAO,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,kBAAkB,EAAE,SAYhC,CAAC;AAcF;;GAEG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAY;IAG1B,OAAO,CAAC,aAAa,CAAe;IACpC,OAAO,CAAC,UAAU,CAAe;IACjC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,cAAc,CAAe;IACrC,OAAO,CAAC,cAAc,CAAe;IACrC,OAAO,CAAC,aAAa,CAAe;IAGpC,OAAO,CAAC,MAAM,CAAuB;IAGrC,OAAO,CAAC,QAAQ,CAAO;IACvB,OAAO,CAAC,SAAS,CAAM;IACvB,OAAO,CAAC,UAAU,CAAK;IAGvB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,UAAU,CAAK;gBAEX,MAAM,GAAE,OAAO,CAAC,SAAS,CAAM;IAyB3C;;OAEG;IACH,aAAa,CAAC,UAAU,EAAE,UAAU,GAAG,IAAI;IAgB3C;;;OAGG;IACH,MAAM,IAAI;QAAE,UAAU,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;IAiFpE;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,YAAY,GAAG;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE;IAMjE;;OAEG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAclC,OAAO,CAAC,QAAQ;IAWhB,OAAO,CAAC,OAAO;IA+Bf,OAAO,CAAC,iBAAiB;IA4BzB,OAAO,CAAC,cAAc;IAiBtB,OAAO,CAAC,iBAAiB;IAoBzB,OAAO,CAAC,UAAU;IAsBlB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,cAAc;IA+BtB,OAAO,CAAC,OAAO;IAiBf,OAAO,CAAC,YAAY;IAUpB,OAAO,CAAC,UAAU;CAOnB;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,SAAS,CAAC,GAAG,YAAY,CAEnE"}
@@ -0,0 +1,361 @@
1
+ /**
2
+ * Advantage Actor-Critic (A2C)
3
+ *
4
+ * Implements synchronous A2C algorithm with:
5
+ * - Shared actor-critic network
6
+ * - N-step returns
7
+ * - Entropy regularization
8
+ * - Advantage normalization
9
+ *
10
+ * Performance Target: <10ms per update step
11
+ */
12
+ /**
13
+ * Default A2C configuration
14
+ */
15
+ export const DEFAULT_A2C_CONFIG = {
16
+ algorithm: 'a2c',
17
+ learningRate: 0.0007,
18
+ gamma: 0.99,
19
+ entropyCoef: 0.01,
20
+ valueLossCoef: 0.5,
21
+ maxGradNorm: 0.5,
22
+ epochs: 1,
23
+ miniBatchSize: 32,
24
+ nSteps: 5,
25
+ useGAE: true,
26
+ gaeLambda: 0.95,
27
+ };
28
+ /**
29
+ * A2C Algorithm Implementation
30
+ */
31
+ export class A2CAlgorithm {
32
+ config;
33
+ // Shared network weights
34
+ sharedWeights;
35
+ policyHead;
36
+ valueHead;
37
+ // Optimizer state
38
+ sharedMomentum;
39
+ policyMomentum;
40
+ valueMomentum;
41
+ // Experience buffer for n-step
42
+ buffer = [];
43
+ // Dimensions
44
+ inputDim = 768;
45
+ hiddenDim = 64;
46
+ numActions = 4;
47
+ // Statistics
48
+ updateCount = 0;
49
+ avgPolicyLoss = 0;
50
+ avgValueLoss = 0;
51
+ avgEntropy = 0;
52
+ constructor(config = {}) {
53
+ this.config = { ...DEFAULT_A2C_CONFIG, ...config };
54
+ // Initialize network
55
+ const scale = Math.sqrt(2 / this.inputDim);
56
+ this.sharedWeights = new Float32Array(this.inputDim * this.hiddenDim);
57
+ this.policyHead = new Float32Array(this.hiddenDim * this.numActions);
58
+ this.valueHead = new Float32Array(this.hiddenDim);
59
+ for (let i = 0; i < this.sharedWeights.length; i++) {
60
+ this.sharedWeights[i] = (Math.random() - 0.5) * scale;
61
+ }
62
+ for (let i = 0; i < this.policyHead.length; i++) {
63
+ this.policyHead[i] = (Math.random() - 0.5) * 0.1;
64
+ }
65
+ for (let i = 0; i < this.valueHead.length; i++) {
66
+ this.valueHead[i] = (Math.random() - 0.5) * 0.1;
67
+ }
68
+ // Initialize momentum
69
+ this.sharedMomentum = new Float32Array(this.sharedWeights.length);
70
+ this.policyMomentum = new Float32Array(this.policyHead.length);
71
+ this.valueMomentum = new Float32Array(this.valueHead.length);
72
+ }
73
+ /**
74
+ * Add experience from trajectory
75
+ */
76
+ addExperience(trajectory) {
77
+ for (const step of trajectory.steps) {
78
+ const { probs, value, entropy } = this.evaluate(step.stateAfter);
79
+ const action = this.hashAction(step.action);
80
+ this.buffer.push({
81
+ state: step.stateAfter,
82
+ action,
83
+ reward: step.reward,
84
+ value,
85
+ logProb: Math.log(probs[action] + 1e-8),
86
+ entropy,
87
+ });
88
+ }
89
+ }
90
+ /**
91
+ * Perform A2C update
92
+ * Target: <10ms
93
+ */
94
+ update() {
95
+ const startTime = performance.now();
96
+ if (this.buffer.length < this.config.nSteps) {
97
+ return { policyLoss: 0, valueLoss: 0, entropy: 0 };
98
+ }
99
+ // Compute returns and advantages
100
+ const returns = this.computeReturns();
101
+ const advantages = this.computeAdvantages(returns);
102
+ // Initialize gradients
103
+ const sharedGrad = new Float32Array(this.sharedWeights.length);
104
+ const policyGrad = new Float32Array(this.policyHead.length);
105
+ const valueGrad = new Float32Array(this.valueHead.length);
106
+ let totalPolicyLoss = 0;
107
+ let totalValueLoss = 0;
108
+ let totalEntropy = 0;
109
+ // Process all experiences
110
+ for (let i = 0; i < this.buffer.length; i++) {
111
+ const exp = this.buffer[i];
112
+ const advantage = advantages[i];
113
+ const return_ = returns[i];
114
+ // Get current policy and value
115
+ const { probs, value, hidden } = this.forwardWithHidden(exp.state);
116
+ const logProb = Math.log(probs[exp.action] + 1e-8);
117
+ // Policy loss
118
+ const policyLoss = -logProb * advantage;
119
+ totalPolicyLoss += policyLoss;
120
+ // Value loss
121
+ const valueLoss = (value - return_) ** 2;
122
+ totalValueLoss += valueLoss;
123
+ // Entropy
124
+ let entropy = 0;
125
+ for (const p of probs) {
126
+ if (p > 0)
127
+ entropy -= p * Math.log(p);
128
+ }
129
+ totalEntropy += entropy;
130
+ // Accumulate gradients
131
+ this.accumulateGradients(sharedGrad, policyGrad, valueGrad, exp.state, hidden, exp.action, advantage, value - return_);
132
+ }
133
+ // Add entropy bonus to policy gradient
134
+ for (let i = 0; i < policyGrad.length; i++) {
135
+ policyGrad[i] -= this.config.entropyCoef * totalEntropy / this.buffer.length;
136
+ }
137
+ // Apply gradients
138
+ this.applyGradients(sharedGrad, policyGrad, valueGrad, this.buffer.length);
139
+ // Clear buffer
140
+ this.buffer = [];
141
+ this.updateCount++;
142
+ this.avgPolicyLoss = totalPolicyLoss / this.buffer.length || 0;
143
+ this.avgValueLoss = totalValueLoss / this.buffer.length || 0;
144
+ this.avgEntropy = totalEntropy / this.buffer.length || 0;
145
+ const elapsed = performance.now() - startTime;
146
+ if (elapsed > 10) {
147
+ console.warn(`A2C update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
148
+ }
149
+ return {
150
+ policyLoss: this.avgPolicyLoss,
151
+ valueLoss: this.avgValueLoss,
152
+ entropy: this.avgEntropy,
153
+ };
154
+ }
155
+ /**
156
+ * Get action from policy
157
+ */
158
+ getAction(state) {
159
+ const { probs, value } = this.evaluate(state);
160
+ const action = this.sampleAction(probs);
161
+ return { action, value };
162
+ }
163
+ /**
164
+ * Get statistics
165
+ */
166
+ getStats() {
167
+ return {
168
+ updateCount: this.updateCount,
169
+ bufferSize: this.buffer.length,
170
+ avgPolicyLoss: this.avgPolicyLoss,
171
+ avgValueLoss: this.avgValueLoss,
172
+ avgEntropy: this.avgEntropy,
173
+ };
174
+ }
175
+ // ==========================================================================
176
+ // Private Methods
177
+ // ==========================================================================
178
+ evaluate(state) {
179
+ const { probs, value } = this.forward(state);
180
+ let entropy = 0;
181
+ for (const p of probs) {
182
+ if (p > 0)
183
+ entropy -= p * Math.log(p);
184
+ }
185
+ return { probs, value, entropy };
186
+ }
187
+ forward(state) {
188
+ // Shared hidden layer
189
+ const hidden = new Float32Array(this.hiddenDim);
190
+ for (let h = 0; h < this.hiddenDim; h++) {
191
+ let sum = 0;
192
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
193
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
194
+ }
195
+ hidden[h] = Math.max(0, sum); // ReLU
196
+ }
197
+ // Policy head
198
+ const logits = new Float32Array(this.numActions);
199
+ for (let a = 0; a < this.numActions; a++) {
200
+ let sum = 0;
201
+ for (let h = 0; h < this.hiddenDim; h++) {
202
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
203
+ }
204
+ logits[a] = sum;
205
+ }
206
+ const probs = this.softmax(logits);
207
+ // Value head
208
+ let value = 0;
209
+ for (let h = 0; h < this.hiddenDim; h++) {
210
+ value += hidden[h] * this.valueHead[h];
211
+ }
212
+ return { probs, value };
213
+ }
214
+ forwardWithHidden(state) {
215
+ const hidden = new Float32Array(this.hiddenDim);
216
+ for (let h = 0; h < this.hiddenDim; h++) {
217
+ let sum = 0;
218
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
219
+ sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
220
+ }
221
+ hidden[h] = Math.max(0, sum);
222
+ }
223
+ const logits = new Float32Array(this.numActions);
224
+ for (let a = 0; a < this.numActions; a++) {
225
+ let sum = 0;
226
+ for (let h = 0; h < this.hiddenDim; h++) {
227
+ sum += hidden[h] * this.policyHead[h * this.numActions + a];
228
+ }
229
+ logits[a] = sum;
230
+ }
231
+ const probs = this.softmax(logits);
232
+ let value = 0;
233
+ for (let h = 0; h < this.hiddenDim; h++) {
234
+ value += hidden[h] * this.valueHead[h];
235
+ }
236
+ return { probs, value, hidden };
237
+ }
238
+ computeReturns() {
239
+ const returns = new Array(this.buffer.length).fill(0);
240
+ let cumReturn = 0;
241
+ // Bootstrap from last value if not terminal
242
+ if (this.buffer.length > 0) {
243
+ cumReturn = this.buffer[this.buffer.length - 1].value;
244
+ }
245
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
246
+ cumReturn = this.buffer[t].reward + this.config.gamma * cumReturn;
247
+ returns[t] = cumReturn;
248
+ }
249
+ return returns;
250
+ }
251
+ computeAdvantages(returns) {
252
+ if (this.config.useGAE) {
253
+ return this.computeGAE();
254
+ }
255
+ // Simple advantage: return - value
256
+ const advantages = new Array(this.buffer.length).fill(0);
257
+ for (let i = 0; i < this.buffer.length; i++) {
258
+ advantages[i] = returns[i] - this.buffer[i].value;
259
+ }
260
+ // Normalize
261
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
262
+ const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
263
+ return advantages.map(a => (a - mean) / std);
264
+ }
265
+ computeGAE() {
266
+ const advantages = new Array(this.buffer.length).fill(0);
267
+ let lastGae = 0;
268
+ for (let t = this.buffer.length - 1; t >= 0; t--) {
269
+ const nextValue = t < this.buffer.length - 1
270
+ ? this.buffer[t + 1].value
271
+ : 0;
272
+ const delta = this.buffer[t].reward + this.config.gamma * nextValue - this.buffer[t].value;
273
+ lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
274
+ advantages[t] = lastGae;
275
+ }
276
+ // Normalize
277
+ const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
278
+ const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
279
+ return advantages.map(a => (a - mean) / std);
280
+ }
281
+ accumulateGradients(sharedGrad, policyGrad, valueGrad, state, hidden, action, advantage, valueError) {
282
+ // Policy gradient
283
+ for (let h = 0; h < this.hiddenDim; h++) {
284
+ policyGrad[h * this.numActions + action] += hidden[h] * advantage;
285
+ }
286
+ // Value gradient
287
+ for (let h = 0; h < this.hiddenDim; h++) {
288
+ valueGrad[h] += hidden[h] * valueError * this.config.valueLossCoef;
289
+ }
290
+ // Shared layer gradient (backprop through both heads)
291
+ for (let h = 0; h < this.hiddenDim; h++) {
292
+ if (hidden[h] > 0) { // ReLU gradient
293
+ const policySignal = advantage * this.policyHead[h * this.numActions + action];
294
+ const valueSignal = valueError * this.valueHead[h] * this.config.valueLossCoef;
295
+ const totalSignal = policySignal + valueSignal;
296
+ for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
297
+ sharedGrad[i * this.hiddenDim + h] += state[i] * totalSignal;
298
+ }
299
+ }
300
+ }
301
+ }
302
+ applyGradients(sharedGrad, policyGrad, valueGrad, batchSize) {
303
+ const lr = this.config.learningRate / batchSize;
304
+ const beta = 0.9;
305
+ // Apply to shared weights
306
+ for (let i = 0; i < this.sharedWeights.length; i++) {
307
+ const grad = Math.max(Math.min(sharedGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
308
+ this.sharedMomentum[i] = beta * this.sharedMomentum[i] + (1 - beta) * grad;
309
+ this.sharedWeights[i] -= lr * this.sharedMomentum[i];
310
+ }
311
+ // Apply to policy head
312
+ for (let i = 0; i < this.policyHead.length; i++) {
313
+ const grad = Math.max(Math.min(policyGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
314
+ this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * grad;
315
+ this.policyHead[i] -= lr * this.policyMomentum[i];
316
+ }
317
+ // Apply to value head
318
+ for (let i = 0; i < this.valueHead.length; i++) {
319
+ const grad = Math.max(Math.min(valueGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
320
+ this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * grad;
321
+ this.valueHead[i] -= lr * this.valueMomentum[i];
322
+ }
323
+ }
324
+ softmax(logits) {
325
+ const max = Math.max(...logits);
326
+ const exps = new Float32Array(logits.length);
327
+ let sum = 0;
328
+ for (let i = 0; i < logits.length; i++) {
329
+ exps[i] = Math.exp(logits[i] - max);
330
+ sum += exps[i];
331
+ }
332
+ for (let i = 0; i < exps.length; i++) {
333
+ exps[i] /= sum;
334
+ }
335
+ return exps;
336
+ }
337
+ sampleAction(probs) {
338
+ const r = Math.random();
339
+ let cumSum = 0;
340
+ for (let i = 0; i < probs.length; i++) {
341
+ cumSum += probs[i];
342
+ if (r < cumSum)
343
+ return i;
344
+ }
345
+ return probs.length - 1;
346
+ }
347
+ hashAction(action) {
348
+ let hash = 0;
349
+ for (let i = 0; i < action.length; i++) {
350
+ hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
351
+ }
352
+ return hash;
353
+ }
354
+ }
355
+ /**
356
+ * Factory function
357
+ */
358
+ export function createA2C(config) {
359
+ return new A2CAlgorithm(config);
360
+ }
361
+ //# sourceMappingURL=a2c.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"a2c.js","sourceRoot":"","sources":["../../src/algorithms/a2c.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAkBH;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAc;IAC3C,SAAS,EAAE,KAAK;IAChB,YAAY,EAAE,MAAM;IACpB,KAAK,EAAE,IAAI;IACX,WAAW,EAAE,IAAI;IACjB,aAAa,EAAE,GAAG;IAClB,WAAW,EAAE,GAAG;IAChB,MAAM,EAAE,CAAC;IACT,aAAa,EAAE,EAAE;IACjB,MAAM,EAAE,CAAC;IACT,MAAM,EAAE,IAAI;IACZ,SAAS,EAAE,IAAI;CAChB,CAAC;AAcF;;GAEG;AACH,MAAM,OAAO,YAAY;IACf,MAAM,CAAY;IAE1B,yBAAyB;IACjB,aAAa,CAAe;IAC5B,UAAU,CAAe;IACzB,SAAS,CAAe;IAEhC,kBAAkB;IACV,cAAc,CAAe;IAC7B,cAAc,CAAe;IAC7B,aAAa,CAAe;IAEpC,+BAA+B;IACvB,MAAM,GAAoB,EAAE,CAAC;IAErC,aAAa;IACL,QAAQ,GAAG,GAAG,CAAC;IACf,SAAS,GAAG,EAAE,CAAC;IACf,UAAU,GAAG,CAAC,CAAC;IAEvB,aAAa;IACL,WAAW,GAAG,CAAC,CAAC;IAChB,aAAa,GAAG,CAAC,CAAC;IAClB,YAAY,GAAG,CAAC,CAAC;IACjB,UAAU,GAAG,CAAC,CAAC;IAEvB,YAAY,SAA6B,EAAE;QACzC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,kBAAkB,EAAE,GAAG,MAAM,EAAE,CAAC;QAEnD,qBAAqB;QACrB,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,aAAa,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC;QACtE,IAAI,CAAC,UAAU,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC;QACrE,IAAI,CAAC,SAAS,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAElD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,KAAK,CAAC;QACxD,CAAC;QACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;QACnD,CAAC;QACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;QAClD,CAAC;QAED,sBAAsB;QACtB,IAAI,CAAC,cAAc,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QAClE,IAAI,CAAC,cAAc,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QAC/D,IAAI,CAAC,aAAa,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC/D,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,UAAsB;QAClC,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,KAAK,EAAE,CAAC;YACpC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACjE,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAE5C,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC;gBACf,KAAK,EAAE,IAAI,CAAC,UAAU;gBACtB,MAAM;gBACN,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK;gBACL,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;gBACvC,OAAO;aACR,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,MAAM;QACJ,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAC5C,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,CAAC;QACrD,CAAC;QAED,iCAAiC;QACjC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;QACtC,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAEnD,uBAAuB;QACvB,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAE1D,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,cAAc,GAAG,CAAC,CAAC;QACvB,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,0BAA0B;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAE3B,+BAA+B;YAC/B,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YACnE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;YAEnD,cAAc;YACd,MAAM,UAAU,GAAG,CAAC,OAAO,GAAG,SAAS,CAAC;YACxC,eAAe,IAAI,UAAU,CAAC;YAE9B,aAAa;YACb,MAAM,SAAS,GAAG,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;YACzC,cAAc,IAAI,SAAS,CAAC;YAE5B,UAAU;YACV,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,IAAI,CAAC,GAAG,CAAC;oBAAE,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACxC,CAAC;YACD,YAAY,IAAI,OAAO,CAAC;YAExB,uBAAuB;YACvB,IAAI,CAAC,mBAAmB,CACtB,UAAU,EAAE,UAAU,EAAE,SAAS,EACjC,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAC7B,SAAS,EAAE,KAAK,GAAG,OAAO,CAC3B,CAAC;QACJ,CAAC;QAED,uCAAuC;QACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,UAAU,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,GAAG,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;QAC/E,CAAC;QAED,kBAAkB;QAClB,IAAI,CAAC,cAAc,CAAC,UAAU,EAAE,UAAU,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAE3E,eAAe;QACf,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,WAAW,EAAE,CAAC;QAEnB,IAAI,CAAC,aAAa,GAAG,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC;QAC/D,IAAI,CAAC,YAAY,GAAG,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC;QAC7D,IAAI,CAAC,UAAU,GAAG,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC;QAEzD,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,EAAE,CAAC;YACjB,OAAO,CAAC,IAAI,CAAC,+BAA+B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;QAC7E,CAAC;QAED,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,aAAa;YAC9B,SAAS,EAAE,IAAI,CAAC,YAAY;YAC5B,OAAO,EAAE,IAAI,CAAC,UAAU;SACzB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAmB;QAC3B,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;QACxC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO;YACL,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;YAC9B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,kBAAkB;IAClB,6EAA6E;IAErE,QAAQ,CAAC,KAAmB;QAClC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAE7C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACxC,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IACnC,CAAC;IAEO,OAAO,CAAC,KAAmB;QACjC,sBAAsB;QACtB,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/D,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,OAAO;QACvC,CAAC;QAED,cAAc;QACd,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YAC9D,CAAC;YACD,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QAClB,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAEnC,aAAa;QACb,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,KAAK,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;IAC1B,CAAC;IAEO,iBAAiB,CAAC,KAAmB;QAC3C,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/D,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YAC/D,CAAC;YACD,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,GAAG,GAAG,CAAC,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YAC9D,CAAC;YACD,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC;QAClB,CAAC;QACD,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAEnC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,KAAK,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAClC,CAAC;IAEO,cAAc;QACpB,MAAM,OAAO,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtD,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,4CAA4C;QAC5C,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3B,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC;QACxD,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,SAAS,CAAC;YAClE,OAAO,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;QACzB,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,iBAAiB,CAAC,OAAiB;QACzC,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC;QAC3B,CAAC;QAED,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QACpD,CAAC;QAED,YAAY;QACZ,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACvE,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CACnB,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CACxE,GAAG,IAAI,CAAC;QAET,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;IAC/C,CAAC;IAEO,UAAU;QAChB,MAAM,UAAU,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzD,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC;gBAC1C,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK;gBAC1B,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;YAC3F,OAAO,GAAG,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,OAAO,CAAC;YACtE,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC;QAC1B,CAAC;QAED,YAAY;QACZ,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACvE,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CACnB,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CACxE,GAAG,IAAI,CAAC;QAET,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;IAC/C,CAAC;IAEO,mBAAmB,CACzB,UAAwB,EACxB,UAAwB,EACxB,SAAuB,EACvB,KAAmB,EACnB,MAAoB,EACpB,MAAc,EACd,SAAiB,EACjB,UAAkB;QAElB,kBAAkB;QAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC;QACpE,CAAC;QAED,iBAAiB;QACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,SAAS,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;QACrE,CAAC;QAED,sDAAsD;QACtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,gBAAgB;gBACnC,MAAM,YAAY,GAAG,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,CAAC;gBAC/E,MAAM,WAAW,GAAG,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;gBAC/E,MAAM,WAAW,GAAG,YAAY,GAAG,WAAW,CAAC;gBAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC/D,UAAU,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC;gBAC/D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAEO,cAAc,CACpB,UAAwB,EACxB,UAAwB,EACxB,SAAuB,EACvB,SAAiB;QAEjB,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,SAAS,CAAC;QAChD,MAAM,IAAI,GAAG,GAAG,CAAC;QAEjB,0BAA0B;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YAClG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YAC3E,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC;QAED,uBAAuB;QACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YAClG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YAC3E,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;QACpD,CAAC;QAED,sBAAsB;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YACjG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YACzE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;IAEO,OAAO,CAAC,MAAoB;QAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAChC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC7C,IAAI,GAAG,GAAG,CAAC,CAAC;QAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC;YACpC,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;QACjB,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,YAAY,CAAC,KAAmB;QACtC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACxB,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,GAAG,MAAM;gBAAE,OAAO,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1B,CAAC;IAEO,UAAU,CAAC,MAAc;QAC/B,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC;QAC9D,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,MAA2B;IACnD,OAAO,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;AAClC,CAAC"}
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Curiosity-Driven Exploration
3
+ *
4
+ * Implements intrinsic motivation for exploration:
5
+ * - Intrinsic Curiosity Module (ICM)
6
+ * - Random Network Distillation (RND)
7
+ * - Forward and inverse dynamics models
8
+ * - Exploration bonus generation
9
+ *
10
+ * Performance Target: <5ms per forward pass
11
+ */
12
+ import type { CuriosityConfig, Trajectory } from '../types.js';
13
+ /**
14
+ * Default Curiosity configuration
15
+ */
16
+ export declare const DEFAULT_CURIOSITY_CONFIG: CuriosityConfig;
17
+ /**
18
+ * Curiosity-Driven Exploration Module
19
+ */
20
+ export declare class CuriosityModule {
21
+ private config;
22
+ private featureEncoder;
23
+ private forwardModel;
24
+ private inverseModel;
25
+ private rndTarget;
26
+ private rndPredictor;
27
+ private forwardMomentum;
28
+ private inverseMomentum;
29
+ private rndMomentum;
30
+ private stateDim;
31
+ private numActions;
32
+ private intrinsicMean;
33
+ private intrinsicVar;
34
+ private updateCount;
35
+ private avgForwardLoss;
36
+ private avgInverseLoss;
37
+ private avgIntrinsicReward;
38
+ constructor(config?: Partial<CuriosityConfig>);
39
+ /**
40
+ * Compute intrinsic reward for a transition
41
+ */
42
+ computeIntrinsicReward(state: Float32Array, action: string, nextState: Float32Array): number;
43
+ /**
44
+ * Compute ICM-based intrinsic reward (prediction error)
45
+ */
46
+ computeICMReward(state: Float32Array, action: string, nextState: Float32Array): number;
47
+ /**
48
+ * Compute RND-based intrinsic reward
49
+ */
50
+ computeRNDReward(state: Float32Array): number;
51
+ /**
52
+ * Update curiosity models from trajectory
53
+ */
54
+ update(trajectory: Trajectory): {
55
+ forwardLoss: number;
56
+ inverseLoss: number;
57
+ };
58
+ /**
59
+ * Add intrinsic rewards to trajectory
60
+ */
61
+ augmentTrajectory(trajectory: Trajectory): Trajectory;
62
+ /**
63
+ * Get statistics
64
+ */
65
+ getStats(): Record<string, number>;
66
+ private initWeight;
67
+ private encodeState;
68
+ private forwardPredict;
69
+ private inversePredict;
70
+ private rndForward;
71
+ private updateForwardModel;
72
+ private updateInverseModel;
73
+ private updateRNDPredictor;
74
+ private normalizeIntrinsic;
75
+ private softmax;
76
+ private hashAction;
77
+ }
78
+ /**
79
+ * Factory function
80
+ */
81
+ export declare function createCuriosity(config?: Partial<CuriosityConfig>): CuriosityModule;
82
+ //# sourceMappingURL=curiosity.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"curiosity.d.ts","sourceRoot":"","sources":["../../src/algorithms/curiosity.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAkB,MAAM,aAAa,CAAC;AAE/E;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eActC,CAAC;AAEF;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAkB;IAGhC,OAAO,CAAC,cAAc,CAAe;IAGrC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,YAAY,CAAe;IAGnC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,WAAW,CAAe;IAGlC,OAAO,CAAC,QAAQ,CAAO;IACvB,OAAO,CAAC,UAAU,CAAK;IAGvB,OAAO,CAAC,aAAa,CAAK;IAC1B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,WAAW,CAAK;IAGxB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,kBAAkB,CAAK;gBAEnB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAwBjD;;OAEG;IACH,sBAAsB,CACpB,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,YAAY,GACtB,MAAM;IAQT;;OAEG;IACH,gBAAgB,CACd,KAAK,EAAE,YAAY,EACnB,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,YAAY,GACtB,MAAM;IA4BT;;OAEG;IACH,gBAAgB,CAAC,KAAK,EAAE,YAAY,GAAG,MAAM;IA0B7C;;OAEG;IACH,MAAM,CAAC,UAAU,EAAE,UAAU,GAAG;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAE;IAkD5E;;OAEG;IACH,iBAAiB,CAAC,UAAU,EAAE,UAAU,GAAG,UAAU;IAuBrD;;OAEG;IACH,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;IAelC,OAAO,CAAC,UAAU;IASlB,OAAO,CAAC,WAAW;IAenB,OAAO,CAAC,cAAc;IAsBtB,OAAO,CAAC,cAAc;IAwBtB,OAAO,CAAC,UAAU;IAelB,OAAO,CAAC,kBAAkB;IAwC1B,OAAO,CAAC,kBAAkB;IAsC1B,OAAO,CAAC,kBAAkB;IA8B1B,OAAO,CAAC,kBAAkB;IAa1B,OAAO,CAAC,OAAO;IAiBf,OAAO,CAAC,UAAU;CAOnB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAAG,eAAe,CAElF"}