agentic-qe 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +123 -0
  2. package/README.md +1 -1
  3. package/dist/agents/index.d.ts.map +1 -1
  4. package/dist/agents/index.js +5 -1
  5. package/dist/agents/index.js.map +1 -1
  6. package/dist/core/di/AgentDependencies.d.ts +127 -0
  7. package/dist/core/di/AgentDependencies.d.ts.map +1 -0
  8. package/dist/core/di/AgentDependencies.js +251 -0
  9. package/dist/core/di/AgentDependencies.js.map +1 -0
  10. package/dist/core/di/DIContainer.d.ts +149 -0
  11. package/dist/core/di/DIContainer.d.ts.map +1 -0
  12. package/dist/core/di/DIContainer.js +333 -0
  13. package/dist/core/di/DIContainer.js.map +1 -0
  14. package/dist/core/di/index.d.ts +11 -0
  15. package/dist/core/di/index.d.ts.map +1 -0
  16. package/dist/core/di/index.js +22 -0
  17. package/dist/core/di/index.js.map +1 -0
  18. package/dist/core/index.d.ts +1 -0
  19. package/dist/core/index.d.ts.map +1 -1
  20. package/dist/core/index.js +11 -1
  21. package/dist/core/index.js.map +1 -1
  22. package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
  23. package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
  24. package/dist/learning/ExperienceSharingProtocol.js +538 -0
  25. package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
  26. package/dist/learning/LearningEngine.d.ts +101 -1
  27. package/dist/learning/LearningEngine.d.ts.map +1 -1
  28. package/dist/learning/LearningEngine.js +330 -3
  29. package/dist/learning/LearningEngine.js.map +1 -1
  30. package/dist/learning/QLearning.d.ts +38 -125
  31. package/dist/learning/QLearning.d.ts.map +1 -1
  32. package/dist/learning/QLearning.js +46 -267
  33. package/dist/learning/QLearning.js.map +1 -1
  34. package/dist/learning/QLearningLegacy.d.ts +154 -0
  35. package/dist/learning/QLearningLegacy.d.ts.map +1 -0
  36. package/dist/learning/QLearningLegacy.js +337 -0
  37. package/dist/learning/QLearningLegacy.js.map +1 -0
  38. package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
  39. package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
  40. package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
  41. package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
  42. package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
  43. package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
  44. package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
  45. package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
  46. package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
  47. package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
  48. package/dist/learning/algorithms/PPOLearner.js +490 -0
  49. package/dist/learning/algorithms/PPOLearner.js.map +1 -0
  50. package/dist/learning/algorithms/QLearning.d.ts +68 -0
  51. package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
  52. package/dist/learning/algorithms/QLearning.js +116 -0
  53. package/dist/learning/algorithms/QLearning.js.map +1 -0
  54. package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
  55. package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
  56. package/dist/learning/algorithms/SARSALearner.js +252 -0
  57. package/dist/learning/algorithms/SARSALearner.js.map +1 -0
  58. package/dist/learning/algorithms/index.d.ts +29 -0
  59. package/dist/learning/algorithms/index.d.ts.map +1 -0
  60. package/dist/learning/algorithms/index.js +44 -0
  61. package/dist/learning/algorithms/index.js.map +1 -0
  62. package/dist/learning/index.d.ts +3 -0
  63. package/dist/learning/index.d.ts.map +1 -1
  64. package/dist/learning/index.js +15 -1
  65. package/dist/learning/index.js.map +1 -1
  66. package/dist/learning/types.d.ts +2 -0
  67. package/dist/learning/types.d.ts.map +1 -1
  68. package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
  69. package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
  70. package/dist/memory/DistributedPatternLibrary.js +370 -0
  71. package/dist/memory/DistributedPatternLibrary.js.map +1 -0
  72. package/dist/memory/PatternQualityScorer.d.ts +169 -0
  73. package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
  74. package/dist/memory/PatternQualityScorer.js +327 -0
  75. package/dist/memory/PatternQualityScorer.js.map +1 -0
  76. package/dist/memory/PatternReplicationService.d.ts +187 -0
  77. package/dist/memory/PatternReplicationService.d.ts.map +1 -0
  78. package/dist/memory/PatternReplicationService.js +392 -0
  79. package/dist/memory/PatternReplicationService.js.map +1 -0
  80. package/dist/providers/ClaudeProvider.d.ts +98 -0
  81. package/dist/providers/ClaudeProvider.d.ts.map +1 -0
  82. package/dist/providers/ClaudeProvider.js +418 -0
  83. package/dist/providers/ClaudeProvider.js.map +1 -0
  84. package/dist/providers/ILLMProvider.d.ts +287 -0
  85. package/dist/providers/ILLMProvider.d.ts.map +1 -0
  86. package/dist/providers/ILLMProvider.js +33 -0
  87. package/dist/providers/ILLMProvider.js.map +1 -0
  88. package/dist/providers/LLMProviderFactory.d.ts +154 -0
  89. package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
  90. package/dist/providers/LLMProviderFactory.js +426 -0
  91. package/dist/providers/LLMProviderFactory.js.map +1 -0
  92. package/dist/providers/RuvllmProvider.d.ts +107 -0
  93. package/dist/providers/RuvllmProvider.d.ts.map +1 -0
  94. package/dist/providers/RuvllmProvider.js +417 -0
  95. package/dist/providers/RuvllmProvider.js.map +1 -0
  96. package/dist/providers/index.d.ts +31 -0
  97. package/dist/providers/index.d.ts.map +1 -0
  98. package/dist/providers/index.js +69 -0
  99. package/dist/providers/index.js.map +1 -0
  100. package/package.json +1 -1
@@ -1,154 +1,67 @@
1
1
  /**
2
- * QLearning - Phase 2 (Milestone 2.2)
2
+ * QLearning - Off-policy TD(0) Reinforcement Learning
3
3
  *
4
4
  * Implements standard Q-learning algorithm for reinforcement learning.
5
- * Provides epsilon-greedy policy, Q-table updates, and value function estimation.
5
+ * Key differences from SARSA:
6
+ * - Off-policy: learns optimal Q-values regardless of policy being followed
7
+ * - Uses max Q-value for next state, not actual next action
8
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
9
+ * - More aggressive than SARSA, finds optimal policy faster
6
10
  */
7
- import { TaskState, AgentAction, TaskExperience } from './types';
11
+ import { AbstractRLLearner, RLConfig } from './algorithms/AbstractRLLearner';
12
+ import { TaskExperience, AgentAction } from './types';
8
13
  /**
9
- * Q-learning algorithm configuration
14
+ * Q-learning configuration (same as base RL config)
10
15
  */
11
- export interface QLearningConfig {
12
- learningRate: number;
13
- discountFactor: number;
14
- explorationRate: number;
15
- explorationDecay: number;
16
- minExplorationRate: number;
17
- useExperienceReplay: boolean;
18
- replayBufferSize: number;
19
- batchSize: number;
20
- }
21
- /**
22
- * Q-learning action-value pair
23
- */
24
- interface QValue {
25
- state: string;
26
- action: string;
27
- value: number;
28
- updateCount: number;
29
- lastUpdated: number;
30
- }
16
+ export type QLearningConfig = RLConfig;
31
17
  /**
32
18
  * QLearning - Standard Q-learning implementation
33
19
  *
34
20
  * Implements the classic Q-learning algorithm with:
35
21
  * - Epsilon-greedy exploration policy
36
- * - Temporal difference (TD) learning
22
+ * - Off-policy temporal difference (TD) learning
37
23
  * - Q-table for state-action values
38
24
  * - Optional experience replay for stability
25
+ *
26
+ * Update Rule:
27
+ * Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
28
+ *
29
+ * Key characteristics:
30
+ * - Off-policy: learns about optimal policy while following exploration policy
31
+ * - Uses max Q-value (greedy) for bootstrapping
32
+ * - Converges to optimal Q* under certain conditions
33
+ * - More sample-efficient than on-policy methods
39
34
  */
40
- export declare class QLearning {
41
- private readonly logger;
42
- private config;
43
- private qTable;
44
- private replayBuffer?;
45
- private stepCount;
46
- private episodeCount;
47
- constructor(config?: Partial<QLearningConfig>);
48
- /**
49
- * Select action using epsilon-greedy policy
50
- * With probability ε, select random action (exploration)
51
- * Otherwise, select action with highest Q-value (exploitation)
52
- */
53
- selectAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
54
- /**
55
- * Get best action based on current Q-values
56
- */
57
- getBestAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
35
+ export declare class QLearning extends AbstractRLLearner {
36
+ private readonly defaultConfig;
37
+ constructor(config?: Partial<RLConfig>);
58
38
  /**
59
39
  * Update Q-value using Q-learning update rule
60
40
  * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
41
+ *
42
+ * @param experience The transition experience (s, a, r, s')
43
+ * @param nextAction Ignored in Q-learning (uses max Q-value instead)
61
44
  */
62
- update(experience: TaskExperience): void;
45
+ update(experience: TaskExperience, nextAction?: AgentAction): void;
63
46
  /**
64
- * Perform batch update using experience replay
65
- * Samples random batch from replay buffer and updates Q-values
47
+ * Get the default exploration rate for this algorithm
66
48
  */
67
- batchUpdate(): void;
49
+ protected getDefaultExplorationRate(): number;
68
50
  /**
69
- * Get Q-value for a state-action pair
51
+ * Get algorithm name
70
52
  */
71
- getQValue(state: TaskState, action: AgentAction): number;
53
+ getAlgorithmName(): string;
72
54
  /**
73
- * Get all Q-values for a state
55
+ * Get algorithm type (off-policy)
74
56
  */
75
- getStateValues(state: TaskState): Map<string, number>;
57
+ getAlgorithmType(): 'on-policy' | 'off-policy';
76
58
  /**
77
- * Get value of a state (max Q-value over all actions)
78
- * V(s) = max_a Q(s,a)
59
+ * Get detailed statistics including Q-learning-specific metrics
79
60
  */
80
- getStateValue(state: TaskState): number;
81
- /**
82
- * Decay exploration rate (epsilon)
83
- * Called after each episode to gradually reduce exploration
84
- */
85
- decayExploration(): void;
86
- /**
87
- * Mark end of episode
88
- */
89
- endEpisode(): void;
90
- /**
91
- * Encode state to string key for Q-table
92
- */
93
- private encodeState;
94
- /**
95
- * Encode action to string key for Q-table
96
- */
97
- private encodeAction;
98
- /**
99
- * Get current exploration rate (epsilon)
100
- */
101
- getExplorationRate(): number;
102
- /**
103
- * Get total number of learning steps
104
- */
105
- getStepCount(): number;
106
- /**
107
- * Get total number of episodes
108
- */
109
- getEpisodeCount(): number;
110
- /**
111
- * Get Q-table size (number of state-action pairs)
112
- */
113
- getTableSize(): number;
114
- /**
115
- * Get statistics about learning progress
116
- */
117
- getStatistics(): {
118
- steps: number;
119
- episodes: number;
120
- tableSize: number;
121
- explorationRate: number;
122
- avgQValue: number;
123
- maxQValue: number;
124
- minQValue: number;
61
+ getDetailedStatistics(): {
62
+ algorithm: string;
63
+ type: 'on-policy' | 'off-policy';
64
+ stats: ReturnType<AbstractRLLearner['getStatistics']>;
125
65
  };
126
- /**
127
- * Reset Q-table and learning state
128
- */
129
- reset(): void;
130
- /**
131
- * Export Q-table and state for persistence
132
- */
133
- export(): {
134
- qTable: Record<string, Record<string, QValue>>;
135
- config: QLearningConfig;
136
- stepCount: number;
137
- episodeCount: number;
138
- };
139
- /**
140
- * Import Q-table and state from persistence
141
- */
142
- import(state: {
143
- qTable: Record<string, Record<string, QValue>>;
144
- config: QLearningConfig;
145
- stepCount: number;
146
- episodeCount: number;
147
- }): void;
148
- /**
149
- * Get memory usage estimate in bytes
150
- */
151
- getMemoryUsage(): number;
152
66
  }
153
- export {};
154
67
  //# sourceMappingURL=QLearning.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"QLearning.d.ts","sourceRoot":"","sources":["../../src/learning/QLearning.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;CACnB;AAgBD;;GAEG;AACH,UAAU,MAAM;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;GAQG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,YAAY,CAAC,CAAyB;IAC9C,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAS;gBAEjB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAmBjD;;;;OAIG;IACH,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IAe5E;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IA2B7E;;;OAGG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,GAAG,IAAI;IA4CxC;;;OAGG;IACH,WAAW,IAAI,IAAI;IAcnB;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,GAAG,MAAM;IAaxD;;OAEG;IACH,cAAc,CAAC,KAAK,EAAE,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC;IAgBrD;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAWvC;;;OAGG;IACH,gBAAgB,IAAI,IAAI;IAOxB;;OAEG;IACH,UAAU,IAAI,IAAI;IAUlB;;OAEG;IACH,OAAO,CAAC,WAAW;IAcnB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,YAAY,IAAI,MAAM;IAQtB;;OAEG;IACH,aAAa,IAAI;QACf,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,eAAe,EAAE,MAAM,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB;IA0BD;;OAEG;IACH,KAAK,IAAI,IAAI;IAab;;OAEG;IACH,MAAM,IAAI;QACR,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,eAAe,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB;IAkBD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,eAAe,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB,GAAG,IAAI;IAkBR;;OAEG;IACH,cAAc,IAAI,MAAM;CAKzB"}
1
+ {"version":3,"file":"QLearning.d.ts","sourceRoot":"","sources":["../../src/learning/QLearning.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,gCAAgC,CAAC;AAC7E,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtD;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,QAAQ,CAAC;AAgBvC;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,SAAU,SAAQ,iBAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;gBAE7B,MAAM,GAAE,OAAO,CAAC,QAAQ,CAAM;IAO1C;;;;;;OAMG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IAiClE;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,gBAAgB,IAAI,WAAW,GAAG,YAAY;IAI9C;;OAEG;IACH,qBAAqB,IAAI;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC;QACjC,KAAK,EAAE,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,CAAC;KACvD;CAOF"}
@@ -1,14 +1,17 @@
1
1
  "use strict";
2
2
  /**
3
- * QLearning - Phase 2 (Milestone 2.2)
3
+ * QLearning - Off-policy TD(0) Reinforcement Learning
4
4
  *
5
5
  * Implements standard Q-learning algorithm for reinforcement learning.
6
- * Provides epsilon-greedy policy, Q-table updates, and value function estimation.
6
+ * Key differences from SARSA:
7
+ * - Off-policy: learns optimal Q-values regardless of policy being followed
8
+ * - Uses max Q-value for next state, not actual next action
9
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
10
+ * - More aggressive than SARSA, finds optimal policy faster
7
11
  */
8
12
  Object.defineProperty(exports, "__esModule", { value: true });
9
13
  exports.QLearning = void 0;
10
- const Logger_1 = require("../utils/Logger");
11
- const ExperienceReplayBuffer_1 = require("./ExperienceReplayBuffer");
14
+ const AbstractRLLearner_1 = require("./algorithms/AbstractRLLearner");
12
15
  /**
13
16
  * Default Q-learning configuration
14
17
  */
@@ -27,85 +30,42 @@ const DEFAULT_CONFIG = {
27
30
  *
28
31
  * Implements the classic Q-learning algorithm with:
29
32
  * - Epsilon-greedy exploration policy
30
- * - Temporal difference (TD) learning
33
+ * - Off-policy temporal difference (TD) learning
31
34
  * - Q-table for state-action values
32
35
  * - Optional experience replay for stability
36
+ *
37
+ * Update Rule:
38
+ * Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
39
+ *
40
+ * Key characteristics:
41
+ * - Off-policy: learns about optimal policy while following exploration policy
42
+ * - Uses max Q-value (greedy) for bootstrapping
43
+ * - Converges to optimal Q* under certain conditions
44
+ * - More sample-efficient than on-policy methods
33
45
  */
34
- class QLearning {
46
+ class QLearning extends AbstractRLLearner_1.AbstractRLLearner {
35
47
  constructor(config = {}) {
36
- this.logger = Logger_1.Logger.getInstance();
37
- this.config = { ...DEFAULT_CONFIG, ...config };
38
- this.qTable = new Map();
39
- this.stepCount = 0;
40
- this.episodeCount = 0;
41
- // Initialize experience replay buffer if enabled
42
- if (this.config.useExperienceReplay) {
43
- this.replayBuffer = new ExperienceReplayBuffer_1.ExperienceReplayBuffer({
44
- maxSize: this.config.replayBufferSize,
45
- minSize: this.config.batchSize,
46
- prioritized: false
47
- });
48
- }
49
- this.logger.info('QLearning initialized', { config: this.config });
50
- }
51
- /**
52
- * Select action using epsilon-greedy policy
53
- * With probability ε, select random action (exploration)
54
- * Otherwise, select action with highest Q-value (exploitation)
55
- */
56
- selectAction(state, availableActions) {
57
- if (availableActions.length === 0) {
58
- throw new Error('No available actions to select from');
59
- }
60
- // Exploration: random action
61
- if (Math.random() < this.config.explorationRate) {
62
- const randomIndex = Math.floor(Math.random() * availableActions.length);
63
- return availableActions[randomIndex];
64
- }
65
- // Exploitation: best action based on Q-values
66
- return this.getBestAction(state, availableActions);
67
- }
68
- /**
69
- * Get best action based on current Q-values
70
- */
71
- getBestAction(state, availableActions) {
72
- const stateKey = this.encodeState(state);
73
- const stateActions = this.qTable.get(stateKey);
74
- if (!stateActions || stateActions.size === 0) {
75
- // No Q-values yet, return random action
76
- const randomIndex = Math.floor(Math.random() * availableActions.length);
77
- return availableActions[randomIndex];
78
- }
79
- // Find action with highest Q-value
80
- let bestAction = availableActions[0];
81
- let bestValue = -Infinity;
82
- for (const action of availableActions) {
83
- const actionKey = this.encodeAction(action);
84
- const qValue = stateActions.get(actionKey);
85
- if (qValue && qValue.value > bestValue) {
86
- bestValue = qValue.value;
87
- bestAction = action;
88
- }
89
- }
90
- return bestAction;
48
+ const fullConfig = { ...DEFAULT_CONFIG, ...config };
49
+ super(fullConfig);
50
+ this.defaultConfig = fullConfig;
51
+ this.logger.info('QLearning initialized with off-policy TD(0)', { config: fullConfig });
91
52
  }
92
53
  /**
93
54
  * Update Q-value using Q-learning update rule
94
55
  * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
56
+ *
57
+ * @param experience The transition experience (s, a, r, s')
58
+ * @param nextAction Ignored in Q-learning (uses max Q-value instead)
95
59
  */
96
- update(experience) {
60
+ update(experience, nextAction) {
97
61
  const stateKey = this.encodeState(experience.state);
98
62
  const actionKey = this.encodeAction(experience.action);
99
63
  const nextStateKey = this.encodeState(experience.nextState);
100
- // Get or initialize state-action map
101
- if (!this.qTable.has(stateKey)) {
102
- this.qTable.set(stateKey, new Map());
103
- }
64
+ // Get current Q-value Q(s,a)
104
65
  const stateActions = this.qTable.get(stateKey);
105
- // Get current Q-value
106
- const currentQValue = stateActions.get(actionKey);
107
- const currentQ = currentQValue?.value ?? 0;
108
- // Get max Q-value for next state (for all possible actions)
66
+ const currentQ = stateActions?.get(actionKey)?.value ?? 0;
67
+ // Q-Learning: Get max Q-value for next state (greedy)
68
+ // This is the key difference from SARSA (which uses actual next action)
109
69
  const nextStateActions = this.qTable.get(nextStateKey);
110
70
  const maxNextQ = nextStateActions && nextStateActions.size > 0
111
71
  ? Math.max(...Array.from(nextStateActions.values()).map(qv => qv.value))
@@ -116,222 +76,41 @@ class QLearning {
116
76
  const tdError = tdTarget - currentQ;
117
77
  const newQ = currentQ + this.config.learningRate * tdError;
118
78
  // Update Q-value
119
- stateActions.set(actionKey, {
120
- state: stateKey,
121
- action: actionKey,
122
- value: newQ,
123
- updateCount: (currentQValue?.updateCount ?? 0) + 1,
124
- lastUpdated: Date.now()
125
- });
126
- // Add to experience replay buffer
79
+ this.setQValue(stateKey, actionKey, newQ);
80
+ // Add to experience replay buffer if enabled
127
81
  if (this.replayBuffer) {
128
82
  this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
129
83
  }
130
84
  this.stepCount++;
131
85
  }
132
86
  /**
133
- * Perform batch update using experience replay
134
- * Samples random batch from replay buffer and updates Q-values
135
- */
136
- batchUpdate() {
137
- if (!this.replayBuffer || !this.replayBuffer.canSample(this.config.batchSize)) {
138
- return;
139
- }
140
- const batch = this.replayBuffer.sample(this.config.batchSize);
141
- for (const experience of batch) {
142
- this.update(experience);
143
- }
144
- this.logger.debug(`Performed batch update with ${batch.length} experiences`);
145
- }
146
- /**
147
- * Get Q-value for a state-action pair
148
- */
149
- getQValue(state, action) {
150
- const stateKey = this.encodeState(state);
151
- const actionKey = this.encodeAction(action);
152
- const stateActions = this.qTable.get(stateKey);
153
- if (!stateActions) {
154
- return 0;
155
- }
156
- const qValue = stateActions.get(actionKey);
157
- return qValue?.value ?? 0;
158
- }
159
- /**
160
- * Get all Q-values for a state
161
- */
162
- getStateValues(state) {
163
- const stateKey = this.encodeState(state);
164
- const stateActions = this.qTable.get(stateKey);
165
- if (!stateActions) {
166
- return new Map();
167
- }
168
- const values = new Map();
169
- for (const [actionKey, qValue] of stateActions.entries()) {
170
- values.set(actionKey, qValue.value);
171
- }
172
- return values;
173
- }
174
- /**
175
- * Get value of a state (max Q-value over all actions)
176
- * V(s) = max_a Q(s,a)
177
- */
178
- getStateValue(state) {
179
- const stateKey = this.encodeState(state);
180
- const stateActions = this.qTable.get(stateKey);
181
- if (!stateActions || stateActions.size === 0) {
182
- return 0;
183
- }
184
- return Math.max(...Array.from(stateActions.values()).map(qv => qv.value));
185
- }
186
- /**
187
- * Decay exploration rate (epsilon)
188
- * Called after each episode to gradually reduce exploration
87
+ * Get the default exploration rate for this algorithm
189
88
  */
190
- decayExploration() {
191
- this.config.explorationRate = Math.max(this.config.minExplorationRate, this.config.explorationRate * this.config.explorationDecay);
89
+ getDefaultExplorationRate() {
90
+ return this.defaultConfig.explorationRate;
192
91
  }
193
92
  /**
194
- * Mark end of episode
93
+ * Get algorithm name
195
94
  */
196
- endEpisode() {
197
- this.episodeCount++;
198
- this.decayExploration();
199
- // Perform batch update if using experience replay
200
- if (this.config.useExperienceReplay) {
201
- this.batchUpdate();
202
- }
203
- }
204
- /**
205
- * Encode state to string key for Q-table
206
- */
207
- encodeState(state) {
208
- // Create normalized feature vector
209
- const features = [
210
- state.taskComplexity,
211
- state.requiredCapabilities.length / 10, // normalize
212
- state.previousAttempts / 5, // normalize
213
- state.availableResources,
214
- state.timeConstraint ? Math.min(state.timeConstraint / 300000, 1) : 1 // normalize to 5 min
215
- ];
216
- // Round to reduce state space (discretization)
217
- return features.map(f => Math.round(f * 10) / 10).join(',');
218
- }
219
- /**
220
- * Encode action to string key for Q-table
221
- */
222
- encodeAction(action) {
223
- return `${action.strategy}:${action.parallelization.toFixed(1)}:${action.retryPolicy}`;
224
- }
225
- /**
226
- * Get current exploration rate (epsilon)
227
- */
228
- getExplorationRate() {
229
- return this.config.explorationRate;
230
- }
231
- /**
232
- * Get total number of learning steps
233
- */
234
- getStepCount() {
235
- return this.stepCount;
236
- }
237
- /**
238
- * Get total number of episodes
239
- */
240
- getEpisodeCount() {
241
- return this.episodeCount;
242
- }
243
- /**
244
- * Get Q-table size (number of state-action pairs)
245
- */
246
- getTableSize() {
247
- let size = 0;
248
- for (const stateActions of this.qTable.values()) {
249
- size += stateActions.size;
250
- }
251
- return size;
95
+ getAlgorithmName() {
96
+ return 'Q-Learning';
252
97
  }
253
98
  /**
254
- * Get statistics about learning progress
99
+ * Get algorithm type (off-policy)
255
100
  */
256
- getStatistics() {
257
- let totalQValue = 0;
258
- let count = 0;
259
- let maxQ = -Infinity;
260
- let minQ = Infinity;
261
- for (const stateActions of this.qTable.values()) {
262
- for (const qValue of stateActions.values()) {
263
- totalQValue += qValue.value;
264
- maxQ = Math.max(maxQ, qValue.value);
265
- minQ = Math.min(minQ, qValue.value);
266
- count++;
267
- }
268
- }
269
- return {
270
- steps: this.stepCount,
271
- episodes: this.episodeCount,
272
- tableSize: count,
273
- explorationRate: this.config.explorationRate,
274
- avgQValue: count > 0 ? totalQValue / count : 0,
275
- maxQValue: count > 0 ? maxQ : 0,
276
- minQValue: count > 0 ? minQ : 0
277
- };
278
- }
279
- /**
280
- * Reset Q-table and learning state
281
- */
282
- reset() {
283
- this.qTable.clear();
284
- this.stepCount = 0;
285
- this.episodeCount = 0;
286
- this.config.explorationRate = DEFAULT_CONFIG.explorationRate;
287
- if (this.replayBuffer) {
288
- this.replayBuffer.clear();
289
- }
290
- this.logger.info('QLearning reset to initial state');
101
+ getAlgorithmType() {
102
+ return 'off-policy';
291
103
  }
292
104
  /**
293
- * Export Q-table and state for persistence
105
+ * Get detailed statistics including Q-learning-specific metrics
294
106
  */
295
- export() {
296
- const serializedQTable = {};
297
- for (const [state, actions] of this.qTable.entries()) {
298
- serializedQTable[state] = {};
299
- for (const [action, qValue] of actions.entries()) {
300
- serializedQTable[state][action] = qValue;
301
- }
302
- }
107
+ getDetailedStatistics() {
303
108
  return {
304
- qTable: serializedQTable,
305
- config: { ...this.config },
306
- stepCount: this.stepCount,
307
- episodeCount: this.episodeCount
109
+ algorithm: this.getAlgorithmName(),
110
+ type: this.getAlgorithmType(),
111
+ stats: this.getStatistics()
308
112
  };
309
113
  }
310
- /**
311
- * Import Q-table and state from persistence
312
- */
313
- import(state) {
314
- this.qTable.clear();
315
- for (const [stateKey, actions] of Object.entries(state.qTable)) {
316
- const actionMap = new Map();
317
- for (const [actionKey, qValue] of Object.entries(actions)) {
318
- actionMap.set(actionKey, qValue);
319
- }
320
- this.qTable.set(stateKey, actionMap);
321
- }
322
- this.config = { ...state.config };
323
- this.stepCount = state.stepCount;
324
- this.episodeCount = state.episodeCount;
325
- this.logger.info(`Imported Q-table with ${this.getTableSize()} state-action pairs`);
326
- }
327
- /**
328
- * Get memory usage estimate in bytes
329
- */
330
- getMemoryUsage() {
331
- const qTableSize = JSON.stringify(this.export().qTable).length;
332
- const bufferSize = this.replayBuffer?.getMemoryUsage() ?? 0;
333
- return qTableSize + bufferSize;
334
- }
335
114
  }
336
115
  exports.QLearning = QLearning;
337
116
  //# sourceMappingURL=QLearning.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"QLearning.js","sourceRoot":"","sources":["../../src/learning/QLearning.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAEH,4CAAyC;AAEzC,qEAAkE;AAgBlE;;GAEG;AACH,MAAM,cAAc,GAAoB;IACtC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAaF;;;;;;;;GAQG;AACH,MAAa,SAAS;IAQpB,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG,eAAM,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QAC/C,IAAI,CAAC,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAEtB,iDAAiD;QACjD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,YAAY,GAAG,IAAI,+CAAsB,CAAC;gBAC7C,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,gBAAgB;gBACrC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC9B,WAAW,EAAE,KAAK;aACnB,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IACrE,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QAC5D,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,8CAA8C;QAC9C,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,KAAgB,EAAE,gBAA+B;QAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,wCAAwC;YACxC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,mCAAmC;QACnC,IAAI,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;QAE1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAE3C,IAAI,MAAM,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBACvC,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;gBACzB,UAAU,GAAG,MAAM,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,UAA0B;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,qCAAqC;QACrC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEhD,sBAAsB;QACtB,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC;QAE3C,4DAA4D;QAC5D,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC;YAC5D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,yBAAyB;QACzB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE;YAC1B,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;QAEH,kCAAkC;QAClC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,WAAW;QACT,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9E,OAAO;QACT,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAE9D,KAAK,MAAM,UAAU,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC1B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,+BAA+B,KAAK,CAAC,MAAM,cAAc,CAAC,CAAC;IAC/E,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAgB,EAAE,MAAmB;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,OAAO,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,KAAgB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,IAAI,GAAG,EAAE,CAAC;QACnB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;YACzD,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,KAAgB;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5E,CAAC;IAED;;;OAGG;IACH,gBAAgB;QACd,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,CACpC,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAC9B,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAC3D,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAExB,kDAAkD;QAClD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,KAAgB;QAClC,mCAAmC;QACnC,MAAM,QAAQ,GAAG;YACf,KAAK,CAAC,cAAc;YACpB,KAAK,CAAC,oBAAoB,CAAC,MAAM,GAAG,EAAE,EAAE,YAAY;YACpD,KAAK,CAAC,gBAAgB,GAAG,CAAC,EAAE,YAAY;YACxC,KAAK,CAAC,kBAAkB;YACxB,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;SAC5F,CAAC;QAEF,+CAA+C;QAC/C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC9D,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,MAAmB;QACtC,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;IACzF,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,IAAI,IAAI,YAAY,CAAC,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,aAAa;QASX,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC;QACrB,IAAI,IAAI,GAAG,QAAQ,CAAC;QAEpB,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,KAAK,MAAM,MAAM,IAAI,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC3C,WAAW,IAAI,MAAM,CAAC,KAAK,CAAC;gBAC5B,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;YAC5C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC/B,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,cAAc,CAAC,eAAe,CAAC;QAE7D,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;QAC5B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,MAAM;QAMJ,MAAM,gBAAgB,GAA2C,EAAE,CAAC;QAEpE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,YAAY,EAAE,IAAI,CAAC,YAAY;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAKN;QACC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAEpB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;YAC5C,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAClC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC,YAAY,CAAC;QAEvC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,YAAY,EAAE,qBAAqB,CAAC,CAAC;IACtF,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;QAC5D,OAAO,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;CACF;AAlYD,8BAkYC"}
1
+ {"version":3,"file":"QLearning.js","sourceRoot":"","sources":["../../src/learning/QLearning.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,sEAA6E;AAQ7E;;GAEG;AACH,MAAM,cAAc,GAAa;IAC/B,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAa,SAAU,SAAQ,qCAAiB;IAG9C,YAAY,SAA4B,EAAE;QACxC,MAAM,UAAU,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QACpD,KAAK,CAAC,UAAU,CAAC,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6CAA6C,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,UAA0B,EAAE,UAAwB;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,6BAA6B;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAE1D,sDAAsD;QACtD,wEAAwE;QACxE,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC;YAC5D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,yBAAyB;QACzB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAE1C,6CAA6C;QAC7C,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,aAAa,CAAC,eAAe,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,qBAAqB;QAKnB,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAClC,IAAI,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAC7B,KAAK,EAAE,IAAI,CAAC,aAAa,EAAE;SAC5B,CAAC;IACJ,CAAC;CACF;AArFD,8BAqFC"}