agentic-qe 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/CHANGELOG.md +213 -0
  2. package/README.md +37 -11
  3. package/dist/agents/index.d.ts.map +1 -1
  4. package/dist/agents/index.js +5 -1
  5. package/dist/agents/index.js.map +1 -1
  6. package/dist/core/MemoryManager.d.ts.map +1 -1
  7. package/dist/core/MemoryManager.js +4 -0
  8. package/dist/core/MemoryManager.js.map +1 -1
  9. package/dist/core/di/AgentDependencies.d.ts +127 -0
  10. package/dist/core/di/AgentDependencies.d.ts.map +1 -0
  11. package/dist/core/di/AgentDependencies.js +251 -0
  12. package/dist/core/di/AgentDependencies.js.map +1 -0
  13. package/dist/core/di/DIContainer.d.ts +149 -0
  14. package/dist/core/di/DIContainer.d.ts.map +1 -0
  15. package/dist/core/di/DIContainer.js +333 -0
  16. package/dist/core/di/DIContainer.js.map +1 -0
  17. package/dist/core/di/index.d.ts +11 -0
  18. package/dist/core/di/index.d.ts.map +1 -0
  19. package/dist/core/di/index.js +22 -0
  20. package/dist/core/di/index.js.map +1 -0
  21. package/dist/core/index.d.ts +1 -0
  22. package/dist/core/index.d.ts.map +1 -1
  23. package/dist/core/index.js +11 -1
  24. package/dist/core/index.js.map +1 -1
  25. package/dist/core/memory/RuVectorPatternStore.d.ts +5 -1
  26. package/dist/core/memory/RuVectorPatternStore.d.ts.map +1 -1
  27. package/dist/core/memory/RuVectorPatternStore.js +43 -1
  28. package/dist/core/memory/RuVectorPatternStore.js.map +1 -1
  29. package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
  30. package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
  31. package/dist/learning/ExperienceSharingProtocol.js +538 -0
  32. package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
  33. package/dist/learning/LearningEngine.d.ts +101 -1
  34. package/dist/learning/LearningEngine.d.ts.map +1 -1
  35. package/dist/learning/LearningEngine.js +330 -3
  36. package/dist/learning/LearningEngine.js.map +1 -1
  37. package/dist/learning/QLearning.d.ts +38 -125
  38. package/dist/learning/QLearning.d.ts.map +1 -1
  39. package/dist/learning/QLearning.js +46 -267
  40. package/dist/learning/QLearning.js.map +1 -1
  41. package/dist/learning/QLearningLegacy.d.ts +154 -0
  42. package/dist/learning/QLearningLegacy.d.ts.map +1 -0
  43. package/dist/learning/QLearningLegacy.js +337 -0
  44. package/dist/learning/QLearningLegacy.js.map +1 -0
  45. package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
  46. package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
  47. package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
  48. package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
  49. package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
  50. package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
  51. package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
  52. package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
  53. package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
  54. package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
  55. package/dist/learning/algorithms/PPOLearner.js +490 -0
  56. package/dist/learning/algorithms/PPOLearner.js.map +1 -0
  57. package/dist/learning/algorithms/QLearning.d.ts +68 -0
  58. package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
  59. package/dist/learning/algorithms/QLearning.js +116 -0
  60. package/dist/learning/algorithms/QLearning.js.map +1 -0
  61. package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
  62. package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
  63. package/dist/learning/algorithms/SARSALearner.js +252 -0
  64. package/dist/learning/algorithms/SARSALearner.js.map +1 -0
  65. package/dist/learning/algorithms/index.d.ts +29 -0
  66. package/dist/learning/algorithms/index.d.ts.map +1 -0
  67. package/dist/learning/algorithms/index.js +44 -0
  68. package/dist/learning/algorithms/index.js.map +1 -0
  69. package/dist/learning/index.d.ts +3 -0
  70. package/dist/learning/index.d.ts.map +1 -1
  71. package/dist/learning/index.js +15 -1
  72. package/dist/learning/index.js.map +1 -1
  73. package/dist/learning/types.d.ts +2 -0
  74. package/dist/learning/types.d.ts.map +1 -1
  75. package/dist/mcp/handlers/advanced/index.d.ts +3 -2
  76. package/dist/mcp/handlers/advanced/index.d.ts.map +1 -1
  77. package/dist/mcp/handlers/advanced/index.js +4 -5
  78. package/dist/mcp/handlers/advanced/index.js.map +1 -1
  79. package/dist/mcp/handlers/analysis/coverageAnalyzeSublinear.d.ts +1 -0
  80. package/dist/mcp/handlers/analysis/coverageAnalyzeSublinear.d.ts.map +1 -1
  81. package/dist/mcp/handlers/analysis/coverageAnalyzeSublinear.js +3 -0
  82. package/dist/mcp/handlers/analysis/coverageAnalyzeSublinear.js.map +1 -1
  83. package/dist/mcp/handlers/analysis/coverageGapsDetect.d.ts +1 -0
  84. package/dist/mcp/handlers/analysis/coverageGapsDetect.d.ts.map +1 -1
  85. package/dist/mcp/handlers/analysis/coverageGapsDetect.js +3 -0
  86. package/dist/mcp/handlers/analysis/coverageGapsDetect.js.map +1 -1
  87. package/dist/mcp/handlers/analysis/index.d.ts +0 -2
  88. package/dist/mcp/handlers/analysis/index.d.ts.map +1 -1
  89. package/dist/mcp/handlers/analysis/index.js +2 -3
  90. package/dist/mcp/handlers/analysis/index.js.map +1 -1
  91. package/dist/mcp/handlers/analysis/performanceMonitorRealtime.d.ts +1 -0
  92. package/dist/mcp/handlers/analysis/performanceMonitorRealtime.d.ts.map +1 -1
  93. package/dist/mcp/handlers/analysis/performanceMonitorRealtime.js +3 -0
  94. package/dist/mcp/handlers/analysis/performanceMonitorRealtime.js.map +1 -1
  95. package/dist/mcp/handlers/prediction/flaky-test-detect.d.ts.map +1 -1
  96. package/dist/mcp/handlers/prediction/flaky-test-detect.js +2 -0
  97. package/dist/mcp/handlers/prediction/flaky-test-detect.js.map +1 -1
  98. package/dist/mcp/handlers/prediction/index.d.ts +0 -2
  99. package/dist/mcp/handlers/prediction/index.d.ts.map +1 -1
  100. package/dist/mcp/handlers/prediction/index.js +2 -3
  101. package/dist/mcp/handlers/prediction/index.js.map +1 -1
  102. package/dist/mcp/handlers/security/index.d.ts +0 -30
  103. package/dist/mcp/handlers/security/index.d.ts.map +1 -1
  104. package/dist/mcp/handlers/security/index.js +5 -31
  105. package/dist/mcp/handlers/security/index.js.map +1 -1
  106. package/dist/mcp/lazy-loader.d.ts +156 -0
  107. package/dist/mcp/lazy-loader.d.ts.map +1 -0
  108. package/dist/mcp/lazy-loader.js +327 -0
  109. package/dist/mcp/lazy-loader.js.map +1 -0
  110. package/dist/mcp/server-instructions.d.ts +18 -0
  111. package/dist/mcp/server-instructions.d.ts.map +1 -0
  112. package/dist/mcp/server-instructions.js +133 -0
  113. package/dist/mcp/server-instructions.js.map +1 -0
  114. package/dist/mcp/server.d.ts.map +1 -1
  115. package/dist/mcp/server.js +236 -49
  116. package/dist/mcp/server.js.map +1 -1
  117. package/dist/mcp/tool-categories.d.ts +105 -0
  118. package/dist/mcp/tool-categories.d.ts.map +1 -0
  119. package/dist/mcp/tool-categories.js +463 -0
  120. package/dist/mcp/tool-categories.js.map +1 -0
  121. package/dist/mcp/tools.d.ts +3 -19
  122. package/dist/mcp/tools.d.ts.map +1 -1
  123. package/dist/mcp/tools.js +260 -647
  124. package/dist/mcp/tools.js.map +1 -1
  125. package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
  126. package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
  127. package/dist/memory/DistributedPatternLibrary.js +370 -0
  128. package/dist/memory/DistributedPatternLibrary.js.map +1 -0
  129. package/dist/memory/PatternQualityScorer.d.ts +169 -0
  130. package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
  131. package/dist/memory/PatternQualityScorer.js +327 -0
  132. package/dist/memory/PatternQualityScorer.js.map +1 -0
  133. package/dist/memory/PatternReplicationService.d.ts +187 -0
  134. package/dist/memory/PatternReplicationService.d.ts.map +1 -0
  135. package/dist/memory/PatternReplicationService.js +392 -0
  136. package/dist/memory/PatternReplicationService.js.map +1 -0
  137. package/dist/providers/ClaudeProvider.d.ts +98 -0
  138. package/dist/providers/ClaudeProvider.d.ts.map +1 -0
  139. package/dist/providers/ClaudeProvider.js +418 -0
  140. package/dist/providers/ClaudeProvider.js.map +1 -0
  141. package/dist/providers/ILLMProvider.d.ts +287 -0
  142. package/dist/providers/ILLMProvider.d.ts.map +1 -0
  143. package/dist/providers/ILLMProvider.js +33 -0
  144. package/dist/providers/ILLMProvider.js.map +1 -0
  145. package/dist/providers/LLMProviderFactory.d.ts +154 -0
  146. package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
  147. package/dist/providers/LLMProviderFactory.js +426 -0
  148. package/dist/providers/LLMProviderFactory.js.map +1 -0
  149. package/dist/providers/RuvllmProvider.d.ts +107 -0
  150. package/dist/providers/RuvllmProvider.d.ts.map +1 -0
  151. package/dist/providers/RuvllmProvider.js +417 -0
  152. package/dist/providers/RuvllmProvider.js.map +1 -0
  153. package/dist/providers/index.d.ts +31 -0
  154. package/dist/providers/index.d.ts.map +1 -0
  155. package/dist/providers/index.js +69 -0
  156. package/dist/providers/index.js.map +1 -0
  157. package/dist/utils/IntervalRegistry.d.ts +110 -0
  158. package/dist/utils/IntervalRegistry.d.ts.map +1 -0
  159. package/dist/utils/IntervalRegistry.js +190 -0
  160. package/dist/utils/IntervalRegistry.js.map +1 -0
  161. package/dist/utils/index.d.ts +1 -0
  162. package/dist/utils/index.d.ts.map +1 -1
  163. package/dist/utils/index.js +5 -1
  164. package/dist/utils/index.js.map +1 -1
  165. package/docs/reference/agents.md +33 -0
  166. package/docs/reference/usage.md +60 -0
  167. package/package.json +2 -2
@@ -0,0 +1,154 @@
1
+ /**
2
+ * QLearning - Phase 2 (Milestone 2.2)
3
+ *
4
+ * Implements standard Q-learning algorithm for reinforcement learning.
5
+ * Provides epsilon-greedy policy, Q-table updates, and value function estimation.
6
+ */
7
+ import { TaskState, AgentAction, TaskExperience } from './types';
8
+ /**
9
+ * Q-learning algorithm configuration
10
+ */
11
+ export interface QLearningConfig {
12
+ learningRate: number;
13
+ discountFactor: number;
14
+ explorationRate: number;
15
+ explorationDecay: number;
16
+ minExplorationRate: number;
17
+ useExperienceReplay: boolean;
18
+ replayBufferSize: number;
19
+ batchSize: number;
20
+ }
21
+ /**
22
+ * Q-learning action-value pair
23
+ */
24
+ interface QValue {
25
+ state: string;
26
+ action: string;
27
+ value: number;
28
+ updateCount: number;
29
+ lastUpdated: number;
30
+ }
31
+ /**
32
+ * QLearning - Standard Q-learning implementation
33
+ *
34
+ * Implements the classic Q-learning algorithm with:
35
+ * - Epsilon-greedy exploration policy
36
+ * - Temporal difference (TD) learning
37
+ * - Q-table for state-action values
38
+ * - Optional experience replay for stability
39
+ */
40
+ export declare class QLearning {
41
+ private readonly logger;
42
+ private config;
43
+ private qTable;
44
+ private replayBuffer?;
45
+ private stepCount;
46
+ private episodeCount;
47
+ constructor(config?: Partial<QLearningConfig>);
48
+ /**
49
+ * Select action using epsilon-greedy policy
50
+ * With probability ε, select random action (exploration)
51
+ * Otherwise, select action with highest Q-value (exploitation)
52
+ */
53
+ selectAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
54
+ /**
55
+ * Get best action based on current Q-values
56
+ */
57
+ getBestAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
58
+ /**
59
+ * Update Q-value using Q-learning update rule
60
+ * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
61
+ */
62
+ update(experience: TaskExperience): void;
63
+ /**
64
+ * Perform batch update using experience replay
65
+ * Samples random batch from replay buffer and updates Q-values
66
+ */
67
+ batchUpdate(): void;
68
+ /**
69
+ * Get Q-value for a state-action pair
70
+ */
71
+ getQValue(state: TaskState, action: AgentAction): number;
72
+ /**
73
+ * Get all Q-values for a state
74
+ */
75
+ getStateValues(state: TaskState): Map<string, number>;
76
+ /**
77
+ * Get value of a state (max Q-value over all actions)
78
+ * V(s) = max_a Q(s,a)
79
+ */
80
+ getStateValue(state: TaskState): number;
81
+ /**
82
+ * Decay exploration rate (epsilon)
83
+ * Called after each episode to gradually reduce exploration
84
+ */
85
+ decayExploration(): void;
86
+ /**
87
+ * Mark end of episode
88
+ */
89
+ endEpisode(): void;
90
+ /**
91
+ * Encode state to string key for Q-table
92
+ */
93
+ private encodeState;
94
+ /**
95
+ * Encode action to string key for Q-table
96
+ */
97
+ private encodeAction;
98
+ /**
99
+ * Get current exploration rate (epsilon)
100
+ */
101
+ getExplorationRate(): number;
102
+ /**
103
+ * Get total number of learning steps
104
+ */
105
+ getStepCount(): number;
106
+ /**
107
+ * Get total number of episodes
108
+ */
109
+ getEpisodeCount(): number;
110
+ /**
111
+ * Get Q-table size (number of state-action pairs)
112
+ */
113
+ getTableSize(): number;
114
+ /**
115
+ * Get statistics about learning progress
116
+ */
117
+ getStatistics(): {
118
+ steps: number;
119
+ episodes: number;
120
+ tableSize: number;
121
+ explorationRate: number;
122
+ avgQValue: number;
123
+ maxQValue: number;
124
+ minQValue: number;
125
+ };
126
+ /**
127
+ * Reset Q-table and learning state
128
+ */
129
+ reset(): void;
130
+ /**
131
+ * Export Q-table and state for persistence
132
+ */
133
+ export(): {
134
+ qTable: Record<string, Record<string, QValue>>;
135
+ config: QLearningConfig;
136
+ stepCount: number;
137
+ episodeCount: number;
138
+ };
139
+ /**
140
+ * Import Q-table and state from persistence
141
+ */
142
+ import(state: {
143
+ qTable: Record<string, Record<string, QValue>>;
144
+ config: QLearningConfig;
145
+ stepCount: number;
146
+ episodeCount: number;
147
+ }): void;
148
+ /**
149
+ * Get memory usage estimate in bytes
150
+ */
151
+ getMemoryUsage(): number;
152
+ }
153
+ export {};
154
+ //# sourceMappingURL=QLearningLegacy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"QLearningLegacy.d.ts","sourceRoot":"","sources":["../../src/learning/QLearningLegacy.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAGjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;CACnB;AAgBD;;GAEG;AACH,UAAU,MAAM;IACd,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;GAQG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,YAAY,CAAC,CAAyB;IAC9C,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,YAAY,CAAS;gBAEjB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAmBjD;;;;OAIG;IACH,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IAe5E;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IA2B7E;;;OAGG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,GAAG,IAAI;IA4CxC;;;OAGG;IACH,WAAW,IAAI,IAAI;IAcnB;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,GAAG,MAAM;IAaxD;;OAEG;IACH,cAAc,CAAC,KAAK,EAAE,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC;IAgBrD;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAWvC;;;OAGG;IACH,gBAAgB,IAAI,IAAI;IAOxB;;OAEG;IACH,UAAU,IAAI,IAAI;IAUlB;;OAEG;IACH,OAAO,CAAC,WAAW;IAcnB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,YAAY,IAAI,MAAM;IAQtB;;OAEG;IACH,aAAa,IAAI;QACf,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,eAAe,EAAE,MAAM,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB;IA0BD;;OAEG;IACH,KAAK,IAAI,IAAI;IAab;;OAEG;IACH,MAAM,IAAI;QACR,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,eAAe,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB;IAkBD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,eAAe,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB,GAAG,IAAI;IAkBR;;OAEG;IACH,cAAc,IAAI,MAAM;CAKzB"}
@@ -0,0 +1,337 @@
1
+ "use strict";
2
+ /**
3
+ * QLearning - Phase 2 (Milestone 2.2)
4
+ *
5
+ * Implements standard Q-learning algorithm for reinforcement learning.
6
+ * Provides epsilon-greedy policy, Q-table updates, and value function estimation.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.QLearning = void 0;
10
+ const Logger_1 = require("../utils/Logger");
11
+ const ExperienceReplayBuffer_1 = require("./ExperienceReplayBuffer");
12
+ /**
13
+ * Default Q-learning configuration
14
+ */
15
+ const DEFAULT_CONFIG = {
16
+ learningRate: 0.1,
17
+ discountFactor: 0.95,
18
+ explorationRate: 0.3,
19
+ explorationDecay: 0.995,
20
+ minExplorationRate: 0.01,
21
+ useExperienceReplay: true,
22
+ replayBufferSize: 10000,
23
+ batchSize: 32
24
+ };
25
+ /**
26
+ * QLearning - Standard Q-learning implementation
27
+ *
28
+ * Implements the classic Q-learning algorithm with:
29
+ * - Epsilon-greedy exploration policy
30
+ * - Temporal difference (TD) learning
31
+ * - Q-table for state-action values
32
+ * - Optional experience replay for stability
33
+ */
34
+ class QLearning {
35
+ constructor(config = {}) {
36
+ this.logger = Logger_1.Logger.getInstance();
37
+ this.config = { ...DEFAULT_CONFIG, ...config };
38
+ this.qTable = new Map();
39
+ this.stepCount = 0;
40
+ this.episodeCount = 0;
41
+ // Initialize experience replay buffer if enabled
42
+ if (this.config.useExperienceReplay) {
43
+ this.replayBuffer = new ExperienceReplayBuffer_1.ExperienceReplayBuffer({
44
+ maxSize: this.config.replayBufferSize,
45
+ minSize: this.config.batchSize,
46
+ prioritized: false
47
+ });
48
+ }
49
+ this.logger.info('QLearning initialized', { config: this.config });
50
+ }
51
+ /**
52
+ * Select action using epsilon-greedy policy
53
+ * With probability ε, select random action (exploration)
54
+ * Otherwise, select action with highest Q-value (exploitation)
55
+ */
56
+ selectAction(state, availableActions) {
57
+ if (availableActions.length === 0) {
58
+ throw new Error('No available actions to select from');
59
+ }
60
+ // Exploration: random action
61
+ if (Math.random() < this.config.explorationRate) {
62
+ const randomIndex = Math.floor(Math.random() * availableActions.length);
63
+ return availableActions[randomIndex];
64
+ }
65
+ // Exploitation: best action based on Q-values
66
+ return this.getBestAction(state, availableActions);
67
+ }
68
+ /**
69
+ * Get best action based on current Q-values
70
+ */
71
+ getBestAction(state, availableActions) {
72
+ const stateKey = this.encodeState(state);
73
+ const stateActions = this.qTable.get(stateKey);
74
+ if (!stateActions || stateActions.size === 0) {
75
+ // No Q-values yet, return random action
76
+ const randomIndex = Math.floor(Math.random() * availableActions.length);
77
+ return availableActions[randomIndex];
78
+ }
79
+ // Find action with highest Q-value
80
+ let bestAction = availableActions[0];
81
+ let bestValue = -Infinity;
82
+ for (const action of availableActions) {
83
+ const actionKey = this.encodeAction(action);
84
+ const qValue = stateActions.get(actionKey);
85
+ if (qValue && qValue.value > bestValue) {
86
+ bestValue = qValue.value;
87
+ bestAction = action;
88
+ }
89
+ }
90
+ return bestAction;
91
+ }
92
+ /**
93
+ * Update Q-value using Q-learning update rule
94
+ * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
95
+ */
96
+ update(experience) {
97
+ const stateKey = this.encodeState(experience.state);
98
+ const actionKey = this.encodeAction(experience.action);
99
+ const nextStateKey = this.encodeState(experience.nextState);
100
+ // Get or initialize state-action map
101
+ if (!this.qTable.has(stateKey)) {
102
+ this.qTable.set(stateKey, new Map());
103
+ }
104
+ const stateActions = this.qTable.get(stateKey);
105
+ // Get current Q-value
106
+ const currentQValue = stateActions.get(actionKey);
107
+ const currentQ = currentQValue?.value ?? 0;
108
+ // Get max Q-value for next state (for all possible actions)
109
+ const nextStateActions = this.qTable.get(nextStateKey);
110
+ const maxNextQ = nextStateActions && nextStateActions.size > 0
111
+ ? Math.max(...Array.from(nextStateActions.values()).map(qv => qv.value))
112
+ : 0;
113
+ // Q-learning update rule
114
+ // Q(s,a) = Q(s,a) + α * [r + γ * max(Q(s',a')) - Q(s,a)]
115
+ const tdTarget = experience.reward + this.config.discountFactor * maxNextQ;
116
+ const tdError = tdTarget - currentQ;
117
+ const newQ = currentQ + this.config.learningRate * tdError;
118
+ // Update Q-value
119
+ stateActions.set(actionKey, {
120
+ state: stateKey,
121
+ action: actionKey,
122
+ value: newQ,
123
+ updateCount: (currentQValue?.updateCount ?? 0) + 1,
124
+ lastUpdated: Date.now()
125
+ });
126
+ // Add to experience replay buffer
127
+ if (this.replayBuffer) {
128
+ this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
129
+ }
130
+ this.stepCount++;
131
+ }
132
+ /**
133
+ * Perform batch update using experience replay
134
+ * Samples random batch from replay buffer and updates Q-values
135
+ */
136
+ batchUpdate() {
137
+ if (!this.replayBuffer || !this.replayBuffer.canSample(this.config.batchSize)) {
138
+ return;
139
+ }
140
+ const batch = this.replayBuffer.sample(this.config.batchSize);
141
+ for (const experience of batch) {
142
+ this.update(experience);
143
+ }
144
+ this.logger.debug(`Performed batch update with ${batch.length} experiences`);
145
+ }
146
+ /**
147
+ * Get Q-value for a state-action pair
148
+ */
149
+ getQValue(state, action) {
150
+ const stateKey = this.encodeState(state);
151
+ const actionKey = this.encodeAction(action);
152
+ const stateActions = this.qTable.get(stateKey);
153
+ if (!stateActions) {
154
+ return 0;
155
+ }
156
+ const qValue = stateActions.get(actionKey);
157
+ return qValue?.value ?? 0;
158
+ }
159
+ /**
160
+ * Get all Q-values for a state
161
+ */
162
+ getStateValues(state) {
163
+ const stateKey = this.encodeState(state);
164
+ const stateActions = this.qTable.get(stateKey);
165
+ if (!stateActions) {
166
+ return new Map();
167
+ }
168
+ const values = new Map();
169
+ for (const [actionKey, qValue] of stateActions.entries()) {
170
+ values.set(actionKey, qValue.value);
171
+ }
172
+ return values;
173
+ }
174
+ /**
175
+ * Get value of a state (max Q-value over all actions)
176
+ * V(s) = max_a Q(s,a)
177
+ */
178
+ getStateValue(state) {
179
+ const stateKey = this.encodeState(state);
180
+ const stateActions = this.qTable.get(stateKey);
181
+ if (!stateActions || stateActions.size === 0) {
182
+ return 0;
183
+ }
184
+ return Math.max(...Array.from(stateActions.values()).map(qv => qv.value));
185
+ }
186
+ /**
187
+ * Decay exploration rate (epsilon)
188
+ * Called after each episode to gradually reduce exploration
189
+ */
190
+ decayExploration() {
191
+ this.config.explorationRate = Math.max(this.config.minExplorationRate, this.config.explorationRate * this.config.explorationDecay);
192
+ }
193
+ /**
194
+ * Mark end of episode
195
+ */
196
+ endEpisode() {
197
+ this.episodeCount++;
198
+ this.decayExploration();
199
+ // Perform batch update if using experience replay
200
+ if (this.config.useExperienceReplay) {
201
+ this.batchUpdate();
202
+ }
203
+ }
204
+ /**
205
+ * Encode state to string key for Q-table
206
+ */
207
+ encodeState(state) {
208
+ // Create normalized feature vector
209
+ const features = [
210
+ state.taskComplexity,
211
+ state.requiredCapabilities.length / 10, // normalize
212
+ state.previousAttempts / 5, // normalize
213
+ state.availableResources,
214
+ state.timeConstraint ? Math.min(state.timeConstraint / 300000, 1) : 1 // normalize to 5 min
215
+ ];
216
+ // Round to reduce state space (discretization)
217
+ return features.map(f => Math.round(f * 10) / 10).join(',');
218
+ }
219
+ /**
220
+ * Encode action to string key for Q-table
221
+ */
222
+ encodeAction(action) {
223
+ return `${action.strategy}:${action.parallelization.toFixed(1)}:${action.retryPolicy}`;
224
+ }
225
+ /**
226
+ * Get current exploration rate (epsilon)
227
+ */
228
+ getExplorationRate() {
229
+ return this.config.explorationRate;
230
+ }
231
+ /**
232
+ * Get total number of learning steps
233
+ */
234
+ getStepCount() {
235
+ return this.stepCount;
236
+ }
237
+ /**
238
+ * Get total number of episodes
239
+ */
240
+ getEpisodeCount() {
241
+ return this.episodeCount;
242
+ }
243
+ /**
244
+ * Get Q-table size (number of state-action pairs)
245
+ */
246
+ getTableSize() {
247
+ let size = 0;
248
+ for (const stateActions of this.qTable.values()) {
249
+ size += stateActions.size;
250
+ }
251
+ return size;
252
+ }
253
+ /**
254
+ * Get statistics about learning progress
255
+ */
256
+ getStatistics() {
257
+ let totalQValue = 0;
258
+ let count = 0;
259
+ let maxQ = -Infinity;
260
+ let minQ = Infinity;
261
+ for (const stateActions of this.qTable.values()) {
262
+ for (const qValue of stateActions.values()) {
263
+ totalQValue += qValue.value;
264
+ maxQ = Math.max(maxQ, qValue.value);
265
+ minQ = Math.min(minQ, qValue.value);
266
+ count++;
267
+ }
268
+ }
269
+ return {
270
+ steps: this.stepCount,
271
+ episodes: this.episodeCount,
272
+ tableSize: count,
273
+ explorationRate: this.config.explorationRate,
274
+ avgQValue: count > 0 ? totalQValue / count : 0,
275
+ maxQValue: count > 0 ? maxQ : 0,
276
+ minQValue: count > 0 ? minQ : 0
277
+ };
278
+ }
279
+ /**
280
+ * Reset Q-table and learning state
281
+ */
282
+ reset() {
283
+ this.qTable.clear();
284
+ this.stepCount = 0;
285
+ this.episodeCount = 0;
286
+ this.config.explorationRate = DEFAULT_CONFIG.explorationRate;
287
+ if (this.replayBuffer) {
288
+ this.replayBuffer.clear();
289
+ }
290
+ this.logger.info('QLearning reset to initial state');
291
+ }
292
+ /**
293
+ * Export Q-table and state for persistence
294
+ */
295
+ export() {
296
+ const serializedQTable = {};
297
+ for (const [state, actions] of this.qTable.entries()) {
298
+ serializedQTable[state] = {};
299
+ for (const [action, qValue] of actions.entries()) {
300
+ serializedQTable[state][action] = qValue;
301
+ }
302
+ }
303
+ return {
304
+ qTable: serializedQTable,
305
+ config: { ...this.config },
306
+ stepCount: this.stepCount,
307
+ episodeCount: this.episodeCount
308
+ };
309
+ }
310
+ /**
311
+ * Import Q-table and state from persistence
312
+ */
313
+ import(state) {
314
+ this.qTable.clear();
315
+ for (const [stateKey, actions] of Object.entries(state.qTable)) {
316
+ const actionMap = new Map();
317
+ for (const [actionKey, qValue] of Object.entries(actions)) {
318
+ actionMap.set(actionKey, qValue);
319
+ }
320
+ this.qTable.set(stateKey, actionMap);
321
+ }
322
+ this.config = { ...state.config };
323
+ this.stepCount = state.stepCount;
324
+ this.episodeCount = state.episodeCount;
325
+ this.logger.info(`Imported Q-table with ${this.getTableSize()} state-action pairs`);
326
+ }
327
+ /**
328
+ * Get memory usage estimate in bytes
329
+ */
330
+ getMemoryUsage() {
331
+ const qTableSize = JSON.stringify(this.export().qTable).length;
332
+ const bufferSize = this.replayBuffer?.getMemoryUsage() ?? 0;
333
+ return qTableSize + bufferSize;
334
+ }
335
+ }
336
+ exports.QLearning = QLearning;
337
+ //# sourceMappingURL=QLearningLegacy.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"QLearningLegacy.js","sourceRoot":"","sources":["../../src/learning/QLearningLegacy.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAEH,4CAAyC;AAEzC,qEAAkE;AAgBlE;;GAEG;AACH,MAAM,cAAc,GAAoB;IACtC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAaF;;;;;;;;GAQG;AACH,MAAa,SAAS;IAQpB,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG,eAAM,CAAC,WAAW,EAAE,CAAC;QACnC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QAC/C,IAAI,CAAC,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;QACxB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAEtB,iDAAiD;QACjD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,YAAY,GAAG,IAAI,+CAAsB,CAAC;gBAC7C,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,gBAAgB;gBACrC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC9B,WAAW,EAAE,KAAK;aACnB,CAAC,CAAC;QACL,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IACrE,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QAC5D,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,6BAA6B;QAC7B,IAAI,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC;YAChD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,8CAA8C;QAC9C,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,KAAgB,EAAE,gBAA+B;QAC7D,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,wCAAwC;YACxC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YACxE,OAAO,gBAAgB,CAAC,WAAW,CAAC,CAAC;QACvC,CAAC;QAED,mCAAmC;QACnC,IAAI,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QACrC,IAAI,SAAS,GAAG,CAAC,QAAQ,CAAC;QAE1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAE3C,IAAI,MAAM,IAAI,MAAM,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBACvC,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC;gBACzB,UAAU,GAAG,MAAM,CAAC;YACtB,CAAC;QACH,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;OAGG;IACH,MAAM,CAAC,UAA0B;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,qCAAqC;QACrC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEhD,sBAAsB;QACtB,MAAM,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAClD,MAAM,QAAQ,GAAG,aAAa,EAAE,KAAK,IAAI,CAAC,CAAC;QAE3C,4DAA4D;QAC5D,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC;YAC5D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,yBAAyB;QACzB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE;YAC1B,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,KAAK,EAAE,IAAI;YACX,WAAW,EAAE,CAAC,aAAa,EAAE,WAAW,IAAI,CAAC,CAAC,GAAG,CAAC;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;SACxB,CAAC,CAAC;QAEH,kCAAkC;QAClC,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;;OAGG;IACH,WAAW;QACT,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9E,OAAO;QACT,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAE9D,KAAK,MAAM,UAAU,IAAI,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC1B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,+BAA+B,KAAK,CAAC,MAAM,cAAc,CAAC,CAAC;IAC/E,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAgB,EAAE,MAAmB;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE5C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC3C,OAAO,MAAM,EAAE,KAAK,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,KAAgB;QAC7B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,OAAO,IAAI,GAAG,EAAE,CAAC;QACnB,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;QACzC,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,YAAY,CAAC,OAAO,EAAE,EAAE,CAAC;YACzD,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,KAAgB;QAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE/C,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC7C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5E,CAAC;IAED;;;OAGG;IACH,gBAAgB;QACd,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,GAAG,CACpC,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAC9B,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAC3D,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,UAAU;QACR,IAAI,CAAC,YAAY,EAAE,CAAC;QACpB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QAExB,kDAAkD;QAClD,IAAI,IAAI,CAAC,MAAM,CAAC,mBAAmB,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,KAAgB;QAClC,mCAAmC;QACnC,MAAM,QAAQ,GAAG;YACf,KAAK,CAAC,cAAc;YACpB,KAAK,CAAC,oBAAoB,CAAC,MAAM,GAAG,EAAE,EAAE,YAAY;YACpD,KAAK,CAAC,gBAAgB,GAAG,CAAC,EAAE,YAAY;YACxC,KAAK,CAAC,kBAAkB;YACxB,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,qBAAqB;SAC5F,CAAC;QAEF,+CAA+C;QAC/C,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC9D,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,MAAmB;QACtC,OAAO,GAAG,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC;IACzF,CAAC;IAED;;OAEG;IACH,kBAAkB;QAChB,OAAO,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,IAAI,IAAI,YAAY,CAAC,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,aAAa;QASX,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC;QACrB,IAAI,IAAI,GAAG,QAAQ,CAAC;QAEpB,KAAK,MAAM,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YAChD,KAAK,MAAM,MAAM,IAAI,YAAY,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC3C,WAAW,IAAI,MAAM,CAAC,KAAK,CAAC;gBAC5B,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpC,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,QAAQ,EAAE,IAAI,CAAC,YAAY;YAC3B,SAAS,EAAE,KAAK;YAChB,eAAe,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;YAC5C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9C,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC/B,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,IAAI,CAAC,MAAM,CAAC,eAAe,GAAG,cAAc,CAAC,eAAe,CAAC;QAE7D,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,CAAC;QAC5B,CAAC;QAED,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,MAAM;QAMJ,MAAM,gBAAgB,GAA2C,EAAE,CAAC;QAEpE,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;YACrD,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,OAAO;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,YAAY,EAAE,IAAI,CAAC,YAAY;SAChC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAKN;QACC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAEpB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;YAC5C,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QAClC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC,YAAY,CAAC;QAEvC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,IAAI,CAAC,YAAY,EAAE,qBAAqB,CAAC,CAAC;IACtF,CAAC;IAED;;OAEG;IACH,cAAc;QACZ,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;QAC5D,OAAO,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;CACF;AAlYD,8BAkYC"}
@@ -0,0 +1,162 @@
1
+ /**
2
+ * AbstractRLLearner - Base class for Reinforcement Learning algorithms
3
+ *
4
+ * Provides common functionality for all RL algorithms including:
5
+ * - Epsilon-greedy exploration policy
6
+ * - State/action encoding
7
+ * - Q-table management
8
+ * - Experience replay integration
9
+ * - Statistics tracking
10
+ */
11
+ import { Logger } from '../../utils/Logger';
12
+ import { TaskState, AgentAction, TaskExperience } from '../types';
13
+ import { ExperienceReplayBuffer } from '../ExperienceReplayBuffer';
14
+ /**
15
+ * Base configuration for RL algorithms
16
+ */
17
+ export interface RLConfig {
18
+ learningRate: number;
19
+ discountFactor: number;
20
+ explorationRate: number;
21
+ explorationDecay: number;
22
+ minExplorationRate: number;
23
+ useExperienceReplay: boolean;
24
+ replayBufferSize: number;
25
+ batchSize: number;
26
+ }
27
+ /**
28
+ * Q-value with metadata
29
+ */
30
+ export interface QValue {
31
+ state: string;
32
+ action: string;
33
+ value: number;
34
+ updateCount: number;
35
+ lastUpdated: number;
36
+ }
37
+ /**
38
+ * Abstract base class for RL algorithms
39
+ */
40
+ export declare abstract class AbstractRLLearner {
41
+ protected readonly logger: Logger;
42
+ protected config: RLConfig;
43
+ protected qTable: Map<string, Map<string, QValue>>;
44
+ protected replayBuffer?: ExperienceReplayBuffer;
45
+ protected stepCount: number;
46
+ protected episodeCount: number;
47
+ constructor(config: RLConfig);
48
+ /**
49
+ * Select action using epsilon-greedy policy
50
+ * With probability ε, select random action (exploration)
51
+ * Otherwise, select action with highest Q-value (exploitation)
52
+ */
53
+ selectAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
54
+ /**
55
+ * Get best action based on current Q-values (greedy policy)
56
+ */
57
+ getBestAction(state: TaskState, availableActions: AgentAction[]): AgentAction;
58
+ /**
59
+ * Abstract method: Update Q-value with algorithm-specific rule
60
+ * Must be implemented by subclasses (Q-Learning, SARSA, etc.)
61
+ */
62
+ abstract update(experience: TaskExperience, nextAction?: AgentAction): void;
63
+ /**
64
+ * Get Q-value for a state-action pair
65
+ */
66
+ getQValue(state: TaskState, action: AgentAction): number;
67
+ /**
68
+ * Set Q-value for a state-action pair (protected for subclass use)
69
+ */
70
+ protected setQValue(stateKey: string, actionKey: string, value: number): void;
71
+ /**
72
+ * Get all Q-values for a state
73
+ */
74
+ getStateValues(state: TaskState): Map<string, number>;
75
+ /**
76
+ * Get value of a state (max Q-value over all actions)
77
+ * V(s) = max_a Q(s,a)
78
+ */
79
+ getStateValue(state: TaskState): number;
80
+ /**
81
+ * Perform batch update using experience replay
82
+ * Samples random batch from replay buffer and updates Q-values
83
+ */
84
+ batchUpdate(): void;
85
+ /**
86
+ * Decay exploration rate (epsilon)
87
+ * Called after each episode to gradually reduce exploration
88
+ */
89
+ decayExploration(): void;
90
+ /**
91
+ * Mark end of episode
92
+ */
93
+ endEpisode(): void;
94
+ /**
95
+ * Encode state to string key for Q-table
96
+ * Creates normalized feature vector and discretizes for generalization
97
+ */
98
+ protected encodeState(state: TaskState): string;
99
+ /**
100
+ * Encode action to string key for Q-table
101
+ */
102
+ protected encodeAction(action: AgentAction): string;
103
+ /**
104
+ * Get current exploration rate (epsilon)
105
+ */
106
+ getExplorationRate(): number;
107
+ /**
108
+ * Get total number of learning steps
109
+ */
110
+ getStepCount(): number;
111
+ /**
112
+ * Get total number of episodes
113
+ */
114
+ getEpisodeCount(): number;
115
+ /**
116
+ * Get Q-table size (number of state-action pairs)
117
+ */
118
+ getTableSize(): number;
119
+ /**
120
+ * Get statistics about learning progress
121
+ */
122
+ getStatistics(): {
123
+ steps: number;
124
+ episodes: number;
125
+ tableSize: number;
126
+ explorationRate: number;
127
+ avgQValue: number;
128
+ maxQValue: number;
129
+ minQValue: number;
130
+ };
131
+ /**
132
+ * Reset Q-table and learning state
133
+ */
134
+ reset(): void;
135
+ /**
136
+ * Get default exploration rate (for reset)
137
+ */
138
+ protected abstract getDefaultExplorationRate(): number;
139
+ /**
140
+ * Export Q-table and state for persistence
141
+ */
142
+ export(): {
143
+ qTable: Record<string, Record<string, QValue>>;
144
+ config: RLConfig;
145
+ stepCount: number;
146
+ episodeCount: number;
147
+ };
148
+ /**
149
+ * Import Q-table and state from persistence
150
+ */
151
+ import(state: {
152
+ qTable: Record<string, Record<string, QValue>>;
153
+ config: RLConfig;
154
+ stepCount: number;
155
+ episodeCount: number;
156
+ }): void;
157
+ /**
158
+ * Get memory usage estimate in bytes
159
+ */
160
+ getMemoryUsage(): number;
161
+ }
162
+ //# sourceMappingURL=AbstractRLLearner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AbstractRLLearner.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/AbstractRLLearner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,UAAU,CAAC;AAClE,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AAEnE;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,MAAM,CAAC;IAC3B,mBAAmB,EAAE,OAAO,CAAC;IAC7B,gBAAgB,EAAE,MAAM,CAAC;IACzB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,8BAAsB,iBAAiB;IACrC,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IAClC,SAAS,CAAC,MAAM,EAAE,QAAQ,CAAC;IAC3B,SAAS,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACnD,SAAS,CAAC,YAAY,CAAC,EAAE,sBAAsB,CAAC;IAChD,SAAS,CAAC,SAAS,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,YAAY,EAAE,MAAM,CAAC;gBAEnB,MAAM,EAAE,QAAQ;IAmB5B;;;;OAIG;IACH,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IAe5E;;OAEG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,WAAW,EAAE,GAAG,WAAW;IA2B7E;;;OAGG;IACH,QAAQ,CAAC,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IAE3E;;OAEG;IACH,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,GAAG,MAAM;IAaxD;;OAEG;IACH,SAAS,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAgB7E;;OAEG;IACH,cAAc,CAAC,KAAK,EAAE,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC;IAgBrD;;;OAGG;IACH,aAAa,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAWvC;;;OAGG;IACH,WAAW,IAAI,IAAI;IAcnB;;;OAGG;IACH,gBAAgB,IAAI,IAAI;IAOxB;;OAEG;IACH,UAAU,IAAI,IAAI;IAUlB;;;OAGG;IACH,SAAS,CAAC,WAAW,CAAC,KAAK,EAAE,SAAS,GAAG,MAAM;IAc/C;;OAEG;IACH,SAAS,CAAC,YAAY,CAAC,MAAM,EAAE,WAAW,GAAG,MAAM;IAInD;;OAEG;IACH,kBAAkB,IAAI,MAAM;IAI5B;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,eAAe,IAAI,MAAM;IAIzB;;OAEG;IACH,YAAY,IAAI,MAAM;IAQtB;;OAEG;IACH,aAAa,IAAI;QACf,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,eAAe,EAAE,MAAM,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;KACnB;IA0BD;;OAEG;IACH,KAAK,IAAI,IAAI;IAab;;OAEG;IACH,SAAS,CAAC,QAAQ,CAAC,yBAAyB,IAAI,MAAM;IAEtD;;OAEG;IACH,MAAM,IAAI;QACR,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,QAAQ,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB;IAkBD;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;QAC/C,MAAM,EAAE,QAAQ,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,EAAE,MAAM,CAAC;KACtB,GAAG,IAAI;IAkBR;;OAEG;IACH,cAAc,IAAI,MAAM;CAKzB"}