agentic-qe 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/.claude/skills/agentic-quality-engineering/SKILL.md +4 -4
  2. package/.claude/skills/cicd-pipeline-qe-orchestrator/README.md +14 -11
  3. package/.claude/skills/skills-manifest.json +2 -2
  4. package/CHANGELOG.md +138 -0
  5. package/README.md +92 -214
  6. package/dist/agents/BaseAgent.d.ts +5 -1
  7. package/dist/agents/BaseAgent.d.ts.map +1 -1
  8. package/dist/agents/BaseAgent.js +32 -17
  9. package/dist/agents/BaseAgent.js.map +1 -1
  10. package/dist/agents/index.d.ts.map +1 -1
  11. package/dist/agents/index.js +5 -1
  12. package/dist/agents/index.js.map +1 -1
  13. package/dist/cli/commands/improve/index.d.ts +8 -1
  14. package/dist/cli/commands/improve/index.d.ts.map +1 -1
  15. package/dist/cli/commands/improve/index.js +18 -16
  16. package/dist/cli/commands/improve/index.js.map +1 -1
  17. package/dist/cli/commands/learn/index.d.ts +10 -2
  18. package/dist/cli/commands/learn/index.d.ts.map +1 -1
  19. package/dist/cli/commands/learn/index.js +99 -63
  20. package/dist/cli/commands/learn/index.js.map +1 -1
  21. package/dist/cli/commands/patterns/index.d.ts +8 -1
  22. package/dist/cli/commands/patterns/index.d.ts.map +1 -1
  23. package/dist/cli/commands/patterns/index.js +79 -45
  24. package/dist/cli/commands/patterns/index.js.map +1 -1
  25. package/dist/cli/commands/routing/index.d.ts +5 -0
  26. package/dist/cli/commands/routing/index.d.ts.map +1 -1
  27. package/dist/cli/commands/routing/index.js +11 -10
  28. package/dist/cli/commands/routing/index.js.map +1 -1
  29. package/dist/cli/init/agents.d.ts +1 -1
  30. package/dist/cli/init/agents.js +2 -2
  31. package/dist/cli/init/database-init.d.ts +7 -0
  32. package/dist/cli/init/database-init.d.ts.map +1 -1
  33. package/dist/cli/init/database-init.js +29 -48
  34. package/dist/cli/init/database-init.js.map +1 -1
  35. package/dist/core/di/AgentDependencies.d.ts +127 -0
  36. package/dist/core/di/AgentDependencies.d.ts.map +1 -0
  37. package/dist/core/di/AgentDependencies.js +251 -0
  38. package/dist/core/di/AgentDependencies.js.map +1 -0
  39. package/dist/core/di/DIContainer.d.ts +149 -0
  40. package/dist/core/di/DIContainer.d.ts.map +1 -0
  41. package/dist/core/di/DIContainer.js +333 -0
  42. package/dist/core/di/DIContainer.js.map +1 -0
  43. package/dist/core/di/index.d.ts +11 -0
  44. package/dist/core/di/index.d.ts.map +1 -0
  45. package/dist/core/di/index.js +22 -0
  46. package/dist/core/di/index.js.map +1 -0
  47. package/dist/core/index.d.ts +1 -0
  48. package/dist/core/index.d.ts.map +1 -1
  49. package/dist/core/index.js +11 -1
  50. package/dist/core/index.js.map +1 -1
  51. package/dist/core/memory/HNSWVectorMemory.d.ts +261 -0
  52. package/dist/core/memory/HNSWVectorMemory.d.ts.map +1 -0
  53. package/dist/core/memory/HNSWVectorMemory.js +647 -0
  54. package/dist/core/memory/HNSWVectorMemory.js.map +1 -0
  55. package/dist/core/memory/SwarmMemoryManager.d.ts +7 -0
  56. package/dist/core/memory/SwarmMemoryManager.d.ts.map +1 -1
  57. package/dist/core/memory/SwarmMemoryManager.js +9 -0
  58. package/dist/core/memory/SwarmMemoryManager.js.map +1 -1
  59. package/dist/core/memory/index.d.ts +2 -0
  60. package/dist/core/memory/index.d.ts.map +1 -1
  61. package/dist/core/memory/index.js +11 -1
  62. package/dist/core/memory/index.js.map +1 -1
  63. package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
  64. package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
  65. package/dist/learning/ExperienceSharingProtocol.js +538 -0
  66. package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
  67. package/dist/learning/ExplainableLearning.d.ts +191 -0
  68. package/dist/learning/ExplainableLearning.d.ts.map +1 -0
  69. package/dist/learning/ExplainableLearning.js +441 -0
  70. package/dist/learning/ExplainableLearning.js.map +1 -0
  71. package/dist/learning/GossipPatternSharingProtocol.d.ts +228 -0
  72. package/dist/learning/GossipPatternSharingProtocol.d.ts.map +1 -0
  73. package/dist/learning/GossipPatternSharingProtocol.js +590 -0
  74. package/dist/learning/GossipPatternSharingProtocol.js.map +1 -0
  75. package/dist/learning/LearningEngine.d.ts +104 -4
  76. package/dist/learning/LearningEngine.d.ts.map +1 -1
  77. package/dist/learning/LearningEngine.js +350 -16
  78. package/dist/learning/LearningEngine.js.map +1 -1
  79. package/dist/learning/PerformanceOptimizer.d.ts +268 -0
  80. package/dist/learning/PerformanceOptimizer.d.ts.map +1 -0
  81. package/dist/learning/PerformanceOptimizer.js +552 -0
  82. package/dist/learning/PerformanceOptimizer.js.map +1 -0
  83. package/dist/learning/PrivacyManager.d.ts +197 -0
  84. package/dist/learning/PrivacyManager.d.ts.map +1 -0
  85. package/dist/learning/PrivacyManager.js +551 -0
  86. package/dist/learning/PrivacyManager.js.map +1 -0
  87. package/dist/learning/QLearning.d.ts +38 -125
  88. package/dist/learning/QLearning.d.ts.map +1 -1
  89. package/dist/learning/QLearning.js +46 -267
  90. package/dist/learning/QLearning.js.map +1 -1
  91. package/dist/learning/QLearningLegacy.d.ts +154 -0
  92. package/dist/learning/QLearningLegacy.d.ts.map +1 -0
  93. package/dist/learning/QLearningLegacy.js +337 -0
  94. package/dist/learning/QLearningLegacy.js.map +1 -0
  95. package/dist/learning/TransferLearningManager.d.ts +212 -0
  96. package/dist/learning/TransferLearningManager.d.ts.map +1 -0
  97. package/dist/learning/TransferLearningManager.js +497 -0
  98. package/dist/learning/TransferLearningManager.js.map +1 -0
  99. package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
  100. package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
  101. package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
  102. package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
  103. package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
  104. package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
  105. package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
  106. package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
  107. package/dist/learning/algorithms/MAMLMetaLearner.d.ts +218 -0
  108. package/dist/learning/algorithms/MAMLMetaLearner.d.ts.map +1 -0
  109. package/dist/learning/algorithms/MAMLMetaLearner.js +532 -0
  110. package/dist/learning/algorithms/MAMLMetaLearner.js.map +1 -0
  111. package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
  112. package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
  113. package/dist/learning/algorithms/PPOLearner.js +490 -0
  114. package/dist/learning/algorithms/PPOLearner.js.map +1 -0
  115. package/dist/learning/algorithms/QLearning.d.ts +68 -0
  116. package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
  117. package/dist/learning/algorithms/QLearning.js +116 -0
  118. package/dist/learning/algorithms/QLearning.js.map +1 -0
  119. package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
  120. package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
  121. package/dist/learning/algorithms/SARSALearner.js +252 -0
  122. package/dist/learning/algorithms/SARSALearner.js.map +1 -0
  123. package/dist/learning/algorithms/index.d.ts +32 -0
  124. package/dist/learning/algorithms/index.d.ts.map +1 -0
  125. package/dist/learning/algorithms/index.js +50 -0
  126. package/dist/learning/algorithms/index.js.map +1 -0
  127. package/dist/learning/index.d.ts +11 -0
  128. package/dist/learning/index.d.ts.map +1 -1
  129. package/dist/learning/index.js +31 -1
  130. package/dist/learning/index.js.map +1 -1
  131. package/dist/learning/types.d.ts +2 -0
  132. package/dist/learning/types.d.ts.map +1 -1
  133. package/dist/mcp/server-instructions.d.ts +1 -1
  134. package/dist/mcp/server-instructions.js +1 -1
  135. package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
  136. package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
  137. package/dist/memory/DistributedPatternLibrary.js +370 -0
  138. package/dist/memory/DistributedPatternLibrary.js.map +1 -0
  139. package/dist/memory/PatternQualityScorer.d.ts +169 -0
  140. package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
  141. package/dist/memory/PatternQualityScorer.js +327 -0
  142. package/dist/memory/PatternQualityScorer.js.map +1 -0
  143. package/dist/memory/PatternReplicationService.d.ts +187 -0
  144. package/dist/memory/PatternReplicationService.d.ts.map +1 -0
  145. package/dist/memory/PatternReplicationService.js +392 -0
  146. package/dist/memory/PatternReplicationService.js.map +1 -0
  147. package/dist/providers/ClaudeProvider.d.ts +98 -0
  148. package/dist/providers/ClaudeProvider.d.ts.map +1 -0
  149. package/dist/providers/ClaudeProvider.js +418 -0
  150. package/dist/providers/ClaudeProvider.js.map +1 -0
  151. package/dist/providers/HybridRouter.d.ts +217 -0
  152. package/dist/providers/HybridRouter.d.ts.map +1 -0
  153. package/dist/providers/HybridRouter.js +679 -0
  154. package/dist/providers/HybridRouter.js.map +1 -0
  155. package/dist/providers/ILLMProvider.d.ts +287 -0
  156. package/dist/providers/ILLMProvider.d.ts.map +1 -0
  157. package/dist/providers/ILLMProvider.js +33 -0
  158. package/dist/providers/ILLMProvider.js.map +1 -0
  159. package/dist/providers/LLMProviderFactory.d.ts +154 -0
  160. package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
  161. package/dist/providers/LLMProviderFactory.js +426 -0
  162. package/dist/providers/LLMProviderFactory.js.map +1 -0
  163. package/dist/providers/RuvllmProvider.d.ts +107 -0
  164. package/dist/providers/RuvllmProvider.d.ts.map +1 -0
  165. package/dist/providers/RuvllmProvider.js +417 -0
  166. package/dist/providers/RuvllmProvider.js.map +1 -0
  167. package/dist/providers/index.d.ts +32 -0
  168. package/dist/providers/index.d.ts.map +1 -0
  169. package/dist/providers/index.js +75 -0
  170. package/dist/providers/index.js.map +1 -0
  171. package/dist/telemetry/LearningTelemetry.d.ts +190 -0
  172. package/dist/telemetry/LearningTelemetry.d.ts.map +1 -0
  173. package/dist/telemetry/LearningTelemetry.js +403 -0
  174. package/dist/telemetry/LearningTelemetry.js.map +1 -0
  175. package/dist/telemetry/index.d.ts +1 -0
  176. package/dist/telemetry/index.d.ts.map +1 -1
  177. package/dist/telemetry/index.js +20 -2
  178. package/dist/telemetry/index.js.map +1 -1
  179. package/dist/telemetry/instrumentation/agent.d.ts +1 -1
  180. package/dist/telemetry/instrumentation/agent.js +1 -1
  181. package/dist/telemetry/instrumentation/index.d.ts +1 -1
  182. package/dist/telemetry/instrumentation/index.js +1 -1
  183. package/dist/utils/math.d.ts +11 -0
  184. package/dist/utils/math.d.ts.map +1 -0
  185. package/dist/utils/math.js +16 -0
  186. package/dist/utils/math.js.map +1 -0
  187. package/docs/reference/agents.md +1 -1
  188. package/docs/reference/skills.md +3 -3
  189. package/docs/reference/usage.md +4 -4
  190. package/package.json +1 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PPOLearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/PPOLearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;;;AA2lBH,wDAqBC;AA9mBD,2DAA0E;AAqD1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;AACH,MAAa,UAAW,SAAQ,qCAAiB;IAQ/C,YAAY,MAAiB;QAC3B,KAAK,CAAC,MAAM,CAAC,CAAC;QACd,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC;QACxB,IAAI,CAAC,WAAW,GAAG,IAAI,GAAG,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG,IAAI,GAAG,EAAE,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,IAAI,GAAG,EAAE,CAAC;QAChC,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,kBAAkB,GAAG,MAAM,CAAC,eAAe,CAAC;QAEjD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE;YACzC,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,SAAS;YACxB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,YAAY,EAAE,MAAM,CAAC,kBAAkB;SACxC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACM,YAAY,CAAC,KAAgB,EAAE,gBAA+B;QACrE,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;QACzD,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,sBAAsB,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;QAEtE,2BAA2B;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC7B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,UAAU,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;YACvB,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;gBACzB,OAAO,gBAAgB,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACK,sBAAsB,CAAC,QAAgB,EAAE,gBAA+B;QAC9E,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACzD,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QACtC,CAAC;QAED,mCAAmC;QACnC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;QACzC,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAEvD,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAgB,EAAE,SAAiB;QACzD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;QACvD,CAAC;QACD,OAAO,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;IACrF,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,QAAgB,EAAE,SAAiB,EAAE,gBAAgC;QACtF,mCAAmC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEzD,4DAA4D;QAC5D,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,MAAM,CAAC,OAAO,CAAC;QACxB,CAAC;QAED,mCAAmC;QACnC,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC;QAEnC,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACtC,MAAM,EAAE,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACrC,MAAM,CAAC,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC7C,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;YACzB,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;gBACrB,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,EAAE,UAAU,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,OAAO,CAAC,CAAC;QACjD,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC;QAExE,OAAO,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,CAAC,CAAC;IACtC,CAAC;IAED;;OAEG;IACM,aAAa,CAAC,KAAgB;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,OAAO,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,WAAW,CACT,KAAgB,EAChB,MAAmB,EACnB,MAAc,EACd,SAAoB,EACpB,IAAa;QAEb,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAEjD,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACjD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAErD,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;YACnB,KAAK,EAAE,QAAQ;YACf,MAAM,EAAE,SAAS;YACjB,MAAM;YACN,SAAS,EAAE,YAAY;YACvB,IAAI;YACJ,KAAK;YACL,OAAO;YACP,SAAS,EAAE,CAAC,EAAE,iBAAiB;YAC/B,OAAO,EAAE,CAAC,CAAI,iBAAiB;SAChC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACM,MAAM,CAAC,UAA0B,EAAE,UAAwB;QAClE,IAAI,CAAC,SAAS,EAAE,CAAC;QAEjB,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,UAAU,CAAC;QACxD,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,IAAI,KAAK,CAAC;QAEtC,eAAe;QACf,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAEzD,wCAAwC;QACxC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,IAAI,IAAI,CAAC,SAAS,CAAC,gBAAgB,EAAE,CAAC;YAC9D,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,iBAAiB;QACf,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjC,OAAO;QACT,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,UAAU,EAAE,CAAC;QAElB,wCAAwC;QACxC,IAAI,CAAC,aAAa,EAAE,CAAC;QAErB,8BAA8B;QAC9B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;YAC9D,IAAI,CAAC,UAAU,EAAE,CAAC;QACpB,CAAC;QAED,mBAAmB;QACnB,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QAErB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uBAAuB,EAAE;YACxC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;YAChC,KAAK,EAAE,IAAI,CAAC,SAAS;SACtB,CAAC,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACK,UAAU;QAChB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC;QACzC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;QAExC,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;QAEjC,2CAA2C;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAEhC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI;gBACzB,CAAC,CAAC,CAAC;gBACH,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC;YAE1F,WAAW;YACX,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,GAAG,KAAK,GAAG,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;YAE3D,gBAAgB;YAChB,UAAU,GAAG,IAAI,CAAC,IAAI;gBACpB,CAAC,CAAC,KAAK;gBACP,CAAC,CAAC,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,UAAU,CAAC;YAExC,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC;YAC5B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC;QAC7C,CAAC;QAED,uBAAuB;QACvB,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACzD,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACvE,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;QACzF,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;QAEvC,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACnC,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;QAC5B,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;YAClD,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;YACvC,CAAC;YACD,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAgB,EAAE,SAAiB;QACvD,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACtD,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,OAAO,CAAC,CAAC;QACX,CAAC;QACD,OAAO,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,OAAO,IAAI,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,UAAU;QAChB,qBAAqB;QACrB,MAAM,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC;QAEtE,qBAAqB;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,CAAC;YACvE,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;YAClE,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAuB;QAC5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,4BAA4B;YAC5B,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5D,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,sBAAsB;YACvD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC;YAEhD,2CAA2C;YAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;YACvC,MAAM,KAAK,GAAG,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC;YACrC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC;YAE3E,qDAAqD;YACrD,MAAM,UAAU,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAE3C,aAAa;YACb,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC;YACjC,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YAC1D,IAAI,SAAS,GAAG,CAAC,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;YAElD,6BAA6B;YAC7B,IAAI,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,CAAC;gBACjC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,GAAG,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;gBAC3F,MAAM,gBAAgB,GAAG,CAAC,YAAY,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;gBAC3D,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;YACpD,CAAC;YAED,gBAAgB;YAChB,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChD,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,kBAAkB,GAAG,OAAO,CAAC;YAEjE,aAAa;YACb,MAAM,SAAS,GAAG,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,oBAAoB,GAAG,SAAS,GAAG,WAAW,CAAC;YAE7F,4CAA4C;YAC5C,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YAElE,wBAAwB;YACxB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,YAAY,CAClB,QAAgB,EAChB,SAAiB,EACjB,SAAiB,EACjB,KAAa;QAEb,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;QAEpD,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC;QAE5F,mBAAmB;QACnB,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;QACvC,IAAI,QAAQ,GAAG,SAAS,CAAC;QACzB,IAAI,CAAC,KAAK,GAAG,CAAC,GAAG,GAAG,IAAI,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,GAAG,GAAG,IAAI,SAAS,GAAG,CAAC,CAAC,EAAE,CAAC;YAC7E,QAAQ,GAAG,CAAC,CAAC,CAAC,sBAAsB;QACtC,CAAC;QAED,oBAAoB;QACpB,MAAM,aAAa,GAAG,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,GAAG,QAAQ,CAAC;QACxF,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAExD,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;YACzB,UAAU,EAAE,aAAa;YACzB,OAAO,EAAE,UAAU;YACnB,WAAW,EAAE,OAAO,CAAC,WAAW,GAAG,CAAC;SACrC,CAAC,CAAC;QAEH,mCAAmC;QACnC,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,aAAa,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAgB,EAAE,MAAc;QAClD,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACnD,MAAM,QAAQ,GAAG,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,iBAAiB,GAAG,CAAC,MAAM,GAAG,OAAO,CAAC,CAAC;QACjF,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAgB;QACrC,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC3C,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QACtE,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC;QACnC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC;QACvD,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;QAE5C,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,gBAAgB;QAQd,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,CAAC;YACzC,UAAU,IAAI,CAAC,CAAC;QAClB,CAAC;QAED,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,KAAK,MAAM,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,EAAE,CAAC;YACpD,UAAU,IAAI,WAAW,CAAC,IAAI,CAAC;QACjC,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;YAC7C,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM;YAC/E,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,gBAAgB,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM;YACxC,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;YACpC,eAAe,EAAE,UAAU;YAC3B,QAAQ,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1E,YAAY;YACZ,YAAY,EAAE,CAAC,CAAC,sCAAsC;SACvD,CAAC;IACJ,CAAC;IAED;;OAEG;IACM,KAAK;QACZ,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,IAAI,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;QAC5B,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,SAAS;QAMP,MAAM,gBAAgB,GAAiD,EAAE,CAAC;QAC1E,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1D,gBAAgB,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC;YAC7B,KAAK,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;gBACjD,gBAAgB,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,MAAM,eAAe,GAA2B,EAAE,CAAC;QACnD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;YACvD,eAAe,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE;YACnB,WAAW,EAAE,gBAAgB;YAC7B,UAAU,EAAE,eAAe;YAC3B,SAAS,EAAE,EAAE,GAAG,IAAI,CAAC,SAAS,EAAE;SACjC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAwC;QAChD,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAExB,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,KAAK,MAAM,CAAC,QAAQ,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;YACpE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAwB,CAAC;YAClD,KAAK,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1D,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;QAED,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,EAAE,CAAC;YACjE,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;QAED,IAAI,CAAC,SAAS,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;QAExC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,EAAE;YACrC,UAAU,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;YACjC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI;SAChC,CAAC,CAAC;IACL,CAAC;CACF;AA3fD,gCA2fC;AAED;;GAEG;AACH,SAAgB,sBAAsB;IACpC,OAAO;QACL,YAAY,EAAE,MAAM;QACpB,cAAc,EAAE,IAAI;QACpB,eAAe,EAAE,GAAG,EAAE,mCAAmC;QACzD,gBAAgB,EAAE,GAAG;QACrB,kBAAkB,EAAE,GAAG;QACvB,WAAW,EAAE,GAAG;QAChB,SAAS,EAAE,CAAC;QACZ,aAAa,EAAE,EAAE;QACjB,oBAAoB,EAAE,GAAG;QACzB,kBAAkB,EAAE,IAAI;QACxB,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,GAAG;QAChB,aAAa,EAAE,IAAI;QACnB,kBAAkB,EAAE,MAAM;QAC1B,iBAAiB,EAAE,KAAK;QACxB,mBAAmB,EAAE,KAAK,EAAE,gCAAgC;QAC5D,gBAAgB,EAAE,IAAI,EAAM,iCAAiC;QAC7D,SAAS,EAAE,EAAE;KACd,CAAC;AACJ,CAAC"}
@@ -0,0 +1,68 @@
1
+ /**
2
+ * QLearning - Off-policy TD(0) Reinforcement Learning
3
+ *
4
+ * Implements standard Q-learning algorithm for reinforcement learning.
5
+ * Key differences from SARSA:
6
+ * - Off-policy: learns optimal Q-values regardless of policy being followed
7
+ * - Uses max Q-value for next state, not actual next action
8
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
9
+ * - More aggressive than SARSA, finds optimal policy faster
10
+ */
11
+ import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
12
+ import { TaskExperience, AgentAction } from '../types';
13
+ /**
14
+ * Q-learning configuration (extends base RL config)
15
+ */
16
+ export interface QLearningConfig extends RLConfig {
17
+ }
18
+ /**
19
+ * QLearning - Standard Q-learning implementation
20
+ *
21
+ * Implements the classic Q-learning algorithm with:
22
+ * - Epsilon-greedy exploration policy
23
+ * - Off-policy temporal difference (TD) learning
24
+ * - Q-table for state-action values
25
+ * - Optional experience replay for stability
26
+ *
27
+ * Update Rule:
28
+ * Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
29
+ *
30
+ * Key characteristics:
31
+ * - Off-policy: learns about optimal policy while following exploration policy
32
+ * - Uses max Q-value (greedy) for bootstrapping
33
+ * - Converges to optimal Q* under certain conditions
34
+ * - More sample-efficient than on-policy methods
35
+ */
36
+ export declare class QLearning extends AbstractRLLearner {
37
+ private readonly defaultConfig;
38
+ constructor(config?: Partial<QLearningConfig>);
39
+ /**
40
+ * Update Q-value using Q-learning update rule
41
+ * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
42
+ *
43
+ * @param experience The transition experience (s, a, r, s')
44
+ * @param nextAction Ignored in Q-learning (uses max Q-value instead)
45
+ */
46
+ update(experience: TaskExperience, nextAction?: AgentAction): void;
47
+ /**
48
+ * Get the default exploration rate for this algorithm
49
+ */
50
+ protected getDefaultExplorationRate(): number;
51
+ /**
52
+ * Get algorithm name
53
+ */
54
+ getAlgorithmName(): string;
55
+ /**
56
+ * Get algorithm type (off-policy)
57
+ */
58
+ getAlgorithmType(): 'on-policy' | 'off-policy';
59
+ /**
60
+ * Get detailed statistics including Q-learning-specific metrics
61
+ */
62
+ getDetailedStatistics(): {
63
+ algorithm: string;
64
+ type: 'on-policy' | 'off-policy';
65
+ stats: ReturnType<AbstractRLLearner['getStatistics']>;
66
+ };
67
+ }
68
+ //# sourceMappingURL=QLearning.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"QLearning.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/QLearning.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAEvD;;GAEG;AACH,MAAM,WAAW,eAAgB,SAAQ,QAAQ;CAGhD;AAgBD;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,SAAU,SAAQ,iBAAiB;IAC9C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkB;gBAEpC,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAOjD;;;;;;OAMG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IAiClE;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,gBAAgB,IAAI,WAAW,GAAG,YAAY;IAI9C;;OAEG;IACH,qBAAqB,IAAI;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC;QACjC,KAAK,EAAE,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,CAAC;KACvD;CAOF"}
@@ -0,0 +1,116 @@
1
+ "use strict";
2
+ /**
3
+ * QLearning - Off-policy TD(0) Reinforcement Learning
4
+ *
5
+ * Implements standard Q-learning algorithm for reinforcement learning.
6
+ * Key differences from SARSA:
7
+ * - Off-policy: learns optimal Q-values regardless of policy being followed
8
+ * - Uses max Q-value for next state, not actual next action
9
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
10
+ * - More aggressive than SARSA, finds optimal policy faster
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.QLearning = void 0;
14
+ const AbstractRLLearner_1 = require("./AbstractRLLearner");
15
+ /**
16
+ * Default Q-learning configuration
17
+ */
18
+ const DEFAULT_CONFIG = {
19
+ learningRate: 0.1,
20
+ discountFactor: 0.95,
21
+ explorationRate: 0.3,
22
+ explorationDecay: 0.995,
23
+ minExplorationRate: 0.01,
24
+ useExperienceReplay: true,
25
+ replayBufferSize: 10000,
26
+ batchSize: 32
27
+ };
28
+ /**
29
+ * QLearning - Standard Q-learning implementation
30
+ *
31
+ * Implements the classic Q-learning algorithm with:
32
+ * - Epsilon-greedy exploration policy
33
+ * - Off-policy temporal difference (TD) learning
34
+ * - Q-table for state-action values
35
+ * - Optional experience replay for stability
36
+ *
37
+ * Update Rule:
38
+ * Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
39
+ *
40
+ * Key characteristics:
41
+ * - Off-policy: learns about optimal policy while following exploration policy
42
+ * - Uses max Q-value (greedy) for bootstrapping
43
+ * - Converges to optimal Q* under certain conditions
44
+ * - More sample-efficient than on-policy methods
45
+ */
46
+ class QLearning extends AbstractRLLearner_1.AbstractRLLearner {
47
+ constructor(config = {}) {
48
+ const fullConfig = { ...DEFAULT_CONFIG, ...config };
49
+ super(fullConfig);
50
+ this.defaultConfig = fullConfig;
51
+ this.logger.info('QLearning initialized with off-policy TD(0)', { config: fullConfig });
52
+ }
53
+ /**
54
+ * Update Q-value using Q-learning update rule
55
+ * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
56
+ *
57
+ * @param experience The transition experience (s, a, r, s')
58
+ * @param nextAction Ignored in Q-learning (uses max Q-value instead)
59
+ */
60
+ update(experience, nextAction) {
61
+ const stateKey = this.encodeState(experience.state);
62
+ const actionKey = this.encodeAction(experience.action);
63
+ const nextStateKey = this.encodeState(experience.nextState);
64
+ // Get current Q-value Q(s,a)
65
+ const stateActions = this.qTable.get(stateKey);
66
+ const currentQ = stateActions?.get(actionKey)?.value ?? 0;
67
+ // Q-Learning: Get max Q-value for next state (greedy)
68
+ // This is the key difference from SARSA (which uses actual next action)
69
+ const nextStateActions = this.qTable.get(nextStateKey);
70
+ const maxNextQ = nextStateActions && nextStateActions.size > 0
71
+ ? Math.max(...Array.from(nextStateActions.values()).map(qv => qv.value))
72
+ : 0;
73
+ // Q-learning update rule
74
+ // Q(s,a) = Q(s,a) + α * [r + γ * max(Q(s',a')) - Q(s,a)]
75
+ const tdTarget = experience.reward + this.config.discountFactor * maxNextQ;
76
+ const tdError = tdTarget - currentQ;
77
+ const newQ = currentQ + this.config.learningRate * tdError;
78
+ // Update Q-value
79
+ this.setQValue(stateKey, actionKey, newQ);
80
+ // Add to experience replay buffer if enabled
81
+ if (this.replayBuffer) {
82
+ this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
83
+ }
84
+ this.stepCount++;
85
+ }
86
+ /**
87
+ * Get the default exploration rate for this algorithm
88
+ */
89
+ getDefaultExplorationRate() {
90
+ return this.defaultConfig.explorationRate;
91
+ }
92
+ /**
93
+ * Get algorithm name
94
+ */
95
+ getAlgorithmName() {
96
+ return 'Q-Learning';
97
+ }
98
+ /**
99
+ * Get algorithm type (off-policy)
100
+ */
101
+ getAlgorithmType() {
102
+ return 'off-policy';
103
+ }
104
+ /**
105
+ * Get detailed statistics including Q-learning-specific metrics
106
+ */
107
+ getDetailedStatistics() {
108
+ return {
109
+ algorithm: this.getAlgorithmName(),
110
+ type: this.getAlgorithmType(),
111
+ stats: this.getStatistics()
112
+ };
113
+ }
114
+ }
115
+ exports.QLearning = QLearning;
116
+ //# sourceMappingURL=QLearning.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"QLearning.js","sourceRoot":"","sources":["../../../src/learning/algorithms/QLearning.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,2DAAkE;AAWlE;;GAEG;AACH,MAAM,cAAc,GAAoB;IACtC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAa,SAAU,SAAQ,qCAAiB;IAG9C,YAAY,SAAmC,EAAE;QAC/C,MAAM,UAAU,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QACpD,KAAK,CAAC,UAAU,CAAC,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6CAA6C,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,UAA0B,EAAE,UAAwB;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,6BAA6B;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAE1D,sDAAsD;QACtD,wEAAwE;QACxE,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC;YAC5D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,yBAAyB;QACzB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAE1C,6CAA6C;QAC7C,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,aAAa,CAAC,eAAe,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,qBAAqB;QAKnB,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAClC,IAAI,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAC7B,KAAK,EAAE,IAAI,CAAC,aAAa,EAAE;SAC5B,CAAC;IACJ,CAAC;CACF;AArFD,8BAqFC"}
@@ -0,0 +1,107 @@
1
+ /**
2
+ * SARSALearner - On-policy TD(0) Reinforcement Learning
3
+ *
4
+ * Implements SARSA (State-Action-Reward-State-Action) algorithm.
5
+ * Key differences from Q-Learning:
6
+ * - On-policy: learns Q-values for the policy being followed (epsilon-greedy)
7
+ * - Uses actual next action taken, not the max Q-value
8
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
9
+ * - More conservative than Q-Learning, safer for exploration
10
+ */
11
+ import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
12
+ import { TaskExperience, AgentAction, TaskState } from '../types';
13
+ /**
14
+ * SARSA configuration (same as base RL config)
15
+ */
16
+ export type SARSAConfig = RLConfig;
17
+ /**
18
+ * SARSALearner - On-policy Temporal Difference Learning
19
+ *
20
+ * SARSA is an on-policy TD control algorithm that learns the Q-values
21
+ * for the policy being followed (typically epsilon-greedy).
22
+ *
23
+ * Key Characteristics:
24
+ * - Updates based on (State, Action, Reward, next State, next Action)
25
+ * - Learns Q-values for the actual policy (including exploration)
26
+ * - More conservative than Q-Learning
27
+ * - Better for tasks where exploration is risky
28
+ * - Converges to optimal policy under certain conditions
29
+ *
30
+ * Update Rule:
31
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
32
+ * where a' is the action actually taken in state s' (not necessarily greedy)
33
+ */
34
+ export declare class SARSALearner extends AbstractRLLearner {
35
+ private readonly defaultConfig;
36
+ private lastStateAction?;
37
+ constructor(config?: Partial<RLConfig>);
38
+ /**
39
+ * Update Q-value using SARSA on-policy update rule
40
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
41
+ *
42
+ * @param experience The transition experience (s, a, r, s')
43
+ * @param nextAction The actual action taken in next state (SARSA requires this!)
44
+ * If not provided, selects action using current policy (epsilon-greedy)
45
+ */
46
+ update(experience: TaskExperience, nextAction?: AgentAction): void;
47
+ /**
48
+ * Calculate expected value for next state under current epsilon-greedy policy
49
+ * This is used when we don't have the actual next action (e.g., in batch updates)
50
+ *
51
+ * Expected SARSA: E[Q(s',a')] = ε/|A| * Σ Q(s',a) + (1-ε) * max Q(s',a)
52
+ */
53
+ private getExpectedValue;
54
+ /**
55
+ * Select next action and update with SARSA
56
+ * This is the typical SARSA flow: select action, observe reward, select next action, update
57
+ *
58
+ * @param currentState Current state
59
+ * @param currentAction Action taken in current state
60
+ * @param reward Reward received
61
+ * @param nextState Next state observed
62
+ * @param availableActions Actions available in next state
63
+ * @returns Next action selected (for continued learning)
64
+ */
65
+ selectAndUpdate(currentState: TaskState, currentAction: AgentAction, reward: number, nextState: TaskState, availableActions: AgentAction[]): AgentAction;
66
+ /**
67
+ * Learn from a complete episode trajectory
68
+ * Updates all state-action pairs in the trajectory using SARSA
69
+ *
70
+ * @param trajectory Array of (state, action, reward) tuples
71
+ */
72
+ learnFromEpisode(trajectory: Array<{
73
+ state: TaskState;
74
+ action: AgentAction;
75
+ reward: number;
76
+ }>): void;
77
+ /**
78
+ * Get the default exploration rate for this algorithm
79
+ */
80
+ protected getDefaultExplorationRate(): number;
81
+ /**
82
+ * Get algorithm name
83
+ */
84
+ getAlgorithmName(): string;
85
+ /**
86
+ * Get algorithm type (on-policy)
87
+ */
88
+ getAlgorithmType(): 'on-policy' | 'off-policy';
89
+ /**
90
+ * Get detailed statistics including SARSA-specific metrics
91
+ */
92
+ getDetailedStatistics(): {
93
+ algorithm: string;
94
+ type: 'on-policy' | 'off-policy';
95
+ stats: ReturnType<AbstractRLLearner['getStatistics']>;
96
+ };
97
+ /**
98
+ * Compare performance with expected convergence
99
+ * SARSA typically converges slower but more safely than Q-Learning
100
+ */
101
+ getConvergenceMetrics(): {
102
+ isConverging: boolean;
103
+ convergenceRate: number;
104
+ stability: number;
105
+ };
106
+ }
107
+ //# sourceMappingURL=SARSALearner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SARSALearner.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/SARSALearner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAElE;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,QAAQ,CAAC;AAgBnC;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,YAAa,SAAQ,iBAAiB;IACjD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;IACzC,OAAO,CAAC,eAAe,CAAC,CAAoC;gBAEhD,MAAM,GAAE,OAAO,CAAC,QAAQ,CAAM;IAO1C;;;;;;;OAOG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IA8ClE;;;;;OAKG;IACH,OAAO,CAAC,gBAAgB;IAwBxB;;;;;;;;;;OAUG;IACH,eAAe,CACb,YAAY,EAAE,SAAS,EACvB,aAAa,EAAE,WAAW,EAC1B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,SAAS,EACpB,gBAAgB,EAAE,WAAW,EAAE,GAC9B,WAAW;IAsBd;;;;;OAKG;IACH,gBAAgB,CACd,UAAU,EAAE,KAAK,CAAC;QAChB,KAAK,EAAE,SAAS,CAAC;QACjB,MAAM,EAAE,WAAW,CAAC;QACpB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC,GACD,IAAI;IA0CP;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,gBAAgB,IAAI,WAAW,GAAG,YAAY;IAI9C;;OAEG;IACH,qBAAqB,IAAI;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC;QACjC,KAAK,EAAE,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,CAAC;KACvD;IAQD;;;OAGG;IACH,qBAAqB,IAAI;QACvB,YAAY,EAAE,OAAO,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;KACnB;CA+BF"}
@@ -0,0 +1,252 @@
1
+ "use strict";
2
+ /**
3
+ * SARSALearner - On-policy TD(0) Reinforcement Learning
4
+ *
5
+ * Implements SARSA (State-Action-Reward-State-Action) algorithm.
6
+ * Key differences from Q-Learning:
7
+ * - On-policy: learns Q-values for the policy being followed (epsilon-greedy)
8
+ * - Uses actual next action taken, not the max Q-value
9
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
10
+ * - More conservative than Q-Learning, safer for exploration
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.SARSALearner = void 0;
14
+ const AbstractRLLearner_1 = require("./AbstractRLLearner");
15
+ /**
16
+ * Default SARSA configuration
17
+ */
18
+ const DEFAULT_SARSA_CONFIG = {
19
+ learningRate: 0.1,
20
+ discountFactor: 0.95,
21
+ explorationRate: 0.3,
22
+ explorationDecay: 0.995,
23
+ minExplorationRate: 0.01,
24
+ useExperienceReplay: true,
25
+ replayBufferSize: 10000,
26
+ batchSize: 32
27
+ };
28
+ /**
29
+ * SARSALearner - On-policy Temporal Difference Learning
30
+ *
31
+ * SARSA is an on-policy TD control algorithm that learns the Q-values
32
+ * for the policy being followed (typically epsilon-greedy).
33
+ *
34
+ * Key Characteristics:
35
+ * - Updates based on (State, Action, Reward, next State, next Action)
36
+ * - Learns Q-values for the actual policy (including exploration)
37
+ * - More conservative than Q-Learning
38
+ * - Better for tasks where exploration is risky
39
+ * - Converges to optimal policy under certain conditions
40
+ *
41
+ * Update Rule:
42
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
43
+ * where a' is the action actually taken in state s' (not necessarily greedy)
44
+ */
45
+ class SARSALearner extends AbstractRLLearner_1.AbstractRLLearner {
46
+ constructor(config = {}) {
47
+ const fullConfig = { ...DEFAULT_SARSA_CONFIG, ...config };
48
+ super(fullConfig);
49
+ this.defaultConfig = fullConfig;
50
+ this.logger.info('SARSALearner initialized with on-policy TD(0)', { config: fullConfig });
51
+ }
52
+ /**
53
+ * Update Q-value using SARSA on-policy update rule
54
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
55
+ *
56
+ * @param experience The transition experience (s, a, r, s')
57
+ * @param nextAction The actual action taken in next state (SARSA requires this!)
58
+ * If not provided, selects action using current policy (epsilon-greedy)
59
+ */
60
+ update(experience, nextAction) {
61
+ const stateKey = this.encodeState(experience.state);
62
+ const actionKey = this.encodeAction(experience.action);
63
+ const nextStateKey = this.encodeState(experience.nextState);
64
+ // Get current Q-value Q(s,a)
65
+ const stateActions = this.qTable.get(stateKey);
66
+ const currentQ = stateActions?.get(actionKey)?.value ?? 0;
67
+ // SARSA: Get Q-value for next action that will actually be taken
68
+ // This is the key difference from Q-Learning (which uses max Q-value)
69
+ let nextQ = 0;
70
+ if (nextAction) {
71
+ // Use provided next action (typical in online learning)
72
+ const nextActionKey = this.encodeAction(nextAction);
73
+ const nextStateActions = this.qTable.get(nextStateKey);
74
+ nextQ = nextStateActions?.get(nextActionKey)?.value ?? 0;
75
+ }
76
+ else {
77
+ // If no next action provided, we need to select one using epsilon-greedy
78
+ // This happens in batch updates from experience replay
79
+ // We approximate by using a greedy action (conservative estimate)
80
+ const nextStateActions = this.qTable.get(nextStateKey);
81
+ if (nextStateActions && nextStateActions.size > 0) {
82
+ // Use expected SARSA approximation: average over all actions weighted by policy
83
+ nextQ = this.getExpectedValue(experience.nextState, nextStateActions);
84
+ }
85
+ }
86
+ // SARSA update rule
87
+ // Q(s,a) = Q(s,a) + α * [r + γ * Q(s',a') - Q(s,a)]
88
+ const tdTarget = experience.reward + this.config.discountFactor * nextQ;
89
+ const tdError = tdTarget - currentQ;
90
+ const newQ = currentQ + this.config.learningRate * tdError;
91
+ // Update Q-value
92
+ this.setQValue(stateKey, actionKey, newQ);
93
+ // Add to experience replay buffer if enabled
94
+ if (this.replayBuffer) {
95
+ this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
96
+ }
97
+ this.stepCount++;
98
+ }
99
+ /**
100
+ * Calculate expected value for next state under current epsilon-greedy policy
101
+ * This is used when we don't have the actual next action (e.g., in batch updates)
102
+ *
103
+ * Expected SARSA: E[Q(s',a')] = ε/|A| * Σ Q(s',a) + (1-ε) * max Q(s',a)
104
+ */
105
+ getExpectedValue(nextState, nextStateActions) {
106
+ if (nextStateActions.size === 0) {
107
+ return 0;
108
+ }
109
+ const epsilon = this.config.explorationRate;
110
+ const numActions = nextStateActions.size;
111
+ // Calculate average Q-value (for random exploration)
112
+ let sumQ = 0;
113
+ let maxQ = -Infinity;
114
+ for (const qValue of nextStateActions.values()) {
115
+ sumQ += qValue.value;
116
+ maxQ = Math.max(maxQ, qValue.value);
117
+ }
118
+ const avgQ = sumQ / numActions;
119
+ // Expected value under epsilon-greedy policy
120
+ // ε * (average of all actions) + (1-ε) * (max action)
121
+ return epsilon * avgQ + (1 - epsilon) * maxQ;
122
+ }
123
+ /**
124
+ * Select next action and update with SARSA
125
+ * This is the typical SARSA flow: select action, observe reward, select next action, update
126
+ *
127
+ * @param currentState Current state
128
+ * @param currentAction Action taken in current state
129
+ * @param reward Reward received
130
+ * @param nextState Next state observed
131
+ * @param availableActions Actions available in next state
132
+ * @returns Next action selected (for continued learning)
133
+ */
134
+ selectAndUpdate(currentState, currentAction, reward, nextState, availableActions) {
135
+ // Select next action using epsilon-greedy policy
136
+ const nextAction = this.selectAction(nextState, availableActions);
137
+ // Create experience
138
+ const experience = {
139
+ taskId: `sarsa-${Date.now()}`,
140
+ taskType: 'online-learning',
141
+ state: currentState,
142
+ action: currentAction,
143
+ reward,
144
+ nextState,
145
+ timestamp: new Date(),
146
+ agentId: 'sarsa-learner'
147
+ };
148
+ // Update Q-value using SARSA rule with actual next action
149
+ this.update(experience, nextAction);
150
+ return nextAction;
151
+ }
152
+ /**
153
+ * Learn from a complete episode trajectory
154
+ * Updates all state-action pairs in the trajectory using SARSA
155
+ *
156
+ * @param trajectory Array of (state, action, reward) tuples
157
+ */
158
+ learnFromEpisode(trajectory) {
159
+ // SARSA updates each transition with the next action in the trajectory
160
+ for (let i = 0; i < trajectory.length - 1; i++) {
161
+ const current = trajectory[i];
162
+ const next = trajectory[i + 1];
163
+ const experience = {
164
+ taskId: `episode-${Date.now()}-${i}`,
165
+ taskType: 'episode-learning',
166
+ state: current.state,
167
+ action: current.action,
168
+ reward: current.reward,
169
+ nextState: next.state,
170
+ timestamp: new Date(),
171
+ agentId: 'sarsa-learner'
172
+ };
173
+ // Update with the actual next action from trajectory
174
+ this.update(experience, next.action);
175
+ }
176
+ // Handle terminal state (last transition)
177
+ if (trajectory.length > 0) {
178
+ const last = trajectory[trajectory.length - 1];
179
+ const terminalExperience = {
180
+ taskId: `episode-${Date.now()}-terminal`,
181
+ taskType: 'episode-learning',
182
+ state: last.state,
183
+ action: last.action,
184
+ reward: last.reward,
185
+ nextState: last.state, // Terminal state transitions to itself
186
+ timestamp: new Date(),
187
+ agentId: 'sarsa-learner'
188
+ };
189
+ // Terminal state has no next action, Q(terminal, any) = 0
190
+ this.update(terminalExperience);
191
+ }
192
+ this.endEpisode();
193
+ }
194
+ /**
195
+ * Get the default exploration rate for this algorithm
196
+ */
197
+ getDefaultExplorationRate() {
198
+ return this.defaultConfig.explorationRate;
199
+ }
200
+ /**
201
+ * Get algorithm name
202
+ */
203
+ getAlgorithmName() {
204
+ return 'SARSA';
205
+ }
206
+ /**
207
+ * Get algorithm type (on-policy)
208
+ */
209
+ getAlgorithmType() {
210
+ return 'on-policy';
211
+ }
212
+ /**
213
+ * Get detailed statistics including SARSA-specific metrics
214
+ */
215
+ getDetailedStatistics() {
216
+ return {
217
+ algorithm: this.getAlgorithmName(),
218
+ type: this.getAlgorithmType(),
219
+ stats: this.getStatistics()
220
+ };
221
+ }
222
+ /**
223
+ * Compare performance with expected convergence
224
+ * SARSA typically converges slower but more safely than Q-Learning
225
+ */
226
+ getConvergenceMetrics() {
227
+ const stats = this.getStatistics();
228
+ // Check if Q-values are stabilizing
229
+ const avgQValue = stats.avgQValue;
230
+ const qValueRange = stats.maxQValue - stats.minQValue;
231
+ // Convergence indicators:
232
+ // 1. Low exploration rate (mostly exploiting)
233
+ // 2. Reasonable Q-value range (not diverging)
234
+ // 3. Sufficient episodes for learning
235
+ const isConverging = stats.explorationRate < 0.1 && // Low exploration
236
+ qValueRange < 10 && // Bounded Q-values
237
+ stats.episodes > 20; // Sufficient training
238
+ const convergenceRate = stats.episodes > 0
239
+ ? Math.min(1.0, stats.episodes / 100)
240
+ : 0;
241
+ const stability = qValueRange > 0
242
+ ? 1.0 - Math.min(1.0, qValueRange / 20)
243
+ : 0.5;
244
+ return {
245
+ isConverging,
246
+ convergenceRate,
247
+ stability
248
+ };
249
+ }
250
+ }
251
+ exports.SARSALearner = SARSALearner;
252
+ //# sourceMappingURL=SARSALearner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SARSALearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/SARSALearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,2DAAkE;AAQlE;;GAEG;AACH,MAAM,oBAAoB,GAAa;IACrC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,MAAa,YAAa,SAAQ,qCAAiB;IAIjD,YAAY,SAA4B,EAAE;QACxC,MAAM,UAAU,GAAG,EAAE,GAAG,oBAAoB,EAAE,GAAG,MAAM,EAAE,CAAC;QAC1D,KAAK,CAAC,UAAU,CAAC,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,+CAA+C,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED;;;;;;;OAOG;IACH,MAAM,CAAC,UAA0B,EAAE,UAAwB;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,6BAA6B;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAE1D,iEAAiE;QACjE,sEAAsE;QACtE,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,IAAI,UAAU,EAAE,CAAC;YACf,wDAAwD;YACxD,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;YACpD,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,GAAG,gBAAgB,EAAE,GAAG,CAAC,aAAa,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,yEAAyE;YACzE,uDAAuD;YACvD,kEAAkE;YAClE,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YACvD,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAClD,gFAAgF;gBAChF,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,oDAAoD;QACpD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,KAAK,CAAC;QACxE,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAE1C,6CAA6C;QAC7C,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,gBAAgB,CAAC,SAAoB,EAAE,gBAAkC;QAC/E,IAAI,gBAAgB,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;QAC5C,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,CAAC;QAEzC,qDAAqD;QACrD,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC;QAErB,KAAK,MAAM,MAAM,IAAI,gBAAgB,CAAC,MAAM,EAAE,EAAE,CAAC;YAC/C,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC;YACrB,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,GAAG,UAAU,CAAC;QAE/B,6CAA6C;QAC7C,sDAAsD;QACtD,OAAO,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC;IAC/C,CAAC;IAED;;;;;;;;;;OAUG;IACH,eAAe,CACb,YAAuB,EACvB,aAA0B,EAC1B,MAAc,EACd,SAAoB,EACpB,gBAA+B;QAE/B,iDAAiD;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;QAElE,oBAAoB;QACpB,MAAM,UAAU,GAAmB;YACjC,MAAM,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,EAAE;YAC7B,QAAQ,EAAE,iBAAiB;YAC3B,KAAK,EAAE,YAAY;YACnB,MAAM,EAAE,aAAa;YACrB,MAAM;YACN,SAAS;YACT,SAAS,EAAE,IAAI,IAAI,EAAE;YACrB,OAAO,EAAE,eAAe;SACzB,CAAC;QAEF,0DAA0D;QAC1D,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QAEpC,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;;OAKG;IACH,gBAAgB,CACd,UAIE;QAEF,uEAAuE;QACvE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAE/B,MAAM,UAAU,GAAmB;gBACjC,MAAM,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE;gBACpC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,SAAS,EAAE,IAAI,CAAC,KAAK;gBACrB,SAAS,EAAE,IAAI,IAAI,EAAE;gBACrB,OAAO,EAAE,eAAe;aACzB,CAAC;YAEF,qDAAqD;YACrD,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,0CAA0C;QAC1C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC/C,MAAM,kBAAkB,GAAmB;gBACzC,MAAM,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,WAAW;gBACxC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,uCAAuC;gBAC9D,SAAS,EAAE,IAAI,IAAI,EAAE;gBACrB,OAAO,EAAE,eAAe;aACzB,CAAC;YAEF,0DAA0D;YAC1D,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,aAAa,CAAC,eAAe,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,qBAAqB;QAKnB,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAClC,IAAI,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAC7B,KAAK,EAAE,IAAI,CAAC,aAAa,EAAE;SAC5B,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,qBAAqB;QAKnB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QAEnC,oCAAoC;QACpC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAClC,MAAM,WAAW,GAAG,KAAK,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAEtD,0BAA0B;QAC1B,8CAA8C;QAC9C,8CAA8C;QAC9C,sCAAsC;QAEtC,MAAM,YAAY,GAChB,KAAK,CAAC,eAAe,GAAG,GAAG,IAAI,kBAAkB;YACjD,WAAW,GAAG,EAAE,IAAI,mBAAmB;YACvC,KAAK,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,sBAAsB;QAE7C,MAAM,eAAe,GAAG,KAAK,CAAC,QAAQ,GAAG,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;YACrC,CAAC,CAAC,CAAC,CAAC;QAEN,MAAM,SAAS,GAAG,WAAW,GAAG,CAAC;YAC/B,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,GAAG,EAAE,CAAC;YACvC,CAAC,CAAC,GAAG,CAAC;QAER,OAAO;YACL,YAAY;YACZ,eAAe;YACf,SAAS;SACV,CAAC;IACJ,CAAC;CACF;AAvQD,oCAuQC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Learning Algorithms - Reinforcement Learning Implementations
3
+ *
4
+ * This module provides various RL algorithms for agent learning:
5
+ * - AbstractRLLearner: Base class for all RL algorithms
6
+ * - QLearning: Off-policy TD(0) algorithm
7
+ * - SARSALearner: On-policy TD(0) algorithm
8
+ * - ActorCriticLearner: Advantage Actor-Critic (A2C) algorithm
9
+ * - PPOLearner: Proximal Policy Optimization (PPO-Clip) algorithm
10
+ * - MAMLMetaLearner: Model-Agnostic Meta-Learning for fast adaptation
11
+ */
12
+ import { AbstractRLLearner, RLConfig, QValue } from './AbstractRLLearner';
13
+ import { QLearning, QLearningConfig } from '../QLearning';
14
+ import { SARSALearner, SARSAConfig } from './SARSALearner';
15
+ import { ActorCriticLearner, ActorCriticConfig, createDefaultActorCriticConfig } from './ActorCriticLearner';
16
+ import { PPOLearner, PPOConfig, createDefaultPPOConfig } from './PPOLearner';
17
+ import { MAMLMetaLearner, MAMLConfig, createDefaultMAMLConfig } from './MAMLMetaLearner';
18
+ export { AbstractRLLearner, RLConfig, QValue };
19
+ export { QLearning, QLearningConfig };
20
+ export { SARSALearner, SARSAConfig };
21
+ export { ActorCriticLearner, ActorCriticConfig, createDefaultActorCriticConfig };
22
+ export { PPOLearner, PPOConfig, createDefaultPPOConfig };
23
+ export { MAMLMetaLearner, MAMLConfig, createDefaultMAMLConfig };
24
+ /**
25
+ * Supported RL algorithm types
26
+ */
27
+ export type RLAlgorithmType = 'q-learning' | 'sarsa' | 'actor-critic' | 'ppo' | 'maml' | 'legacy';
28
+ /**
29
+ * Factory function to create RL algorithm instances
30
+ */
31
+ export declare function createRLAlgorithm(type: RLAlgorithmType, config?: any): AbstractRLLearner;
32
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC3D,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,8BAA8B,EAAE,MAAM,sBAAsB,CAAC;AAC7G,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,mBAAmB,CAAC;AAEzF,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AACrC,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,8BAA8B,EAAE,CAAC;AACjF,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,sBAAsB,EAAE,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,uBAAuB,EAAE,CAAC;AAEhE;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,OAAO,GAAG,cAAc,GAAG,KAAK,GAAG,MAAM,GAAG,QAAQ,CAAC;AAElG;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,eAAe,EACrB,MAAM,CAAC,EAAE,GAAG,GACX,iBAAiB,CAenB"}