npm - agentic-qe - Versions diffs - 2.1.2 → 2.2.0 - Mend

agentic-qe 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/CHANGELOG.md +123 -0
package/README.md +1 -1
package/dist/agents/index.d.ts.map +1 -1
package/dist/agents/index.js +5 -1
package/dist/agents/index.js.map +1 -1
package/dist/core/di/AgentDependencies.d.ts +127 -0
package/dist/core/di/AgentDependencies.d.ts.map +1 -0
package/dist/core/di/AgentDependencies.js +251 -0
package/dist/core/di/AgentDependencies.js.map +1 -0
package/dist/core/di/DIContainer.d.ts +149 -0
package/dist/core/di/DIContainer.d.ts.map +1 -0
package/dist/core/di/DIContainer.js +333 -0
package/dist/core/di/DIContainer.js.map +1 -0
package/dist/core/di/index.d.ts +11 -0
package/dist/core/di/index.d.ts.map +1 -0
package/dist/core/di/index.js +22 -0
package/dist/core/di/index.js.map +1 -0
package/dist/core/index.d.ts +1 -0
package/dist/core/index.d.ts.map +1 -1
package/dist/core/index.js +11 -1
package/dist/core/index.js.map +1 -1
package/dist/learning/ExperienceSharingProtocol.d.ts +243 -0
package/dist/learning/ExperienceSharingProtocol.d.ts.map +1 -0
package/dist/learning/ExperienceSharingProtocol.js +538 -0
package/dist/learning/ExperienceSharingProtocol.js.map +1 -0
package/dist/learning/LearningEngine.d.ts +101 -1
package/dist/learning/LearningEngine.d.ts.map +1 -1
package/dist/learning/LearningEngine.js +330 -3
package/dist/learning/LearningEngine.js.map +1 -1
package/dist/learning/QLearning.d.ts +38 -125
package/dist/learning/QLearning.d.ts.map +1 -1
package/dist/learning/QLearning.js +46 -267
package/dist/learning/QLearning.js.map +1 -1
package/dist/learning/QLearningLegacy.d.ts +154 -0
package/dist/learning/QLearningLegacy.d.ts.map +1 -0
package/dist/learning/QLearningLegacy.js +337 -0
package/dist/learning/QLearningLegacy.js.map +1 -0
package/dist/learning/algorithms/AbstractRLLearner.d.ts +162 -0
package/dist/learning/algorithms/AbstractRLLearner.d.ts.map +1 -0
package/dist/learning/algorithms/AbstractRLLearner.js +300 -0
package/dist/learning/algorithms/AbstractRLLearner.js.map +1 -0
package/dist/learning/algorithms/ActorCriticLearner.d.ts +201 -0
package/dist/learning/algorithms/ActorCriticLearner.d.ts.map +1 -0
package/dist/learning/algorithms/ActorCriticLearner.js +447 -0
package/dist/learning/algorithms/ActorCriticLearner.js.map +1 -0
package/dist/learning/algorithms/PPOLearner.d.ts +207 -0
package/dist/learning/algorithms/PPOLearner.d.ts.map +1 -0
package/dist/learning/algorithms/PPOLearner.js +490 -0
package/dist/learning/algorithms/PPOLearner.js.map +1 -0
package/dist/learning/algorithms/QLearning.d.ts +68 -0
package/dist/learning/algorithms/QLearning.d.ts.map +1 -0
package/dist/learning/algorithms/QLearning.js +116 -0
package/dist/learning/algorithms/QLearning.js.map +1 -0
package/dist/learning/algorithms/SARSALearner.d.ts +107 -0
package/dist/learning/algorithms/SARSALearner.d.ts.map +1 -0
package/dist/learning/algorithms/SARSALearner.js +252 -0
package/dist/learning/algorithms/SARSALearner.js.map +1 -0
package/dist/learning/algorithms/index.d.ts +29 -0
package/dist/learning/algorithms/index.d.ts.map +1 -0
package/dist/learning/algorithms/index.js +44 -0
package/dist/learning/algorithms/index.js.map +1 -0
package/dist/learning/index.d.ts +3 -0
package/dist/learning/index.d.ts.map +1 -1
package/dist/learning/index.js +15 -1
package/dist/learning/index.js.map +1 -1
package/dist/learning/types.d.ts +2 -0
package/dist/learning/types.d.ts.map +1 -1
package/dist/memory/DistributedPatternLibrary.d.ts +159 -0
package/dist/memory/DistributedPatternLibrary.d.ts.map +1 -0
package/dist/memory/DistributedPatternLibrary.js +370 -0
package/dist/memory/DistributedPatternLibrary.js.map +1 -0
package/dist/memory/PatternQualityScorer.d.ts +169 -0
package/dist/memory/PatternQualityScorer.d.ts.map +1 -0
package/dist/memory/PatternQualityScorer.js +327 -0
package/dist/memory/PatternQualityScorer.js.map +1 -0
package/dist/memory/PatternReplicationService.d.ts +187 -0
package/dist/memory/PatternReplicationService.d.ts.map +1 -0
package/dist/memory/PatternReplicationService.js +392 -0
package/dist/memory/PatternReplicationService.js.map +1 -0
package/dist/providers/ClaudeProvider.d.ts +98 -0
package/dist/providers/ClaudeProvider.d.ts.map +1 -0
package/dist/providers/ClaudeProvider.js +418 -0
package/dist/providers/ClaudeProvider.js.map +1 -0
package/dist/providers/ILLMProvider.d.ts +287 -0
package/dist/providers/ILLMProvider.d.ts.map +1 -0
package/dist/providers/ILLMProvider.js +33 -0
package/dist/providers/ILLMProvider.js.map +1 -0
package/dist/providers/LLMProviderFactory.d.ts +154 -0
package/dist/providers/LLMProviderFactory.d.ts.map +1 -0
package/dist/providers/LLMProviderFactory.js +426 -0
package/dist/providers/LLMProviderFactory.js.map +1 -0
package/dist/providers/RuvllmProvider.d.ts +107 -0
package/dist/providers/RuvllmProvider.d.ts.map +1 -0
package/dist/providers/RuvllmProvider.js +417 -0
package/dist/providers/RuvllmProvider.js.map +1 -0
package/dist/providers/index.d.ts +31 -0
package/dist/providers/index.d.ts.map +1 -0
package/dist/providers/index.js +69 -0
package/dist/providers/index.js.map +1 -0
package/package.json +1 -1

package/dist/learning/algorithms/QLearning.js ADDED Viewed

@@ -0,0 +1,116 @@
+"use strict";
+/**
+ * QLearning - Off-policy TD(0) Reinforcement Learning
+ *
+ * Implements standard Q-learning algorithm for reinforcement learning.
+ * Key differences from SARSA:
+ * - Off-policy: learns optimal Q-values regardless of policy being followed
+ * - Uses max Q-value for next state, not actual next action
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
+ * - More aggressive than SARSA, finds optimal policy faster
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.QLearning = void 0;
+const AbstractRLLearner_1 = require("./AbstractRLLearner");
+/**
+ * Default Q-learning configuration
+ */
+const DEFAULT_CONFIG = {
+    learningRate: 0.1,
+    discountFactor: 0.95,
+    explorationRate: 0.3,
+    explorationDecay: 0.995,
+    minExplorationRate: 0.01,
+    useExperienceReplay: true,
+    replayBufferSize: 10000,
+    batchSize: 32
+};
+/**
+ * QLearning - Standard Q-learning implementation
+ *
+ * Implements the classic Q-learning algorithm with:
+ * - Epsilon-greedy exploration policy
+ * - Off-policy temporal difference (TD) learning
+ * - Q-table for state-action values
+ * - Optional experience replay for stability
+ *
+ * Update Rule:
+ * Q(s,a) ← Q(s,a) + α[r + γ·max_a'(Q(s',a')) - Q(s,a)]
+ *
+ * Key characteristics:
+ * - Off-policy: learns about optimal policy while following exploration policy
+ * - Uses max Q-value (greedy) for bootstrapping
+ * - Converges to optimal Q* under certain conditions
+ * - More sample-efficient than on-policy methods
+ */
+class QLearning extends AbstractRLLearner_1.AbstractRLLearner {
+    constructor(config = {}) {
+        const fullConfig = { ...DEFAULT_CONFIG, ...config };
+        super(fullConfig);
+        this.defaultConfig = fullConfig;
+        this.logger.info('QLearning initialized with off-policy TD(0)', { config: fullConfig });
+    }
+    /**
+     * Update Q-value using Q-learning update rule
+     * Q(s,a) ← Q(s,a) + α[r + γ·max(Q(s',a')) - Q(s,a)]
+     *
+     * @param experience The transition experience (s, a, r, s')
+     * @param nextAction Ignored in Q-learning (uses max Q-value instead)
+     */
+    update(experience, nextAction) {
+        const stateKey = this.encodeState(experience.state);
+        const actionKey = this.encodeAction(experience.action);
+        const nextStateKey = this.encodeState(experience.nextState);
+        // Get current Q-value Q(s,a)
+        const stateActions = this.qTable.get(stateKey);
+        const currentQ = stateActions?.get(actionKey)?.value ?? 0;
+        // Q-Learning: Get max Q-value for next state (greedy)
+        // This is the key difference from SARSA (which uses actual next action)
+        const nextStateActions = this.qTable.get(nextStateKey);
+        const maxNextQ = nextStateActions && nextStateActions.size > 0
+            ? Math.max(...Array.from(nextStateActions.values()).map(qv => qv.value))
+            : 0;
+        // Q-learning update rule
+        // Q(s,a) = Q(s,a) + α * [r + γ * max(Q(s',a')) - Q(s,a)]
+        const tdTarget = experience.reward + this.config.discountFactor * maxNextQ;
+        const tdError = tdTarget - currentQ;
+        const newQ = currentQ + this.config.learningRate * tdError;
+        // Update Q-value
+        this.setQValue(stateKey, actionKey, newQ);
+        // Add to experience replay buffer if enabled
+        if (this.replayBuffer) {
+            this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
+        }
+        this.stepCount++;
+    }
+    /**
+     * Get the default exploration rate for this algorithm
+     */
+    getDefaultExplorationRate() {
+        return this.defaultConfig.explorationRate;
+    }
+    /**
+     * Get algorithm name
+     */
+    getAlgorithmName() {
+        return 'Q-Learning';
+    }
+    /**
+     * Get algorithm type (off-policy)
+     */
+    getAlgorithmType() {
+        return 'off-policy';
+    }
+    /**
+     * Get detailed statistics including Q-learning-specific metrics
+     */
+    getDetailedStatistics() {
+        return {
+            algorithm: this.getAlgorithmName(),
+            type: this.getAlgorithmType(),
+            stats: this.getStatistics()
+        };
+    }
+}
+exports.QLearning = QLearning;
+//# sourceMappingURL=QLearning.js.map

package/dist/learning/algorithms/QLearning.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"QLearning.js","sourceRoot":"","sources":["../../../src/learning/algorithms/QLearning.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,2DAAkE;AAWlE;;GAEG;AACH,MAAM,cAAc,GAAoB;IACtC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAEF;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAa,SAAU,SAAQ,qCAAiB;IAG9C,YAAY,SAAmC,EAAE;QAC/C,MAAM,UAAU,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;QACpD,KAAK,CAAC,UAAU,CAAC,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,6CAA6C,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,UAA0B,EAAE,UAAwB;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,6BAA6B;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAE1D,sDAAsD;QACtD,wEAAwE;QACxE,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC;YAC5D,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC;YACxE,CAAC,CAAC,CAAC,CAAC;QAEN,yBAAyB;QACzB,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAE1C,6CAA6C;QAC7C,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,aAAa,CAAC,eAAe,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,qBAAqB;QAKnB,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAClC,IAAI,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAC7B,KAAK,EAAE,IAAI,CAAC,aAAa,EAAE;SAC5B,CAAC;IACJ,CAAC;CACF;AArFD,8BAqFC"}

package/dist/learning/algorithms/SARSALearner.d.ts ADDED Viewed

@@ -0,0 +1,107 @@
+/**
+ * SARSALearner - On-policy TD(0) Reinforcement Learning
+ *
+ * Implements SARSA (State-Action-Reward-State-Action) algorithm.
+ * Key differences from Q-Learning:
+ * - On-policy: learns Q-values for the policy being followed (epsilon-greedy)
+ * - Uses actual next action taken, not the max Q-value
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+ * - More conservative than Q-Learning, safer for exploration
+ */
+import { AbstractRLLearner, RLConfig } from './AbstractRLLearner';
+import { TaskExperience, AgentAction, TaskState } from '../types';
+/**
+ * SARSA configuration (same as base RL config)
+ */
+export type SARSAConfig = RLConfig;
+/**
+ * SARSALearner - On-policy Temporal Difference Learning
+ *
+ * SARSA is an on-policy TD control algorithm that learns the Q-values
+ * for the policy being followed (typically epsilon-greedy).
+ *
+ * Key Characteristics:
+ * - Updates based on (State, Action, Reward, next State, next Action)
+ * - Learns Q-values for the actual policy (including exploration)
+ * - More conservative than Q-Learning
+ * - Better for tasks where exploration is risky
+ * - Converges to optimal policy under certain conditions
+ *
+ * Update Rule:
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+ * where a' is the action actually taken in state s' (not necessarily greedy)
+ */
+export declare class SARSALearner extends AbstractRLLearner {
+    private readonly defaultConfig;
+    private lastStateAction?;
+    constructor(config?: Partial<RLConfig>);
+    /**
+     * Update Q-value using SARSA on-policy update rule
+     * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+     *
+     * @param experience The transition experience (s, a, r, s')
+     * @param nextAction The actual action taken in next state (SARSA requires this!)
+     *                   If not provided, selects action using current policy (epsilon-greedy)
+     */
+    update(experience: TaskExperience, nextAction?: AgentAction): void;
+    /**
+     * Calculate expected value for next state under current epsilon-greedy policy
+     * This is used when we don't have the actual next action (e.g., in batch updates)
+     *
+     * Expected SARSA: E[Q(s',a')] = ε/|A| * Σ Q(s',a) + (1-ε) * max Q(s',a)
+     */
+    private getExpectedValue;
+    /**
+     * Select next action and update with SARSA
+     * This is the typical SARSA flow: select action, observe reward, select next action, update
+     *
+     * @param currentState Current state
+     * @param currentAction Action taken in current state
+     * @param reward Reward received
+     * @param nextState Next state observed
+     * @param availableActions Actions available in next state
+     * @returns Next action selected (for continued learning)
+     */
+    selectAndUpdate(currentState: TaskState, currentAction: AgentAction, reward: number, nextState: TaskState, availableActions: AgentAction[]): AgentAction;
+    /**
+     * Learn from a complete episode trajectory
+     * Updates all state-action pairs in the trajectory using SARSA
+     *
+     * @param trajectory Array of (state, action, reward) tuples
+     */
+    learnFromEpisode(trajectory: Array<{
+        state: TaskState;
+        action: AgentAction;
+        reward: number;
+    }>): void;
+    /**
+     * Get the default exploration rate for this algorithm
+     */
+    protected getDefaultExplorationRate(): number;
+    /**
+     * Get algorithm name
+     */
+    getAlgorithmName(): string;
+    /**
+     * Get algorithm type (on-policy)
+     */
+    getAlgorithmType(): 'on-policy' | 'off-policy';
+    /**
+     * Get detailed statistics including SARSA-specific metrics
+     */
+    getDetailedStatistics(): {
+        algorithm: string;
+        type: 'on-policy' | 'off-policy';
+        stats: ReturnType<AbstractRLLearner['getStatistics']>;
+    };
+    /**
+     * Compare performance with expected convergence
+     * SARSA typically converges slower but more safely than Q-Learning
+     */
+    getConvergenceMetrics(): {
+        isConverging: boolean;
+        convergenceRate: number;
+        stability: number;
+    };
+}
+//# sourceMappingURL=SARSALearner.d.ts.map

package/dist/learning/algorithms/SARSALearner.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"SARSALearner.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/SARSALearner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,qBAAqB,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAElE;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,QAAQ,CAAC;AAgBnC;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,YAAa,SAAQ,iBAAiB;IACjD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;IACzC,OAAO,CAAC,eAAe,CAAC,CAAoC;gBAEhD,MAAM,GAAE,OAAO,CAAC,QAAQ,CAAM;IAO1C;;;;;;;OAOG;IACH,MAAM,CAAC,UAAU,EAAE,cAAc,EAAE,UAAU,CAAC,EAAE,WAAW,GAAG,IAAI;IA8ClE;;;;;OAKG;IACH,OAAO,CAAC,gBAAgB;IAwBxB;;;;;;;;;;OAUG;IACH,eAAe,CACb,YAAY,EAAE,SAAS,EACvB,aAAa,EAAE,WAAW,EAC1B,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,SAAS,EACpB,gBAAgB,EAAE,WAAW,EAAE,GAC9B,WAAW;IAsBd;;;;;OAKG;IACH,gBAAgB,CACd,UAAU,EAAE,KAAK,CAAC;QAChB,KAAK,EAAE,SAAS,CAAC;QACjB,MAAM,EAAE,WAAW,CAAC;QACpB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC,GACD,IAAI;IA0CP;;OAEG;IACH,SAAS,CAAC,yBAAyB,IAAI,MAAM;IAI7C;;OAEG;IACH,gBAAgB,IAAI,MAAM;IAI1B;;OAEG;IACH,gBAAgB,IAAI,WAAW,GAAG,YAAY;IAI9C;;OAEG;IACH,qBAAqB,IAAI;QACvB,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,WAAW,GAAG,YAAY,CAAC;QACjC,KAAK,EAAE,UAAU,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC,CAAC;KACvD;IAQD;;;OAGG;IACH,qBAAqB,IAAI;QACvB,YAAY,EAAE,OAAO,CAAC;QACtB,eAAe,EAAE,MAAM,CAAC;QACxB,SAAS,EAAE,MAAM,CAAC;KACnB;CA+BF"}

package/dist/learning/algorithms/SARSALearner.js ADDED Viewed

@@ -0,0 +1,252 @@
+"use strict";
+/**
+ * SARSALearner - On-policy TD(0) Reinforcement Learning
+ *
+ * Implements SARSA (State-Action-Reward-State-Action) algorithm.
+ * Key differences from Q-Learning:
+ * - On-policy: learns Q-values for the policy being followed (epsilon-greedy)
+ * - Uses actual next action taken, not the max Q-value
+ * - Update rule: Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+ * - More conservative than Q-Learning, safer for exploration
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.SARSALearner = void 0;
+const AbstractRLLearner_1 = require("./AbstractRLLearner");
+/**
+ * Default SARSA configuration
+ */
+const DEFAULT_SARSA_CONFIG = {
+    learningRate: 0.1,
+    discountFactor: 0.95,
+    explorationRate: 0.3,
+    explorationDecay: 0.995,
+    minExplorationRate: 0.01,
+    useExperienceReplay: true,
+    replayBufferSize: 10000,
+    batchSize: 32
+};
+/**
+ * SARSALearner - On-policy Temporal Difference Learning
+ *
+ * SARSA is an on-policy TD control algorithm that learns the Q-values
+ * for the policy being followed (typically epsilon-greedy).
+ *
+ * Key Characteristics:
+ * - Updates based on (State, Action, Reward, next State, next Action)
+ * - Learns Q-values for the actual policy (including exploration)
+ * - More conservative than Q-Learning
+ * - Better for tasks where exploration is risky
+ * - Converges to optimal policy under certain conditions
+ *
+ * Update Rule:
+ * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+ * where a' is the action actually taken in state s' (not necessarily greedy)
+ */
+class SARSALearner extends AbstractRLLearner_1.AbstractRLLearner {
+    constructor(config = {}) {
+        const fullConfig = { ...DEFAULT_SARSA_CONFIG, ...config };
+        super(fullConfig);
+        this.defaultConfig = fullConfig;
+        this.logger.info('SARSALearner initialized with on-policy TD(0)', { config: fullConfig });
+    }
+    /**
+     * Update Q-value using SARSA on-policy update rule
+     * Q(s,a) ← Q(s,a) + α[r + γQ(s',a') - Q(s,a)]
+     *
+     * @param experience The transition experience (s, a, r, s')
+     * @param nextAction The actual action taken in next state (SARSA requires this!)
+     *                   If not provided, selects action using current policy (epsilon-greedy)
+     */
+    update(experience, nextAction) {
+        const stateKey = this.encodeState(experience.state);
+        const actionKey = this.encodeAction(experience.action);
+        const nextStateKey = this.encodeState(experience.nextState);
+        // Get current Q-value Q(s,a)
+        const stateActions = this.qTable.get(stateKey);
+        const currentQ = stateActions?.get(actionKey)?.value ?? 0;
+        // SARSA: Get Q-value for next action that will actually be taken
+        // This is the key difference from Q-Learning (which uses max Q-value)
+        let nextQ = 0;
+        if (nextAction) {
+            // Use provided next action (typical in online learning)
+            const nextActionKey = this.encodeAction(nextAction);
+            const nextStateActions = this.qTable.get(nextStateKey);
+            nextQ = nextStateActions?.get(nextActionKey)?.value ?? 0;
+        }
+        else {
+            // If no next action provided, we need to select one using epsilon-greedy
+            // This happens in batch updates from experience replay
+            // We approximate by using a greedy action (conservative estimate)
+            const nextStateActions = this.qTable.get(nextStateKey);
+            if (nextStateActions && nextStateActions.size > 0) {
+                // Use expected SARSA approximation: average over all actions weighted by policy
+                nextQ = this.getExpectedValue(experience.nextState, nextStateActions);
+            }
+        }
+        // SARSA update rule
+        // Q(s,a) = Q(s,a) + α * [r + γ * Q(s',a') - Q(s,a)]
+        const tdTarget = experience.reward + this.config.discountFactor * nextQ;
+        const tdError = tdTarget - currentQ;
+        const newQ = currentQ + this.config.learningRate * tdError;
+        // Update Q-value
+        this.setQValue(stateKey, actionKey, newQ);
+        // Add to experience replay buffer if enabled
+        if (this.replayBuffer) {
+            this.replayBuffer.add(experience, Math.abs(tdError)); // Priority based on TD error
+        }
+        this.stepCount++;
+    }
+    /**
+     * Calculate expected value for next state under current epsilon-greedy policy
+     * This is used when we don't have the actual next action (e.g., in batch updates)
+     *
+     * Expected SARSA: E[Q(s',a')] = ε/|A| * Σ Q(s',a) + (1-ε) * max Q(s',a)
+     */
+    getExpectedValue(nextState, nextStateActions) {
+        if (nextStateActions.size === 0) {
+            return 0;
+        }
+        const epsilon = this.config.explorationRate;
+        const numActions = nextStateActions.size;
+        // Calculate average Q-value (for random exploration)
+        let sumQ = 0;
+        let maxQ = -Infinity;
+        for (const qValue of nextStateActions.values()) {
+            sumQ += qValue.value;
+            maxQ = Math.max(maxQ, qValue.value);
+        }
+        const avgQ = sumQ / numActions;
+        // Expected value under epsilon-greedy policy
+        // ε * (average of all actions) + (1-ε) * (max action)
+        return epsilon * avgQ + (1 - epsilon) * maxQ;
+    }
+    /**
+     * Select next action and update with SARSA
+     * This is the typical SARSA flow: select action, observe reward, select next action, update
+     *
+     * @param currentState Current state
+     * @param currentAction Action taken in current state
+     * @param reward Reward received
+     * @param nextState Next state observed
+     * @param availableActions Actions available in next state
+     * @returns Next action selected (for continued learning)
+     */
+    selectAndUpdate(currentState, currentAction, reward, nextState, availableActions) {
+        // Select next action using epsilon-greedy policy
+        const nextAction = this.selectAction(nextState, availableActions);
+        // Create experience
+        const experience = {
+            taskId: `sarsa-${Date.now()}`,
+            taskType: 'online-learning',
+            state: currentState,
+            action: currentAction,
+            reward,
+            nextState,
+            timestamp: new Date(),
+            agentId: 'sarsa-learner'
+        };
+        // Update Q-value using SARSA rule with actual next action
+        this.update(experience, nextAction);
+        return nextAction;
+    }
+    /**
+     * Learn from a complete episode trajectory
+     * Updates all state-action pairs in the trajectory using SARSA
+     *
+     * @param trajectory Array of (state, action, reward) tuples
+     */
+    learnFromEpisode(trajectory) {
+        // SARSA updates each transition with the next action in the trajectory
+        for (let i = 0; i < trajectory.length - 1; i++) {
+            const current = trajectory[i];
+            const next = trajectory[i + 1];
+            const experience = {
+                taskId: `episode-${Date.now()}-${i}`,
+                taskType: 'episode-learning',
+                state: current.state,
+                action: current.action,
+                reward: current.reward,
+                nextState: next.state,
+                timestamp: new Date(),
+                agentId: 'sarsa-learner'
+            };
+            // Update with the actual next action from trajectory
+            this.update(experience, next.action);
+        }
+        // Handle terminal state (last transition)
+        if (trajectory.length > 0) {
+            const last = trajectory[trajectory.length - 1];
+            const terminalExperience = {
+                taskId: `episode-${Date.now()}-terminal`,
+                taskType: 'episode-learning',
+                state: last.state,
+                action: last.action,
+                reward: last.reward,
+                nextState: last.state, // Terminal state transitions to itself
+                timestamp: new Date(),
+                agentId: 'sarsa-learner'
+            };
+            // Terminal state has no next action, Q(terminal, any) = 0
+            this.update(terminalExperience);
+        }
+        this.endEpisode();
+    }
+    /**
+     * Get the default exploration rate for this algorithm
+     */
+    getDefaultExplorationRate() {
+        return this.defaultConfig.explorationRate;
+    }
+    /**
+     * Get algorithm name
+     */
+    getAlgorithmName() {
+        return 'SARSA';
+    }
+    /**
+     * Get algorithm type (on-policy)
+     */
+    getAlgorithmType() {
+        return 'on-policy';
+    }
+    /**
+     * Get detailed statistics including SARSA-specific metrics
+     */
+    getDetailedStatistics() {
+        return {
+            algorithm: this.getAlgorithmName(),
+            type: this.getAlgorithmType(),
+            stats: this.getStatistics()
+        };
+    }
+    /**
+     * Compare performance with expected convergence
+     * SARSA typically converges slower but more safely than Q-Learning
+     */
+    getConvergenceMetrics() {
+        const stats = this.getStatistics();
+        // Check if Q-values are stabilizing
+        const avgQValue = stats.avgQValue;
+        const qValueRange = stats.maxQValue - stats.minQValue;
+        // Convergence indicators:
+        // 1. Low exploration rate (mostly exploiting)
+        // 2. Reasonable Q-value range (not diverging)
+        // 3. Sufficient episodes for learning
+        const isConverging = stats.explorationRate < 0.1 && // Low exploration
+            qValueRange < 10 && // Bounded Q-values
+            stats.episodes > 20; // Sufficient training
+        const convergenceRate = stats.episodes > 0
+            ? Math.min(1.0, stats.episodes / 100)
+            : 0;
+        const stability = qValueRange > 0
+            ? 1.0 - Math.min(1.0, qValueRange / 20)
+            : 0.5;
+        return {
+            isConverging,
+            convergenceRate,
+            stability
+        };
+    }
+}
+exports.SARSALearner = SARSALearner;
+//# sourceMappingURL=SARSALearner.js.map

package/dist/learning/algorithms/SARSALearner.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"SARSALearner.js","sourceRoot":"","sources":["../../../src/learning/algorithms/SARSALearner.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAEH,2DAAkE;AAQlE;;GAEG;AACH,MAAM,oBAAoB,GAAa;IACrC,YAAY,EAAE,GAAG;IACjB,cAAc,EAAE,IAAI;IACpB,eAAe,EAAE,GAAG;IACpB,gBAAgB,EAAE,KAAK;IACvB,kBAAkB,EAAE,IAAI;IACxB,mBAAmB,EAAE,IAAI;IACzB,gBAAgB,EAAE,KAAK;IACvB,SAAS,EAAE,EAAE;CACd,CAAC;AAEF;;;;;;;;;;;;;;;;GAgBG;AACH,MAAa,YAAa,SAAQ,qCAAiB;IAIjD,YAAY,SAA4B,EAAE;QACxC,MAAM,UAAU,GAAG,EAAE,GAAG,oBAAoB,EAAE,GAAG,MAAM,EAAE,CAAC;QAC1D,KAAK,CAAC,UAAU,CAAC,CAAC;QAClB,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC;QAChC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,+CAA+C,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED;;;;;;;OAOG;IACH,MAAM,CAAC,UAA0B,EAAE,UAAwB;QACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAE5D,6BAA6B;QAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC/C,MAAM,QAAQ,GAAG,YAAY,EAAE,GAAG,CAAC,SAAS,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAE1D,iEAAiE;QACjE,sEAAsE;QACtE,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,IAAI,UAAU,EAAE,CAAC;YACf,wDAAwD;YACxD,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC;YACpD,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,GAAG,gBAAgB,EAAE,GAAG,CAAC,aAAa,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,yEAAyE;YACzE,uDAAuD;YACvD,kEAAkE;YAClE,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;YACvD,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAClD,gFAAgF;gBAChF,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;YACxE,CAAC;QACH,CAAC;QAED,oBAAoB;QACpB,oDAAoD;QACpD,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,GAAG,KAAK,CAAC;QACxE,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAC;QACpC,MAAM,IAAI,GAAG,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,OAAO,CAAC;QAE3D,iBAAiB;QACjB,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,CAAC;QAE1C,6CAA6C;QAC7C,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,6BAA6B;QACrF,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,gBAAgB,CAAC,SAAoB,EAAE,gBAAkC;QAC/E,IAAI,gBAAgB,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAChC,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC;QAC5C,MAAM,UAAU,GAAG,gBAAgB,CAAC,IAAI,CAAC;QAEzC,qDAAqD;QACrD,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,IAAI,IAAI,GAAG,CAAC,QAAQ,CAAC;QAErB,KAAK,MAAM,MAAM,IAAI,gBAAgB,CAAC,MAAM,EAAE,EAAE,CAAC;YAC/C,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC;YACrB,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACtC,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,GAAG,UAAU,CAAC;QAE/B,6CAA6C;QAC7C,sDAAsD;QACtD,OAAO,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC,GAAG,IAAI,CAAC;IAC/C,CAAC;IAED;;;;;;;;;;OAUG;IACH,eAAe,CACb,YAAuB,EACvB,aAA0B,EAC1B,MAAc,EACd,SAAoB,EACpB,gBAA+B;QAE/B,iDAAiD;QACjD,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,gBAAgB,CAAC,CAAC;QAElE,oBAAoB;QACpB,MAAM,UAAU,GAAmB;YACjC,MAAM,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,EAAE;YAC7B,QAAQ,EAAE,iBAAiB;YAC3B,KAAK,EAAE,YAAY;YACnB,MAAM,EAAE,aAAa;YACrB,MAAM;YACN,SAAS;YACT,SAAS,EAAE,IAAI,IAAI,EAAE;YACrB,OAAO,EAAE,eAAe;SACzB,CAAC;QAEF,0DAA0D;QAC1D,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QAEpC,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;;OAKG;IACH,gBAAgB,CACd,UAIE;QAEF,uEAAuE;QACvE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC9B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAE/B,MAAM,UAAU,GAAmB;gBACjC,MAAM,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE;gBACpC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,SAAS,EAAE,IAAI,CAAC,KAAK;gBACrB,SAAS,EAAE,IAAI,IAAI,EAAE;gBACrB,OAAO,EAAE,eAAe;aACzB,CAAC;YAEF,qDAAqD;YACrD,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,CAAC;QAED,0CAA0C;QAC1C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,GAAG,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAC/C,MAAM,kBAAkB,GAAmB;gBACzC,MAAM,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,WAAW;gBACxC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,SAAS,EAAE,IAAI,CAAC,KAAK,EAAE,uCAAuC;gBAC9D,SAAS,EAAE,IAAI,IAAI,EAAE;gBACrB,OAAO,EAAE,eAAe;aACzB,CAAC;YAEF,0DAA0D;YAC1D,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,CAAC,UAAU,EAAE,CAAC;IACpB,CAAC;IAED;;OAEG;IACO,yBAAyB;QACjC,OAAO,IAAI,CAAC,aAAa,CAAC,eAAe,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,OAAO,WAAW,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,qBAAqB;QAKnB,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAClC,IAAI,EAAE,IAAI,CAAC,gBAAgB,EAAE;YAC7B,KAAK,EAAE,IAAI,CAAC,aAAa,EAAE;SAC5B,CAAC;IACJ,CAAC;IAED;;;OAGG;IACH,qBAAqB;QAKnB,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QAEnC,oCAAoC;QACpC,MAAM,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAClC,MAAM,WAAW,GAAG,KAAK,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAEtD,0BAA0B;QAC1B,8CAA8C;QAC9C,8CAA8C;QAC9C,sCAAsC;QAEtC,MAAM,YAAY,GAChB,KAAK,CAAC,eAAe,GAAG,GAAG,IAAI,kBAAkB;YACjD,WAAW,GAAG,EAAE,IAAI,mBAAmB;YACvC,KAAK,CAAC,QAAQ,GAAG,EAAE,CAAC,CAAC,sBAAsB;QAE7C,MAAM,eAAe,GAAG,KAAK,CAAC,QAAQ,GAAG,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;YACrC,CAAC,CAAC,CAAC,CAAC;QAEN,MAAM,SAAS,GAAG,WAAW,GAAG,CAAC;YAC/B,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,WAAW,GAAG,EAAE,CAAC;YACvC,CAAC,CAAC,GAAG,CAAC;QAER,OAAO;YACL,YAAY;YACZ,eAAe;YACf,SAAS;SACV,CAAC;IACJ,CAAC;CACF;AAvQD,oCAuQC"}

package/dist/learning/algorithms/index.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * Learning Algorithms - Reinforcement Learning Implementations
+ *
+ * This module provides various RL algorithms for agent learning:
+ * - AbstractRLLearner: Base class for all RL algorithms
+ * - QLearning: Off-policy TD(0) algorithm
+ * - SARSALearner: On-policy TD(0) algorithm
+ * - ActorCriticLearner: Advantage Actor-Critic (A2C) algorithm
+ * - PPOLearner: Proximal Policy Optimization (PPO-Clip) algorithm
+ */
+import { AbstractRLLearner, RLConfig, QValue } from './AbstractRLLearner';
+import { QLearning, QLearningConfig } from '../QLearning';
+import { SARSALearner, SARSAConfig } from './SARSALearner';
+import { ActorCriticLearner, ActorCriticConfig, createDefaultActorCriticConfig } from './ActorCriticLearner';
+import { PPOLearner, PPOConfig, createDefaultPPOConfig } from './PPOLearner';
+export { AbstractRLLearner, RLConfig, QValue };
+export { QLearning, QLearningConfig };
+export { SARSALearner, SARSAConfig };
+export { ActorCriticLearner, ActorCriticConfig, createDefaultActorCriticConfig };
+export { PPOLearner, PPOConfig, createDefaultPPOConfig };
+/**
+ * Supported RL algorithm types
+ */
+export type RLAlgorithmType = 'q-learning' | 'sarsa' | 'actor-critic' | 'ppo';
+/**
+ * Factory function to create RL algorithm instances
+ */
+export declare function createRLAlgorithm(type: RLAlgorithmType, config?: any): AbstractRLLearner;
+//# sourceMappingURL=index.d.ts.map

package/dist/learning/algorithms/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/learning/algorithms/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,qBAAqB,CAAC;AAC1E,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC3D,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,8BAA8B,EAAE,MAAM,sBAAsB,CAAC;AAC7G,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAE7E,OAAO,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;AAC/C,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AACrC,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,8BAA8B,EAAE,CAAC;AACjF,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,sBAAsB,EAAE,CAAC;AAEzD;;GAEG;AACH,MAAM,MAAM,eAAe,GAAG,YAAY,GAAG,OAAO,GAAG,cAAc,GAAG,KAAK,CAAC;AAE9E;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,eAAe,EACrB,MAAM,CAAC,EAAE,GAAG,GACX,iBAAiB,CAanB"}

package/dist/learning/algorithms/index.js ADDED Viewed

@@ -0,0 +1,44 @@
+"use strict";
+/**
+ * Learning Algorithms - Reinforcement Learning Implementations
+ *
+ * This module provides various RL algorithms for agent learning:
+ * - AbstractRLLearner: Base class for all RL algorithms
+ * - QLearning: Off-policy TD(0) algorithm
+ * - SARSALearner: On-policy TD(0) algorithm
+ * - ActorCriticLearner: Advantage Actor-Critic (A2C) algorithm
+ * - PPOLearner: Proximal Policy Optimization (PPO-Clip) algorithm
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.createDefaultPPOConfig = exports.PPOLearner = exports.createDefaultActorCriticConfig = exports.ActorCriticLearner = exports.SARSALearner = exports.QLearning = exports.AbstractRLLearner = void 0;
+exports.createRLAlgorithm = createRLAlgorithm;
+const AbstractRLLearner_1 = require("./AbstractRLLearner");
+Object.defineProperty(exports, "AbstractRLLearner", { enumerable: true, get: function () { return AbstractRLLearner_1.AbstractRLLearner; } });
+const QLearning_1 = require("../QLearning");
+Object.defineProperty(exports, "QLearning", { enumerable: true, get: function () { return QLearning_1.QLearning; } });
+const SARSALearner_1 = require("./SARSALearner");
+Object.defineProperty(exports, "SARSALearner", { enumerable: true, get: function () { return SARSALearner_1.SARSALearner; } });
+const ActorCriticLearner_1 = require("./ActorCriticLearner");
+Object.defineProperty(exports, "ActorCriticLearner", { enumerable: true, get: function () { return ActorCriticLearner_1.ActorCriticLearner; } });
+Object.defineProperty(exports, "createDefaultActorCriticConfig", { enumerable: true, get: function () { return ActorCriticLearner_1.createDefaultActorCriticConfig; } });
+const PPOLearner_1 = require("./PPOLearner");
+Object.defineProperty(exports, "PPOLearner", { enumerable: true, get: function () { return PPOLearner_1.PPOLearner; } });
+Object.defineProperty(exports, "createDefaultPPOConfig", { enumerable: true, get: function () { return PPOLearner_1.createDefaultPPOConfig; } });
+/**
+ * Factory function to create RL algorithm instances
+ */
+function createRLAlgorithm(type, config) {
+    switch (type) {
+        case 'q-learning':
+            return new QLearning_1.QLearning(config);
+        case 'sarsa':
+            return new SARSALearner_1.SARSALearner(config);
+        case 'actor-critic':
+            return new ActorCriticLearner_1.ActorCriticLearner(config ?? (0, ActorCriticLearner_1.createDefaultActorCriticConfig)());
+        case 'ppo':
+            return new PPOLearner_1.PPOLearner(config ?? (0, PPOLearner_1.createDefaultPPOConfig)());
+        default:
+            throw new Error(`Unknown RL algorithm type: ${type}`);
+    }
+}
+//# sourceMappingURL=index.js.map

package/dist/learning/algorithms/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/learning/algorithms/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAsBH,8CAgBC;AApCD,2DAA0E;AAMjE,kGANA,qCAAiB,OAMA;AAL1B,4CAA0D;AAMjD,0FANA,qBAAS,OAMA;AALlB,iDAA2D;AAMlD,6FANA,2BAAY,OAMA;AALrB,6DAA6G;AAMpG,mGANA,uCAAkB,OAMA;AAAqB,+GANA,mDAA8B,OAMA;AAL9E,6CAA6E;AAMpE,2FANA,uBAAU,OAMA;AAAa,uGANA,mCAAsB,OAMA;AAOtD;;GAEG;AACH,SAAgB,iBAAiB,CAC/B,IAAqB,EACrB,MAAY;IAEZ,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,YAAY;YACf,OAAO,IAAI,qBAAS,CAAC,MAAM,CAAC,CAAC;QAC/B,KAAK,OAAO;YACV,OAAO,IAAI,2BAAY,CAAC,MAAM,CAAC,CAAC;QAClC,KAAK,cAAc;YACjB,OAAO,IAAI,uCAAkB,CAAC,MAAM,IAAI,IAAA,mDAA8B,GAAE,CAAC,CAAC;QAC5E,KAAK,KAAK;YACR,OAAO,IAAI,uBAAU,CAAC,MAAM,IAAI,IAAA,mCAAsB,GAAE,CAAC,CAAC;QAC5D;YACE,MAAM,IAAI,KAAK,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC"}

package/dist/learning/index.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 /**
  * Learning System - Phase 2 (Milestone 2.2)
  * Enhanced (v1.3.3+) - ML Root Cause Analysis and Fix Recommendations
+ * Enhanced (v2.2.0+) - Self-Learning Upgrade with RL Algorithms
  *
  * Exports all learning components for agent performance improvement.
  */
@@ -17,4 +18,6 @@ export * from './FlakyFixRecommendations';
 export * from './StatisticalAnalysis';
 export * from './SwarmIntegration';
 export { FixRecommendationEngine } from './FixRecommendationEngine';
+export { AbstractRLLearner, RLConfig, QValue, SARSALearner, SARSAConfig, ActorCriticLearner, ActorCriticConfig, createDefaultActorCriticConfig, PPOLearner, PPOConfig, createDefaultPPOConfig, createRLAlgorithm } from './algorithms';
+export { ExperienceSharingProtocol, ExperienceSharingConfig, SharedExperience, SharingStats, PeerConnection, SharingEvent } from './ExperienceSharingProtocol';
 //# sourceMappingURL=index.d.ts.map

package/dist/learning/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/learning/index.ts"],"names":[],"mappings":"AAAA~~;;;;;GAKG~~;AAEH,cAAc,SAAS,CAAC;AACxB,cAAc,kBAAkB,CAAC;AACjC,cAAc,aAAa,CAAC;AAC5B,cAAc,0BAA0B,CAAC;AACzC,cAAc,sBAAsB,CAAC;AACrC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,wBAAwB,CAAC;AACvC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,oBAAoB,CAAC;AAGnC,OAAO,EACL,uBAAuB,EACxB,MAAM,2BAA2B,CAAC"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/learning/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,cAAc,SAAS,CAAC;AACxB,cAAc,kBAAkB,CAAC;AACjC,cAAc,aAAa,CAAC;AAC5B,cAAc,0BAA0B,CAAC;AACzC,cAAc,sBAAsB,CAAC;AACrC,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,qBAAqB,CAAC;AACpC,cAAc,wBAAwB,CAAC;AACvC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,uBAAuB,CAAC;AACtC,cAAc,oBAAoB,CAAC;AAGnC,OAAO,EACL,uBAAuB,EACxB,MAAM,2BAA2B,CAAC;AAInC,OAAO,EACL,iBAAiB,EACjB,QAAQ,EACR,MAAM,EACN,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,iBAAiB,EACjB,8BAA8B,EAC9B,UAAU,EACV,SAAS,EACT,sBAAsB,EACtB,iBAAiB,EAClB,MAAM,cAAc,CAAC;AAGtB,OAAO,EACL,yBAAyB,EACzB,uBAAuB,EACvB,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,YAAY,EACb,MAAM,6BAA6B,CAAC"}

package/dist/learning/index.js CHANGED Viewed

@@ -2,6 +2,7 @@
 /**
  * Learning System - Phase 2 (Milestone 2.2)
  * Enhanced (v1.3.3+) - ML Root Cause Analysis and Fix Recommendations
+ * Enhanced (v2.2.0+) - Self-Learning Upgrade with RL Algorithms
  *
  * Exports all learning components for agent performance improvement.
  */
@@ -20,7 +21,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.FixRecommendationEngine = void 0;
+exports.ExperienceSharingProtocol = exports.createRLAlgorithm = exports.createDefaultPPOConfig = exports.PPOLearner = exports.createDefaultActorCriticConfig = exports.ActorCriticLearner = exports.SARSALearner = exports.AbstractRLLearner = exports.FixRecommendationEngine = void 0;
 __exportStar(require("./types"), exports);
 __exportStar(require("./LearningEngine"), exports);
 __exportStar(require("./QLearning"), exports);
@@ -36,4 +37,17 @@ __exportStar(require("./SwarmIntegration"), exports);
 // Enhanced fix recommendations (NEW in v1.3.3+)
 var FixRecommendationEngine_1 = require("./FixRecommendationEngine");
 Object.defineProperty(exports, "FixRecommendationEngine", { enumerable: true, get: function () { return FixRecommendationEngine_1.FixRecommendationEngine; } });
+// RL Algorithms (NEW in v2.2.0+)
+// Note: RLAlgorithmType is already exported from LearningEngine, so we use explicit exports
+var algorithms_1 = require("./algorithms");
+Object.defineProperty(exports, "AbstractRLLearner", { enumerable: true, get: function () { return algorithms_1.AbstractRLLearner; } });
+Object.defineProperty(exports, "SARSALearner", { enumerable: true, get: function () { return algorithms_1.SARSALearner; } });
+Object.defineProperty(exports, "ActorCriticLearner", { enumerable: true, get: function () { return algorithms_1.ActorCriticLearner; } });
+Object.defineProperty(exports, "createDefaultActorCriticConfig", { enumerable: true, get: function () { return algorithms_1.createDefaultActorCriticConfig; } });
+Object.defineProperty(exports, "PPOLearner", { enumerable: true, get: function () { return algorithms_1.PPOLearner; } });
+Object.defineProperty(exports, "createDefaultPPOConfig", { enumerable: true, get: function () { return algorithms_1.createDefaultPPOConfig; } });
+Object.defineProperty(exports, "createRLAlgorithm", { enumerable: true, get: function () { return algorithms_1.createRLAlgorithm; } });
+// Experience Sharing (NEW in v2.2.0+)
+var ExperienceSharingProtocol_1 = require("./ExperienceSharingProtocol");
+Object.defineProperty(exports, "ExperienceSharingProtocol", { enumerable: true, get: function () { return ExperienceSharingProtocol_1.ExperienceSharingProtocol; } });
 //# sourceMappingURL=index.js.map

package/dist/learning/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/learning/index.ts"],"names":[],"mappings":";AAAA~~;;;;;GAKG~~;;;;;;;;;;;;;;;;;AAEH,0CAAwB;AACxB,mDAAiC;AACjC,8CAA4B;AAC5B,2DAAyC;AACzC,uDAAqC;AACrC,oDAAkC;AAClC,sDAAoC;AACpC,sDAAoC;AACpC,yDAAuC;AACvC,4DAA0C;AAC1C,wDAAsC;AACtC,qDAAmC;AAEnC,gDAAgD;AAChD,qEAEmC;AADjC,kIAAA,uBAAuB,OAAA"}
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/learning/index.ts"],"names":[],"mappings":";AAAA;;;;;;GAMG;;;;;;;;;;;;;;;;;AAEH,0CAAwB;AACxB,mDAAiC;AACjC,8CAA4B;AAC5B,2DAAyC;AACzC,uDAAqC;AACrC,oDAAkC;AAClC,sDAAoC;AACpC,sDAAoC;AACpC,yDAAuC;AACvC,4DAA0C;AAC1C,wDAAsC;AACtC,qDAAmC;AAEnC,gDAAgD;AAChD,qEAEmC;AADjC,kIAAA,uBAAuB,OAAA;AAGzB,iCAAiC;AACjC,4FAA4F;AAC5F,2CAasB;AAZpB,+GAAA,iBAAiB,OAAA;AAGjB,0GAAA,YAAY,OAAA;AAEZ,gHAAA,kBAAkB,OAAA;AAElB,4HAAA,8BAA8B,OAAA;AAC9B,wGAAA,UAAU,OAAA;AAEV,oHAAA,sBAAsB,OAAA;AACtB,+GAAA,iBAAiB,OAAA;AAGnB,sCAAsC;AACtC,yEAOqC;AANnC,sIAAA,yBAAyB,OAAA"}

package/dist/learning/types.d.ts CHANGED Viewed

@@ -31,6 +31,8 @@ export interface TaskExperience {
     nextState: TaskState;
     timestamp: Date;
     agentId: string;
+    /** Whether this experience represents a terminal state (episode end) */
+    done?: boolean;
 }
 /**
  * State representation for reinforcement learning