npm - moflo - Versions diffs - 4.8.32 → 4.8.34 - Mend

moflo 4.8.32 → 4.8.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/bin/generate-code-map.mjs +955 -955
package/bin/index-guidance.mjs +905 -905
package/bin/index-tests.mjs +728 -728
package/bin/setup-project.mjs +252 -252
package/package.json +10 -5
package/src/@claude-flow/cli/dist/src/commands/doctor.js +1339 -1107
package/src/@claude-flow/cli/dist/src/index.js +2 -18
package/src/@claude-flow/cli/dist/src/mcp-tools/hooks-tools.js +17 -0
package/src/@claude-flow/cli/dist/src/memory/memory-initializer.js +4 -7
package/src/@claude-flow/cli/dist/src/version.js +6 -0
package/src/@claude-flow/cli/package.json +1 -1
package/src/@claude-flow/neural/README.md +260 -0
package/src/@claude-flow/neural/dist/algorithms/a2c.js +361 -0
package/src/@claude-flow/neural/dist/algorithms/curiosity.js +392 -0
package/src/@claude-flow/neural/dist/algorithms/decision-transformer.js +415 -0
package/src/@claude-flow/neural/dist/algorithms/dqn.js +303 -0
package/src/@claude-flow/neural/dist/algorithms/index.js +74 -0
package/src/@claude-flow/neural/dist/algorithms/ppo.js +331 -0
package/src/@claude-flow/neural/dist/algorithms/q-learning.js +259 -0
package/src/@claude-flow/neural/dist/algorithms/sarsa.js +297 -0
package/src/@claude-flow/neural/dist/application/index.js +7 -0
package/src/@claude-flow/neural/dist/application/services/neural-application-service.js +161 -0
package/src/@claude-flow/neural/dist/domain/entities/pattern.js +134 -0
package/src/@claude-flow/neural/dist/domain/index.js +8 -0
package/src/@claude-flow/neural/dist/domain/services/learning-service.js +195 -0
package/src/@claude-flow/neural/dist/index.js +201 -0
package/src/@claude-flow/neural/dist/modes/balanced.js +234 -0
package/src/@claude-flow/neural/dist/modes/base.js +77 -0
package/src/@claude-flow/neural/dist/modes/batch.js +316 -0
package/src/@claude-flow/neural/dist/modes/edge.js +310 -0
package/src/@claude-flow/neural/dist/modes/index.js +13 -0
package/src/@claude-flow/neural/dist/modes/real-time.js +196 -0
package/src/@claude-flow/neural/dist/modes/research.js +389 -0
package/src/@claude-flow/neural/dist/pattern-learner.js +603 -0
package/src/@claude-flow/neural/dist/reasoning-bank.js +993 -0
package/src/@claude-flow/neural/dist/reasoningbank-adapter.js +463 -0
package/src/@claude-flow/neural/dist/sona-integration.js +326 -0
package/src/@claude-flow/neural/dist/sona-manager.js +695 -0
package/src/@claude-flow/neural/dist/types.js +11 -0
package/src/@claude-flow/neural/package.json +26 -0

package/src/@claude-flow/neural/dist/algorithms/a2c.js ADDED Viewed

@@ -0,0 +1,361 @@
+/**
+ * Advantage Actor-Critic (A2C)
+ *
+ * Implements synchronous A2C algorithm with:
+ * - Shared actor-critic network
+ * - N-step returns
+ * - Entropy regularization
+ * - Advantage normalization
+ *
+ * Performance Target: <10ms per update step
+ */
+/**
+ * Default A2C configuration
+ */
+export const DEFAULT_A2C_CONFIG = {
+    algorithm: 'a2c',
+    learningRate: 0.0007,
+    gamma: 0.99,
+    entropyCoef: 0.01,
+    valueLossCoef: 0.5,
+    maxGradNorm: 0.5,
+    epochs: 1,
+    miniBatchSize: 32,
+    nSteps: 5,
+    useGAE: true,
+    gaeLambda: 0.95,
+};
+/**
+ * A2C Algorithm Implementation
+ */
+export class A2CAlgorithm {
+    config;
+    // Shared network weights
+    sharedWeights;
+    policyHead;
+    valueHead;
+    // Optimizer state
+    sharedMomentum;
+    policyMomentum;
+    valueMomentum;
+    // Experience buffer for n-step
+    buffer = [];
+    // Dimensions
+    inputDim = 768;
+    hiddenDim = 64;
+    numActions = 4;
+    // Statistics
+    updateCount = 0;
+    avgPolicyLoss = 0;
+    avgValueLoss = 0;
+    avgEntropy = 0;
+    constructor(config = {}) {
+        this.config = { ...DEFAULT_A2C_CONFIG, ...config };
+        // Initialize network
+        const scale = Math.sqrt(2 / this.inputDim);
+        this.sharedWeights = new Float32Array(this.inputDim * this.hiddenDim);
+        this.policyHead = new Float32Array(this.hiddenDim * this.numActions);
+        this.valueHead = new Float32Array(this.hiddenDim);
+        for (let i = 0; i < this.sharedWeights.length; i++) {
+            this.sharedWeights[i] = (Math.random() - 0.5) * scale;
+        }
+        for (let i = 0; i < this.policyHead.length; i++) {
+            this.policyHead[i] = (Math.random() - 0.5) * 0.1;
+        }
+        for (let i = 0; i < this.valueHead.length; i++) {
+            this.valueHead[i] = (Math.random() - 0.5) * 0.1;
+        }
+        // Initialize momentum
+        this.sharedMomentum = new Float32Array(this.sharedWeights.length);
+        this.policyMomentum = new Float32Array(this.policyHead.length);
+        this.valueMomentum = new Float32Array(this.valueHead.length);
+    }
+    /**
+     * Add experience from trajectory
+     */
+    addExperience(trajectory) {
+        for (const step of trajectory.steps) {
+            const { probs, value, entropy } = this.evaluate(step.stateAfter);
+            const action = this.hashAction(step.action);
+            this.buffer.push({
+                state: step.stateAfter,
+                action,
+                reward: step.reward,
+                value,
+                logProb: Math.log(probs[action] + 1e-8),
+                entropy,
+            });
+        }
+    }
+    /**
+     * Perform A2C update
+     * Target: <10ms
+     */
+    update() {
+        const startTime = performance.now();
+        if (this.buffer.length < this.config.nSteps) {
+            return { policyLoss: 0, valueLoss: 0, entropy: 0 };
+        }
+        // Compute returns and advantages
+        const returns = this.computeReturns();
+        const advantages = this.computeAdvantages(returns);
+        // Initialize gradients
+        const sharedGrad = new Float32Array(this.sharedWeights.length);
+        const policyGrad = new Float32Array(this.policyHead.length);
+        const valueGrad = new Float32Array(this.valueHead.length);
+        let totalPolicyLoss = 0;
+        let totalValueLoss = 0;
+        let totalEntropy = 0;
+        // Process all experiences
+        for (let i = 0; i < this.buffer.length; i++) {
+            const exp = this.buffer[i];
+            const advantage = advantages[i];
+            const return_ = returns[i];
+            // Get current policy and value
+            const { probs, value, hidden } = this.forwardWithHidden(exp.state);
+            const logProb = Math.log(probs[exp.action] + 1e-8);
+            // Policy loss
+            const policyLoss = -logProb * advantage;
+            totalPolicyLoss += policyLoss;
+            // Value loss
+            const valueLoss = (value - return_) ** 2;
+            totalValueLoss += valueLoss;
+            // Entropy
+            let entropy = 0;
+            for (const p of probs) {
+                if (p > 0)
+                    entropy -= p * Math.log(p);
+            }
+            totalEntropy += entropy;
+            // Accumulate gradients
+            this.accumulateGradients(sharedGrad, policyGrad, valueGrad, exp.state, hidden, exp.action, advantage, value - return_);
+        }
+        // Add entropy bonus to policy gradient
+        for (let i = 0; i < policyGrad.length; i++) {
+            policyGrad[i] -= this.config.entropyCoef * totalEntropy / this.buffer.length;
+        }
+        // Apply gradients
+        this.applyGradients(sharedGrad, policyGrad, valueGrad, this.buffer.length);
+        // Clear buffer
+        this.buffer = [];
+        this.updateCount++;
+        this.avgPolicyLoss = totalPolicyLoss / this.buffer.length || 0;
+        this.avgValueLoss = totalValueLoss / this.buffer.length || 0;
+        this.avgEntropy = totalEntropy / this.buffer.length || 0;
+        const elapsed = performance.now() - startTime;
+        if (elapsed > 10) {
+            console.warn(`A2C update exceeded target: ${elapsed.toFixed(2)}ms > 10ms`);
+        }
+        return {
+            policyLoss: this.avgPolicyLoss,
+            valueLoss: this.avgValueLoss,
+            entropy: this.avgEntropy,
+        };
+    }
+    /**
+     * Get action from policy
+     */
+    getAction(state) {
+        const { probs, value } = this.evaluate(state);
+        const action = this.sampleAction(probs);
+        return { action, value };
+    }
+    /**
+     * Get statistics
+     */
+    getStats() {
+        return {
+            updateCount: this.updateCount,
+            bufferSize: this.buffer.length,
+            avgPolicyLoss: this.avgPolicyLoss,
+            avgValueLoss: this.avgValueLoss,
+            avgEntropy: this.avgEntropy,
+        };
+    }
+    // ==========================================================================
+    // Private Methods
+    // ==========================================================================
+    evaluate(state) {
+        const { probs, value } = this.forward(state);
+        let entropy = 0;
+        for (const p of probs) {
+            if (p > 0)
+                entropy -= p * Math.log(p);
+        }
+        return { probs, value, entropy };
+    }
+    forward(state) {
+        // Shared hidden layer
+        const hidden = new Float32Array(this.hiddenDim);
+        for (let h = 0; h < this.hiddenDim; h++) {
+            let sum = 0;
+            for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
+                sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
+            }
+            hidden[h] = Math.max(0, sum); // ReLU
+        }
+        // Policy head
+        const logits = new Float32Array(this.numActions);
+        for (let a = 0; a < this.numActions; a++) {
+            let sum = 0;
+            for (let h = 0; h < this.hiddenDim; h++) {
+                sum += hidden[h] * this.policyHead[h * this.numActions + a];
+            }
+            logits[a] = sum;
+        }
+        const probs = this.softmax(logits);
+        // Value head
+        let value = 0;
+        for (let h = 0; h < this.hiddenDim; h++) {
+            value += hidden[h] * this.valueHead[h];
+        }
+        return { probs, value };
+    }
+    forwardWithHidden(state) {
+        const hidden = new Float32Array(this.hiddenDim);
+        for (let h = 0; h < this.hiddenDim; h++) {
+            let sum = 0;
+            for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
+                sum += state[i] * this.sharedWeights[i * this.hiddenDim + h];
+            }
+            hidden[h] = Math.max(0, sum);
+        }
+        const logits = new Float32Array(this.numActions);
+        for (let a = 0; a < this.numActions; a++) {
+            let sum = 0;
+            for (let h = 0; h < this.hiddenDim; h++) {
+                sum += hidden[h] * this.policyHead[h * this.numActions + a];
+            }
+            logits[a] = sum;
+        }
+        const probs = this.softmax(logits);
+        let value = 0;
+        for (let h = 0; h < this.hiddenDim; h++) {
+            value += hidden[h] * this.valueHead[h];
+        }
+        return { probs, value, hidden };
+    }
+    computeReturns() {
+        const returns = new Array(this.buffer.length).fill(0);
+        let cumReturn = 0;
+        // Bootstrap from last value if not terminal
+        if (this.buffer.length > 0) {
+            cumReturn = this.buffer[this.buffer.length - 1].value;
+        }
+        for (let t = this.buffer.length - 1; t >= 0; t--) {
+            cumReturn = this.buffer[t].reward + this.config.gamma * cumReturn;
+            returns[t] = cumReturn;
+        }
+        return returns;
+    }
+    computeAdvantages(returns) {
+        if (this.config.useGAE) {
+            return this.computeGAE();
+        }
+        // Simple advantage: return - value
+        const advantages = new Array(this.buffer.length).fill(0);
+        for (let i = 0; i < this.buffer.length; i++) {
+            advantages[i] = returns[i] - this.buffer[i].value;
+        }
+        // Normalize
+        const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
+        const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
+        return advantages.map(a => (a - mean) / std);
+    }
+    computeGAE() {
+        const advantages = new Array(this.buffer.length).fill(0);
+        let lastGae = 0;
+        for (let t = this.buffer.length - 1; t >= 0; t--) {
+            const nextValue = t < this.buffer.length - 1
+                ? this.buffer[t + 1].value
+                : 0;
+            const delta = this.buffer[t].reward + this.config.gamma * nextValue - this.buffer[t].value;
+            lastGae = delta + this.config.gamma * this.config.gaeLambda * lastGae;
+            advantages[t] = lastGae;
+        }
+        // Normalize
+        const mean = advantages.reduce((a, b) => a + b, 0) / advantages.length;
+        const std = Math.sqrt(advantages.reduce((a, b) => a + (b - mean) ** 2, 0) / advantages.length) + 1e-8;
+        return advantages.map(a => (a - mean) / std);
+    }
+    accumulateGradients(sharedGrad, policyGrad, valueGrad, state, hidden, action, advantage, valueError) {
+        // Policy gradient
+        for (let h = 0; h < this.hiddenDim; h++) {
+            policyGrad[h * this.numActions + action] += hidden[h] * advantage;
+        }
+        // Value gradient
+        for (let h = 0; h < this.hiddenDim; h++) {
+            valueGrad[h] += hidden[h] * valueError * this.config.valueLossCoef;
+        }
+        // Shared layer gradient (backprop through both heads)
+        for (let h = 0; h < this.hiddenDim; h++) {
+            if (hidden[h] > 0) { // ReLU gradient
+                const policySignal = advantage * this.policyHead[h * this.numActions + action];
+                const valueSignal = valueError * this.valueHead[h] * this.config.valueLossCoef;
+                const totalSignal = policySignal + valueSignal;
+                for (let i = 0; i < Math.min(state.length, this.inputDim); i++) {
+                    sharedGrad[i * this.hiddenDim + h] += state[i] * totalSignal;
+                }
+            }
+        }
+    }
+    applyGradients(sharedGrad, policyGrad, valueGrad, batchSize) {
+        const lr = this.config.learningRate / batchSize;
+        const beta = 0.9;
+        // Apply to shared weights
+        for (let i = 0; i < this.sharedWeights.length; i++) {
+            const grad = Math.max(Math.min(sharedGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
+            this.sharedMomentum[i] = beta * this.sharedMomentum[i] + (1 - beta) * grad;
+            this.sharedWeights[i] -= lr * this.sharedMomentum[i];
+        }
+        // Apply to policy head
+        for (let i = 0; i < this.policyHead.length; i++) {
+            const grad = Math.max(Math.min(policyGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
+            this.policyMomentum[i] = beta * this.policyMomentum[i] + (1 - beta) * grad;
+            this.policyHead[i] -= lr * this.policyMomentum[i];
+        }
+        // Apply to value head
+        for (let i = 0; i < this.valueHead.length; i++) {
+            const grad = Math.max(Math.min(valueGrad[i], this.config.maxGradNorm), -this.config.maxGradNorm);
+            this.valueMomentum[i] = beta * this.valueMomentum[i] + (1 - beta) * grad;
+            this.valueHead[i] -= lr * this.valueMomentum[i];
+        }
+    }
+    softmax(logits) {
+        const max = Math.max(...logits);
+        const exps = new Float32Array(logits.length);
+        let sum = 0;
+        for (let i = 0; i < logits.length; i++) {
+            exps[i] = Math.exp(logits[i] - max);
+            sum += exps[i];
+        }
+        for (let i = 0; i < exps.length; i++) {
+            exps[i] /= sum;
+        }
+        return exps;
+    }
+    sampleAction(probs) {
+        const r = Math.random();
+        let cumSum = 0;
+        for (let i = 0; i < probs.length; i++) {
+            cumSum += probs[i];
+            if (r < cumSum)
+                return i;
+        }
+        return probs.length - 1;
+    }
+    hashAction(action) {
+        let hash = 0;
+        for (let i = 0; i < action.length; i++) {
+            hash = (hash * 31 + action.charCodeAt(i)) % this.numActions;
+        }
+        return hash;
+    }
+}
+/**
+ * Factory function
+ */
+export function createA2C(config) {
+    return new A2CAlgorithm(config);
+}
+//# sourceMappingURL=a2c.js.map