npm - @woosh/meep-engine - Versions diffs - 2.138.15 → 2.138.17 - Mend

@woosh/meep-engine 2.138.15 → 2.138.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/src/engine/intelligence/mcts/MonteCarlo.d.ts CHANGED Viewed

@@ -14,6 +14,12 @@ export class MonteCarloTreeSearch<S> {
      * @type {StateNode|null}
      */
     root: StateNode<any, any> | null;
+    /**
+     * Number of distinct players whose payoffs we track. Set in {@link initialize}.
+     * Sizes the per-node `heuristicValue` and `totalScore` buffers.
+     * @type {number}
+     */
+    numPlayers: number;
     /**
      *
      * @type {function(state:S, source:StateNode):MoveEdge[]}
@@ -24,6 +30,30 @@ export class MonteCarloTreeSearch<S> {
      * @type {function(state:S):StateType}
      */
     computeTerminalFlag: (arg0: state) => S;
+    /**
+     * Returns the player ID (0-based) who chooses the next move from a given state.
+     * Called when a non-terminal node is expanded. Players don't need to alternate
+     * or follow any fixed pattern — the framework just asks the game.
+     * @type {function(state:S):number}
+     */
+    computeActivePlayer: (arg0: state) => S;
+    /**
+     * Returns the per-player payoff vector for a terminal state.
+     * Indexed by player ID. The game decides the zero-sum/cooperative shape.
+     * @type {function(state:S):Float64Array}
+     */
+    computeOutcome: (arg0: state) => S;
+    /**
+     *
+     * @type {function(S):S}
+     */
+    cloneState: (arg0: S) => S;
+    /**
+     * Heuristic estimate of a non-terminal state's per-player value.
+     * Optional — if `null`, new nodes start with zero-valued heuristic vectors.
+     * @type {null|function(node:StateNode, state:S):Float64Array}
+     */
+    heuristic: null | ((arg0: node) => StateNode<any, any>);
     /**
      * Depth to which plays will be explored
      * @type {number}
@@ -36,14 +66,15 @@ export class MonteCarloTreeSearch<S> {
     random: Function;
     /**
      * @param {S} rootState
+     * @param {number} numPlayers Total number of players whose payoffs are tracked. Use 1 for solitaire / planning.
      * @param {function(state:S, source:StateNode):MoveEdge[]} computeValidMoves
      * @param {function(state:S):StateType} computeTerminalFlag
+     * @param {function(state:S):number} computeActivePlayer
+     * @param {function(state:S):Float64Array} computeOutcome called on terminal states; must return a vector of length `numPlayers`
      * @param {function(S):S} cloneState
-     * @param {function(StateNode, S):number} heuristic Estimation function for evaluation of intermediate stated, guides exploration
+     * @param {null|function(node:StateNode, state:S):Float64Array} [heuristic] optional per-player estimator for intermediate states
      */
-    initialize({ rootState, computeValidMoves, computeTerminalFlag, cloneState, heuristic }: S): void;
-    cloneState: any;
-    heuristic: any;
+    initialize({ rootState, numPlayers, computeValidMoves, computeTerminalFlag, computeActivePlayer, computeOutcome, cloneState, heuristic }: S): void;
     /**
      *
      * @param {StateNode} node

package/src/engine/intelligence/mcts/MonteCarlo.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"MonteCarlo.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/MonteCarlo.js"],"names":[],"mappings":"~~AAsCA~~;;;;GAIG;AACH;IAEI;;;OAGG;IACH,WAFU,CAAC,CAEM;IAEjB;;;OAGG;IACH,MAFU,sBAAU,IAAI,CAEZ;IAEZ;;;OAGG;IACH,oCAFyB,CAAC,CAED;IAEzB;;;OAGG;IACH,sCAFyB,CAAC,CAEC;IAE3B;;;OAGG;IACH,qBAFU,MAAM,CAEW;IAE3B;;;OAGG;IACH,iBAAyB;IAEzB~~;;;;;;OAMG~~;IACH,~~yFANW~~,CAAC,~~QA2BX~~;~~IANG,gBAA4B;IAC5B,eAA0B;IAO9B~~;;;;;OAKG;IACH,+CAHW,CAAC,uBAsEX;IAED;;;OAGG;IACH,WAFa,CAAC,~~CA+Cb~~;CACJ;~~0BAvOoC~~,gBAAgB"}
1	+ {"version":3,"file":"MonteCarlo.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/MonteCarlo.js"],"names":[],"mappings":"AA0CA;;;;GAIG;AACH;IAEI;;;OAGG;IACH,WAFU,CAAC,CAEM;IAEjB;;;OAGG;IACH,MAFU,sBAAU,IAAI,CAEZ;IAEZ;;;;OAIG;IACH,YAFU,MAAM,CAED;IAEf;;;OAGG;IACH,oCAFyB,CAAC,CAED;IAEzB;;;OAGG;IACH,sCAFyB,CAAC,CAEC;IAE3B;;;;;OAKG;IACH,sCAFyB,CAAC,CAEC;IAE3B;;;;OAIG;IACH,iCAFyB,CAAC,CAEJ;IAEtB;;;OAGG;IACH,mBAFmB,CAAC,KAAE,CAAC,CAEL;IAElB;;;;OAIG;IACH,WAFU,IAAI,wCAAwB,CAErB;IAEjB;;;OAGG;IACH,qBAFU,MAAM,CAEW;IAE3B;;;OAGG;IACH,iBAAyB;IAEzB;;;;;;;;;OASG;IACH,0IATW,CAAC,QA0CX;IAED;;;;;OAKG;IACH,+CAHW,CAAC,uBAsEX;IAED;;;OAGG;IACH,WAFa,CAAC,CAgDb;CACJ;0BA/RoC,gBAAgB"}

package/src/engine/intelligence/mcts/MonteCarlo.js CHANGED Viewed

@@ -1,5 +1,4 @@
 import { assert } from "../../../core/assert.js";
-import { returnZero } from "../../../core/function/returnZero.js";
 import { mix } from "../../../core/math/mix.js";
 import { seededRandom } from "../../../core/math/random/seededRandom.js";
 import { MoveEdge } from "./MoveEdge.js";
@@ -13,6 +12,9 @@ import { StateNode, StateType } from "./StateNode.js";
 const C_ks = 1 / Math.sqrt(2);
 /**
+ * UCB1-style score for the parent choosing among its children. The exploitation
+ * term is read from whichever component the parent's active player cares about —
+ * each decision-maker maximizes their own outcome (maxN selection).
  *
  * @param {StateNode} parent
  * @param {StateNode} child
@@ -26,8 +28,10 @@ function computeNodeSelectionScore(parent, child) {
         return 0;
     }
+    const player = parent.activePlayer;
     // Exploitation heuristic
-    const Q = mix((child.wins + 1) / playouts, child.heuristicValue, 0.65);
+    const Q = mix(child.totalScore[player] / playouts, child.heuristicValue[player], 0.65);
     // Based on UCB1
     // exploration heuristic
@@ -55,6 +59,13 @@ export class MonteCarloTreeSearch {
      */
     root = null;
+    /**
+     * Number of distinct players whose payoffs we track. Set in {@link initialize}.
+     * Sizes the per-node `heuristicValue` and `totalScore` buffers.
+     * @type {number}
+     */
+    numPlayers = 0;
     /**
      *
      * @type {function(state:S, source:StateNode):MoveEdge[]}
@@ -67,6 +78,34 @@ export class MonteCarloTreeSearch {
      */
     computeTerminalFlag = null;
+    /**
+     * Returns the player ID (0-based) who chooses the next move from a given state.
+     * Called when a non-terminal node is expanded. Players don't need to alternate
+     * or follow any fixed pattern — the framework just asks the game.
+     * @type {function(state:S):number}
+     */
+    computeActivePlayer = null;
+    /**
+     * Returns the per-player payoff vector for a terminal state.
+     * Indexed by player ID. The game decides the zero-sum/cooperative shape.
+     * @type {function(state:S):Float64Array}
+     */
+    computeOutcome = null;
+    /**
+     *
+     * @type {function(S):S}
+     */
+    cloneState = null;
+    /**
+     * Heuristic estimate of a non-terminal state's per-player value.
+     * Optional — if `null`, new nodes start with zero-valued heuristic vectors.
+     * @type {null|function(node:StateNode, state:S):Float64Array}
+     */
+    heuristic = null;
     /**
      * Depth to which plays will be explored
      * @type {number}
@@ -81,32 +120,47 @@ export class MonteCarloTreeSearch {
     /**
      * @param {S} rootState
+     * @param {number} numPlayers Total number of players whose payoffs are tracked. Use 1 for solitaire / planning.
      * @param {function(state:S, source:StateNode):MoveEdge[]} computeValidMoves
      * @param {function(state:S):StateType} computeTerminalFlag
+     * @param {function(state:S):number} computeActivePlayer
+     * @param {function(state:S):Float64Array} computeOutcome called on terminal states; must return a vector of length `numPlayers`
      * @param {function(S):S} cloneState
-     * @param {function(StateNode, S):number} heuristic Estimation function for evaluation of intermediate stated, guides exploration
+     * @param {null|function(node:StateNode, state:S):Float64Array} [heuristic] optional per-player estimator for intermediate states
      */
     initialize(
         {
             rootState,
+            numPlayers,
             computeValidMoves,
             computeTerminalFlag,
+            computeActivePlayer,
+            computeOutcome,
             cloneState,
-            heuristic = returnZero
+            heuristic = null
         }
     ) {
+        assert.isInteger(numPlayers, `numPlayers`);
+        assert.greaterThanOrEqual(numPlayers, 1, `numPlayers`);
         assert.isFunction(computeValidMoves, `computeValidMoves`);
         assert.isFunction(computeTerminalFlag, `computeTerminalFlag`);
+        assert.isFunction(computeActivePlayer, `computeActivePlayer`);
+        assert.isFunction(computeOutcome, `computeOutcome`);
         assert.isFunction(cloneState, `cloneState`);
+        this.numPlayers = numPlayers;
         this.computeValidMoves = computeValidMoves;
         this.computeTerminalFlag = computeTerminalFlag;
+        this.computeActivePlayer = computeActivePlayer;
+        this.computeOutcome = computeOutcome;
         this.cloneState = cloneState;
         this.heuristic = heuristic;
         this.rootState = rootState;
         this.root = new StateNode();
+        this.root.allocate(numPlayers);
     }
     /**
@@ -170,7 +224,7 @@ export class MonteCarloTreeSearch {
             if (!bestMove.isTargetMaterialized()) {
                 //materialize the target state
-                materializedEdgeTarget(state, node, bestMove, this.computeTerminalFlag, this.heuristic);
+                materializeEdgeTarget(state, node, bestMove, this);
             } else {
                 //just follow the edge
@@ -190,8 +244,8 @@ export class MonteCarloTreeSearch {
      */
     playout() {
         const computeValidMoves = this.computeValidMoves;
-        const computeTerminalFlag = this.computeTerminalFlag;
+        const computeActivePlayer = this.computeActivePlayer;
+        const computeOutcome = this.computeOutcome;
         const state = this.cloneState(this.rootState);
@@ -202,35 +256,36 @@ export class MonteCarloTreeSearch {
         while (!node.isTerminal() && node.depth < this.maxExplorationDepth) {
             if (!node.isExpanded()) {
-                node.expand(state, computeValidMoves, computeTerminalFlag);
+                node.expand(state, computeValidMoves, computeActivePlayer, computeOutcome);
             }
             const child = this.selectRandom(node, state);
             if (child === node) {
                 // prevent infinite recursion
-                // this should not happen?
+                // can happen e.g. when expand produced no moves (NoMoves terminal)
                 break;
             }
             node = child;
         }
+        let outcome;
         if (!node.isTerminal() && node.depth >= this.maxExplorationDepth) {
-            //cap the state by depth, propagate heuristic score
+            // cap the state by depth, propagate heuristic score as payoff stand-in
             node.type = StateType.DepthCapped;
+            outcome = node.heuristicValue;
+        } else if (node.outcome !== null) {
+            // terminal node with cached payoff
+            outcome = node.outcome;
+        } else {
+            // Defensive: shouldn't reach here under normal control flow, but if
+            // a node became terminal without a cached outcome, fall back to heuristic.
+            outcome = node.heuristicValue;
         }
-        // record play-through
-        const terminalFlag = node.type;
-        if (terminalFlag === StateType.Win) {
-            node.addPlayouts(1, 1, 0);
-        } else if (terminalFlag === StateType.Loss) {
-            node.addPlayouts(1, 0, 1);
-        } else if (terminalFlag === StateType.Tie || terminalFlag === StateType.DepthCapped) {
-            node.addPlayouts(1, 0, 0);
-        }
+        node.backpropagate(outcome);
         return state;
     }
@@ -241,18 +296,18 @@ export class MonteCarloTreeSearch {
  * @param {S} state
  * @param {StateNode} source
  * @param {MoveEdge} edge
- * @param {function(S):StateType} computeTerminalFlag
- * @param {function(StateNode, S)} heuristic
+ * @param {MonteCarloTreeSearch} search
  */
-function materializedEdgeTarget(state, source, edge, computeTerminalFlag, heuristic) {
+function materializeEdgeTarget(state, source, edge, search) {
     const child = new StateNode();
+    child.allocate(search.numPlayers);
     child.parent = source;
     child.depth = source.depth + 1;
     const computedState = edge.move(state);
-    const terminalFlag = computeTerminalFlag(computedState);
+    const terminalFlag = search.computeTerminalFlag(computedState);
     assert.enum(terminalFlag, StateType, 'terminalFlag');
@@ -260,16 +315,31 @@ function materializedEdgeTarget(state, source, edge, computeTerminalFlag, heuris
     edge.target = child;
-    const childHeuristicScore = heuristic(child, computedState);
-    assert.notNaN(childHeuristicScore, 'childHeuristicScore');
+    if (terminalFlag === StateType.Undecided) {
+        // active player will be set when this node is expanded (we don't know
+        // yet whether it has any legal moves)
+        if (search.heuristic !== null) {
+            const h = search.heuristic(child, computedState);
-    child.heuristicValue = childHeuristicScore;
+            assert.notNull(h, 'heuristic returned null');
+            child.heuristicValue.set(h);
+        }
+    } else {
+        // terminal — game decides per-player payoff, cache and mirror into heuristicValue.
+        // Copy into the node's own buffer so the user is free to reuse one across calls;
+        // outcome and heuristicValue share storage within the node (safe since bubble-up
+        // skips terminals — see StateNode.aggregateHeuristicScore).
+        const out = search.computeOutcome(computedState);
+        child.heuristicValue.set(out);
+        child.outcome = child.heuristicValue;
+    }
-    // bubble the heuristic score up the tree
-    // child.bubbleUpHeuristicScore(); // heuristic value changes sign depending on the team making the move, so aggregation becomes tricky
+    // Propagate the new node's value up the tree via maxN backup. This refines
+    // ancestors' heuristicValue with information from the freshly-materialized
+    // leaf — useful guidance for subsequent UCB selections at those ancestors.
+    child.bubbleUpHeuristicScore();
     return computedState;
 }

package/src/engine/intelligence/mcts/StateNode.d.ts CHANGED Viewed

@@ -1,9 +1,11 @@
+/**
+ * Coarse classification of a node's terminal status, independent of payoff.
+ * The actual per-player payoff lives in {@link StateNodeoutcome }.
+ */
 export type StateType = number;
 export namespace StateType {
     let Undecided: number;
-    let Win: number;
-    let Loss: number;
-    let Tie: number;
+    let Terminal: number;
     let DepthCapped: number;
     let NoMoves: number;
 }
@@ -19,27 +21,37 @@ export class StateNode<State, Action> {
      */
     depth: number;
     /**
-     *
+     * Total number of explored playouts through this node.
      * @type {number}
      */
-    wins: number;
+    playouts: number;
     /**
-     * Number of leses in the subtree of this state
-     * @type {number}
+     * Per-player heuristic estimate of the value of this state.
+     * For terminal nodes this mirrors {@link StateNode#outcome}.
+     * For internal nodes this is refined by maxN backup as children materialize.
+     * Index = player ID.
+     * @type {Float64Array | null}
      */
-    losses: number;
+    heuristicValue: Float64Array | null;
     /**
-     * total number of explored playouts
-     * @type {number}
+     * Per-player cumulative payoff aggregated by backpropagation.
+     * Average value for player `p` is `totalScore[p] / playouts` once `playouts > 0`.
+     * @type {Float64Array | null}
      */
-    playouts: number;
+    totalScore: Float64Array | null;
     /**
-     *
+     * Per-player payoff at this terminal node, cached at materialization.
+     * `null` for non-terminal nodes (uses {@link StateNode#heuristicValue} as a stand-in).
+     * @type {Float64Array | null}
+     */
+    outcome: Float64Array | null;
+    /**
+     * Player ID of whoever chooses the next move from this state.
+     * `-1` for terminal nodes (no decision to make).
      * @type {number}
      */
-    heuristicValue: number;
+    activePlayer: number;
     /**
-     * parent node, previous state
      * @type {null|StateNode}
      */
     parent: null | StateNode<any, any>;
@@ -53,27 +65,37 @@ export class StateNode<State, Action> {
      * @type {StateType}
      */
     type: StateType;
+    /**
+     * Allocate the per-player buffers. Must be called before any backprop or selection.
+     * @param {number} numPlayers
+     */
+    allocate(numPlayers: number): void;
     bubbleUpHeuristicScore(): void;
     /**
-     * Aggregate heuristic score from children
+     * maxN backup: the active player picks the child that maximizes their own
+     * component; the chosen child's entire heuristic vector is copied here.
+     *
+     * This generalizes minimax across N players and asymmetric turn orders: the
+     * non-active components ride along, representing "what would happen to player Y
+     * if X plays the way X wants from this state".
      */
     aggregateHeuristicScore(): void;
     /**
      * @param state
      * @param {function(State, source:StateNode):MoveEdge[]} computeValidMoves
-     * @param computeTerminalFlag
+     * @param {function(State):number} computeActivePlayer
+     * @param {function(State):Float64Array} computeOutcome called when expansion produces no moves
      * @returns {number} number of children
      */
-    expand(state: any, computeValidMoves: any, computeTerminalFlag: any): number;
+    expand(state: any, computeValidMoves: any, computeActivePlayer: (arg0: State) => number, computeOutcome: (arg0: State) => Float64Array): number;
     /**
-     *
-     * @param {number} playouts
-     * @param {number} wins
-     * @param {number} losses
+     * Walk up from this node to the root, adding the per-player outcome to each
+     * ancestor's cumulative score and incrementing their visit count.
+     * @param {Float64Array} outcomeVector indexed by player ID
      */
-    addPlayouts(playouts: number, wins: number, losses: number): void;
+    backpropagate(outcomeVector: Float64Array): void;
     /**
-     * Whenever this is a terminal state or not (win/loss)
+     * Whenever this is a terminal state or not (win/loss/tie/no-moves/depth-cap)
      * @returns {boolean}
      */
     isTerminal(): boolean;
@@ -83,7 +105,8 @@ export class StateNode<State, Action> {
      */
     isExpanded(): boolean;
     /**
-     *
+     * Pick the best moves from this node's perspective — i.e. for whichever
+     * player owns the decision here.
      * @returns {MoveEdge[]}
      */
     pickBestMoves(): MoveEdge<any>[];

package/src/engine/intelligence/mcts/StateNode.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"StateNode.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/StateNode.js"],"names":[],"mappings":"~~wBAKU~~,MAAM~~;;;;;;;;;AAyDhB~~;;;;GAIG;AACH;IAEI;;;OAGG;IACH,OAFU,MAAM,CAEN;IAEV;;;OAGG;IACH,~~MAFU~~,MAAM,CAEP;~~IAET;;;OAGG~~;IACH,~~QAFU~~,~~MAAM~~,~~CAEL~~;~~IAGX;;;OAGG~~;IACH,~~UAFU~~,~~MAAM~~,~~CAEH~~;~~IAEb;;;OAGG~~;IACH,~~gBAFU~~,MAAM,~~CAEG~~;~~IAEnB;;;OAGG~~;IACH,QAFU,IAAI,sBAAU,CAEV;IAEd;;;OAGG;IACH,OAFU,IAAI,GAAC,eAAU,CAEZ;IAEb;;;OAGG;IACH,MAFU,SAAS,CAEQ;~~IAG3B~~,+~~BASC~~;IAED~~;;OAEG~~;IACH,~~gCAuCC~~;IAED~~;;;;;OAKG~~;IACH,~~sEAFa~~,MAAM,~~CAyBlB;IAGD;;;;;OAKG;IACH~~,~~sBAJW~~,~~MAAM~~,~~QACN~~,MAAM,~~UACN~~,~~MAAM~~,~~QAchB~~;IAGD;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED~~;;;OAGG~~;IACH,iBAFa,eAAU,~~CAmCtB~~;IAED;;;OAGG;IACH,4DA4BC;CACJ;~~yBA1TwB~~,eAAe"}
1	+ {"version":3,"file":"StateNode.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/StateNode.js"],"names":[],"mappings":";;;;wBAMU,MAAM;;;;;;;AA0ChB;;;;GAIG;AACH;IAEI;;;OAGG;IACH,OAFU,MAAM,CAEN;IAEV;;;OAGG;IACH,UAFU,MAAM,CAEH;IAEb;;;;;;OAMG;IACH,gBAFU,YAAY,GAAG,IAAI,CAEP;IAEtB;;;;OAIG;IACH,YAFU,YAAY,GAAG,IAAI,CAEX;IAElB;;;;OAIG;IACH,SAFU,YAAY,GAAG,IAAI,CAEd;IAEf;;;;OAIG;IACH,cAFU,MAAM,CAEE;IAElB;;OAEG;IACH,QAFU,IAAI,sBAAU,CAEV;IAEd;;;OAGG;IACH,OAFU,IAAI,GAAC,eAAU,CAEZ;IAEb;;;OAGG;IACH,MAFU,SAAS,CAEQ;IAE3B;;;OAGG;IACH,qBAFW,MAAM,QAOhB;IAED,+BAQC;IAED;;;;;;;OAOG;IACH,gCA0CC;IAED;;;;;;OAMG;IACH,uEAJoB,KAAK,KAAE,MAAM,yBACb,KAAK,KAAE,YAAY,GAC1B,MAAM,CA+ClB;IAGD;;;;OAIG;IACH,6BAFW,YAAY,QAqBtB;IAGD;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED;;;;OAIG;IACH,iBAFa,eAAU,CAwCtB;IAED;;;OAGG;IACH,4DA4BC;CACJ;yBA1WwB,eAAe"}