@woosh/meep-engine 2.138.15 → 2.138.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/engine/graphics/impostors/octahedral/shader/ImpostorShaderDepthV0.d.ts.map +1 -1
- package/src/engine/graphics/impostors/octahedral/shader/ImpostorShaderDepthV0.js +29 -6
- package/src/engine/graphics/shaders/TerrainShader.js +2 -2
- package/src/engine/intelligence/mcts/MonteCarlo.d.ts +35 -4
- package/src/engine/intelligence/mcts/MonteCarlo.d.ts.map +1 -1
- package/src/engine/intelligence/mcts/MonteCarlo.js +101 -31
- package/src/engine/intelligence/mcts/StateNode.d.ts +47 -24
- package/src/engine/intelligence/mcts/StateNode.d.ts.map +1 -1
- package/src/engine/intelligence/mcts/StateNode.js +364 -316
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"description": "Pure JavaScript game engine. Fully featured and production ready.",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"author": "Alexander Goldring",
|
|
8
|
-
"version": "2.138.
|
|
8
|
+
"version": "2.138.16",
|
|
9
9
|
"main": "build/meep.module.js",
|
|
10
10
|
"module": "build/meep.module.js",
|
|
11
11
|
"exports": {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ImpostorShaderDepthV0.d.ts","sourceRoot":"","sources":["../../../../../../../src/engine/graphics/impostors/octahedral/shader/ImpostorShaderDepthV0.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ImpostorShaderDepthV0.d.ts","sourceRoot":"","sources":["../../../../../../../src/engine/graphics/impostors/octahedral/shader/ImpostorShaderDepthV0.js"],"names":[],"mappings":"AAsXA;;;;GAIG;AACH;IACI,cAoCC;CACJ;kCA5ZM,OAAO"}
|
|
@@ -155,12 +155,35 @@ const shader_vx = `
|
|
|
155
155
|
// camera in object space is therefore the LIGHT's position in
|
|
156
156
|
// object space, so the impostor card naturally orients to face the
|
|
157
157
|
// light — exactly what we want for a shadow caster.
|
|
158
|
-
|
|
159
|
-
//
|
|
160
|
-
//
|
|
161
|
-
//
|
|
162
|
-
//
|
|
163
|
-
|
|
158
|
+
// For a directional light's ORTHOGRAPHIC shadow camera, three.js
|
|
159
|
+
// positions the shadow camera at the light entity's world
|
|
160
|
+
// position — which is independent of the light's actual ray
|
|
161
|
+
// direction. Using "cameraPos_OS - uOffset" would index the atlas
|
|
162
|
+
// by "direction to shadow camera position", which generally does
|
|
163
|
+
// NOT equal the parallel light-ray direction (in our scene the
|
|
164
|
+
// light sits at (30, 70, 30) but rays travel along (0.17, -1, 0.17),
|
|
165
|
+
// which puts the camera position on the +X,+Z side while the
|
|
166
|
+
// light arrives from -X,-Z — i.e. mirrored on X and Z). The
|
|
167
|
+
// receiver (lit shader) uses the actual light direction for its
|
|
168
|
+
// shadow coord, so we have to match it on the caster side too.
|
|
169
|
+
//
|
|
170
|
+
// Row 2 of modelViewMatrix encodes the camera's +Z axis in OS,
|
|
171
|
+
// which for a three.js camera is "away from the looking direction"
|
|
172
|
+
// — i.e. the direction OPPOSITE the way the light travels, which
|
|
173
|
+
// is what we want for atlas indexing (the bake's D_frame =
|
|
174
|
+
// direction-from-centre-to-bake-camera = -look-direction).
|
|
175
|
+
//
|
|
176
|
+
// NOTE: signs verified by empirical comparison with the receiver
|
|
177
|
+
// side, since the conventions interlock through three.js's
|
|
178
|
+
// matrix-to-uniform pipeline in ways that are easy to get wrong
|
|
179
|
+
// on paper. The negation below comes from the observation that
|
|
180
|
+
// the un-negated formula produced a shadow in the exact-opposite
|
|
181
|
+
// direction of what the truth mesh casts.
|
|
182
|
+
vec3 pivotToCameraRay = -normalize(vec3(
|
|
183
|
+
modelViewMatrix[0][2],
|
|
184
|
+
modelViewMatrix[1][2],
|
|
185
|
+
modelViewMatrix[2][2]
|
|
186
|
+
));
|
|
164
187
|
|
|
165
188
|
vec2 framesMinusOne = vec2(uFrames - 1.0);
|
|
166
189
|
vec2 octahedral_uv = clamp(VectorToGrid(pivotToCameraRay) * 0.5 + 0.5, 0.0, 1.0);
|
|
@@ -280,10 +280,10 @@ function fragment() {
|
|
|
280
280
|
float weight = texture(splatWeightMap, vec3(uv, i)).x;
|
|
281
281
|
|
|
282
282
|
weightSum += weight;
|
|
283
|
-
colorSum += diffuseData*weight;
|
|
283
|
+
colorSum += diffuseData * weight;
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
-
return weightSum > 0.0 ? colorSum / weightSum : vec4(0.0);
|
|
286
|
+
return weightSum > 0.0 ? (colorSum / weightSum) : vec4(0.0);
|
|
287
287
|
}
|
|
288
288
|
|
|
289
289
|
${ShaderChunks.clouds_pars_fragment}
|
|
@@ -14,6 +14,12 @@ export class MonteCarloTreeSearch<S> {
|
|
|
14
14
|
* @type {StateNode|null}
|
|
15
15
|
*/
|
|
16
16
|
root: StateNode<any, any> | null;
|
|
17
|
+
/**
|
|
18
|
+
* Number of distinct players whose payoffs we track. Set in {@link initialize}.
|
|
19
|
+
* Sizes the per-node `heuristicValue` and `totalScore` buffers.
|
|
20
|
+
* @type {number}
|
|
21
|
+
*/
|
|
22
|
+
numPlayers: number;
|
|
17
23
|
/**
|
|
18
24
|
*
|
|
19
25
|
* @type {function(state:S, source:StateNode):MoveEdge[]}
|
|
@@ -24,6 +30,30 @@ export class MonteCarloTreeSearch<S> {
|
|
|
24
30
|
* @type {function(state:S):StateType}
|
|
25
31
|
*/
|
|
26
32
|
computeTerminalFlag: (arg0: state) => S;
|
|
33
|
+
/**
|
|
34
|
+
* Returns the player ID (0-based) who chooses the next move from a given state.
|
|
35
|
+
* Called when a non-terminal node is expanded. Players don't need to alternate
|
|
36
|
+
* or follow any fixed pattern — the framework just asks the game.
|
|
37
|
+
* @type {function(state:S):number}
|
|
38
|
+
*/
|
|
39
|
+
computeActivePlayer: (arg0: state) => S;
|
|
40
|
+
/**
|
|
41
|
+
* Returns the per-player payoff vector for a terminal state.
|
|
42
|
+
* Indexed by player ID. The game decides the zero-sum/cooperative shape.
|
|
43
|
+
* @type {function(state:S):Float64Array}
|
|
44
|
+
*/
|
|
45
|
+
computeOutcome: (arg0: state) => S;
|
|
46
|
+
/**
|
|
47
|
+
*
|
|
48
|
+
* @type {function(S):S}
|
|
49
|
+
*/
|
|
50
|
+
cloneState: (arg0: S) => S;
|
|
51
|
+
/**
|
|
52
|
+
* Heuristic estimate of a non-terminal state's per-player value.
|
|
53
|
+
* Optional — if `null`, new nodes start with zero-valued heuristic vectors.
|
|
54
|
+
* @type {null|function(node:StateNode, state:S):Float64Array}
|
|
55
|
+
*/
|
|
56
|
+
heuristic: null | ((arg0: node) => StateNode<any, any>);
|
|
27
57
|
/**
|
|
28
58
|
* Depth to which plays will be explored
|
|
29
59
|
* @type {number}
|
|
@@ -36,14 +66,15 @@ export class MonteCarloTreeSearch<S> {
|
|
|
36
66
|
random: Function;
|
|
37
67
|
/**
|
|
38
68
|
* @param {S} rootState
|
|
69
|
+
* @param {number} numPlayers Total number of players whose payoffs are tracked. Use 1 for solitaire / planning.
|
|
39
70
|
* @param {function(state:S, source:StateNode):MoveEdge[]} computeValidMoves
|
|
40
71
|
* @param {function(state:S):StateType} computeTerminalFlag
|
|
72
|
+
* @param {function(state:S):number} computeActivePlayer
|
|
73
|
+
* @param {function(state:S):Float64Array} computeOutcome called on terminal states; must return a vector of length `numPlayers`
|
|
41
74
|
* @param {function(S):S} cloneState
|
|
42
|
-
* @param {function(StateNode, S):
|
|
75
|
+
* @param {null|function(node:StateNode, state:S):Float64Array} [heuristic] optional per-player estimator for intermediate states
|
|
43
76
|
*/
|
|
44
|
-
initialize({ rootState, computeValidMoves, computeTerminalFlag, cloneState, heuristic }: S): void;
|
|
45
|
-
cloneState: any;
|
|
46
|
-
heuristic: any;
|
|
77
|
+
initialize({ rootState, numPlayers, computeValidMoves, computeTerminalFlag, computeActivePlayer, computeOutcome, cloneState, heuristic }: S): void;
|
|
47
78
|
/**
|
|
48
79
|
*
|
|
49
80
|
* @param {StateNode} node
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"MonteCarlo.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/MonteCarlo.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"MonteCarlo.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/MonteCarlo.js"],"names":[],"mappings":"AA0CA;;;;GAIG;AACH;IAEI;;;OAGG;IACH,WAFU,CAAC,CAEM;IAEjB;;;OAGG;IACH,MAFU,sBAAU,IAAI,CAEZ;IAEZ;;;;OAIG;IACH,YAFU,MAAM,CAED;IAEf;;;OAGG;IACH,oCAFyB,CAAC,CAED;IAEzB;;;OAGG;IACH,sCAFyB,CAAC,CAEC;IAE3B;;;;;OAKG;IACH,sCAFyB,CAAC,CAEC;IAE3B;;;;OAIG;IACH,iCAFyB,CAAC,CAEJ;IAEtB;;;OAGG;IACH,mBAFmB,CAAC,KAAE,CAAC,CAEL;IAElB;;;;OAIG;IACH,WAFU,IAAI,wCAAwB,CAErB;IAEjB;;;OAGG;IACH,qBAFU,MAAM,CAEW;IAE3B;;;OAGG;IACH,iBAAyB;IAEzB;;;;;;;;;OASG;IACH,0IATW,CAAC,QA0CX;IAED;;;;;OAKG;IACH,+CAHW,CAAC,uBAsEX;IAED;;;OAGG;IACH,WAFa,CAAC,CAgDb;CACJ;0BA/RoC,gBAAgB"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { assert } from "../../../core/assert.js";
|
|
2
|
-
import { returnZero } from "../../../core/function/returnZero.js";
|
|
3
2
|
import { mix } from "../../../core/math/mix.js";
|
|
4
3
|
import { seededRandom } from "../../../core/math/random/seededRandom.js";
|
|
5
4
|
import { MoveEdge } from "./MoveEdge.js";
|
|
@@ -13,6 +12,9 @@ import { StateNode, StateType } from "./StateNode.js";
|
|
|
13
12
|
const C_ks = 1 / Math.sqrt(2);
|
|
14
13
|
|
|
15
14
|
/**
|
|
15
|
+
* UCB1-style score for the parent choosing among its children. The exploitation
|
|
16
|
+
* term is read from whichever component the parent's active player cares about —
|
|
17
|
+
* each decision-maker maximizes their own outcome (maxN selection).
|
|
16
18
|
*
|
|
17
19
|
* @param {StateNode} parent
|
|
18
20
|
* @param {StateNode} child
|
|
@@ -26,8 +28,10 @@ function computeNodeSelectionScore(parent, child) {
|
|
|
26
28
|
return 0;
|
|
27
29
|
}
|
|
28
30
|
|
|
31
|
+
const player = parent.activePlayer;
|
|
32
|
+
|
|
29
33
|
// Exploitation heuristic
|
|
30
|
-
const Q = mix(
|
|
34
|
+
const Q = mix(child.totalScore[player] / playouts, child.heuristicValue[player], 0.65);
|
|
31
35
|
|
|
32
36
|
// Based on UCB1
|
|
33
37
|
// exploration heuristic
|
|
@@ -55,6 +59,13 @@ export class MonteCarloTreeSearch {
|
|
|
55
59
|
*/
|
|
56
60
|
root = null;
|
|
57
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Number of distinct players whose payoffs we track. Set in {@link initialize}.
|
|
64
|
+
* Sizes the per-node `heuristicValue` and `totalScore` buffers.
|
|
65
|
+
* @type {number}
|
|
66
|
+
*/
|
|
67
|
+
numPlayers = 0;
|
|
68
|
+
|
|
58
69
|
/**
|
|
59
70
|
*
|
|
60
71
|
* @type {function(state:S, source:StateNode):MoveEdge[]}
|
|
@@ -67,6 +78,34 @@ export class MonteCarloTreeSearch {
|
|
|
67
78
|
*/
|
|
68
79
|
computeTerminalFlag = null;
|
|
69
80
|
|
|
81
|
+
/**
|
|
82
|
+
* Returns the player ID (0-based) who chooses the next move from a given state.
|
|
83
|
+
* Called when a non-terminal node is expanded. Players don't need to alternate
|
|
84
|
+
* or follow any fixed pattern — the framework just asks the game.
|
|
85
|
+
* @type {function(state:S):number}
|
|
86
|
+
*/
|
|
87
|
+
computeActivePlayer = null;
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Returns the per-player payoff vector for a terminal state.
|
|
91
|
+
* Indexed by player ID. The game decides the zero-sum/cooperative shape.
|
|
92
|
+
* @type {function(state:S):Float64Array}
|
|
93
|
+
*/
|
|
94
|
+
computeOutcome = null;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
*
|
|
98
|
+
* @type {function(S):S}
|
|
99
|
+
*/
|
|
100
|
+
cloneState = null;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Heuristic estimate of a non-terminal state's per-player value.
|
|
104
|
+
* Optional — if `null`, new nodes start with zero-valued heuristic vectors.
|
|
105
|
+
* @type {null|function(node:StateNode, state:S):Float64Array}
|
|
106
|
+
*/
|
|
107
|
+
heuristic = null;
|
|
108
|
+
|
|
70
109
|
/**
|
|
71
110
|
* Depth to which plays will be explored
|
|
72
111
|
* @type {number}
|
|
@@ -81,32 +120,47 @@ export class MonteCarloTreeSearch {
|
|
|
81
120
|
|
|
82
121
|
/**
|
|
83
122
|
* @param {S} rootState
|
|
123
|
+
* @param {number} numPlayers Total number of players whose payoffs are tracked. Use 1 for solitaire / planning.
|
|
84
124
|
* @param {function(state:S, source:StateNode):MoveEdge[]} computeValidMoves
|
|
85
125
|
* @param {function(state:S):StateType} computeTerminalFlag
|
|
126
|
+
* @param {function(state:S):number} computeActivePlayer
|
|
127
|
+
* @param {function(state:S):Float64Array} computeOutcome called on terminal states; must return a vector of length `numPlayers`
|
|
86
128
|
* @param {function(S):S} cloneState
|
|
87
|
-
* @param {function(StateNode, S):
|
|
129
|
+
* @param {null|function(node:StateNode, state:S):Float64Array} [heuristic] optional per-player estimator for intermediate states
|
|
88
130
|
*/
|
|
89
131
|
initialize(
|
|
90
132
|
{
|
|
91
133
|
rootState,
|
|
134
|
+
numPlayers,
|
|
92
135
|
computeValidMoves,
|
|
93
136
|
computeTerminalFlag,
|
|
137
|
+
computeActivePlayer,
|
|
138
|
+
computeOutcome,
|
|
94
139
|
cloneState,
|
|
95
|
-
heuristic =
|
|
140
|
+
heuristic = null
|
|
96
141
|
}
|
|
97
142
|
) {
|
|
143
|
+
assert.isInteger(numPlayers, `numPlayers`);
|
|
144
|
+
assert.greaterThanOrEqual(numPlayers, 1, `numPlayers`);
|
|
145
|
+
|
|
98
146
|
assert.isFunction(computeValidMoves, `computeValidMoves`);
|
|
99
147
|
assert.isFunction(computeTerminalFlag, `computeTerminalFlag`);
|
|
148
|
+
assert.isFunction(computeActivePlayer, `computeActivePlayer`);
|
|
149
|
+
assert.isFunction(computeOutcome, `computeOutcome`);
|
|
100
150
|
assert.isFunction(cloneState, `cloneState`);
|
|
101
151
|
|
|
152
|
+
this.numPlayers = numPlayers;
|
|
102
153
|
this.computeValidMoves = computeValidMoves;
|
|
103
154
|
this.computeTerminalFlag = computeTerminalFlag;
|
|
155
|
+
this.computeActivePlayer = computeActivePlayer;
|
|
156
|
+
this.computeOutcome = computeOutcome;
|
|
104
157
|
this.cloneState = cloneState;
|
|
105
158
|
this.heuristic = heuristic;
|
|
106
159
|
|
|
107
160
|
this.rootState = rootState;
|
|
108
161
|
|
|
109
162
|
this.root = new StateNode();
|
|
163
|
+
this.root.allocate(numPlayers);
|
|
110
164
|
}
|
|
111
165
|
|
|
112
166
|
/**
|
|
@@ -170,7 +224,7 @@ export class MonteCarloTreeSearch {
|
|
|
170
224
|
|
|
171
225
|
if (!bestMove.isTargetMaterialized()) {
|
|
172
226
|
//materialize the target state
|
|
173
|
-
|
|
227
|
+
materializeEdgeTarget(state, node, bestMove, this);
|
|
174
228
|
|
|
175
229
|
} else {
|
|
176
230
|
//just follow the edge
|
|
@@ -190,8 +244,8 @@ export class MonteCarloTreeSearch {
|
|
|
190
244
|
*/
|
|
191
245
|
playout() {
|
|
192
246
|
const computeValidMoves = this.computeValidMoves;
|
|
193
|
-
|
|
194
|
-
const
|
|
247
|
+
const computeActivePlayer = this.computeActivePlayer;
|
|
248
|
+
const computeOutcome = this.computeOutcome;
|
|
195
249
|
|
|
196
250
|
const state = this.cloneState(this.rootState);
|
|
197
251
|
|
|
@@ -202,35 +256,36 @@ export class MonteCarloTreeSearch {
|
|
|
202
256
|
while (!node.isTerminal() && node.depth < this.maxExplorationDepth) {
|
|
203
257
|
|
|
204
258
|
if (!node.isExpanded()) {
|
|
205
|
-
node.expand(state, computeValidMoves,
|
|
259
|
+
node.expand(state, computeValidMoves, computeActivePlayer, computeOutcome);
|
|
206
260
|
}
|
|
207
261
|
|
|
208
262
|
const child = this.selectRandom(node, state);
|
|
209
263
|
|
|
210
264
|
if (child === node) {
|
|
211
265
|
// prevent infinite recursion
|
|
212
|
-
//
|
|
266
|
+
// can happen e.g. when expand produced no moves (NoMoves terminal)
|
|
213
267
|
break;
|
|
214
268
|
}
|
|
215
269
|
|
|
216
270
|
node = child;
|
|
217
271
|
}
|
|
218
272
|
|
|
273
|
+
let outcome;
|
|
274
|
+
|
|
219
275
|
if (!node.isTerminal() && node.depth >= this.maxExplorationDepth) {
|
|
220
|
-
//cap the state by depth, propagate heuristic score
|
|
276
|
+
// cap the state by depth, propagate heuristic score as payoff stand-in
|
|
221
277
|
node.type = StateType.DepthCapped;
|
|
278
|
+
outcome = node.heuristicValue;
|
|
279
|
+
} else if (node.outcome !== null) {
|
|
280
|
+
// terminal node with cached payoff
|
|
281
|
+
outcome = node.outcome;
|
|
282
|
+
} else {
|
|
283
|
+
// Defensive: shouldn't reach here under normal control flow, but if
|
|
284
|
+
// a node became terminal without a cached outcome, fall back to heuristic.
|
|
285
|
+
outcome = node.heuristicValue;
|
|
222
286
|
}
|
|
223
287
|
|
|
224
|
-
|
|
225
|
-
const terminalFlag = node.type;
|
|
226
|
-
|
|
227
|
-
if (terminalFlag === StateType.Win) {
|
|
228
|
-
node.addPlayouts(1, 1, 0);
|
|
229
|
-
} else if (terminalFlag === StateType.Loss) {
|
|
230
|
-
node.addPlayouts(1, 0, 1);
|
|
231
|
-
} else if (terminalFlag === StateType.Tie || terminalFlag === StateType.DepthCapped) {
|
|
232
|
-
node.addPlayouts(1, 0, 0);
|
|
233
|
-
}
|
|
288
|
+
node.backpropagate(outcome);
|
|
234
289
|
|
|
235
290
|
return state;
|
|
236
291
|
}
|
|
@@ -241,18 +296,18 @@ export class MonteCarloTreeSearch {
|
|
|
241
296
|
* @param {S} state
|
|
242
297
|
* @param {StateNode} source
|
|
243
298
|
* @param {MoveEdge} edge
|
|
244
|
-
* @param {
|
|
245
|
-
* @param {function(StateNode, S)} heuristic
|
|
299
|
+
* @param {MonteCarloTreeSearch} search
|
|
246
300
|
*/
|
|
247
|
-
function
|
|
301
|
+
function materializeEdgeTarget(state, source, edge, search) {
|
|
248
302
|
|
|
249
303
|
const child = new StateNode();
|
|
304
|
+
child.allocate(search.numPlayers);
|
|
250
305
|
child.parent = source;
|
|
251
306
|
child.depth = source.depth + 1;
|
|
252
307
|
|
|
253
308
|
const computedState = edge.move(state);
|
|
254
309
|
|
|
255
|
-
const terminalFlag = computeTerminalFlag(computedState);
|
|
310
|
+
const terminalFlag = search.computeTerminalFlag(computedState);
|
|
256
311
|
|
|
257
312
|
assert.enum(terminalFlag, StateType, 'terminalFlag');
|
|
258
313
|
|
|
@@ -260,16 +315,31 @@ function materializedEdgeTarget(state, source, edge, computeTerminalFlag, heuris
|
|
|
260
315
|
|
|
261
316
|
edge.target = child;
|
|
262
317
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
318
|
+
if (terminalFlag === StateType.Undecided) {
|
|
319
|
+
// active player will be set when this node is expanded (we don't know
|
|
320
|
+
// yet whether it has any legal moves)
|
|
321
|
+
if (search.heuristic !== null) {
|
|
322
|
+
const h = search.heuristic(child, computedState);
|
|
266
323
|
|
|
267
|
-
|
|
324
|
+
assert.notNull(h, 'heuristic returned null');
|
|
268
325
|
|
|
326
|
+
child.heuristicValue.set(h);
|
|
327
|
+
}
|
|
328
|
+
} else {
|
|
329
|
+
// terminal — game decides per-player payoff, cache and mirror into heuristicValue.
|
|
330
|
+
// Copy into the node's own buffer so the user is free to reuse one across calls;
|
|
331
|
+
// outcome and heuristicValue share storage within the node (safe since bubble-up
|
|
332
|
+
// skips terminals — see StateNode.aggregateHeuristicScore).
|
|
333
|
+
const out = search.computeOutcome(computedState);
|
|
334
|
+
|
|
335
|
+
child.heuristicValue.set(out);
|
|
336
|
+
child.outcome = child.heuristicValue;
|
|
337
|
+
}
|
|
269
338
|
|
|
270
|
-
//
|
|
271
|
-
//
|
|
339
|
+
// Propagate the new node's value up the tree via maxN backup. This refines
|
|
340
|
+
// ancestors' heuristicValue with information from the freshly-materialized
|
|
341
|
+
// leaf — useful guidance for subsequent UCB selections at those ancestors.
|
|
342
|
+
child.bubbleUpHeuristicScore();
|
|
272
343
|
|
|
273
344
|
return computedState;
|
|
274
345
|
}
|
|
275
|
-
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coarse classification of a node's terminal status, independent of payoff.
|
|
3
|
+
* The actual per-player payoff lives in {@link StateNodeoutcome }.
|
|
4
|
+
*/
|
|
1
5
|
export type StateType = number;
|
|
2
6
|
export namespace StateType {
|
|
3
7
|
let Undecided: number;
|
|
4
|
-
let
|
|
5
|
-
let Loss: number;
|
|
6
|
-
let Tie: number;
|
|
8
|
+
let Terminal: number;
|
|
7
9
|
let DepthCapped: number;
|
|
8
10
|
let NoMoves: number;
|
|
9
11
|
}
|
|
@@ -19,27 +21,37 @@ export class StateNode<State, Action> {
|
|
|
19
21
|
*/
|
|
20
22
|
depth: number;
|
|
21
23
|
/**
|
|
22
|
-
*
|
|
24
|
+
* Total number of explored playouts through this node.
|
|
23
25
|
* @type {number}
|
|
24
26
|
*/
|
|
25
|
-
|
|
27
|
+
playouts: number;
|
|
26
28
|
/**
|
|
27
|
-
*
|
|
28
|
-
* @
|
|
29
|
+
* Per-player heuristic estimate of the value of this state.
|
|
30
|
+
* For terminal nodes this mirrors {@link StateNode#outcome}.
|
|
31
|
+
* For internal nodes this is refined by maxN backup as children materialize.
|
|
32
|
+
* Index = player ID.
|
|
33
|
+
* @type {Float64Array | null}
|
|
29
34
|
*/
|
|
30
|
-
|
|
35
|
+
heuristicValue: Float64Array | null;
|
|
31
36
|
/**
|
|
32
|
-
*
|
|
33
|
-
*
|
|
37
|
+
* Per-player cumulative payoff aggregated by backpropagation.
|
|
38
|
+
* Average value for player `p` is `totalScore[p] / playouts` once `playouts > 0`.
|
|
39
|
+
* @type {Float64Array | null}
|
|
34
40
|
*/
|
|
35
|
-
|
|
41
|
+
totalScore: Float64Array | null;
|
|
36
42
|
/**
|
|
37
|
-
*
|
|
43
|
+
* Per-player payoff at this terminal node, cached at materialization.
|
|
44
|
+
* `null` for non-terminal nodes (uses {@link StateNode#heuristicValue} as a stand-in).
|
|
45
|
+
* @type {Float64Array | null}
|
|
46
|
+
*/
|
|
47
|
+
outcome: Float64Array | null;
|
|
48
|
+
/**
|
|
49
|
+
* Player ID of whoever chooses the next move from this state.
|
|
50
|
+
* `-1` for terminal nodes (no decision to make).
|
|
38
51
|
* @type {number}
|
|
39
52
|
*/
|
|
40
|
-
|
|
53
|
+
activePlayer: number;
|
|
41
54
|
/**
|
|
42
|
-
* parent node, previous state
|
|
43
55
|
* @type {null|StateNode}
|
|
44
56
|
*/
|
|
45
57
|
parent: null | StateNode<any, any>;
|
|
@@ -53,27 +65,37 @@ export class StateNode<State, Action> {
|
|
|
53
65
|
* @type {StateType}
|
|
54
66
|
*/
|
|
55
67
|
type: StateType;
|
|
68
|
+
/**
|
|
69
|
+
* Allocate the per-player buffers. Must be called before any backprop or selection.
|
|
70
|
+
* @param {number} numPlayers
|
|
71
|
+
*/
|
|
72
|
+
allocate(numPlayers: number): void;
|
|
56
73
|
bubbleUpHeuristicScore(): void;
|
|
57
74
|
/**
|
|
58
|
-
*
|
|
75
|
+
* maxN backup: the active player picks the child that maximizes their own
|
|
76
|
+
* component; the chosen child's entire heuristic vector is copied here.
|
|
77
|
+
*
|
|
78
|
+
* This generalizes minimax across N players and asymmetric turn orders: the
|
|
79
|
+
* non-active components ride along, representing "what would happen to player Y
|
|
80
|
+
* if X plays the way X wants from this state".
|
|
59
81
|
*/
|
|
60
82
|
aggregateHeuristicScore(): void;
|
|
61
83
|
/**
|
|
62
84
|
* @param state
|
|
63
85
|
* @param {function(State, source:StateNode):MoveEdge[]} computeValidMoves
|
|
64
|
-
* @param
|
|
86
|
+
* @param {function(State):number} computeActivePlayer
|
|
87
|
+
* @param {function(State):Float64Array} computeOutcome called when expansion produces no moves
|
|
65
88
|
* @returns {number} number of children
|
|
66
89
|
*/
|
|
67
|
-
expand(state: any, computeValidMoves: any,
|
|
90
|
+
expand(state: any, computeValidMoves: any, computeActivePlayer: (arg0: State) => number, computeOutcome: (arg0: State) => Float64Array): number;
|
|
68
91
|
/**
|
|
69
|
-
*
|
|
70
|
-
*
|
|
71
|
-
* @param {
|
|
72
|
-
* @param {number} losses
|
|
92
|
+
* Walk up from this node to the root, adding the per-player outcome to each
|
|
93
|
+
* ancestor's cumulative score and incrementing their visit count.
|
|
94
|
+
* @param {Float64Array} outcomeVector indexed by player ID
|
|
73
95
|
*/
|
|
74
|
-
|
|
96
|
+
backpropagate(outcomeVector: Float64Array): void;
|
|
75
97
|
/**
|
|
76
|
-
* Whenever this is a terminal state or not (win/loss)
|
|
98
|
+
* Whenever this is a terminal state or not (win/loss/tie/no-moves/depth-cap)
|
|
77
99
|
* @returns {boolean}
|
|
78
100
|
*/
|
|
79
101
|
isTerminal(): boolean;
|
|
@@ -83,7 +105,8 @@ export class StateNode<State, Action> {
|
|
|
83
105
|
*/
|
|
84
106
|
isExpanded(): boolean;
|
|
85
107
|
/**
|
|
86
|
-
*
|
|
108
|
+
* Pick the best moves from this node's perspective — i.e. for whichever
|
|
109
|
+
* player owns the decision here.
|
|
87
110
|
* @returns {MoveEdge[]}
|
|
88
111
|
*/
|
|
89
112
|
pickBestMoves(): MoveEdge<any>[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"StateNode.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/StateNode.js"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"StateNode.d.ts","sourceRoot":"","sources":["../../../../../src/engine/intelligence/mcts/StateNode.js"],"names":[],"mappings":";;;;wBAMU,MAAM;;;;;;;AA0ChB;;;;GAIG;AACH;IAEI;;;OAGG;IACH,OAFU,MAAM,CAEN;IAEV;;;OAGG;IACH,UAFU,MAAM,CAEH;IAEb;;;;;;OAMG;IACH,gBAFU,YAAY,GAAG,IAAI,CAEP;IAEtB;;;;OAIG;IACH,YAFU,YAAY,GAAG,IAAI,CAEX;IAElB;;;;OAIG;IACH,SAFU,YAAY,GAAG,IAAI,CAEd;IAEf;;;;OAIG;IACH,cAFU,MAAM,CAEE;IAElB;;OAEG;IACH,QAFU,IAAI,sBAAU,CAEV;IAEd;;;OAGG;IACH,OAFU,IAAI,GAAC,eAAU,CAEZ;IAEb;;;OAGG;IACH,MAFU,SAAS,CAEQ;IAE3B;;;OAGG;IACH,qBAFW,MAAM,QAOhB;IAED,+BAQC;IAED;;;;;;;OAOG;IACH,gCA0CC;IAED;;;;;;OAMG;IACH,uEAJoB,KAAK,KAAE,MAAM,yBACb,KAAK,KAAE,YAAY,GAC1B,MAAM,CA+ClB;IAGD;;;;OAIG;IACH,6BAFW,YAAY,QAqBtB;IAGD;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED;;;OAGG;IACH,cAFa,OAAO,CAInB;IAED;;;;OAIG;IACH,iBAFa,eAAU,CAwCtB;IAED;;;OAGG;IACH,4DA4BC;CACJ;yBA1WwB,eAAe"}
|
|
@@ -1,316 +1,364 @@
|
|
|
1
|
-
import { assert } from "../../../core/assert.js";
|
|
2
|
-
import { MoveEdge } from "./MoveEdge.js";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
*
|
|
6
|
-
* @
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
*
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
node
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
*
|
|
246
|
-
* @
|
|
247
|
-
*/
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
const
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
1
|
+
import { assert } from "../../../core/assert.js";
|
|
2
|
+
import { MoveEdge } from "./MoveEdge.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Coarse classification of a node's terminal status, independent of payoff.
|
|
6
|
+
* The actual per-player payoff lives in {@link StateNode#outcome}.
|
|
7
|
+
* @enum {number}
|
|
8
|
+
*/
|
|
9
|
+
export const StateType = {
|
|
10
|
+
/** Game can still continue from this state. */
|
|
11
|
+
Undecided: 0,
|
|
12
|
+
/** Game ended at this state per the game's rules. Payoff in `outcome`. */
|
|
13
|
+
Terminal: 1,
|
|
14
|
+
/** Search hit `maxExplorationDepth` here; heuristic stands in for payoff. */
|
|
15
|
+
DepthCapped: 2,
|
|
16
|
+
/** Expansion produced no legal moves. Treated as terminal; payoff in `outcome`. */
|
|
17
|
+
NoMoves: 3
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Average outcome for `player` across all playouts that visited this child node.
|
|
22
|
+
* Falls back to heuristic when the node has not been rolled out yet.
|
|
23
|
+
* @param {MoveEdge} move
|
|
24
|
+
* @param {number} player
|
|
25
|
+
* @returns {number}
|
|
26
|
+
*/
|
|
27
|
+
function pickBestScore(move, player) {
|
|
28
|
+
assert.isNonNegativeInteger(player, 'player');
|
|
29
|
+
|
|
30
|
+
const node = move.target;
|
|
31
|
+
|
|
32
|
+
if (node.playouts === 0) {
|
|
33
|
+
return node.heuristicValue[player];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return node.totalScore[player] / node.playouts;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
let stack_pointer = 0;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
*
|
|
44
|
+
* @type {StateNode[]}
|
|
45
|
+
*/
|
|
46
|
+
const stack = [];
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @template State, Action
|
|
51
|
+
* @author Alex Goldring
|
|
52
|
+
* @copyright Company Named Limited (c) 2025
|
|
53
|
+
*/
|
|
54
|
+
export class StateNode {
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* How deep is the node in the tree
|
|
58
|
+
* @type {number}
|
|
59
|
+
*/
|
|
60
|
+
depth = 0;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Total number of explored playouts through this node.
|
|
64
|
+
* @type {number}
|
|
65
|
+
*/
|
|
66
|
+
playouts = 0;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Per-player heuristic estimate of the value of this state.
|
|
70
|
+
* For terminal nodes this mirrors {@link StateNode#outcome}.
|
|
71
|
+
* For internal nodes this is refined by maxN backup as children materialize.
|
|
72
|
+
* Index = player ID.
|
|
73
|
+
* @type {Float64Array | null}
|
|
74
|
+
*/
|
|
75
|
+
heuristicValue = null;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Per-player cumulative payoff aggregated by backpropagation.
|
|
79
|
+
* Average value for player `p` is `totalScore[p] / playouts` once `playouts > 0`.
|
|
80
|
+
* @type {Float64Array | null}
|
|
81
|
+
*/
|
|
82
|
+
totalScore = null;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Per-player payoff at this terminal node, cached at materialization.
|
|
86
|
+
* `null` for non-terminal nodes (uses {@link StateNode#heuristicValue} as a stand-in).
|
|
87
|
+
* @type {Float64Array | null}
|
|
88
|
+
*/
|
|
89
|
+
outcome = null;
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Player ID of whoever chooses the next move from this state.
|
|
93
|
+
* `-1` for terminal nodes (no decision to make).
|
|
94
|
+
* @type {number}
|
|
95
|
+
*/
|
|
96
|
+
activePlayer = -1;
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* @type {null|StateNode}
|
|
100
|
+
*/
|
|
101
|
+
parent = null;
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
*
|
|
105
|
+
* @type {null|MoveEdge[]}
|
|
106
|
+
*/
|
|
107
|
+
moves = null;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
*
|
|
111
|
+
* @type {StateType}
|
|
112
|
+
*/
|
|
113
|
+
type = StateType.Undecided;
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Allocate the per-player buffers. Must be called before any backprop or selection.
|
|
117
|
+
* @param {number} numPlayers
|
|
118
|
+
*/
|
|
119
|
+
allocate(numPlayers) {
|
|
120
|
+
assert.isNonNegativeInteger(numPlayers, 'numPlayers');
|
|
121
|
+
|
|
122
|
+
this.heuristicValue = new Float64Array(numPlayers);
|
|
123
|
+
this.totalScore = new Float64Array(numPlayers);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
bubbleUpHeuristicScore() {
|
|
127
|
+
let r = this.parent;
|
|
128
|
+
|
|
129
|
+
while (r !== null) {
|
|
130
|
+
r.aggregateHeuristicScore();
|
|
131
|
+
|
|
132
|
+
r = r.parent;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* maxN backup: the active player picks the child that maximizes their own
|
|
138
|
+
* component; the chosen child's entire heuristic vector is copied here.
|
|
139
|
+
*
|
|
140
|
+
* This generalizes minimax across N players and asymmetric turn orders: the
|
|
141
|
+
* non-active components ride along, representing "what would happen to player Y
|
|
142
|
+
* if X plays the way X wants from this state".
|
|
143
|
+
*/
|
|
144
|
+
aggregateHeuristicScore() {
|
|
145
|
+
const moves = this.moves;
|
|
146
|
+
|
|
147
|
+
if (moves === null) {
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const p = this.activePlayer;
|
|
152
|
+
|
|
153
|
+
if (p < 0) {
|
|
154
|
+
// terminal node — no decision to make, heuristicValue already reflects outcome
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const own = this.heuristicValue;
|
|
159
|
+
let bestVec = own;
|
|
160
|
+
let bestVal = own[p];
|
|
161
|
+
|
|
162
|
+
const n = moves.length;
|
|
163
|
+
|
|
164
|
+
for (let i = 0; i < n; i++) {
|
|
165
|
+
/**
|
|
166
|
+
* @type {MoveEdge}
|
|
167
|
+
*/
|
|
168
|
+
const move = moves[i];
|
|
169
|
+
|
|
170
|
+
if (!move.isTargetMaterialized()) {
|
|
171
|
+
continue;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const childVec = move.target.heuristicValue;
|
|
175
|
+
const val = childVec[p];
|
|
176
|
+
|
|
177
|
+
if (val > bestVal) {
|
|
178
|
+
bestVal = val;
|
|
179
|
+
bestVec = childVec;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (bestVec !== own) {
|
|
184
|
+
own.set(bestVec);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* @param state
|
|
190
|
+
* @param {function(State, source:StateNode):MoveEdge[]} computeValidMoves
|
|
191
|
+
* @param {function(State):number} computeActivePlayer
|
|
192
|
+
* @param {function(State):Float64Array} computeOutcome called when expansion produces no moves
|
|
193
|
+
* @returns {number} number of children
|
|
194
|
+
*/
|
|
195
|
+
expand(state, computeValidMoves, computeActivePlayer, computeOutcome) {
|
|
196
|
+
assert.defined(state, 'state');
|
|
197
|
+
assert.isFunction(computeValidMoves, 'computeValidMoves');
|
|
198
|
+
assert.isFunction(computeActivePlayer, 'computeActivePlayer');
|
|
199
|
+
assert.isFunction(computeOutcome, 'computeOutcome');
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
*
|
|
204
|
+
* @type {MoveEdge[]}
|
|
205
|
+
*/
|
|
206
|
+
const moves = computeValidMoves(state, this);
|
|
207
|
+
|
|
208
|
+
assert.notNull(moves, 'moves');
|
|
209
|
+
assert.defined(moves, 'moves');
|
|
210
|
+
assert.isArray(moves, 'moves');
|
|
211
|
+
|
|
212
|
+
const numMoves = moves.length;
|
|
213
|
+
|
|
214
|
+
this.moves = moves;
|
|
215
|
+
|
|
216
|
+
if (numMoves === 0) {
|
|
217
|
+
// No legal moves → terminal by exhaustion. Cache outcome so subsequent
|
|
218
|
+
// visits don't recompute, and seed heuristicValue with it.
|
|
219
|
+
// Copy into our own buffer so the user is free to reuse the source;
|
|
220
|
+
// outcome and heuristicValue share storage within the node (safe since
|
|
221
|
+
// terminal nodes are skipped by aggregateHeuristicScore).
|
|
222
|
+
this.type = StateType.NoMoves;
|
|
223
|
+
this.activePlayer = -1;
|
|
224
|
+
|
|
225
|
+
const out = computeOutcome(state);
|
|
226
|
+
|
|
227
|
+
assert.defined(out, 'out');
|
|
228
|
+
assert.isArrayLike(out, 'out');
|
|
229
|
+
|
|
230
|
+
this.heuristicValue.set(out);
|
|
231
|
+
this.outcome = this.heuristicValue;
|
|
232
|
+
} else {
|
|
233
|
+
|
|
234
|
+
this.activePlayer = computeActivePlayer(state);
|
|
235
|
+
|
|
236
|
+
assert.isNumber(this.activePlayer, 'activePlayer');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return numMoves;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Walk up from this node to the root, adding the per-player outcome to each
|
|
245
|
+
* ancestor's cumulative score and incrementing their visit count.
|
|
246
|
+
* @param {Float64Array} outcomeVector indexed by player ID
|
|
247
|
+
*/
|
|
248
|
+
backpropagate(outcomeVector) {
|
|
249
|
+
assert.defined(outcomeVector, 'outcomeVector');
|
|
250
|
+
assert.isArrayLike(outcomeVector, 'outcomeVector');
|
|
251
|
+
assert(outcomeVector.length > 0, 'outcomeVector must not be empty');
|
|
252
|
+
|
|
253
|
+
const len = outcomeVector.length;
|
|
254
|
+
let node = this;
|
|
255
|
+
|
|
256
|
+
do {
|
|
257
|
+
node.playouts += 1;
|
|
258
|
+
|
|
259
|
+
const score = node.totalScore;
|
|
260
|
+
for (let p = 0; p < len; p++) {
|
|
261
|
+
score[p] += outcomeVector[p];
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
node = node.parent;
|
|
265
|
+
|
|
266
|
+
} while (node !== null);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Whenever this is a terminal state or not (win/loss/tie/no-moves/depth-cap)
|
|
272
|
+
* @returns {boolean}
|
|
273
|
+
*/
|
|
274
|
+
isTerminal() {
|
|
275
|
+
return this.type !== StateType.Undecided;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
*
|
|
280
|
+
* @returns {boolean}
|
|
281
|
+
*/
|
|
282
|
+
isExpanded() {
|
|
283
|
+
return this.moves !== null;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Pick the best moves from this node's perspective — i.e. for whichever
|
|
288
|
+
* player owns the decision here.
|
|
289
|
+
* @returns {MoveEdge[]}
|
|
290
|
+
*/
|
|
291
|
+
pickBestMoves() {
|
|
292
|
+
const moves = this.moves;
|
|
293
|
+
|
|
294
|
+
if (moves === null) {
|
|
295
|
+
return [];
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const numMoves = moves.length;
|
|
299
|
+
|
|
300
|
+
if (numMoves === 0) {
|
|
301
|
+
return [];
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// For a decision node, score by the active player's component.
|
|
305
|
+
// For a terminal node (no active player), fall back to player 0 — caller
|
|
306
|
+
// shouldn't normally ask for best moves on a terminal but we don't crash.
|
|
307
|
+
const player = this.activePlayer >= 0 ? this.activePlayer : 0;
|
|
308
|
+
|
|
309
|
+
const firstMove = moves[0];
|
|
310
|
+
|
|
311
|
+
let result = [firstMove];
|
|
312
|
+
let bestScore = pickBestScore(firstMove, player);
|
|
313
|
+
|
|
314
|
+
for (let i = 1; i < numMoves; i++) {
|
|
315
|
+
|
|
316
|
+
const move = moves[i];
|
|
317
|
+
|
|
318
|
+
const score = pickBestScore(move, player);
|
|
319
|
+
|
|
320
|
+
if (score > bestScore) {
|
|
321
|
+
bestScore = score;
|
|
322
|
+
result = [move];
|
|
323
|
+
} else if (score === bestScore) {
|
|
324
|
+
result.push(move);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
return result;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
*
|
|
333
|
+
* @param {function(StateNode)} visitor
|
|
334
|
+
*/
|
|
335
|
+
traverse(visitor) {
|
|
336
|
+
const stackOffset = stack_pointer;
|
|
337
|
+
|
|
338
|
+
stack[stack_pointer++] = this;
|
|
339
|
+
|
|
340
|
+
let n;
|
|
341
|
+
|
|
342
|
+
while (stack_pointer-- > stackOffset) {
|
|
343
|
+
|
|
344
|
+
n = stack[stack_pointer];
|
|
345
|
+
|
|
346
|
+
visitor(n);
|
|
347
|
+
|
|
348
|
+
if (n.isExpanded()) {
|
|
349
|
+
|
|
350
|
+
const moves = n.moves;
|
|
351
|
+
const numMoves = moves.length;
|
|
352
|
+
|
|
353
|
+
for (let i = 0; i < numMoves; i++) {
|
|
354
|
+
const moveEdge = moves[i];
|
|
355
|
+
|
|
356
|
+
if (moveEdge.isTargetMaterialized()) {
|
|
357
|
+
stack[stack_pointer++] = moveEdge.target;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
}
|
|
364
|
+
}
|