mapspinner 0.1.76 → 0.1.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/workflows/fps-2x.js +119 -0
- package/package.json +1 -1
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
export const meta = {
|
|
2
|
+
name: 'fps-2x',
|
|
3
|
+
description: 'Exhaustive TV8 FPS-doubling sweep: fan out per-surface finders for VISUAL-NEUTRAL frame-time cuts, adversarially verify each preserves the render byte-for-byte, arbitrate into one ranked cut plan toward a ~2x target.',
|
|
4
|
+
whenToUse: 'When the user wants every possible FPS win with ZERO visual change (the "double the fps, do not touch the look" ask). Broader + more adversarial than fps-perf: covers VS-vertex-throughput, FS, CPU-frame, draw-state, LOD/quad-count, memory/bandwidth, AND dead-code, and every candidate must carry a same-pose visual-neutrality witness (reliefSD/albedoSD/shadingSD + screenshot) or it is dropped. Needs a headed browser for __diag.gpuTimer (EXT_disjoint_timer_query_webgl2 is absent in headless node-gl); the chain feeds measured numbers in via args.measured.',
|
|
5
|
+
phases: [
|
|
6
|
+
{ title: 'Map', detail: 'parallel readers map every per-frame runtime cost across 7 surfaces' },
|
|
7
|
+
{ title: 'Rank', detail: 'arbitrate candidates against the LIVE-MEASURED doctrine (throughput-bound; ALU/THC/octave dead; FS ~17%)' },
|
|
8
|
+
{ title: 'Propose', detail: 'concrete edit per ranked candidate + the exact visual-neutrality + gpuTimer witness' },
|
|
9
|
+
{ title: 'Verify', detail: 'adversarial refute-by-default: does it change the render, seam, break LOD, or fail compile?' },
|
|
10
|
+
{ title: 'Plan', detail: 'synthesize surviving cuts into one ordered apply-and-measure plan' },
|
|
11
|
+
],
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
// ----------------------------------------------------------------------------
|
|
15
|
+
// TV8 fps-2x. The DOCTRINE this workflow is built on (all LIVE-MEASURED, ANGLE AMD D3D11,
|
|
16
|
+
// oblique 6km deck, 537 quads, GRID 11):
|
|
17
|
+
// - fullMs 21.1 = vsRasterMs 17.2 (81%) + fsMs 3.9 (19%). VS+raster is the budget.
|
|
18
|
+
// - The bottleneck is VERTEX/TRIANGLE THROUGHPUT (vertex count x raster), NOT per-vertex ALU:
|
|
19
|
+
// octMax 12->3 (4x less fractal ALU) left fullMs FLAT (20.4->20.4).
|
|
20
|
+
// - THC height-pool (O(1) baked fetch replacing composeHeight) is a DEAD net lever: it cuts
|
|
21
|
+
// vsRaster 16.9->13.2 but pushes fsMs 3.6->7.6, fullMs flat. Do NOT re-chase ALU-class cuts.
|
|
22
|
+
// - So the ONLY live levers are: (a) fewer vertices (GRID down / fewer quads via splitFactor+cull),
|
|
23
|
+
// (b) FS per-pixel cost (now 19%, no longer 1%), (c) CPU-frame + draw-state (instbuf cache,
|
|
24
|
+
// redundant uniform/bind uploads, getError sync), (d) dead-code removal.
|
|
25
|
+
// - GRID 11->8 is the headline vertex-count lever but is BLOCKED by jagged biome-crossover lines
|
|
26
|
+
// (climate varying interpolated across coarse tris). The unblock = per-PIXEL FS biome sampling.
|
|
27
|
+
// - REFUTED, do not re-attempt: 3-tap forward normal (biases slope -> rock-everywhere up close),
|
|
28
|
+
// analytic-derivative normal (broadShapeMD, node-rejected), baked-octa normal (flattens shading).
|
|
29
|
+
// The VISUAL GATE is absolute: every candidate must prove the render is unchanged at the same pose
|
|
30
|
+
// (reliefSD/albedoSD/shadingSD within tolerance + screenshot identical) or it is dropped.
|
|
31
|
+
// ----------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
const SURFACES = [
|
|
34
|
+
{ key: 'vs-throughput', path: 'src/shaders/terrain.glsl + src/gl-render.js', lens: 'per-vertex VS cost that scales with VERTEX COUNT: GRID mesh size, skirt ring verts, the lit-normal FD tap count, vtxDisplace. Vertex-count reductions only (ALU is proven off the critical path). Any GRID drop must name how the biome-crossover jaggies are prevented.' },
|
|
35
|
+
{ key: 'lod-quad-count', path: 'src/planet-orchestrator.js', lens: 'leaf/quad count per frame: splitFactor, altSplitMul PEAK + high-alt holdoff/popBoost, maxLevel, near-radius, behind-limb cull + forward-cone rescue, frustum/horizon cull. Any quad drawn but sub-pixel or off-screen = wasted throughput.' },
|
|
36
|
+
{ key: 'fs-shade', path: 'src/shaders/terrain.glsl FS + atmosphere.glsl', lens: 'per-pixel FS cost (now ~19% of frame): biome/strata/river/canyon masks, biplanar+2-scale RNM rock detail-normal, slope/gorge AO, analytic aerial perspective, snoise3 tap count, branches always-taken vs always-skipped at runtime defaults, taps multiplied by 0.' },
|
|
37
|
+
{ key: 'cpu-frame', path: 'src/planet-orchestrator.js + src/gl-render.js', lens: 'CPU per-frame: quadtree split/update, instance-buffer Float32Array rebuild + bufferData upload, draw-state setup, sampleGroundM collision readback, per-frame getError/finish sync stalls, work repeated on non-moved frames.' },
|
|
38
|
+
{ key: 'draw-state', path: 'src/gl-render.js', lens: 'redundant GL state per frame: useProgram/bind*/uniform* uploaded every frame though invariant, redundant VAO/FBO/texture binds, getUniformLocation in the hot path, getError sync points.' },
|
|
39
|
+
{ key: 'memory-bandwidth', path: 'src/gl-render.js + src/planet-orchestrator.js', lens: 'vertex-fetch + texture bandwidth: instance/vertex attr widths (FLOAT vs SHORT/HALF_FLOAT, packed normals), HPF/pool texture formats + filtering, overdraw. Bounded quantization that keeps planet-scale positions exact (highp islands).' },
|
|
40
|
+
{ key: 'dead-code', path: 'src/ + scripts/ + shaders', lens: 'unused exports, unreferenced fns/vars, dead window.__ levers, always-zero uniforms whose JS setter is gone, #ifdef branches never compiled in the render program, inlined-to-0 noise sites, orphan files. Removal is visual-neutral by definition; each needs a zero-live-reference witness.' },
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
const COST_SCHEMA = {
|
|
44
|
+
type: 'object',
|
|
45
|
+
properties: {
|
|
46
|
+
costs: { type: 'array', items: { type: 'object', properties: {
|
|
47
|
+
name: { type: 'string' },
|
|
48
|
+
surface: { type: 'string' },
|
|
49
|
+
stage: { type: 'string', description: 'VS | FS | CPU-frame | draw-state | LOD-quad-count | memory | dead-code' },
|
|
50
|
+
perWhat: { type: 'string', description: 'per-vertex | per-pixel | per-quad | per-frame | static' },
|
|
51
|
+
cutIdea: { type: 'string' },
|
|
52
|
+
visualNeutral: { type: 'boolean', description: 'true if the cut provably does NOT change the rendered image' },
|
|
53
|
+
visualWitness: { type: 'string', description: 'the exact metric/screenshot test that proves no visual change' },
|
|
54
|
+
expectedSaving: { type: 'string' },
|
|
55
|
+
regressionRisk: { type: 'string', description: 'low|medium|high + why' },
|
|
56
|
+
}, required: ['name', 'surface', 'stage', 'cutIdea', 'visualNeutral', 'expectedSaving'] } },
|
|
57
|
+
},
|
|
58
|
+
required: ['costs'],
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
phase('Map')
|
|
62
|
+
const maps = await parallel(SURFACES.map(s => () =>
|
|
63
|
+
agent(
|
|
64
|
+
`Read the relevant TV8 files for the "${s.key}" surface (${s.path}) and map every per-frame RUNTIME cost it carries. Lens: ${s.lens}\n\n` +
|
|
65
|
+
`HARD CONSTRAINT: the goal is ~2x FPS with ZERO visual change. For each cost give name, surface="${s.key}", stage, perWhat, a concrete cutIdea, visualNeutral (true ONLY if it cannot change the rendered image), the exact visualWitness that proves it, expectedSaving, regressionRisk.\n` +
|
|
66
|
+
`DOCTRINE (live-measured, do not contradict without measurement): bottleneck is vertex/triangle THROUGHPUT not ALU; octave/THC/ALU cuts are DEAD (fullMs flat); FS is ~19%; GRID 11->8 needs per-pixel biome first; forward-normal + analytic-derivative + baked-octa normals are REFUTED. Do NOT propose those. Do NOT edit anything; read+map only.`,
|
|
67
|
+
{ label: `map:${s.key}`, phase: 'Map', schema: COST_SCHEMA }
|
|
68
|
+
)
|
|
69
|
+
))
|
|
70
|
+
const costs = maps.filter(Boolean).flatMap(m => m.costs)
|
|
71
|
+
|
|
72
|
+
phase('Rank')
|
|
73
|
+
const measured = (typeof args === 'object' && args && args.measured) ? args.measured : null
|
|
74
|
+
log(measured ? `measured gpuTimer split: ${JSON.stringify(measured)}` : 'no args.measured passed; ranking from the live-measured doctrine baseline (full 21.1 / vs 17.2 / fs 3.9 @ deck)')
|
|
75
|
+
const ranking = await agent(
|
|
76
|
+
`Arbitrate these TV8 per-frame costs into a ranked FPS-doubling plan (target ~2x, ZERO visual change). ` +
|
|
77
|
+
(measured ? `MEASURED split (authoritative): ${JSON.stringify(measured)}. ` : `Baseline doctrine: full 21.1ms = vs 17.2 (81%) + fs 3.9 (19%) @ oblique 6km deck. `) +
|
|
78
|
+
`Drop any candidate that is not visualNeutral or contradicts the dead-lever record (ALU/octave/THC/forward-normal/analytic-derivative). ` +
|
|
79
|
+
`Weight by expectedSaving x confidence. The headline lever (per-pixel biome -> GRID 11->8) and FS-deadwork + CPU/draw cuts likely dominate. Costs:\n${JSON.stringify(costs, null, 2)}`,
|
|
80
|
+
{ label: 'rank', phase: 'Rank', schema: { type: 'object', properties: {
|
|
81
|
+
summary: { type: 'string' },
|
|
82
|
+
reachableSpeedup: { type: 'string', description: 'honest estimate of total achievable speedup with zero visual change, e.g. ~1.4x' },
|
|
83
|
+
ranked: { type: 'array', items: { type: 'object', properties: {
|
|
84
|
+
name: { type: 'string' }, rank: { type: 'number' }, surface: { type: 'string' },
|
|
85
|
+
expectedSaving: { type: 'string' }, visualWitness: { type: 'string' } },
|
|
86
|
+
required: ['name', 'rank', 'expectedSaving', 'visualWitness'] } },
|
|
87
|
+
}, required: ['summary', 'reachableSpeedup', 'ranked'] } }
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
phase('Propose')
|
|
91
|
+
const cuts = await pipeline(
|
|
92
|
+
ranking.ranked.sort((a, b) => a.rank - b.rank),
|
|
93
|
+
sec => agent(
|
|
94
|
+
`Propose the concrete code edit for TV8 FPS lever "${sec.name}" (${sec.surface}, expected ${sec.expectedSaving}). ` +
|
|
95
|
+
`Give file, exact oldSnippet, newSnippet, the re-measured __diag.gpuTimer fullMs delta to expect, and the VISUAL-NEUTRALITY witness (${sec.visualWitness}): same-pose reliefSD/albedoSD/shadingSD within tolerance + screenshot identical + glError 0. If the edit could change ANY pixel, say safe=false.`,
|
|
96
|
+
{ label: `propose:${sec.name}`, phase: 'Propose', schema: { type: 'object', properties: {
|
|
97
|
+
file: { type: 'string' }, oldSnippet: { type: 'string' }, newSnippet: { type: 'string' },
|
|
98
|
+
gpuWitness: { type: 'string' }, visualWitness: { type: 'string' }, safe: { type: 'boolean' }, notes: { type: 'string' },
|
|
99
|
+
}, required: ['file', 'visualWitness', 'safe'] } }
|
|
100
|
+
),
|
|
101
|
+
(proposal, sec) => agent(
|
|
102
|
+
`Adversarially REFUTE this TV8 FPS cut for "${sec.name}". Could it change the rendered image (any pixel), reintroduce a tile-edge seam, break LOD invariance, alter biome/lighting/shape, break a uniform, regress a refuted lever, or fail to compile? Default real=false/safe=false if ANY doubt. Proposal:\n${JSON.stringify(proposal, null, 2)}`,
|
|
103
|
+
{ label: `verify:${sec.name}`, phase: 'Verify', schema: { type: 'object', properties: {
|
|
104
|
+
real: { type: 'boolean' }, safe: { type: 'boolean' }, visualRisk: { type: 'string' }, reason: { type: 'string' },
|
|
105
|
+
}, required: ['real', 'safe', 'reason'] } }
|
|
106
|
+
).then(v => ({ section: sec.name, surface: sec.surface, proposal, verdict: v }))
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
phase('Plan')
|
|
110
|
+
const safe = cuts.filter(Boolean).filter(c => c.proposal.safe && c.verdict.safe && c.verdict.real)
|
|
111
|
+
const gated = cuts.filter(Boolean).filter(c => !(c.proposal.safe && c.verdict.safe && c.verdict.real))
|
|
112
|
+
return {
|
|
113
|
+
summary: ranking.summary,
|
|
114
|
+
reachableSpeedup: ranking.reachableSpeedup,
|
|
115
|
+
measured,
|
|
116
|
+
applyOrder: safe.map(c => ({ section: c.section, surface: c.surface, file: c.proposal.file, gpuWitness: c.proposal.gpuWitness, visualWitness: c.proposal.visualWitness })),
|
|
117
|
+
gated: gated.map(c => ({ section: c.section, reason: c.verdict.reason, visualRisk: c.verdict.visualRisk })),
|
|
118
|
+
note: 'Apply ONE cut at a time on a headed browser; re-measure __diag.gpuTimer AND assert the same-pose visual metrics + screenshot are unchanged before keeping it. Any perceptible change = revert that cut first, investigate second. Batch terrain.glsl edits into one reloadShaders (each shader edit = a cold compile). If the honest reachable speedup is below 2x, report the measured ceiling with per-lever attribution rather than claiming the target.',
|
|
119
|
+
}
|