romdevtools 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +30 -0
- package/CHANGELOG.md +73 -0
- package/examples/genesis/templates/platformer.c +5 -1
- package/examples/genesis/templates/two_plane_parallax.c +166 -0
- package/package.json +1 -1
- package/src/host/LibretroHost.js +55 -1
- package/src/host/framebuffer.js +37 -0
- package/src/mcp/tools/audio.js +2 -2
- package/src/mcp/tools/frame.js +13 -34
- package/src/mcp/tools/index.js +2 -2
- package/src/mcp/tools/metasprite-tools.js +1 -1
- package/src/mcp/tools/platform-tools.js +18 -11
- package/src/mcp/tools/project.js +9 -1
- package/src/mcp/tools/rendering-context.js +1 -1
- package/src/mcp/tools/symbols.js +130 -39
- package/src/mcp/tools/tile-inspect.js +1 -1
- package/src/mcp/tools/toolchain.js +3 -2
- package/src/mcp/tools/watch-memory.js +60 -8
- package/src/platforms/gb/MENTAL_MODEL.md +18 -0
- package/src/platforms/gb/lib/c/SDCC_GOTCHAS.md +91 -0
- package/src/platforms/gbc/MENTAL_MODEL.md +13 -0
- package/src/platforms/gbc/lib/c/SDCC_GOTCHAS.md +91 -0
- package/src/platforms/genesis/MENTAL_MODEL.md +161 -0
- package/src/platforms/genesis/TROUBLESHOOTING.md +32 -0
- package/src/platforms/gg/lib/c/gg_crt0.s +30 -0
- package/src/platforms/sms/lib/c/sms_crt0.s +40 -0
- package/src/toolchains/sdcc/preflight-lint.js +164 -8
|
@@ -188,3 +188,94 @@ build({
|
|
|
188
188
|
},
|
|
189
189
|
})
|
|
190
190
|
```
|
|
191
|
+
|
|
192
|
+
## sm83 codegen traps in plain game logic (WRAM integer/array code)
|
|
193
|
+
|
|
194
|
+
Every footgun above is about VRAM / OAM-DMA / the cart header — the stuff
|
|
195
|
+
that makes sprites vanish. This section is the opposite: **plain WRAM game
|
|
196
|
+
logic** — PRNGs, collision grids, score math. Two such "miscompiles" were
|
|
197
|
+
reported from a real GBC Columns build session and chased to ground here.
|
|
198
|
+
**Verdict: neither was an sm83 codegen bug.** They are documented so you
|
|
199
|
+
don't burn hours blaming the compiler for what is actually a memory-layout
|
|
200
|
+
or static-init trap.
|
|
201
|
+
|
|
202
|
+
### NOT a bug: 32-bit math / `uint32_t` shifts ≥ 16
|
|
203
|
+
|
|
204
|
+
Reported: *"`static uint32_t rng=0x1357; rng ^= rng<<13; rng ^= rng>>17;
|
|
205
|
+
rng ^= rng<<5;` degenerates — every `1+xorshift()%6` roll comes out the
|
|
206
|
+
same (near-monochrome)."*
|
|
207
|
+
|
|
208
|
+
**Reproduced on sm83: it does NOT degenerate.** A ROM that seeds the PRNG,
|
|
209
|
+
calls `xorshift()` 20×, and writes `1 + (result % 6)` to WRAM reads back a
|
|
210
|
+
fully-varied `5,5,5,1,5,5,4,1,3,2,1,...` — the exact sequence a reference
|
|
211
|
+
implementation produces. Full 32-bit fidelity was confirmed byte-for-byte
|
|
212
|
+
across several seeds (`0xDEADBEEF`, `0x00000001`, …). The `<<13` / `>>17` /
|
|
213
|
+
`<<5` shifts (including the ≥16-bit right shift) and `% 6` are all correct.
|
|
214
|
+
**Do not rewrite a working 32-bit xorshift into 16-bit to "dodge" this.**
|
|
215
|
+
32-bit ops are bigger/slower than 16-bit on an 8-bit CPU, so prefer 16-bit
|
|
216
|
+
PRNGs for *speed* — but not for correctness; both are correct.
|
|
217
|
+
|
|
218
|
+
### The REAL trap behind "monochrome RNG": writing game state to a fixed
|
|
219
|
+
`0xC0xx` WRAM address that overlaps your statics
|
|
220
|
+
|
|
221
|
+
This is what actually produces the reported symptom. SDCC links the C
|
|
222
|
+
runtime's `_DATA` / `_INITIALIZED` segment (every value-initialised
|
|
223
|
+
`static`, e.g. `static uint32_t rng = 0x1357;`) **at the very bottom of
|
|
224
|
+
WRAM, starting `$C000`**, with `_BSS` (zero-init statics like
|
|
225
|
+
`static uint8_t grid[78];`) right after it. If your code also pokes a
|
|
226
|
+
**hardcoded** `$C000`-area pointer for game state —
|
|
227
|
+
|
|
228
|
+
```c
|
|
229
|
+
volatile uint8_t *board = (volatile uint8_t *)0xC000; /* DON'T */
|
|
230
|
+
board[i] = piece; /* clobbers `rng` and friends! */
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
— you are scribbling directly over your own statics. Then `xorshift()`
|
|
234
|
+
reads a trashed `rng`, the PRNG collapses, and every roll looks the same.
|
|
235
|
+
It presents *exactly* like a compiler bug; it is not.
|
|
236
|
+
|
|
237
|
+
**Fixes (any one):**
|
|
238
|
+
- **Best — let the linker place it.** Use a `static` array and take its
|
|
239
|
+
address; never hardcode a WRAM pointer:
|
|
240
|
+
`static uint8_t board[6*13]; ... board[i] = piece;`
|
|
241
|
+
- If you *must* use a fixed address, put it well clear of the runtime data:
|
|
242
|
+
`$C200`+ is safe for small projects (statics here end far below `$C100`;
|
|
243
|
+
`shadow_oam` is pinned at `$C100`). Confirm with the linker map — build
|
|
244
|
+
with `includeSymbols:true` and look at `s__DATA` / `s__BSS` (e.g.
|
|
245
|
+
`s__DATA = $C000`, `s__BSS = $C006`): your scratch RAM must start ABOVE
|
|
246
|
+
the end of `_BSS`.
|
|
247
|
+
- **Diagnose it in seconds:** read `system_ram` offset 0 right after boot
|
|
248
|
+
and compare against your initialised statics' expected bytes. If a
|
|
249
|
+
`static uint32_t x = 0x1357;` doesn't read back `57 13 00 00` at its map
|
|
250
|
+
address, something is overwriting it.
|
|
251
|
+
|
|
252
|
+
### NOT a bug: short `for` loop with an indexed `static` array read
|
|
253
|
+
|
|
254
|
+
Reported: *"`for(i=0;i<3;i++){ if(grid[r*6+col]) return 1; }` reads the
|
|
255
|
+
wrong cells (pieces lock mid-air / floating gaps); unrolling the 3
|
|
256
|
+
iterations fixed it."*
|
|
257
|
+
|
|
258
|
+
**Reproduced on sm83: the looped form reads the CORRECT cells.** A ROM that
|
|
259
|
+
seeds `grid[]` with a sparse occupied/empty pattern and runs `collides()`
|
|
260
|
+
both looped and hand-unrolled, for 8 straddling `(col,topy)` inputs, gets
|
|
261
|
+
**identical, correct** results from both forms (`1,0,1,0,1,1,1,1`). The
|
|
262
|
+
`grid[r*6+col]` index math and the 3-iteration loop are fine. If your real
|
|
263
|
+
collision check "floats," look first at the WRAM-collision trap above (a
|
|
264
|
+
clobbered `grid[]`), at off-by-one row/col limits, or at signed/unsigned
|
|
265
|
+
mix-ups — not at loop codegen. **Don't pre-emptively unroll loops as a
|
|
266
|
+
compiler workaround; with the stack-overflow fix in place, sm83 loops with
|
|
267
|
+
indexed array reads are reliable.**
|
|
268
|
+
|
|
269
|
+
### z80 (SMS/GG) ONLY — fixed: value-initialised statics booted as 0
|
|
270
|
+
|
|
271
|
+
Investigating the above on the **z80** port (SMS/GG share the SDCC family)
|
|
272
|
+
surfaced a real bug — but a **crt0** bug, not codegen. The bundled
|
|
273
|
+
`sms_crt0.s` / `gg_crt0.s` placed `_INITIALIZER` (the ROM image of
|
|
274
|
+
value-initialised statics) *after* the `_DATA` RAM block in the area list,
|
|
275
|
+
so sdld put it in RAM; the gsinit `ldir` then copied uninitialised RAM onto
|
|
276
|
+
itself and **every `static uint8_t x = 5;` booted as 0** (and BSS wasn't
|
|
277
|
+
zeroed either). On z80 *this* is what made the xorshift PRNG monochrome
|
|
278
|
+
(seed `rng` booted 0 → stayed 0). Fixed 2026-06-08 by ROM-placing
|
|
279
|
+
`_INITIALIZER` + adding a `_DATA` zero loop, mirroring this sm83 crt0 (which
|
|
280
|
+
was already correct — hence sm83 was never affected). If you bring your own
|
|
281
|
+
z80 crt0, model gsinit on `gb_crt0.s`.
|
|
@@ -161,6 +161,167 @@ while (1) {
|
|
|
161
161
|
your sprite updates never appear on screen. It's the single most
|
|
162
162
|
important call in any SGDK game loop.
|
|
163
163
|
|
|
164
|
+
## Scrolling, parallax & the feel trap ⭐
|
|
165
|
+
|
|
166
|
+
This is the section to read before you build a side-scroller. The #1
|
|
167
|
+
"my horizontal movement feels choppy/juddery" bug on Genesis is a
|
|
168
|
+
software mistake, not a hardware limit:
|
|
169
|
+
|
|
170
|
+
> ### ⚠️ DO NOT rewrite full tilemaps in the frame loop.
|
|
171
|
+
> The Genesis scrolls in HARDWARE. Moving the world is **two register
|
|
172
|
+
> writes** (`VDP_setHorizontalScroll`), which are free. If instead you
|
|
173
|
+
> redraw the plane each frame (a big `VDP_setTileMapXY`/`VDP_loadTileMap`
|
|
174
|
+
> burst or a per-frame DMA), you overrun vblank, drop frames, and the
|
|
175
|
+
> scroll judders. **Paint the planes ONCE at setup; the loop only nudges
|
|
176
|
+
> scroll registers and re-stages sprites.** Use the
|
|
177
|
+
> `template:"two_plane_parallax"` scaffold as the known-good shape.
|
|
178
|
+
|
|
179
|
+
### Hardware scroll, the whole loop
|
|
180
|
+
|
|
181
|
+
A two-plane parallax scroller's *entire* per-frame render cost is:
|
|
182
|
+
|
|
183
|
+
```c
|
|
184
|
+
VDP_setHorizontalScroll(BG_A, -camX); // foreground: 1:1 with world
|
|
185
|
+
VDP_setHorizontalScroll(BG_B, -(camX >> 4)); // background: 1/16 speed = far depth
|
|
186
|
+
/* ...stage sprites in SCREEN space... */
|
|
187
|
+
VDP_setSprite(0, playerScreenX, playerY, SPRITE_SIZE(2,2), attr);
|
|
188
|
+
VDP_updateSprites(1, DMA); // flush the SAT
|
|
189
|
+
SYS_doVBlankProcess(); // flush DMA queue, sync vblank
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
No `VDP_setTileMapXY` / `VDP_fillTileMapRect` / `VDP_loadTileMap` in the
|
|
193
|
+
loop. Those are SETUP calls (and tiny one-off updates — a coin that
|
|
194
|
+
vanishes, a door that opens). They are NOT for whole-plane runtime
|
|
195
|
+
redraws. Positive `camX` scrolls the plane LEFT, so you write the
|
|
196
|
+
NEGATIVE camera offset. `VDP_setVerticalScroll` is the vertical twin
|
|
197
|
+
(it writes VSRAM — see `genesis_vsram`).
|
|
198
|
+
|
|
199
|
+
### Logical plane size vs HARDWARE plane size
|
|
200
|
+
|
|
201
|
+
A common confusion: **the Genesis has ONE shared plane-size setting for
|
|
202
|
+
BOTH planes A and B** (VDP regs 16). You pick 32×32 / 64×32 / 32×64 /
|
|
203
|
+
64×64 *cells* once; you do NOT get an independent size per plane. So a
|
|
204
|
+
"32-cell-wide level" still lives inside a 64-cell **physical** plane if
|
|
205
|
+
that's the hardware size you set — the extra cells are just offscreen
|
|
206
|
+
buffer. The scroll value wraps within the physical plane
|
|
207
|
+
(64 cells = 512 px), which is exactly what makes a fully-painted plane
|
|
208
|
+
tile forever with no redraw. Don't fight this: pick a hardware plane
|
|
209
|
+
size and treat your logical world coords separately.
|
|
210
|
+
|
|
211
|
+
| Plane size (cells) | Pixels | Use |
|
|
212
|
+
|--------------------|----------|--------------------------------------|
|
|
213
|
+
| 32×32 | 256×256 | single-screen / small wrap |
|
|
214
|
+
| **64×32** (default)| 512×256 | horizontal scroller (one plane wide) |
|
|
215
|
+
| 32×64 | 256×512 | vertical scroller |
|
|
216
|
+
| 64×64 | 512×512 | uses the most VRAM for name tables |
|
|
217
|
+
|
|
218
|
+
### How Sonic-style large maps REALLY work (wider than one plane)
|
|
219
|
+
|
|
220
|
+
You do NOT make the plane "as wide as the level," and you do NOT redraw
|
|
221
|
+
the plane. The 64-cell hardware plane is a **circular buffer**: as the
|
|
222
|
+
camera advances, the column scrolling OFF the left re-appears on the
|
|
223
|
+
right (the scroll wraps mod 512 px). You keep the visible window full by
|
|
224
|
+
updating exactly **ONE offscreen column** each time the camera crosses
|
|
225
|
+
an 8-px tile boundary:
|
|
226
|
+
|
|
227
|
+
```c
|
|
228
|
+
// camX in pixels; world is an array wider than 512 px.
|
|
229
|
+
s16 newTileCol = camX >> 3;
|
|
230
|
+
if (newTileCol != lastTileCol) {
|
|
231
|
+
// the column about to enter view on the right edge:
|
|
232
|
+
s16 worldCol = (camX + SCREEN_W) >> 3;
|
|
233
|
+
s16 planeCol = worldCol & 63; // wrap into the 64-cell plane
|
|
234
|
+
drawWorldColumn(planeCol, worldCol); // ONE column, ~28 cells — tiny
|
|
235
|
+
lastTileCol = newTileCol;
|
|
236
|
+
}
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
That's ~28 tile writes per 8 px of travel, not a 1792-cell plane redraw.
|
|
240
|
+
The `template:"platformer"` scaffold scrolls within one plane (no
|
|
241
|
+
streaming); add the column-stream above to go wider. (Real Sonic also
|
|
242
|
+
splits the screen with H-blank raster effects for independent strips —
|
|
243
|
+
that's an IRQ/raster topic, see the `asm` template.)
|
|
244
|
+
|
|
245
|
+
## Why does horizontal movement feel choppy? — motion-trace it headlessly ⭐
|
|
246
|
+
|
|
247
|
+
When movement feels off, don't trial-and-error with screenshots. Sample
|
|
248
|
+
the player's world-X, the camera scroll, and the actual VDP scroll
|
|
249
|
+
values over ~180 frames while holding a direction, and read the curve.
|
|
250
|
+
Two signatures to look for:
|
|
251
|
+
|
|
252
|
+
1. **Camera scroll changes while the sprite's screen-X barely moves**
|
|
253
|
+
(or vice-versa) → your camera-follow math is off; the world slides
|
|
254
|
+
under a frozen-looking player, or the player slides on a frozen world.
|
|
255
|
+
2. **Scroll JUMPS** (non-monotone, big steps) → you're scrolling by a
|
|
256
|
+
non-constant amount per frame (variable-rate camera, or you only
|
|
257
|
+
update scroll on a tile boundary instead of every frame).
|
|
258
|
+
|
|
259
|
+
The exact call — hold RIGHT, sample player-X + both planes' HSCROLL +
|
|
260
|
+
VSRAM over 180 frames. Expose the player/camera vars as `volatile`
|
|
261
|
+
globals so they resolve (see "Reading your C globals headlessly"); the
|
|
262
|
+
HSCROLL table lives in VRAM (`video_ram`), default base **$F000**
|
|
263
|
+
(`frame({op:'verify'})`'s render summary prints "H-scroll table: $Fxxx"):
|
|
264
|
+
|
|
265
|
+
```js
|
|
266
|
+
b = build({output:'romWithDebug', platform:'genesis', source, inline:true,
|
|
267
|
+
resolveSymbols:['g_player_x','g_cam_x']})
|
|
268
|
+
// → resolvedSymbols.g_player_x.ramOffset (system_ram offset)
|
|
269
|
+
recordSession({
|
|
270
|
+
frames:180, sampleEvery:10, includeScreenshots:false,
|
|
271
|
+
holdInputs:[{right:true}],
|
|
272
|
+
memorySamples:[
|
|
273
|
+
{label:'player_x', region:'system_ram', offset: PLAYER_X_OFF, length:2},
|
|
274
|
+
{label:'cam_x', region:'system_ram', offset: CAM_X_OFF, length:2},
|
|
275
|
+
{label:'hscrollA', region:'video_ram', offset:0xF000, length:2},
|
|
276
|
+
{label:'hscrollB', region:'video_ram', offset:0xF002, length:2},
|
|
277
|
+
{label:'vsram', region:'genesis_vsram', offset:0, length:4},
|
|
278
|
+
],
|
|
279
|
+
})
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Read the columns: `player_x` should ramp smoothly; `hscrollA` should
|
|
283
|
+
move 1:1 with the camera and `hscrollB` at the parallax ratio; both
|
|
284
|
+
should be **monotone** (no jumps) while RIGHT is held. ⚠ Genesis WRAM +
|
|
285
|
+
VRAM read **word-byte-swapped** in gpgx (a 16-bit `0x00F0` reads as
|
|
286
|
+
bytes `F0 00`) — account for the swap, or read single bytes. For a
|
|
287
|
+
compact value-vs-frame curve of just the HSCROLL table use
|
|
288
|
+
`watch({on:'mem', region:'video_ram', offset:0xF000, length:4,
|
|
289
|
+
format:'series', pressDuring:[{frame:0, button:'right', holdFrames:180}]})`.
|
|
290
|
+
|
|
291
|
+
## Is the loop doing too much VDP work? — per-frame DMA budget ⭐
|
|
292
|
+
|
|
293
|
+
The render-side cause of choppy scroll is **too many VDP/DMA bytes per
|
|
294
|
+
frame** (a tilemap rewrite, an asset re-upload). Measure it directly,
|
|
295
|
+
no core rebuild:
|
|
296
|
+
|
|
297
|
+
```js
|
|
298
|
+
watch({on:'dma', perFrame:true, frames:120,
|
|
299
|
+
pressDuring:[{frame:0, button:'right', holdFrames:120}]})
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
returns a per-frame timeline `[{frame, dmas, bytes, romBytes, ramBytes}]`
|
|
303
|
+
plus `avgBytesPerFrame`, `peakFrame`/`peakBytes`, and `spikes`.
|
|
304
|
+
|
|
305
|
+
- A **smooth hardware-scroll loop** shows a LOW, FLAT curve — after boot
|
|
306
|
+
it's mostly the steady SAT/scroll refresh (`ramBytes`, single/low
|
|
307
|
+
double digits per frame).
|
|
308
|
+
- A **`spikes` entry** (bytes ≫ average, especially `romBytes` — an
|
|
309
|
+
asset upload FROM cart ROM) is the "I rewrote a tilemap / re-uploaded
|
|
310
|
+
tiles in the frame loop" smell. Move that work to setup, or stream
|
|
311
|
+
ONE column per 8-px scroll step (above).
|
|
312
|
+
|
|
313
|
+
**CEILING / what this does NOT catch:** this counts mem→VDP **DMA**
|
|
314
|
+
bytes (the dominant cost). Plain CPU writes to the VDP data port —
|
|
315
|
+
`VDP_setTileMapXY` without DMA, single-cell pokes — are not DMA and are
|
|
316
|
+
NOT counted; catching *those* would need a core-side VDP-data-port write
|
|
317
|
+
hook (a gpgx patch, not shipped). In practice the expensive per-frame
|
|
318
|
+
mistakes (whole-plane fills, `VDP_loadTileMap`, big `DMA_*` transfers)
|
|
319
|
+
ALL go through DMA and DO show up here, so the budget is a reliable
|
|
320
|
+
choppiness diagnostic today. There is no exposed per-frame
|
|
321
|
+
"vblank-cycles-used / overrun" counter either — infer overrun from the
|
|
322
|
+
byte budget (DMA bandwidth in vblank is finite: ~7.6 KB to VRAM in PAL
|
|
323
|
+
vblank, less in NTSC; a frame moving multiple KB to VRAM is at risk).
|
|
324
|
+
|
|
164
325
|
## Input
|
|
165
326
|
|
|
166
327
|
`u16 pad = JOY_readJoypad(JOY_1)` returns a packed bitmask. The
|
|
@@ -230,3 +230,35 @@ pixels per byte). The shipped `hello_sprite`, `tile_engine`, `shmup`,
|
|
|
230
230
|
`platformer`, `puzzle` templates use this approach. **But never
|
|
231
231
|
hand-encode a full-screen image this way** — that's the red/choppy
|
|
232
232
|
failure above.
|
|
233
|
+
|
|
234
|
+
## "Horizontal movement / scrolling feels choppy or judders"
|
|
235
|
+
|
|
236
|
+
Almost always: **you're rewriting the tilemap in the frame loop.** The
|
|
237
|
+
Genesis scrolls in HARDWARE — moving the world is two register writes
|
|
238
|
+
(`VDP_setHorizontalScroll`), which cost nothing. If you instead redraw a
|
|
239
|
+
plane every frame (a `VDP_fillTileMapRect` / `VDP_loadTileMap` / big
|
|
240
|
+
`DMA_*` each frame), you overrun the vblank DMA budget and drop frames →
|
|
241
|
+
judder. Fix: paint the planes ONCE at setup; the loop only nudges scroll
|
|
242
|
+
registers + re-stages sprites. The `template:"two_plane_parallax"`
|
|
243
|
+
scaffold is the known-good shape.
|
|
244
|
+
|
|
245
|
+
Diagnose it without guessing (no core rebuild):
|
|
246
|
+
|
|
247
|
+
- **Per-frame VDP work:** `watch({on:'dma', perFrame:true, frames:120,
|
|
248
|
+
pressDuring:[{frame:0, button:'right', holdFrames:120}]})` → a per-frame
|
|
249
|
+
`[{frame, bytes, romBytes, ramBytes}]` timeline + `spikes`. A smooth
|
|
250
|
+
loop is a LOW, FLAT curve (just the SAT/scroll refresh). A `spikes`
|
|
251
|
+
entry (bytes ≫ avg, esp. `romBytes`) IS the per-frame asset-upload /
|
|
252
|
+
tilemap-rewrite mistake. (Counts DMA bytes only — non-DMA single-cell
|
|
253
|
+
`VDP_setTileMapXY` pokes aren't counted; the expensive whole-plane work
|
|
254
|
+
always uses DMA and does show.)
|
|
255
|
+
- **Motion curve:** sample the player-X + both planes' HSCROLL ($F000 in
|
|
256
|
+
`video_ram`) over 180 frames while holding a direction — see
|
|
257
|
+
MENTAL_MODEL.md "Why does horizontal movement feel choppy?". Look for
|
|
258
|
+
scroll that jumps (non-constant per-frame delta) or a camera that moves
|
|
259
|
+
while the sprite's screen-X is frozen.
|
|
260
|
+
|
|
261
|
+
For a world WIDER than one 512-px plane, don't make the plane bigger and
|
|
262
|
+
don't redraw it — stream ONE offscreen column per 8-px camera step
|
|
263
|
+
(circular-buffer the 64-cell plane). See MENTAL_MODEL.md "How Sonic-style
|
|
264
|
+
large maps REALLY work".
|
|
@@ -31,6 +31,8 @@
|
|
|
31
31
|
.globl l__INITIALIZER
|
|
32
32
|
.globl s__INITIALIZER
|
|
33
33
|
.globl s__INITIALIZED
|
|
34
|
+
.globl s__DATA
|
|
35
|
+
.globl l__DATA
|
|
34
36
|
|
|
35
37
|
;; ─── Reset vector at $0000 ────────────────────────────────────────
|
|
36
38
|
.area _HEADER (ABS)
|
|
@@ -83,7 +85,15 @@
|
|
|
83
85
|
;; call main. The initializer area is filled by sdcc when it sees
|
|
84
86
|
;; global initializations.
|
|
85
87
|
|
|
88
|
+
;; AREA ORDERING IS LOad-BEARING. `_INITIALIZER` (the ROM image of
|
|
89
|
+
;; every value-initialised `static` global) MUST be declared in the
|
|
90
|
+
;; ROM group here — BEFORE the `_DATA` RAM block. If it isn't, sdld
|
|
91
|
+
;; places `_INITIALIZER` in RAM right after `_INITIALIZED`, so the
|
|
92
|
+
;; gsinit copy below copies uninitialised RAM onto itself and every
|
|
93
|
+
;; `static uint8_t x = 5;` boots as 0. (Bug found 2026-06-08; see the
|
|
94
|
+
;; matching note in sms_crt0.s — both z80 crt0s were missing this.)
|
|
86
95
|
.area _HOME
|
|
96
|
+
.area _INITIALIZER
|
|
87
97
|
.area _CODE
|
|
88
98
|
.area _GSINIT
|
|
89
99
|
.area _GSFINAL
|
|
@@ -97,6 +107,26 @@
|
|
|
97
107
|
.area _CODE
|
|
98
108
|
|
|
99
109
|
gsinit:
|
|
110
|
+
;; ── Zero the BSS segment (`_DATA`). ──────────────────────────
|
|
111
|
+
;; Every uninitialised `static` global lands in `_DATA` and MUST
|
|
112
|
+
;; read back 0 at boot. Mirrors the sm83 GB crt0's gsinit_data loop.
|
|
113
|
+
ld bc, #l__DATA
|
|
114
|
+
ld a, b
|
|
115
|
+
or a, c
|
|
116
|
+
jr Z, gsinit_bss_done
|
|
117
|
+
ld hl, #s__DATA
|
|
118
|
+
ld (hl), #0x00
|
|
119
|
+
ld d, h
|
|
120
|
+
ld e, l
|
|
121
|
+
inc de
|
|
122
|
+
dec bc
|
|
123
|
+
ld a, b
|
|
124
|
+
or a, c
|
|
125
|
+
jr Z, gsinit_bss_done
|
|
126
|
+
ldir ; propagate the 0 across _DATA
|
|
127
|
+
gsinit_bss_done:
|
|
128
|
+
|
|
129
|
+
;; ── Copy `_INITIALIZER` (ROM) → `_INITIALIZED` (RAM). ────────
|
|
100
130
|
ld bc, #l__INITIALIZER
|
|
101
131
|
ld a, b
|
|
102
132
|
or a, c
|
|
@@ -22,6 +22,8 @@
|
|
|
22
22
|
.globl l__INITIALIZER
|
|
23
23
|
.globl s__INITIALIZER
|
|
24
24
|
.globl s__INITIALIZED
|
|
25
|
+
.globl s__DATA
|
|
26
|
+
.globl l__DATA
|
|
25
27
|
|
|
26
28
|
;; ─── Reset vector at $0000 ────────────────────────────────────────
|
|
27
29
|
.area _HEADER (ABS)
|
|
@@ -72,7 +74,19 @@
|
|
|
72
74
|
;; call main. The initializer area is filled by sdcc when it sees
|
|
73
75
|
;; global initializations.
|
|
74
76
|
|
|
77
|
+
;; AREA ORDERING IS LOad-BEARING. `_INITIALIZER` (the ROM image of
|
|
78
|
+
;; every value-initialised `static` global) MUST be declared in the
|
|
79
|
+
;; ROM group here — BEFORE the `_DATA` RAM block. If it isn't, sdld
|
|
80
|
+
;; places `_INITIALIZER` in RAM right after `_INITIALIZED`, so the
|
|
81
|
+
;; gsinit copy below copies uninitialised RAM onto itself and every
|
|
82
|
+
;; `static uint8_t x = 5;` boots as 0. (Bug found 2026-06-08: a GBC
|
|
83
|
+
;; Columns agent's `static uint32_t rng = 0x1357;` booted as 0, so
|
|
84
|
+
;; the xorshift PRNG stayed 0 and every "random" roll came out the
|
|
85
|
+
;; same — a "monochrome RNG" that looked like an SDCC codegen bug
|
|
86
|
+
;; but was really this missing ROM placement. The sm83 GB crt0 has
|
|
87
|
+
;; always placed _INITIALIZER in ROM; the z80 crt0s never did.)
|
|
75
88
|
.area _HOME
|
|
89
|
+
.area _INITIALIZER
|
|
76
90
|
.area _CODE
|
|
77
91
|
.area _GSINIT
|
|
78
92
|
.area _GSFINAL
|
|
@@ -86,6 +100,32 @@
|
|
|
86
100
|
.area _CODE
|
|
87
101
|
|
|
88
102
|
gsinit:
|
|
103
|
+
;; ── Zero the BSS segment (`_DATA`). ──────────────────────────
|
|
104
|
+
;; Every uninitialised `static` global lands in `_DATA` and MUST
|
|
105
|
+
;; read back 0 at boot. Without this, `static uint8_t flag;` boots
|
|
106
|
+
;; with whatever power-on WRAM byte was there (gambatte/gpgx leave
|
|
107
|
+
;; garbage), and `if (flag)` spuriously fires. Mirrors the sm83 GB
|
|
108
|
+
;; crt0's gsinit_data loop.
|
|
109
|
+
ld bc, #l__DATA
|
|
110
|
+
ld a, b
|
|
111
|
+
or a, c
|
|
112
|
+
jr Z, gsinit_bss_done
|
|
113
|
+
ld hl, #s__DATA
|
|
114
|
+
ld (hl), #0x00
|
|
115
|
+
ld d, h
|
|
116
|
+
ld e, l
|
|
117
|
+
inc de
|
|
118
|
+
dec bc
|
|
119
|
+
ld a, b
|
|
120
|
+
or a, c
|
|
121
|
+
jr Z, gsinit_bss_done
|
|
122
|
+
ldir ; propagate the 0 across _DATA
|
|
123
|
+
gsinit_bss_done:
|
|
124
|
+
|
|
125
|
+
;; ── Copy `_INITIALIZER` (ROM) → `_INITIALIZED` (RAM). ────────
|
|
126
|
+
;; The value-initialised-statics path: `static uint8_t lives = 3;`
|
|
127
|
+
;; lives in _INITIALIZED at runtime; its initial value sits in
|
|
128
|
+
;; _INITIALIZER in ROM (now correctly ROM-placed, see above).
|
|
89
129
|
ld bc, #l__INITIALIZER
|
|
90
130
|
ld a, b
|
|
91
131
|
or a, c
|
|
@@ -152,12 +152,29 @@ export function lintSdccSource(source, file = "main.c", opts = {}) {
|
|
|
152
152
|
}
|
|
153
153
|
|
|
154
154
|
// ─── __xdata / VRAM byte-copy miscompile ────────────────────────
|
|
155
|
-
// SDCC sm83 miscompiles `for (i...) dst[i] = src[i];` when dst is an
|
|
155
|
+
// SDCC sm83 miscompiles `for (i...) dst[i] = src[i];` ONLY when dst is an
|
|
156
156
|
// __xdata pointer (e.g. into VRAM $8000) — it writes through the return
|
|
157
|
-
// address and crashes the CPU.
|
|
158
|
-
//
|
|
159
|
-
//
|
|
160
|
-
//
|
|
157
|
+
// address and crashes the CPU. A plain WRAM array copy (`static uint8_t
|
|
158
|
+
// rb[78]; ... rb[i]=grid[i];`) is perfectly fine. The old lint flagged the
|
|
159
|
+
// SHAPE unconditionally as a "warning" — every WRAM copy in every genre
|
|
160
|
+
// scaffold cried wolf, training agents to distrust the linter. We now
|
|
161
|
+
// classify the DESTINATION identifier before deciding the severity:
|
|
162
|
+
//
|
|
163
|
+
// • PROVABLY VRAM/__xdata → "warning" (the real crash-class footgun)
|
|
164
|
+
// - dst is declared as a POINTER (`type *dst`) — only pointers can
|
|
165
|
+
// alias __xdata; an indexed write through one is the bug.
|
|
166
|
+
// - dst is a known-VRAM name (vram*, VRAM, *vram*, bgmap, _VRAM*).
|
|
167
|
+
// - dst is assigned from a cast/literal in $8000-$9FFF anywhere in
|
|
168
|
+
// the source (e.g. `dst = (uint8_t*)0x9800;`).
|
|
169
|
+
// • PLAIN RAM ARRAY → SUPPRESS (declared `type dst[N];` here).
|
|
170
|
+
// • UNKNOWN (bare ident, → "info" — visible, not scary. Better to
|
|
171
|
+
// no decl in this TU) occasionally downgrade a real VRAM case to
|
|
172
|
+
// info than to keep crying wolf on WRAM.
|
|
173
|
+
//
|
|
174
|
+
// The crash cases that MATTER (the documented memcpy_vram footgun) are
|
|
175
|
+
// pointer-to-VRAM, which the "provably VRAM" path still catches as a
|
|
176
|
+
// warning.
|
|
177
|
+
const dstClass = classifyCopyDest(lines);
|
|
161
178
|
for (let i = 0; i < lines.length; i++) {
|
|
162
179
|
const code = lines[i].replace(/\/\/.*$/, "").replace(/\/\*.*?\*\//g, "");
|
|
163
180
|
// indexed-to-indexed copy: ident[idx] = ident[idx]; (same index token)
|
|
@@ -168,17 +185,72 @@ export function lintSdccSource(source, file = "main.c", opts = {}) {
|
|
|
168
185
|
// Require a for-loop driving this copy (this line or the 2 above).
|
|
169
186
|
const ctx = (lines[i] + "\n" + (lines[i - 1] || "") + "\n" + (lines[i - 2] || ""));
|
|
170
187
|
if (!/\bfor\s*\(/.test(ctx)) continue;
|
|
188
|
+
const dst = cp[1];
|
|
189
|
+
// A VRAM-suggestive NAME promotes an otherwise-unknown dest to VRAM even
|
|
190
|
+
// with no decl in this TU (e.g. `vram_buf[i] = tiles[i];`). A declared
|
|
191
|
+
// plain array still wins as "array" (suppress) — names rarely collide.
|
|
192
|
+
const klass = dstClass.get(dst) || (isVramName(dst) ? "vram" : "unknown");
|
|
193
|
+
if (klass === "array") continue; // plain WRAM array — provably safe, suppress
|
|
194
|
+
const isVram = klass === "vram";
|
|
171
195
|
issues.push({
|
|
172
|
-
|
|
196
|
+
// Provably-VRAM → warning (the crash-class footgun). Unknown bare
|
|
197
|
+
// pointer → info (visible, not "your code is broken"). Never critical:
|
|
198
|
+
// even the VRAM case only miscompiles, it isn't an unconditional hang.
|
|
199
|
+
severity: isVram ? "warning" : "info",
|
|
173
200
|
file,
|
|
174
201
|
line: i + 1,
|
|
175
202
|
stage: "lint",
|
|
176
|
-
message:
|
|
177
|
-
|
|
203
|
+
message: isVram
|
|
204
|
+
? `byte-copy loop \`${dst}[${idx1}] = ${cp[3]}[${idx2}]\` into VRAM/__xdata — ${portLabel} miscompiles this`
|
|
205
|
+
: `byte-copy loop \`${dst}[${idx1}] = ${cp[3]}[${idx2}]\` — safe for WRAM arrays, but miscompiles if '${dst}' points into VRAM/__xdata`,
|
|
206
|
+
details: isVram
|
|
207
|
+
? `${portLabel} miscompiles this pattern when '${dst}' points into VRAM ($8000-$9FFF) or another __xdata region — it writes through the return address and crashes the CPU (PC near $002B, sprites/tiles never show). Use \`memcpy_vram(${dst}, ${cp[3]}, n)\` (in gb_runtime.c) instead. See GB TROUBLESHOOTING § the #1 SDCC footgun.`
|
|
208
|
+
: `If '${dst}' is a plain WRAM array (\`type ${dst}[N];\`) this is FINE — ignore. ${portLabel} only miscompiles it when '${dst}' is a pointer into VRAM ($8000-$9FFF)/__xdata, where it writes through the return address and crashes the CPU. If '${dst}' is a VRAM pointer, use \`memcpy_vram(${dst}, ${cp[3]}, n)\` instead. See GB TROUBLESHOOTING § the #1 SDCC footgun.`,
|
|
178
209
|
ref: "xdata-copy-miscompile",
|
|
179
210
|
});
|
|
180
211
|
}
|
|
181
212
|
|
|
213
|
+
// ─── hardcoded $C000-area WRAM pointer overlaps the C statics ───
|
|
214
|
+
// SDCC links `_DATA`/`_INITIALIZED` (value-init statics) + `_BSS` (zero-init
|
|
215
|
+
// statics) at the BOTTOM of WRAM starting $C000. A program that ALSO pokes a
|
|
216
|
+
// hardcoded pointer into that low range (e.g. `(uint8_t*)0xC000`) scribbles
|
|
217
|
+
// over its own statics — the seed of a PRNG, a collision grid, the score —
|
|
218
|
+
// and the symptom looks EXACTLY like an SDCC codegen bug (a 32-bit xorshift
|
|
219
|
+
// that "degenerates" because its seed got clobbered, never a real
|
|
220
|
+
// miscompile). This was the real root cause behind a GBC Columns agent's
|
|
221
|
+
// "monochrome RNG" report (2026-06-08); the math itself compiles correctly.
|
|
222
|
+
//
|
|
223
|
+
// ONLY for the sm83/z80 GB/SMS-family, whose WRAM base is $C000. Flag the
|
|
224
|
+
// low 256 bytes ($C000-$C0FF) where _DATA/_INITIALIZED live (small projects'
|
|
225
|
+
// statics sit here; $C100 is shadow_oam; $C200+ is the documented-safe
|
|
226
|
+
// scratch floor). INFO severity — visible, not "your code is broken": a
|
|
227
|
+
// hardcoded low pointer is occasionally legitimate (e.g. you've checked the
|
|
228
|
+
// map). NEVER critical.
|
|
229
|
+
if (port === "sm83" || port === "z80") {
|
|
230
|
+
// pointer cast/decl/assignment to a $C0xx literal: `(uint8_t*)0xC000`,
|
|
231
|
+
// `uint8_t *p = (uint8_t*)0xc010;`, `p = 0xC0FF;`
|
|
232
|
+
const ptrLit = /\(\s*(?:volatile\s+|const\s+|unsigned\s+|signed\s+)*[A-Za-z_]\w*\s*\*+\s*\)\s*0x(C0[0-9a-fA-F]{2})\b/;
|
|
233
|
+
const seen = new Set();
|
|
234
|
+
for (let i = 0; i < lines.length; i++) {
|
|
235
|
+
const code = lines[i].replace(/\/\/.*$/, "").replace(/\/\*.*?\*\//g, "");
|
|
236
|
+
const m = code.match(ptrLit);
|
|
237
|
+
if (!m) continue;
|
|
238
|
+
const addr = parseInt(m[1], 16); // already the full $C0xx address
|
|
239
|
+
const key = i + ":" + addr;
|
|
240
|
+
if (seen.has(key)) continue;
|
|
241
|
+
seen.add(key);
|
|
242
|
+
issues.push({
|
|
243
|
+
severity: "info",
|
|
244
|
+
file,
|
|
245
|
+
line: i + 1,
|
|
246
|
+
stage: "lint",
|
|
247
|
+
message: `hardcoded WRAM pointer $${addr.toString(16).toUpperCase()} overlaps the C static-data segment ($C000-)`,
|
|
248
|
+
details: `${portLabel} links your value- and zero-initialised \`static\` globals (PRNG seeds, grids, scores) at the BOTTOM of WRAM from $C000. A hardcoded pointer into $C000-$C0FF can scribble over them — the classic symptom is a PRNG/array that looks "miscompiled" (e.g. an xorshift whose seed got clobbered so every roll is identical) when the math is actually fine. Prefer a \`static\` array and let the linker place it; if you must hardcode, use $C200+ and verify with the linker map (build with includeSymbols:true → check s__DATA/s__BSS). $C100 is shadow_oam. See ${port === "sm83" ? "GB/GBC" : "SMS/GG"} SDCC_GOTCHAS.md § "sm83 codegen traps in plain game logic".`,
|
|
249
|
+
ref: "wram-static-overlap",
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
182
254
|
// Mid-block declarations (rough heuristic — flags any `type name [=...] ;`
|
|
183
255
|
// that appears after a non-decl, non-blank statement at deeper indent
|
|
184
256
|
// than the function opening brace).
|
|
@@ -206,6 +278,90 @@ export function lintSources(sources, opts = {}) {
|
|
|
206
278
|
|
|
207
279
|
// ─── helpers ───────────────────────────────────────────────────────
|
|
208
280
|
|
|
281
|
+
/**
|
|
282
|
+
* Known-VRAM symbol names (GB/GBC/SMS conventions). Case-insensitive;
|
|
283
|
+
* substring "vram" matches vram_ptr / pVRAM / VRAMbase, plus the common
|
|
284
|
+
* GB BG-map / tile-data symbols. A dest with such a name is treated as a
|
|
285
|
+
* VRAM pointer even with no visible declaration.
|
|
286
|
+
* @param {string} n
|
|
287
|
+
* @returns {boolean}
|
|
288
|
+
*/
|
|
289
|
+
function isVramName(n) {
|
|
290
|
+
return /vram/i.test(n) || /^(?:bgmap|tilemap|tiledata|chrram|_VRAM)/i.test(n);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Classify each identifier that is the destination of a copy loop into one
|
|
295
|
+
* of: "vram" (provably a VRAM/__xdata pointer → real crash-class footgun),
|
|
296
|
+
* "array" (declared as a plain `type name[N];` RAM array → provably safe,
|
|
297
|
+
* suppress the warning), or absent (unknown — caller treats as "info").
|
|
298
|
+
*
|
|
299
|
+
* This is a whole-TU pass: a name is classified by scanning the ENTIRE
|
|
300
|
+
* source, so a `uint8_t *dst;` decl far above the loop, or a later
|
|
301
|
+
* `dst = (uint8_t*)0x9800;` assignment, still classifies it as VRAM.
|
|
302
|
+
*
|
|
303
|
+
* Precedence: VRAM wins over array (a name that is BOTH a pointer and,
|
|
304
|
+
* say, shadowed by an array elsewhere should still be treated as the
|
|
305
|
+
* dangerous case — but in practice a single name is one or the other).
|
|
306
|
+
*
|
|
307
|
+
* @param {string[]} lines source split into lines
|
|
308
|
+
* @returns {Map<string,"vram"|"array">}
|
|
309
|
+
*/
|
|
310
|
+
function classifyCopyDest(lines) {
|
|
311
|
+
/** @type {Map<string,"vram"|"array">} */
|
|
312
|
+
const klass = new Map();
|
|
313
|
+
const setVram = (n) => { klass.set(n, "vram"); };
|
|
314
|
+
const setArray = (n) => { if (klass.get(n) !== "vram") klass.set(n, "array"); };
|
|
315
|
+
|
|
316
|
+
// A literal/cast value lands in VRAM if it's 0x8000–0x9FFF.
|
|
317
|
+
const inVramRange = (hexOrDec) => {
|
|
318
|
+
const v = /^0x/i.test(hexOrDec) ? parseInt(hexOrDec, 16) : parseInt(hexOrDec, 10);
|
|
319
|
+
return Number.isFinite(v) && v >= 0x8000 && v <= 0x9fff;
|
|
320
|
+
};
|
|
321
|
+
// Type keywords that introduce a declaration (subset is fine — we only
|
|
322
|
+
// need to tell "pointer decl" from "array decl").
|
|
323
|
+
const TYPE = "(?:unsigned\\s+|signed\\s+)?(?:char|short|int|long|void|u?int(?:8|16|32|64)_t|u8|u16|u32|u64|uint8|uint16|uint32|uint64|int8|int16|int32|int64|size_t|[A-Z][A-Za-z0-9_]*_t)";
|
|
324
|
+
const QUAL = "(?:static\\s+|const\\s+|register\\s+|volatile\\s+|extern\\s+|auto\\s+|__xdata\\s+|__at\\s*\\([^)]*\\)\\s*)*";
|
|
325
|
+
// Pointer declaration: `<quals> <type> * name` (one or more `*`).
|
|
326
|
+
const ptrDeclRe = new RegExp(`\\b${QUAL}${TYPE}\\s*\\*+\\s*([A-Za-z_]\\w*)`, "g");
|
|
327
|
+
// Array declaration: `<quals> <type> name[ ... ]` (NOT a pointer).
|
|
328
|
+
const arrDeclRe = new RegExp(`\\b${QUAL}${TYPE}\\s+([A-Za-z_]\\w*)\\s*\\[`, "g");
|
|
329
|
+
// Pointer assigned a VRAM literal/cast: name = (cast?) 0x8xxx/0x9xxx
|
|
330
|
+
// e.g. dst = (uint8_t*)0x9800; p = (void*)0x8000; q = 0x8800;
|
|
331
|
+
const vramAssignRe = /\b([A-Za-z_]\w*)\s*=\s*(?:\([^)]*\)\s*)?(0x[0-9a-fA-F]+|\d{4,})\b/g;
|
|
332
|
+
// Pointer DECL with an inline VRAM initializer:
|
|
333
|
+
// uint8_t *dst = (uint8_t*)0x8000;
|
|
334
|
+
const ptrInitVramRe = new RegExp(`\\b${QUAL}${TYPE}\\s*\\*+\\s*([A-Za-z_]\\w*)\\s*=\\s*(?:\\([^)]*\\)\\s*)?(0x[0-9a-fA-F]+|\\d{4,})`, "g");
|
|
335
|
+
|
|
336
|
+
for (let i = 0; i < lines.length; i++) {
|
|
337
|
+
const code = lines[i].replace(/\/\/.*$/, "").replace(/\/\*.*?\*\//g, "");
|
|
338
|
+
|
|
339
|
+
// 1) pointer decl with a VRAM-range initializer → VRAM (strongest).
|
|
340
|
+
ptrInitVramRe.lastIndex = 0;
|
|
341
|
+
for (let m; (m = ptrInitVramRe.exec(code)); ) {
|
|
342
|
+
if (inVramRange(m[2])) setVram(m[1]);
|
|
343
|
+
}
|
|
344
|
+
// 2) any name assigned a VRAM-range literal/cast → VRAM.
|
|
345
|
+
vramAssignRe.lastIndex = 0;
|
|
346
|
+
for (let m; (m = vramAssignRe.exec(code)); ) {
|
|
347
|
+
if (inVramRange(m[2])) setVram(m[1]);
|
|
348
|
+
}
|
|
349
|
+
// 3) pointer declarations → VRAM-candidate (only pointers can alias
|
|
350
|
+
// __xdata; an indexed write through one is the documented bug).
|
|
351
|
+
ptrDeclRe.lastIndex = 0;
|
|
352
|
+
for (let m; (m = ptrDeclRe.exec(code)); ) setVram(m[1]);
|
|
353
|
+
// 4) plain array declarations → safe RAM array (unless already VRAM).
|
|
354
|
+
arrDeclRe.lastIndex = 0;
|
|
355
|
+
for (let m; (m = arrDeclRe.exec(code)); ) setArray(m[1]);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// 5) name-based VRAM override: any dest named like VRAM is VRAM even if
|
|
359
|
+
// it was (mis)classified as an array by a same-named decl elsewhere.
|
|
360
|
+
for (const n of klass.keys()) if (isVramName(n)) setVram(n);
|
|
361
|
+
|
|
362
|
+
return klass;
|
|
363
|
+
}
|
|
364
|
+
|
|
209
365
|
/**
|
|
210
366
|
* Detect mid-block variable declarations (C89 violation). Simple state
|
|
211
367
|
* machine: track block depth + whether we've seen a non-decl statement
|