romdevtools 0.28.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/AGENTS.md +53 -43
  2. package/CHANGELOG.md +91 -0
  3. package/README.md +3 -3
  4. package/examples/README.md +7 -7
  5. package/examples/atari2600/templates/platformer.asm +1225 -332
  6. package/examples/atari2600/templates/puzzle.asm +1056 -0
  7. package/examples/atari2600/templates/racing.asm +906 -275
  8. package/examples/atari2600/templates/shmup.asm +1031 -239
  9. package/examples/atari2600/templates/sports.asm +1135 -253
  10. package/examples/atari7800/templates/platformer.c +991 -156
  11. package/examples/atari7800/templates/puzzle.c +1091 -148
  12. package/examples/atari7800/templates/racing.c +952 -124
  13. package/examples/atari7800/templates/shmup.c +812 -134
  14. package/examples/atari7800/templates/sports.c +820 -184
  15. package/examples/c64/templates/platformer.c +879 -164
  16. package/examples/c64/templates/puzzle.c +855 -178
  17. package/examples/c64/templates/racing.c +873 -97
  18. package/examples/c64/templates/shmup.c +757 -161
  19. package/examples/c64/templates/sports.c +755 -100
  20. package/examples/gb/templates/platformer.c +841 -179
  21. package/examples/gb/templates/puzzle.c +986 -246
  22. package/examples/gb/templates/racing.c +754 -174
  23. package/examples/gb/templates/shmup.c +673 -175
  24. package/examples/gb/templates/sports.c +790 -159
  25. package/examples/gba/templates/platformer.c +626 -165
  26. package/examples/gba/templates/puzzle.c +519 -269
  27. package/examples/gba/templates/racing.c +511 -206
  28. package/examples/gba/templates/shmup.c +564 -179
  29. package/examples/gba/templates/sports.c +454 -174
  30. package/examples/gbc/templates/platformer.c +944 -180
  31. package/examples/gbc/templates/puzzle.c +363 -109
  32. package/examples/gbc/templates/racing.c +884 -180
  33. package/examples/gbc/templates/shmup.c +821 -185
  34. package/examples/gbc/templates/sports.c +870 -162
  35. package/examples/genesis/templates/platformer.c +747 -129
  36. package/examples/genesis/templates/puzzle.c +694 -261
  37. package/examples/genesis/templates/racing.c +726 -203
  38. package/examples/genesis/templates/shmup.c +535 -142
  39. package/examples/genesis/templates/sports.c +495 -158
  40. package/examples/gg/templates/platformer.c +880 -215
  41. package/examples/gg/templates/puzzle.c +875 -216
  42. package/examples/gg/templates/racing.c +915 -172
  43. package/examples/gg/templates/shmup.c +714 -191
  44. package/examples/gg/templates/sports.c +732 -129
  45. package/examples/lynx/templates/platformer.c +604 -69
  46. package/examples/lynx/templates/puzzle.c +498 -158
  47. package/examples/lynx/templates/racing.c +538 -102
  48. package/examples/lynx/templates/shmup.c +458 -131
  49. package/examples/lynx/templates/sports.c +496 -72
  50. package/examples/msx/platformer/main.c +649 -162
  51. package/examples/msx/puzzle/main.c +742 -240
  52. package/examples/msx/racing/main.c +669 -178
  53. package/examples/msx/shmup/main.c +460 -178
  54. package/examples/msx/sports/main.c +592 -126
  55. package/examples/nes/templates/platformer.c +589 -171
  56. package/examples/nes/templates/puzzle.c +563 -242
  57. package/examples/nes/templates/racing.c +502 -208
  58. package/examples/nes/templates/shmup.c +339 -145
  59. package/examples/nes/templates/sports.c +341 -183
  60. package/examples/pce/platformer/main.c +874 -205
  61. package/examples/pce/puzzle/main.c +802 -287
  62. package/examples/pce/racing/main.c +783 -208
  63. package/examples/pce/shmup/main.c +638 -212
  64. package/examples/pce/sports/main.c +586 -169
  65. package/examples/porting-across-platforms/README.md +1 -1
  66. package/examples/sms/templates/platformer.c +762 -177
  67. package/examples/sms/templates/puzzle.c +752 -212
  68. package/examples/sms/templates/racing.c +808 -145
  69. package/examples/sms/templates/shmup.c +599 -162
  70. package/examples/sms/templates/sports.c +630 -122
  71. package/examples/snes/templates/music_demo.c +7 -0
  72. package/examples/snes/templates/platformer-data.asm +123 -24
  73. package/examples/snes/templates/platformer-hdr.asm +57 -0
  74. package/examples/snes/templates/platformer.c +586 -165
  75. package/examples/snes/templates/puzzle-data.asm +116 -21
  76. package/examples/snes/templates/puzzle-hdr.asm +57 -0
  77. package/examples/snes/templates/puzzle.c +614 -235
  78. package/examples/snes/templates/racing-data.asm +390 -32
  79. package/examples/snes/templates/racing-hdr.asm +57 -0
  80. package/examples/snes/templates/racing.c +807 -196
  81. package/examples/snes/templates/shmup-data.asm +87 -29
  82. package/examples/snes/templates/shmup-hdr.asm +57 -0
  83. package/examples/snes/templates/shmup.c +459 -198
  84. package/examples/snes/templates/sports-data.asm +48 -2
  85. package/examples/snes/templates/sports-hdr.asm +57 -0
  86. package/examples/snes/templates/sports.c +414 -163
  87. package/package.json +12 -12
  88. package/src/cores/wasm/bluemsx_libretro.js +1 -1
  89. package/src/cores/wasm/bluemsx_libretro.wasm +0 -0
  90. package/src/cores/wasm/fceumm_libretro.js +1 -1
  91. package/src/cores/wasm/fceumm_libretro.wasm +0 -0
  92. package/src/cores/wasm/gambatte_libretro.js +1 -1
  93. package/src/cores/wasm/gambatte_libretro.wasm +0 -0
  94. package/src/cores/wasm/geargrafx_libretro.js +1 -1
  95. package/src/cores/wasm/geargrafx_libretro.wasm +0 -0
  96. package/src/cores/wasm/genesis_plus_gx_libretro.js +1 -1
  97. package/src/cores/wasm/genesis_plus_gx_libretro.wasm +0 -0
  98. package/src/cores/wasm/handy_libretro.js +1 -1
  99. package/src/cores/wasm/handy_libretro.wasm +0 -0
  100. package/src/cores/wasm/mgba_libretro.js +1 -1
  101. package/src/cores/wasm/mgba_libretro.wasm +0 -0
  102. package/src/cores/wasm/prosystem_libretro.js +1 -1
  103. package/src/cores/wasm/prosystem_libretro.wasm +0 -0
  104. package/src/cores/wasm/snes9x_libretro.js +1 -1
  105. package/src/cores/wasm/snes9x_libretro.wasm +0 -0
  106. package/src/cores/wasm/stella2014_libretro.js +1 -1
  107. package/src/cores/wasm/stella2014_libretro.wasm +0 -0
  108. package/src/cores/wasm/vice_x64_libretro.js +1 -1
  109. package/src/cores/wasm/vice_x64_libretro.wasm +0 -0
  110. package/src/host/LibretroHost.js +84 -8
  111. package/src/http/tool-registry.js +11 -11
  112. package/src/mcp/tools/cheats.js +2 -1
  113. package/src/mcp/tools/frame.js +3 -2
  114. package/src/mcp/tools/index.js +3 -3
  115. package/src/mcp/tools/input.js +5 -4
  116. package/src/mcp/tools/lifecycle.js +6 -4
  117. package/src/mcp/tools/memory.js +131 -24
  118. package/src/mcp/tools/platform-docs.js +1 -1
  119. package/src/mcp/tools/preview-tile.js +6 -2
  120. package/src/mcp/tools/project.js +1098 -130
  121. package/src/mcp/tools/record.js +6 -7
  122. package/src/mcp/tools/rom-id.js +5 -1
  123. package/src/mcp/tools/run-until.js +12 -4
  124. package/src/mcp/tools/snippets.js +6 -6
  125. package/src/mcp/tools/sprite-pipeline.js +14 -2
  126. package/src/mcp/tools/state.js +2 -1
  127. package/src/mcp/tools/tile-inspect.js +8 -1
  128. package/src/mcp/tools/toolchain.js +12 -1
  129. package/src/mcp/tools/watch-memory.js +53 -10
  130. package/src/observer/bus.js +73 -0
  131. package/src/observer/livestream.html +4 -2
  132. package/src/observer/tool-wrap.js +17 -14
  133. package/src/platforms/_guides/ROMHACKING_PLAYBOOK.md +32 -3
  134. package/src/platforms/atari7800/MENTAL_MODEL.md +5 -5
  135. package/src/platforms/atari7800/TROUBLESHOOTING.md +5 -5
  136. package/src/platforms/c64/MENTAL_MODEL.md +11 -4
  137. package/src/platforms/c64/TROUBLESHOOTING.md +13 -0
  138. package/src/platforms/gb/MENTAL_MODEL.md +3 -3
  139. package/src/platforms/gb/TROUBLESHOOTING.md +61 -8
  140. package/src/platforms/gb/lib/c/README.md +10 -11
  141. package/src/platforms/gb/lib/c/gb_crt0.s +27 -3
  142. package/src/platforms/gb/lib/c/patch-header.js +13 -3
  143. package/src/platforms/gba/MENTAL_MODEL.md +4 -4
  144. package/src/platforms/gba/TROUBLESHOOTING.md +3 -3
  145. package/src/platforms/gba/lib/c/gba_sfx.c +40 -0
  146. package/src/platforms/gba/lib/c/gba_sfx.h +10 -0
  147. package/src/platforms/gbc/MENTAL_MODEL.md +4 -4
  148. package/src/platforms/gbc/TROUBLESHOOTING.md +4 -4
  149. package/src/platforms/gbc/UPSTREAM_SOURCES.md +1 -1
  150. package/src/platforms/gbc/lib/c/README.md +10 -11
  151. package/src/platforms/gbc/lib/c/gb_crt0.s +26 -3
  152. package/src/platforms/gbc/lib/c/patch-header.js +13 -3
  153. package/src/platforms/genesis/MENTAL_MODEL.md +3 -3
  154. package/src/platforms/genesis/TROUBLESHOOTING.md +2 -2
  155. package/src/platforms/gg/MENTAL_MODEL.md +4 -4
  156. package/src/platforms/gg/TROUBLESHOOTING.md +3 -3
  157. package/src/platforms/gg/UPSTREAM_SOURCES.md +1 -1
  158. package/src/platforms/gg/lib/c/joypad_read.c +29 -0
  159. package/src/platforms/lynx/MENTAL_MODEL.md +1 -1
  160. package/src/platforms/lynx/TROUBLESHOOTING.md +3 -3
  161. package/src/platforms/msx/MENTAL_MODEL.md +5 -5
  162. package/src/platforms/msx/TROUBLESHOOTING.md +2 -2
  163. package/src/platforms/msx/lib/c/msx_hw.h +1 -0
  164. package/src/platforms/msx/lib/c/msx_vdp.c +25 -0
  165. package/src/platforms/nes/MENTAL_MODEL.md +2 -2
  166. package/src/platforms/nes/lib/c/nes_runtime.c +149 -34
  167. package/src/platforms/nes/lib/c/nes_runtime.h +34 -1
  168. package/src/platforms/pce/MENTAL_MODEL.md +5 -5
  169. package/src/platforms/pce/TROUBLESHOOTING.md +1 -1
  170. package/src/platforms/pce/lib/c/pce_hw.h +11 -0
  171. package/src/platforms/pce/lib/c/pce_video.c +32 -0
  172. package/src/platforms/sms/MENTAL_MODEL.md +6 -6
  173. package/src/platforms/snes/MENTAL_MODEL.md +2 -2
  174. package/src/platforms/snes/TROUBLESHOOTING.md +40 -1
  175. package/src/toolchains/cc65/presets/nes/chr-ram-runtime.cfg +13 -8
  176. package/src/toolchains/cc65/presets/nes/chr-ram-runtime.crt0.s +58 -5
  177. package/src/toolchains/cc65/presets/nes/chr-rom.crt0.s +52 -3
  178. package/src/toolchains/cc65/presets/pce/rom32k.cfg +52 -0
  179. package/src/toolchains/index.js +27 -11
@@ -1,202 +1,1145 @@
1
- /* puzzle.c — Atari 7800 single falling block (minimal).
1
+ /* ── puzzle.c — Atari 7800 falling-trio match puzzle (complete example) ───────
2
2
  *
3
- * SCAFFOLD CAVEAT: the original "match-3 6×12 grid" scaffold is
4
- * deferred. The per-scanline DL pool approach used in sports.c works
5
- * with up to ~3 objects per line within RAM budget; a full 6-column
6
- * grid (6 objects per line) overflows the 7800's 2 KB RAM1.
3
+ * PIVOT PURGE — a COMPLETE, working game: title screen, 1P marathon (levels +
4
+ * cascade chains) and 2P SIMULTANEOUS VERSUS (split boards, garbage attacks,
5
+ * both wells falling at once on the two joystick ports), in-session hi-score,
6
+ * music + SFX, full teaching markers — and the 7800's signature constraint
7
+ * worked the OTHER way from the shmup: where the dense shooter spreads 30
8
+ * objects so only ~3 ever share a scanline, a puzzle WELL is the worst case
9
+ * for MARIA — a whole ROW of 6 gems lands on the same 8 scanlines at once,
10
+ * which is 6 objects per line, double the 3-per-line DMA budget. The fix is
11
+ * the load-bearing idiom of this file: each well row is drawn as ONE wide
12
+ * DL object built from a RAM canvas (the same canvas-as-a-drawable trick the
13
+ * text path uses), so a 6-wide row costs ONE object per line, not six. That
14
+ * is what makes 2P (two wells = TWO objects per line) fit at all.
7
15
  *
8
- * This minimal version demonstrates the working MARIA pattern (see
9
- * default.c + MENTAL_MODEL.md) with one falling block. Extend by
10
- * adding more rows of DLs to render landed pieces but keep the
11
- * total DL pool small.
16
+ * The game: a falling-trio match. A trio of coloured cells drops into a 6x12
17
+ * well; LEFT/RIGHT move it, the fire button (port joystick) CYCLES its three
18
+ * colours (the 7800 pad has one button cycle replaces the NES A/B rotate),
19
+ * DOWN soft-drops. When the trio lands, any straight run of 3+ same-coloured
20
+ * cells (horizontal, vertical, or diagonal) clears; survivors fall and
21
+ * cascades chain for multiplied score.
12
22
  *
13
- * Joystick LEFT/RIGHT moves the block, FIRE speeds it up. When it
14
- * lands at bottom, it respawns at top with a new colour.
23
+ * 2P VERSUS design (simultaneous, split board): two 6x12 wells side by side —
24
+ * P1 left on joystick port 0, P2 right on joystick port 1 both falling at
25
+ * once. Clears ATTACK: each chain step sends one garbage row (random cells
26
+ * with one gap, capped at 4 per attack) rising from the bottom of the
27
+ * opponent's well. First player whose stack reaches the rim loses. Both wells
28
+ * update each frame; the whole thing fits the MARIA budget because each well
29
+ * row is ONE canvas-backed DL object (see the idiom above) — two wells = at
30
+ * most two objects per scanline, inside the 3-per-line ceiling.
31
+ *
32
+ * THIS FILE IS MEANT TO BE FORKED AND MODIFIED into your own game — even a
33
+ * very different one. The markers tell you what's what:
34
+ * HARDWARE IDIOM (load-bearing) — dodges a documented 7800/MARIA footgun;
35
+ * reshape your gameplay around it (see TROUBLESHOOTING before changing).
36
+ * GAME LOGIC (clay) — match rules, garbage, tuning, art: reshape freely.
37
+ *
38
+ * What depends on what:
39
+ * atari7800_sfx.{h,c} — TIA one-shot effects (we give it voice 1; the
40
+ * inline music player below owns voice 0 — TIA only HAS two voices).
41
+ * cc65's atari7800 target crt0 + atari7800.cfg — boot, BSS in RAM1
42
+ * ($1800-$203F), C parameter stack at the TOP of RAM3 growing DOWN
43
+ * ($2800 →). This game claims the BOTTOM of RAM3 ($2200-$25FD) for its
44
+ * display-list pool / title canvases — see the RAM MAP below.
45
+ *
46
+ * PERSISTENCE — honest note: the canonical 7800 save path is the High Score
47
+ * Cart (HSC): a pass-through cartridge with 2KB battery RAM at $1000-$17FF
48
+ * plus a directory ROM. The bundled prosystem core does NOT implement HSC
49
+ * (probed 2026-06: retro_get_memory(SAVE_RAM) size = 0, and the core binary
50
+ * has no HSC code at all), so this game keeps the hi-score IN-SESSION ONLY
51
+ * (it survives play → title → play, dies on power-off). Do not fake
52
+ * persistence the hardware path can't back — if a future core round adds
53
+ * HSC, wire hiscore into $1000-$17FF and it becomes real.
54
+ *
55
+ * Frame budget (NTSC): steady state is tiny — input + one gravity step per
56
+ * well + the few canvas rows that changed. The spike is resolve_board() at
57
+ * lock time (the full 4-direction match scan over 72 cells in cc65 code): it
58
+ * can spill a frame or two past vblank. That's fine — MARIA keeps re-walking
59
+ * the same display lists at 60Hz, so a slow CPU tick shows as (at most) a
60
+ * one-frame hitch on the falling trio, never corruption. That budget only
61
+ * holds because of the #pragma optimize(on) right below — read its comment
62
+ * before deleting it.
15
63
  */
64
+
16
65
  #include <stdint.h>
66
+ #include <string.h>
17
67
  #include "atari7800_sfx.h"
18
68
 
69
+ /* ── HARDWARE IDIOM (load-bearing — reshape gameplay around this; see TROUBLESHOOTING) ──
70
+ * cc65 SHIPS WITH ITS OPTIMIZER OFF, and this toolchain does not pass -O —
71
+ * each translation unit must opt in. Without this pragma the unoptimized
72
+ * emit pass made the main loop take ~9 frames per sim tick instead of 1-2
73
+ * (measured on the 7800 shmup: 8.8 → 1.7 frames/tick on prosystem), and
74
+ * every TICK-DENOMINATED timer silently stretched 4-5x in wall-clock terms:
75
+ * the gravity delay, the ready-pause, the lock thunk — all ~4.5x too slow, so
76
+ * pieces crawled and the game "looked broken". But the DLL, the zone
77
+ * pointers, and every canvas were byte-perfect when read back from RAM. The
78
+ * footgun generalizes: on a 1.79MHz 6502 the C optimizer is not a nicety, it
79
+ * IS the frame budget, and a too-slow loop shows up as broken GAME RULES
80
+ * (stretched timers, missed 1-frame input edges), not as a slow-looking
81
+ * screen — MARIA keeps repainting the same display lists at a rock-steady
82
+ * 60Hz no matter how far behind the CPU falls. If your fork feels like
83
+ * molasses or "ignores" short button taps, check this pragma is still here
84
+ * before debugging the display lists. */
85
+ #pragma optimize(on)
86
+
87
+ /* The title screen renders this — examples({op:'fork'}) stamps your game's
88
+ * name here automatically. Keep it ≤16 chars of A-Z 0-9 space dash. */
89
+ #define GAME_TITLE "PIVOT PURGE"
90
+
91
+ /* ── MARIA + TIA + RIOT registers (full list in MENTAL_MODEL.md) ── */
19
92
  #define BACKGRND (*(volatile uint8_t*)0x20)
20
93
  #define P0C1 (*(volatile uint8_t*)0x21)
21
94
  #define P0C2 (*(volatile uint8_t*)0x22)
22
95
  #define P0C3 (*(volatile uint8_t*)0x23)
23
96
  #define P1C1 (*(volatile uint8_t*)0x25)
24
- #define P2C1 (*(volatile uint8_t*)0x29)
97
+ #define P1C2 (*(volatile uint8_t*)0x26)
98
+ #define P1C3 (*(volatile uint8_t*)0x27)
25
99
  #define MSTAT (*(volatile uint8_t*)0x28)
100
+ #define P2C1 (*(volatile uint8_t*)0x29)
101
+ #define P2C2 (*(volatile uint8_t*)0x2A)
102
+ #define P2C3 (*(volatile uint8_t*)0x2B)
26
103
  #define DPPH (*(volatile uint8_t*)0x2C)
104
+ #define P3C1 (*(volatile uint8_t*)0x2D)
105
+ #define P3C2 (*(volatile uint8_t*)0x2E)
106
+ #define P3C3 (*(volatile uint8_t*)0x2F)
27
107
  #define DPPL (*(volatile uint8_t*)0x30)
108
+ #define P4C1 (*(volatile uint8_t*)0x31)
109
+ #define P4C2 (*(volatile uint8_t*)0x32)
110
+ #define P4C3 (*(volatile uint8_t*)0x33)
28
111
  #define CHARBASE (*(volatile uint8_t*)0x34)
112
+ #define P5C1 (*(volatile uint8_t*)0x35)
29
113
  #define OFFSET (*(volatile uint8_t*)0x38)
114
+ #define P6C1 (*(volatile uint8_t*)0x39)
30
115
  #define CTRL (*(volatile uint8_t*)0x3C)
31
- #define SWCHA (*(volatile uint8_t*)0x280)
32
- #define INPT4 (*(volatile uint8_t*)0x0C)
33
-
34
- /* SWCHA bit order is Right(0x80)/Left(0x40)/Down(0x20)/Up(0x10) — the
35
- * old 0x20/0x10 masks here were the DOWN/UP bits, so the stick's
36
- * vertical axis steered horizontally. */
37
- #define JOY_LEFT 0x40
38
- #define JOY_RIGHT 0x80
39
-
40
- #define COLS 8
41
- #define CELL_W_PIX 8
42
- #define TOP_Y 40
43
- #define BOT_Y 180
44
-
45
- /* Solid 8-row block, 2 bytes wide (= 8 pixels in 160A). */
46
- static const uint8_t block_row0[2] = { 0xFF, 0xFF };
47
- static const uint8_t block_row1[2] = { 0xFF, 0xFF };
48
- static const uint8_t block_row2[2] = { 0xFF, 0xFF };
49
- static const uint8_t block_row3[2] = { 0xFF, 0xFF };
50
- static const uint8_t block_row4[2] = { 0xFF, 0xFF };
51
- static const uint8_t block_row5[2] = { 0xFF, 0xFF };
52
- static const uint8_t block_row6[2] = { 0xFF, 0xFF };
53
- static const uint8_t block_row7[2] = { 0xFF, 0xFF };
54
-
55
- #define MK_DL(name) static uint8_t name[7] = { 0, 0x40, 0, 0x1E, 80, 0, 0 }
56
- MK_DL(dl_row0); MK_DL(dl_row1); MK_DL(dl_row2); MK_DL(dl_row3);
57
- MK_DL(dl_row4); MK_DL(dl_row5); MK_DL(dl_row6); MK_DL(dl_row7);
116
+ #define P7C1 (*(volatile uint8_t*)0x3D)
58
117
 
59
- static uint8_t dl_empty[2] = { 0, 0 };
118
+ /* TIA audio (shared with the music player below; atari7800_sfx.c has the
119
+ * same defines — the chip is tiny enough that duplicating 6 lines beats a
120
+ * header dependency the fork machinery would have to carry). */
121
+ #define AUDC0 (*(volatile uint8_t*)0x15)
122
+ #define AUDC1 (*(volatile uint8_t*)0x16)
123
+ #define AUDF0 (*(volatile uint8_t*)0x17)
124
+ #define AUDF1 (*(volatile uint8_t*)0x18)
125
+ #define AUDV0 (*(volatile uint8_t*)0x19)
126
+ #define AUDV1 (*(volatile uint8_t*)0x1A)
127
+
128
+ #define SWCHA (*(volatile uint8_t*)0x280)
129
+ #define INPT4 (*(volatile uint8_t*)0x0C) /* P1 fire, active low (bit 7) */
130
+ #define INPT5 (*(volatile uint8_t*)0x0D) /* P2 fire, active low (bit 7) */
131
+
132
+ /* ── HARDWARE IDIOM (load-bearing — reshape gameplay around this; see TROUBLESHOOTING) ──
133
+ * SWCHA joystick bit order — the #1 7800 input footgun. After the ~SWCHA
134
+ * invert, port 0 (left jack) lives in the HIGH nibble as
135
+ * Right($80) Left($40) Down($20) Up($10), and port 1 (right jack) in the
136
+ * LOW nibble as Right($08) Left($04) Down($02) Up($01). Writing the masks
137
+ * in "natural reading order" (UP=0x80…) is exactly REVERSED and makes the
138
+ * stick's vertical axis steer horizontally — a bug weird enough to
139
+ * misdiagnose as a core problem. Verified bit-by-bit against prosystem.
140
+ * 2P versus uses BOTH ports: player 0 reads the high nibble + INPT4 fire,
141
+ * player 1 the low nibble + INPT5 fire. */
142
+ #define J1_RIGHT 0x80
143
+ #define J1_LEFT 0x40
144
+ #define J1_DOWN 0x20
145
+ #define J1_UP 0x10
146
+ #define J2_RIGHT 0x08
147
+ #define J2_LEFT 0x04
148
+ #define J2_DOWN 0x02
149
+ #define J2_UP 0x01
150
+
151
+ /* ════════════════════════════════════════════════════════════════════════
152
+ * RAM MAP — the 7800 gives you 4KB ($1800-$27FF) and the stock cc65 config
153
+ * only hands the linker the first 2112 bytes of it:
154
+ *
155
+ * $1800-$203F RAM1 — cc65 DATA + BSS (everything `static` below)
156
+ * $2040-$20FF (gap the cc65 cfg skips — unused here)
157
+ * $2100-$213F RAM2 — unused here
158
+ * $2200-$275D RAM3 bottom — OUR display-list pool / title-canvas arena
159
+ * (POOLB): raw pointer, invisible to the linker, 1358 bytes
160
+ * (97 pool lines; the wells need more BSS so more pool lives
161
+ * here than in the shmup — see THE DISPLAY-LIST POOL)
162
+ * $275E-$27FF RAM3 top — cc65 C parameter stack (crt0 starts it at $2800
163
+ * growing DOWN; ~162 bytes is plenty for these call depths,
164
+ * but if you add deep recursion, shrink the boards/canvases
165
+ * before growing pool_a back into BSS)
166
+ * ════════════════════════════════════════════════════════════════════════ */
167
+ #define POOLB ((uint8_t*)0x2200)
168
+
169
+ /* ── Screen layout (243 NTSC zone-lines; the visible frame is ~lines 9-232) ──
170
+ * lines 0- 15 blank (top overscan) 1 DLL entry, 16 tall
171
+ * lines 16- 23 HUD text row (RAM canvas) 8 entries, 1 tall each
172
+ * lines 24- 25 divider band 1 entry, 2 tall
173
+ * lines 26-145 THE WELLS — 120 one-line zones 120 entries (the pool)
174
+ * lines 146-147 base band (well floor surface) 1 entry, 2 tall
175
+ * lines 148-242 decor stripes (cabinet glow) 12 entries, 8/7 tall
176
+ * Total: 143 DLL entries = 429 bytes (vs 729 for the naive all-1-line DLL —
177
+ * mixed zone heights are how real 7800 games keep the DLL small).
178
+ * The WELL pool holds the two wells' row objects, the well frames, AND the
179
+ * falling trios — every one of them is a display-list object (no tilemap). */
180
+ #define FIELD_LINES 120
181
+ #define FIELD_DLL_OFF 30 /* byte offset of well-area entry 0 in dll[] */
182
+
183
+ /* ── GAME LOGIC (clay — reshape freely) ──────────────────────────────────────
184
+ * Board geometry. A 6-wide, 12-tall well; cell colours 1..3, 0 = empty.
185
+ * Each cell is CELL_PX pixels (8 wide × 8 tall), so a well is 48px wide and
186
+ * 96 zone-lines tall. WELL_LINE0 is the well's top zone-line in the pool. */
187
+ #define GRID_W 6
188
+ #define GRID_H 12
189
+ #define CELL_PX 8
190
+ #define WELL_LINES (GRID_H * CELL_PX) /* 96 zone-lines */
191
+ #define WELL_LINE0 12 /* well top at pool line 12 */
192
+ #define EMPTY 0
193
+
194
+ /* Well X (left pixel) per layout: 1P single centred well, 2P split board. */
195
+ #define WELL_1P_X 56
196
+ #define WELL_VS_P0 24
197
+ #define WELL_VS_P1 88
198
+
199
+ /* ── GAME LOGIC (clay — reshape freely) ──────────────────────────────────────
200
+ * Cell art. 160A mode: 1 byte = 4 pixels of 2 bits each; pixel value 1/2/3 =
201
+ * colour 1/2/3 of the palette the DL entry names, 0 = transparent. The settled
202
+ * well cells are NOT kept as three coloured bitmaps: a well row is composited
203
+ * into a RAM CANVAS (see the idiom below) where a cell's colour is the 2-bit
204
+ * VALUE stamped in, all sharing ONE well palette — that's what lets ONE wide
205
+ * object show all three colours of a row at once. The falling TRIO is drawn by
206
+ * stamping its cells' colour values straight into the same canvas (overlay_trio
207
+ * below); there are no separate trio bitmaps or objects. */
208
+ /* settled-well rows AND the falling trio use palette 4 (one shared well palette,
209
+ * three lumas keyed by the 2-bit value). The well frame is BAKED into the row
210
+ * canvas at value 3 (see the WELL CANVAS note), so it shares the same palette. */
211
+ #define WELL_PAL 4
60
212
 
61
- /* ── Background well ──────────────────────────────────────────────
62
- * Without a full-screen drawable the display list emits only the
63
- * falling block and ~99% of the screen stays the flat BACKGRND colour
64
- * (reads as "blank"). Each well zone draws three full-width segments:
65
- * a side wall (palette 2), the playfield well in the centre where the
66
- * piece falls (palette 1), the other wall (palette 2). Width =
67
- * byte[3] low 5 bits (32-n); high 3 bits = palette. */
68
- static const uint8_t band_pix[16] = {
69
- 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,
70
- 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
213
+ /* ── GAME LOGIC (clay) — 8x8 text font, 1 bit per pixel, 7px glyphs.
214
+ * The 7800 has NO text mode and no tilemap; text is just more objects.
215
+ * The text path here: expand glyphs into a 32-byte-wide RAM canvas
216
+ * (= 128px, 16 characters), then show the canvas with ONE wide DL entry
217
+ * per scanline. One drawable per line beats one-DL-entry-per-character
218
+ * by 16x in MARIA DMA time. Index order: 0-9 A-Z dash space. */
219
+ static const uint8_t FONT[38 * 8] = {
220
+ 0x70,0x88,0x98,0xA8,0xC8,0x88,0x70,0x00, /* 0 */
221
+ 0x20,0x60,0x20,0x20,0x20,0x20,0x70,0x00, /* 1 */
222
+ 0x70,0x88,0x08,0x30,0x40,0x80,0xF8,0x00, /* 2 */
223
+ 0x70,0x88,0x08,0x30,0x08,0x88,0x70,0x00, /* 3 */
224
+ 0x10,0x30,0x50,0x90,0xF8,0x10,0x10,0x00, /* 4 */
225
+ 0xF8,0x80,0xF0,0x08,0x08,0x88,0x70,0x00, /* 5 */
226
+ 0x30,0x40,0x80,0xF0,0x88,0x88,0x70,0x00, /* 6 */
227
+ 0xF8,0x08,0x10,0x20,0x40,0x40,0x40,0x00, /* 7 */
228
+ 0x70,0x88,0x88,0x70,0x88,0x88,0x70,0x00, /* 8 */
229
+ 0x70,0x88,0x88,0x78,0x08,0x10,0x60,0x00, /* 9 */
230
+ 0x20,0x50,0x88,0x88,0xF8,0x88,0x88,0x00, /* A */
231
+ 0xF0,0x88,0x88,0xF0,0x88,0x88,0xF0,0x00, /* B */
232
+ 0x70,0x88,0x80,0x80,0x80,0x88,0x70,0x00, /* C */
233
+ 0xF0,0x88,0x88,0x88,0x88,0x88,0xF0,0x00, /* D */
234
+ 0xF8,0x80,0x80,0xF0,0x80,0x80,0xF8,0x00, /* E */
235
+ 0xF8,0x80,0x80,0xF0,0x80,0x80,0x80,0x00, /* F */
236
+ 0x70,0x88,0x80,0xB8,0x88,0x88,0x70,0x00, /* G */
237
+ 0x88,0x88,0x88,0xF8,0x88,0x88,0x88,0x00, /* H */
238
+ 0x70,0x20,0x20,0x20,0x20,0x20,0x70,0x00, /* I */
239
+ 0x38,0x10,0x10,0x10,0x10,0x90,0x60,0x00, /* J */
240
+ 0x88,0x90,0xA0,0xC0,0xA0,0x90,0x88,0x00, /* K */
241
+ 0x80,0x80,0x80,0x80,0x80,0x80,0xF8,0x00, /* L */
242
+ 0x88,0xD8,0xA8,0xA8,0x88,0x88,0x88,0x00, /* M */
243
+ 0x88,0xC8,0xA8,0x98,0x88,0x88,0x88,0x00, /* N */
244
+ 0x70,0x88,0x88,0x88,0x88,0x88,0x70,0x00, /* O */
245
+ 0xF0,0x88,0x88,0xF0,0x80,0x80,0x80,0x00, /* P */
246
+ 0x70,0x88,0x88,0x88,0xA8,0x90,0x68,0x00, /* Q */
247
+ 0xF0,0x88,0x88,0xF0,0xA0,0x90,0x88,0x00, /* R */
248
+ 0x78,0x80,0x80,0x70,0x08,0x08,0xF0,0x00, /* S */
249
+ 0xF8,0x20,0x20,0x20,0x20,0x20,0x20,0x00, /* T */
250
+ 0x88,0x88,0x88,0x88,0x88,0x88,0x70,0x00, /* U */
251
+ 0x88,0x88,0x88,0x88,0x88,0x50,0x20,0x00, /* V */
252
+ 0x88,0x88,0x88,0xA8,0xA8,0xD8,0x88,0x00, /* W */
253
+ 0x88,0x88,0x50,0x20,0x50,0x88,0x88,0x00, /* X */
254
+ 0x88,0x88,0x50,0x20,0x20,0x20,0x20,0x00, /* Y */
255
+ 0xF8,0x08,0x10,0x20,0x40,0x80,0xF8,0x00, /* Z */
256
+ 0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00, /* - */
257
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* space */
71
258
  };
72
- /* 12 bytes (48 px) wall @ x0, 16 bytes (64 px) well @ x48,
73
- * 12 bytes (48 px) wall @ x112, terminator. */
74
- static uint8_t dl_well[16] = {
75
- 0, 0x40, 0, (2 << 5) | 20, 0,
76
- 0, 0x40, 0, (1 << 5) | 16, 48,
77
- 0, 0x40, 0, (2 << 5) | 20, 112,
78
- 0
259
+ /* nibble 2bpp expansion: each 1 bit becomes pixel value 1 (palette c1) */
260
+ static const uint8_t NIB2[16] = {
261
+ 0x00,0x01,0x04,0x05,0x10,0x11,0x14,0x15,
262
+ 0x40,0x41,0x44,0x45,0x50,0x51,0x54,0x55,
79
263
  };
80
264
 
81
- static void set_well_addr(void) {
82
- uint16_t a = (uint16_t)(uintptr_t)band_pix;
83
- dl_well[0] = dl_well[5] = dl_well[10] = (uint8_t)(a & 0xFF);
84
- dl_well[2] = dl_well[7] = dl_well[12] = (uint8_t)(a >> 8);
265
+ /* ── HARDWARE IDIOM (load-bearing — reshape gameplay around this; see TROUBLESHOOTING) ──
266
+ * Solid band drawable for multi-line zones AND the well frames. Inside a zone
267
+ * of height H, MARIA fetches scanline l's pixels from ADDR + (H-1-l)*256 the
268
+ * "offset addressing quirk". A multi-line drawable therefore needs valid data
269
+ * at the SAME low-byte offset across H consecutive 256-byte pages. For solid
270
+ * colour bands we sidestep alignment entirely: a 2KB ROM run of 0x55 means ANY
271
+ * address inside the first page works for zones up to 8 tall (8 pages × 256).
272
+ * Costs 2KB of a 32KB cart — ROM is the cheap resource here. The well frames
273
+ * reuse SOLID8: a frame rail is a thin colour-1 object drawn into the one-line
274
+ * well zones it spans (1-line zones ⇒ the quirk vanishes, any SOLID8 address
275
+ * works). */
276
+ #define S16 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
277
+ #define S256 S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16,S16
278
+ static const uint8_t SOLID8[2048] = { S256,S256,S256,S256,S256,S256,S256,S256 };
279
+
280
+ /* Full-width band DL: a DL drawable is at most 32 bytes (128px), so a
281
+ * 160px line takes TWO 5-byte entries + terminator = 11 bytes. 5-byte
282
+ * form: lo, $40 (extended, write-mode 0 = 160A), hi, palette|width, X.
283
+ * Width 32 encodes as 0 in the low 5 bits — legal ONLY in 5-byte form. */
284
+ #define MK_BAND(name, pal) static uint8_t name[11] = { \
285
+ 0, 0x40, 0, ((pal) << 5) | 0, 0, /* 128px @ x=0 */ \
286
+ 0, 0x40, 0, ((pal) << 5) | 24, 128, /* 32px @ x=128 */ \
287
+ 0 }
288
+ MK_BAND(dl_band_a, 6);
289
+ MK_BAND(dl_band_b, 7);
290
+ MK_BAND(dl_base, 5); /* the well floor surface band */
291
+ static uint8_t dl_empty[2] = { 0, 0 };
292
+
293
+ /* ════════════════════════════════════════════════════════════════════════
294
+ * ── HARDWARE IDIOM (load-bearing — reshape gameplay around this; see TROUBLESHOOTING) ──
295
+ * THE DISPLAY-LIST POOL — how the wells get drawn (the 7800's signature, here
296
+ * applied to its WORST case). Same machinery the dense 7800 shmup uses for its
297
+ * swarm; here it draws the well frames, the falling trios, and — through the
298
+ * canvas trick below — the settled wells.
299
+ *
300
+ * MARIA hierarchy refresher: DPP → DLL (one entry per ZONE: height + DL
301
+ * pointer) → DL (one 4/5-byte entry per OBJECT crossing that zone) → pixel
302
+ * bytes. There is no sprite table; "an object" IS a DL entry.
303
+ *
304
+ * The well area is 120 one-scanline zones. Each has a fixed 14-byte DL slot:
305
+ * room for TWO wide 5-byte object entries (one row per well in 2P) + the
306
+ * terminator byte (MARIA reads the NEXT entry's mode byte after each entry; a
307
+ * 0 there ends the line — forget the terminator and MARIA walks into garbage
308
+ * and the screen dies). 5+5 = 10, terminator at 11 ≤ 14: comfortable.
309
+ *
310
+ * WHY ONE OBJECT PER WELL-ROW — the MARIA DMA budget, the dial this whole game
311
+ * turns: MARIA steals the bus from the CPU to fetch each line's DL + pixels
312
+ * (~113 DMA cycles per scanline before the line visibly runs out). A puzzle
313
+ * WELL is the WORST case: a full row of 6 cells lands on the same 8 scanlines
314
+ * — 6 objects per line, double the ~3-per-line budget; the back half would
315
+ * flicker out every frame. So THE WELL ROW IS NOT DRAWN AS 6 OBJECTS. Each
316
+ * well row is composited into a 14-byte RAM canvas (frame column + 6 cells +
317
+ * frame column = 56px) and shown as ONE wide 5-byte DL object per scanline —
318
+ * 1 object per line, not 6. Two wells (2P) = 2 objects per line. The falling
319
+ * TRIO is NOT a separate object either: it's overlaid straight into the canvas
320
+ * (see the trio-overlay note), so even a trio scanline stays at ≤2 objects.
321
+ *
322
+ * The pool is SPLIT across two RAM regions because no single linker region
323
+ * fits 1680 bytes + the DLL + the canvases (see RAM MAP). We push MORE of it
324
+ * into raw RAM3 than the shmup does (which kept 47 lines in BSS) because the
325
+ * boards + match mask + well canvases also need BSS — so only 23 lines live in
326
+ * BSS and the rest (97) in POOLB:
327
+ * lines 0-22 → pool_a[] (BSS, RAM1) 23 * 14 = 322 bytes
328
+ * lines 23-119 → POOLB ($2200, raw RAM3) 97 * 14 = 1358 bytes
329
+ * POOLB then ends at $275E, leaving ~$A2 (162 bytes) for the cc65 C stack
330
+ * growing down from $2800 — enough for this game's shallow call depth, but if
331
+ * you add deep recursion, shrink the boards/canvases before growing pool_a.
332
+ * line_dl() resolves a well-area line to its slot; nothing else knows the split.
333
+ *
334
+ * Rebuild-vs-patch doctrine (MENTAL_MODEL.md): the DLL is built ONCE and only
335
+ * its 3-byte well-area entries are repointed at state changes (with DMA off);
336
+ * per-frame work only rewrites bytes INSIDE existing 14-byte slots and inside
337
+ * the well canvases. Tearing down the DLL itself mid-game races MARIA's walker
338
+ * — the classic "works one frame then the screen falls apart" 7800 bug.
339
+ * ════════════════════════════════════════════════════════════════════════ */
340
+ #define LINE_BYTES 14 /* per-line DL slot: 2 wide row entries (5B
341
+ * each, one per well in 2P) + terminator */
342
+ #define POOLA_LINES 23
343
+ static uint8_t pool_a[POOLA_LINES * LINE_BYTES];
344
+ static uint8_t line_used[FIELD_LINES];
345
+
346
+ /* line_dl(i): the 14-byte DL slot for well-area line i. Computed inline (no
347
+ * cached pointer array) — on a 4KB machine the 240-byte pointer table is a
348
+ * luxury we spend on RAM the canvases need instead. Lines 0..22 live in
349
+ * pool_a (BSS); 23..119 in POOLB (raw RAM3). */
350
+ static uint8_t* line_dl(uint8_t i) {
351
+ return (i < POOLA_LINES)
352
+ ? pool_a + (uint16_t)i * LINE_BYTES
353
+ : POOLB + (uint16_t)(i - POOLA_LINES) * LINE_BYTES;
354
+ }
355
+
356
+ static uint8_t dll[143 * 3];
357
+ static uint8_t hud_canvas[8 * 32]; /* 16-char text row, lives in BSS */
358
+ static uint8_t hud_dls[8 * 7]; /* one 5-byte DL + term per row */
359
+
360
+ /* ── HARDWARE IDIOM (load-bearing) — the WELL CANVASES, and why ONE object per
361
+ * well line. A 14-byte (56px) canvas per BOARD ROW per well: byte 0 = the
362
+ * left frame column, bytes 1..12 = the 6 cells (48px), byte 13 = the right
363
+ * frame column. 12 rows × 14 bytes × 2 wells = 336 bytes in BSS. The frame is
364
+ * BAKED INTO the canvas (drawn with WELL_PAL value 3) rather than emitted as
365
+ * its own side-rail objects — because the per-line DL SLOT is only 14 bytes
366
+ * (room for the terminator after one 5-byte wide entry + one 4-byte trio
367
+ * entry = 9 bytes used, terminator at 10). Two separate 4-byte rail objects
368
+ * PLUS the 5-byte row would be 13 bytes and the terminator would spill into
369
+ * the NEXT line's slot — the classic off-by-one that walks MARIA into garbage.
370
+ * So the frame rides inside the single wide row object; each well line costs
371
+ * exactly ONE wide object (+ the trio where it overlaps). The same 14-byte
372
+ * image shows on all CELL_PX scanlines of the row (1-line zones ⇒ the
373
+ * offset-addressing quirk vanishes). composite_row() rebuilds a row only when
374
+ * that board changed (lock/clear/garbage), so the per-frame emit just points
375
+ * at the standing canvases. */
376
+ #define CANVAS_ROW_BYTES (1 + GRID_W * 2 + 1) /* 14 bytes = 56px (frame+cells) */
377
+ #define FRAME_V 3 /* frame uses WELL_PAL value 3 */
378
+ static uint8_t well_canvas[2][GRID_H * CANVAS_ROW_BYTES];
379
+
380
+ /* ── HARDWARE IDIOM (load-bearing) — emit a WELL ROW as ONE wide 5-byte object
381
+ * per scanline. canvas = the row's 14-byte (56px) image; the SAME image is
382
+ * shown on all CELL_PX scanlines of the row. This is the move that turns a
383
+ * 6-objects-per-line row into a 1-object-per-line row. We hand-write the
384
+ * 5-byte direct entry; width 56px = 14 bytes encodes as (32 - 14). */
385
+ static void emit_well_row(uint8_t y, const uint8_t* canvas, uint8_t x) {
386
+ uint8_t r, off;
387
+ uint8_t* dl;
388
+ uint16_t a = (uint16_t)(uintptr_t)canvas;
389
+ for (r = 0; r < CELL_PX; ++r) {
390
+ off = line_used[y];
391
+ if (off + 5 <= LINE_BYTES - 1) { /* room for a 5-byte entry + term */
392
+ dl = line_dl(y) + off;
393
+ dl[0] = (uint8_t)(a & 0xFF);
394
+ dl[1] = 0x40; /* 5-byte form, 160A write mode */
395
+ dl[2] = (uint8_t)(a >> 8);
396
+ dl[3] = (uint8_t)((WELL_PAL << 5) | (32 - CANVAS_ROW_BYTES));
397
+ dl[4] = x;
398
+ line_used[y] = off + 5;
399
+ }
400
+ ++y;
401
+ }
402
+ }
403
+
404
+ /* ── HARDWARE IDIOM (load-bearing — the per-frame budget, the 7800 lesson of
405
+ * this file) — REBUILD-vs-PATCH, applied to the per-frame loop itself. A naive
406
+ * version re-emits all 12 well rows × 8 lines × 2 wells (≈1500 byte writes)
407
+ * EVERY frame; on a 1.79MHz 6502 that overran one 60Hz frame so badly the sim
408
+ * effectively ran at ~3Hz and every timer stretched ~19x — the exact
409
+ * "stretched timers look like broken rules" footgun the #pragma comment warns
410
+ * about, here caused by per-frame WORK, not the missing optimizer. The fix:
411
+ * the wells only change on a lock/clear/garbage, so we write their DL entries
412
+ * ONCE (build_wells) and leave them STANDING in the slots. Per frame we only
413
+ * overlay the falling trio into the canvas the entries already point at (see
414
+ * the trio-overlay note) — a few dozen byte writes, no DL traffic at all. */
415
+ static void wells_open(void) { memset(line_used, 0, FIELD_LINES); }
416
+
417
+ static void terminate_all(void) { /* next entry's MODE byte = 0 each line */
418
+ uint8_t i;
419
+ for (i = 0; i < FIELD_LINES; ++i)
420
+ line_dl(i)[line_used[i] + 1] = 0;
421
+ }
422
+
423
+ /* ── HARDWARE IDIOM (load-bearing) — DLL construction + zone repointing.
424
+ * Built once at boot; dll_zone appends one 3-byte entry (offset byte =
425
+ * height-1; DLI/holey bits stay 0 — no NMI handler, no holey DMA here). */
426
+ static uint8_t* dllp;
427
+ static void dll_zone(uint8_t height, uint16_t dl) {
428
+ dllp[0] = height - 1;
429
+ dllp[1] = (uint8_t)(dl >> 8);
430
+ dllp[2] = (uint8_t)(dl & 0xFF);
431
+ dllp += 3;
432
+ }
433
+
434
+ /* Repoint ONE well-area line's DLL entry (title/menu/game-over text overlays
435
+ * borrow well zones; play repoints them back at the pool slots). */
436
+ static void point_field_zone(uint8_t fline, uint16_t dl) {
437
+ uint8_t* e = dll + FIELD_DLL_OFF + (uint16_t)fline * 3;
438
+ e[0] = 0;
439
+ e[1] = (uint8_t)(dl >> 8);
440
+ e[2] = (uint8_t)(dl & 0xFF);
441
+ }
442
+
443
+ /* ── GAME LOGIC (clay) — text rendering into a 32-byte-wide RAM canvas ── */
444
+ static uint8_t glyph_index(char c) {
445
+ if (c >= '0' && c <= '9') return (uint8_t)(c - '0');
446
+ if (c >= 'A' && c <= 'Z') return (uint8_t)(10 + c - 'A');
447
+ if (c == '-') return 36;
448
+ return 37; /* space */
449
+ }
450
+
451
+ static void draw_text(uint8_t* canvas, uint8_t col, const char* s) {
452
+ uint8_t r, b;
453
+ const uint8_t* g;
454
+ uint8_t* dst;
455
+ while (*s && col < 16) {
456
+ g = FONT + ((uint16_t)glyph_index(*s) << 3);
457
+ dst = canvas + ((uint16_t)col << 1);
458
+ for (r = 0; r < 8; ++r) {
459
+ b = g[r];
460
+ dst[0] = NIB2[b >> 4];
461
+ dst[1] = NIB2[b & 0x0F];
462
+ dst += 32;
463
+ }
464
+ ++s;
465
+ ++col;
466
+ }
467
+ }
468
+
469
+ static void digits5(char* d, uint16_t v) {
470
+ uint8_t i;
471
+ for (i = 0; i < 5; ++i) { d[4 - i] = (char)('0' + v % 10); v /= 10; }
472
+ }
473
+
474
+ /* Build the 8 one-line DLs that display an arbitrary RAM canvas at x=16
475
+ * (centered 128px). pal picks the text colour palette. dls = 8*7 bytes. */
476
+ static void canvas_dls(uint8_t* dls, const uint8_t* canvas, uint8_t pal) {
477
+ uint8_t r;
478
+ uint16_t a;
479
+ for (r = 0; r < 8; ++r) {
480
+ a = (uint16_t)(uintptr_t)canvas + ((uint16_t)r << 5);
481
+ dls[0] = (uint8_t)(a & 0xFF);
482
+ dls[1] = 0x40; /* 5-byte form, 160A write mode */
483
+ dls[2] = (uint8_t)(a >> 8);
484
+ dls[3] = (uint8_t)((pal << 5) | 0); /* width 32 bytes encodes as 0 */
485
+ dls[4] = 16;
486
+ dls[5] = 0;
487
+ dls[6] = 0; /* terminator for the next read */
488
+ dls += 7;
489
+ }
490
+ }
491
+
492
+ /* ── GAME LOGIC (clay) — the music. Two-voice TIA tune loop. ─────────────────
493
+ * The TIA's frequency divider is 5 bits — ~32 pitches TOTAL, none of them
494
+ * in tune with each other. Don't fight it: write the melody IN the TIA's
495
+ * crooked scale and it reads as "gritty 7800", fight it and it reads as
496
+ * "wrong". The note tables ARE the song — edit them to recompose.
497
+ * Voice 0 = melody (AUDC 4, square-ish). Voice 1 = bass (AUDC 6, deep
498
+ * buzz) — and voice 1 is SHARED with sound effects (TIA has only two
499
+ * voices): when the game fires an effect, sfx_hold mutes the bass for the
500
+ * effect's length, then the bass re-enters on its next note. That
501
+ * steal-and-return is the standard 2-voice arbitration trick. */
502
+ static const uint8_t MEL_F[16] = { 19,17,15,17, 19,15,12,255, 17,15,13,15, 17,13,15,255 };
503
+ static const uint8_t MEL_L[16] = { 8, 8, 8, 8, 8, 8,16, 8, 8, 8, 8, 8, 8, 8,16, 8 };
504
+ static const uint8_t BAS_F[8] = { 27,27,23,23, 25,25,29,25 };
505
+ static uint8_t mel_i, mel_t, bas_i, bas_t, sfx_hold;
506
+
507
+ static void music_tick(void) {
508
+ if (mel_t) --mel_t;
509
+ if (mel_t == 0) {
510
+ mel_i = (uint8_t)((mel_i + 1) & 15);
511
+ mel_t = MEL_L[mel_i];
512
+ if (MEL_F[mel_i] == 255) {
513
+ AUDV0 = 0; /* 255 = rest */
514
+ } else {
515
+ AUDC0 = 4; AUDF0 = MEL_F[mel_i]; AUDV0 = 6;
516
+ }
517
+ }
518
+ if (sfx_hold) { /* an effect owns voice 1 */
519
+ --sfx_hold;
520
+ if (sfx_hold == 0) bas_t = 1; /* bass re-enters next tick */
521
+ return;
522
+ }
523
+ if (bas_t) --bas_t;
524
+ if (bas_t == 0) {
525
+ bas_i = (uint8_t)((bas_i + 1) & 7);
526
+ bas_t = 16;
527
+ AUDC1 = 6; AUDF1 = BAS_F[bas_i]; AUDV1 = 5;
528
+ }
529
+ }
530
+
531
+ /* Effects (voice 1 via atari7800_sfx; sfx_hold keeps the bass out). */
532
+ static void fx_move(void) { sfx_tone(1, 18, 3); sfx_hold = 4; }
533
+ static void fx_cycle(void) { sfx_tone(1, 10, 3); sfx_hold = 4; }
534
+ static void fx_lock(void) { sfx_tone(1, 24, 4); sfx_hold = 5; }
535
+ static void fx_clear(void) { sfx_tone(1, 6, 6); sfx_hold = 7; }
536
+ static void fx_garb(void) { sfx_noise(10); sfx_hold = 11; }
537
+ static void fx_over(void) { sfx_noise(22); sfx_hold = 23; }
538
+ static void fx_start(void) { sfx_tone(1, 8, 6); sfx_hold = 7; }
539
+
540
+ /* ── GAME LOGIC (clay — reshape freely) — game state ─────────────────────────
541
+ * Fixed object pools, no allocation (1.79MHz CPU, 4KB RAM — a heap is a cost
542
+ * with no payer). Two 6×12 boards live in BSS (72 bytes each); a 72-byte
543
+ * match mask too. */
544
+ static uint8_t board[2][GRID_H][GRID_W];
545
+ static uint8_t matched[GRID_H][GRID_W];
546
+
547
+ static uint8_t two_p; /* 0 = 1P marathon, 1 = 2P versus */
548
+ static uint8_t well_x[2]; /* left pixel of each well */
549
+ static uint8_t piece_x[2]; /* falling trio column 0..5 */
550
+ static int8_t piece_y[2]; /* row of its TOP cell (<0 = above rim) */
551
+ static uint8_t piece_col[2][3]; /* trio colours, top to bottom */
552
+ static uint8_t fall_t[2]; /* frames until next gravity step */
553
+ static uint8_t prev_fire[2]; /* edge-trigger the cycle button */
554
+ static uint8_t prev_lr[2]; /* edge-trigger left/right */
555
+ static uint16_t score[2];
556
+ static uint16_t hiscore;
557
+ static uint16_t cleared_total; /* 1P: cells cleared, drives level */
558
+ static uint8_t level; /* 1P: 1..9, speeds up the fall */
559
+ static uint8_t alive[2]; /* 2P: still in the game */
560
+ static uint8_t ready_pause; /* freeze frames after spawn/start */
561
+ static uint8_t dirty, over_lock;
562
+ static uint8_t dirty_wells; /* a board changed → rebuild well DLs */
563
+ static int8_t trio_rows[2][3]; /* canvas rows the trio overlaid last
564
+ * frame, to wipe (-1 = none) */
565
+ static uint16_t rng = 0xACE1;
566
+
567
+ #define ST_TITLE 0
568
+ #define ST_PLAY 1
569
+ #define ST_OVER 2
570
+ static uint8_t state;
571
+ static uint8_t winner; /* 2P: who won (for the over text) */
572
+
573
+ #define VS_FALL_DELAY 26 /* 2P: fixed gravity (frames/row) */
574
+ #define GARBAGE_CAP 4 /* max garbage rows per attack */
575
+
576
+ static uint8_t random8(void) { /* xorshift16 — cheap + fine */
577
+ uint16_t r = rng;
578
+ r ^= r << 7;
579
+ r ^= r >> 9;
580
+ r ^= r << 8;
581
+ rng = r;
582
+ return (uint8_t)r;
583
+ }
584
+
585
+ /* ── HARDWARE IDIOM (load-bearing) — composite ONE board row into its 14-byte
586
+ * canvas: a left frame column (value 3), then each of the 6 cells writes a
587
+ * 2-byte (8px) 2bpp value at the cell's colour (1/2/3), then a right frame
588
+ * column. Empty cells write 0 (transparent → the BACKGRND shows through,
589
+ * reading as the recessed well). All cells AND the frame share the WELL_PAL
590
+ * palette, so the colour comes from the 2-bit VALUE, not a palette switch —
591
+ * which is the whole reason one wide object can show three colours at once. */
592
+ static void composite_row(uint8_t p, uint8_t row) {
593
+ uint8_t c, col, v;
594
+ uint8_t* dst = well_canvas[p] + (uint16_t)row * CANVAS_ROW_BYTES;
595
+ dst[0] = (uint8_t)(FRAME_V * 0x55); /* left frame column (4px) */
596
+ for (c = 0; c < GRID_W; ++c) {
597
+ col = board[p][row][c];
598
+ v = col ? (uint8_t)(col * 0x55) : 0; /* fill all 4 px of the byte */
599
+ dst[1 + (uint16_t)c * 2] = v;
600
+ dst[1 + (uint16_t)c * 2 + 1] = v;
601
+ }
602
+ dst[CANVAS_ROW_BYTES - 1] = (uint8_t)(FRAME_V * 0x55); /* right frame col */
603
+ }
604
+
605
+ static void composite_all(uint8_t p) {
606
+ uint8_t r;
607
+ for (r = 0; r < GRID_H; ++r) composite_row(p, r);
608
+ dirty_wells = 1; /* the standing well DLs need rebuild */
85
609
  }
86
610
 
87
- static uint16_t bg_zone_dl(int zone) {
88
- if (zone >= 32 && zone < 200) return (uint16_t)(uintptr_t)dl_well;
89
- return (uint16_t)(uintptr_t)dl_empty;
611
+ /* ── GAME LOGIC (clay — reshape freely) — match scan: mark every straight run
612
+ * of 3+ same-coloured cells in all 4 directions (a cell can belong to several
613
+ * runs — the mask de-dupes), return how many cells matched. This is the
614
+ * resolve-time spike the header's frame-budget note talks about. */
615
+ static const int8_t DIRS4[4][2] = { {0,1}, {1,0}, {1,1}, {1,-1} };
616
+
617
+ static uint8_t mark_and_count(uint8_t p) {
618
+ uint8_t r, c, d, len, k, cnt, col;
619
+ int8_t dr, dc;
620
+ int sr, sc;
621
+ cnt = 0;
622
+ for (r = 0; r < GRID_H; ++r)
623
+ for (c = 0; c < GRID_W; ++c) matched[r][c] = 0;
624
+ for (r = 0; r < GRID_H; ++r) {
625
+ for (c = 0; c < GRID_W; ++c) {
626
+ col = board[p][r][c];
627
+ if (col == EMPTY) continue;
628
+ for (d = 0; d < 4; ++d) {
629
+ dr = DIRS4[d][0]; dc = DIRS4[d][1];
630
+ sr = (int)r - dr; sc = (int)c - dc;
631
+ if (sr >= 0 && sr < GRID_H && sc >= 0 && sc < GRID_W
632
+ && board[p][sr][sc] == col) continue; /* not the run's start */
633
+ len = 1;
634
+ sr = (int)r + dr; sc = (int)c + dc;
635
+ while (sr >= 0 && sr < GRID_H && sc >= 0 && sc < GRID_W
636
+ && board[p][sr][sc] == col) { ++len; sr += dr; sc += dc; }
637
+ if (len >= 3) {
638
+ sr = r; sc = c;
639
+ for (k = 0; k < len; ++k) {
640
+ if (!matched[sr][sc]) { matched[sr][sc] = 1; ++cnt; }
641
+ sr += dr; sc += dc;
642
+ }
643
+ }
644
+ }
645
+ }
646
+ }
647
+ return cnt;
90
648
  }
91
649
 
92
- #define DLL_ZONES 243
93
- static uint8_t dll[DLL_ZONES * 3];
650
+ /* Collapse each column so survivors rest on the floor. */
651
+ static void apply_gravity(uint8_t p) {
652
+ uint8_t c;
653
+ int8_t r, w;
654
+ for (c = 0; c < GRID_W; ++c) {
655
+ w = GRID_H - 1;
656
+ for (r = GRID_H - 1; r >= 0; --r)
657
+ if (board[p][r][c] != EMPTY) { board[p][w][c] = board[p][r][c]; --w; }
658
+ for (; w >= 0; --w) board[p][w][c] = EMPTY;
659
+ }
660
+ }
661
+
662
+ /* ── GAME LOGIC (clay) — game-over overlay (defined later; the lock path calls
663
+ * it through this forward declaration). ── */
664
+ static void paint_gameover(void);
665
+
666
+ static void game_over(void) {
667
+ uint16_t best = score[0];
668
+ if (two_p && score[1] > best) best = score[1];
669
+ if (best > hiscore) {
670
+ hiscore = best;
671
+ /* HSC NOTE (see file header): on real hardware with a High Score Cart you
672
+ * would write the record into HSC RAM ($1000-$17FF) here. The bundled
673
+ * prosystem core has no HSC support and exposes no SAVE_RAM, so the record
674
+ * honestly lives only as long as the session. */
675
+ }
676
+ fx_over();
677
+ paint_gameover();
678
+ }
94
679
 
95
- static int piece_x_col;
96
- static int piece_y;
97
- static uint8_t fall_timer;
98
- static uint8_t prev_btn;
99
- static uint8_t color_cycle;
680
+ /* ── GAME LOGIC (clay) — clear matches, drop survivors, chain cascades.
681
+ * Returns the chain depth (0 = the lock matched nothing). ── */
682
+ static uint8_t resolve_board(uint8_t p) {
683
+ uint8_t n, r, c, chain;
684
+ uint16_t amt;
685
+ chain = 0;
686
+ for (;;) {
687
+ n = mark_and_count(p);
688
+ if (n == 0) break;
689
+ ++chain;
690
+ for (r = 0; r < GRID_H; ++r)
691
+ for (c = 0; c < GRID_W; ++c)
692
+ if (matched[r][c]) board[p][r][c] = EMPTY;
693
+ amt = (uint16_t)n * 10;
694
+ if (chain > 1) amt *= chain; /* cascades pay multiplied */
695
+ score[p] += amt;
696
+ if (score[p] > 99999u) score[p] = 99999u;
697
+ dirty = 1;
698
+ fx_clear();
699
+ apply_gravity(p);
700
+ if (!two_p) {
701
+ cleared_total += n;
702
+ while (level < 9 && cleared_total >= (uint16_t)level * 10) ++level;
703
+ }
704
+ }
705
+ composite_all(p);
706
+ return chain;
707
+ }
100
708
 
101
- static void set_dl_addr(uint8_t* dl, const uint8_t* row) {
102
- uint16_t a = (uint16_t)(uintptr_t)row;
103
- dl[0] = (uint8_t)(a & 0xFF);
104
- dl[2] = (uint8_t)(a >> 8);
709
+ /* ── GAME LOGIC (clay) VERSUS attack: garbage rows rise from the bottom of
710
+ * the victim's well (random cells with one gap — matchable, so a skilled
711
+ * victim digs out). If the rim row is already occupied when a garbage row
712
+ * pushes up, the victim tops out and loses. ── */
713
+ static void garbage_insert(uint8_t v, uint8_t nrows) {
714
+ uint8_t k, c, gap;
715
+ int8_t r;
716
+ fx_garb();
717
+ for (k = 0; k < nrows; ++k) {
718
+ for (c = 0; c < GRID_W; ++c)
719
+ if (board[v][0][c] != EMPTY) { winner = (uint8_t)(v ^ 1); alive[v] = 0; game_over(); return; }
720
+ for (r = 0; r < GRID_H - 1; ++r)
721
+ for (c = 0; c < GRID_W; ++c)
722
+ board[v][r][c] = board[v][r + 1][c];
723
+ gap = random8() % GRID_W;
724
+ for (c = 0; c < GRID_W; ++c)
725
+ board[v][GRID_H - 1][c] = (c == gap) ? EMPTY : (uint8_t)(1 + random8() % 3);
726
+ if (piece_y[v] > -3) --piece_y[v]; /* keep the trio board-relative */
727
+ }
728
+ composite_all(v);
729
+ dirty = 1;
105
730
  }
106
731
 
107
- static void set_dll_entry(int idx, uint16_t dl_ptr) {
108
- dll[idx * 3 + 0] = 0;
109
- dll[idx * 3 + 1] = (uint8_t)(dl_ptr >> 8);
110
- dll[idx * 3 + 2] = (uint8_t)(dl_ptr & 0xFF);
732
+ /* Can the trio occupy column x, rows y..y+2? Cells above the rim are fine
733
+ * (pieces enter from above); below the floor or on a cell is not. */
734
+ static uint8_t can_place(uint8_t p, int8_t x, int8_t y) {
735
+ int8_t i, cy;
736
+ if (x < 0 || x >= GRID_W) return 0;
737
+ for (i = 0; i < 3; ++i) {
738
+ cy = (int8_t)(y + i);
739
+ if (cy < 0) continue;
740
+ if (cy >= GRID_H) return 0;
741
+ if (board[p][cy][x] != EMPTY) return 0;
742
+ }
743
+ return 1;
111
744
  }
112
745
 
113
- static void set_x(uint8_t x) {
114
- dl_row0[4] = x; dl_row1[4] = x; dl_row2[4] = x; dl_row3[4] = x;
115
- dl_row4[4] = x; dl_row5[4] = x; dl_row6[4] = x; dl_row7[4] = x;
746
+ static void spawn_piece(uint8_t p) {
747
+ piece_x[p] = GRID_W / 2;
748
+ piece_y[p] = 0; /* enter the trio FULLY inside the well (all
749
+ * 3 cells visible at once) — the well is only
750
+ * 12 rows, so an off-screen entry would flash
751
+ * past; top-out is detected by a lock landing
752
+ * with rows still ≤0 occupied. */
753
+ piece_col[p][0] = (uint8_t)(1 + random8() % 3);
754
+ piece_col[p][1] = (uint8_t)(1 + random8() % 3);
755
+ piece_col[p][2] = (uint8_t)(1 + random8() % 3);
756
+ if (!can_place(p, (int8_t)piece_x[p], piece_y[p])) { /* this well topped out */
757
+ if (two_p) { alive[p] = 0; winner = (uint8_t)(p ^ 1); }
758
+ game_over();
759
+ }
116
760
  }
117
761
 
118
- static void build_dll(int y) {
119
- int i;
120
- for (i = 0; i < DLL_ZONES; i++) {
121
- uint16_t dl;
122
- int d = i - y;
123
- switch (d) {
124
- case 0: dl = (uint16_t)(uintptr_t)dl_row0; break;
125
- case 1: dl = (uint16_t)(uintptr_t)dl_row1; break;
126
- case 2: dl = (uint16_t)(uintptr_t)dl_row2; break;
127
- case 3: dl = (uint16_t)(uintptr_t)dl_row3; break;
128
- case 4: dl = (uint16_t)(uintptr_t)dl_row4; break;
129
- case 5: dl = (uint16_t)(uintptr_t)dl_row5; break;
130
- case 6: dl = (uint16_t)(uintptr_t)dl_row6; break;
131
- case 7: dl = (uint16_t)(uintptr_t)dl_row7; break;
132
- default: dl = bg_zone_dl(i); break;
762
+ /* ── GAME LOGIC (clay) — land the trio, resolve, attack, respawn. ── */
763
+ static void lock_piece(uint8_t p) {
764
+ int8_t i, y;
765
+ uint8_t chain;
766
+ for (i = 0; i < 3; ++i) {
767
+ y = (int8_t)(piece_y[p] + i);
768
+ if (y >= 0) board[p][y][piece_x[p]] = piece_col[p][i];
769
+ }
770
+ fx_lock();
771
+ composite_all(p);
772
+ dirty = 1;
773
+ if (piece_y[p] < 0) { /* locked above the rim */
774
+ if (two_p) { alive[p] = 0; winner = (uint8_t)(p ^ 1); }
775
+ game_over();
776
+ return;
777
+ }
778
+ chain = resolve_board(p);
779
+ if (state != ST_PLAY) return;
780
+ if (chain && two_p) {
781
+ garbage_insert(p ^ 1, chain > GARBAGE_CAP ? GARBAGE_CAP : chain);
782
+ if (state != ST_PLAY) return; /* garbage topped them out */
783
+ }
784
+ spawn_piece(p);
785
+ }
786
+
787
+ /* ── GAME LOGIC (clay) — per-player input + gravity. Edge-triggered moves
788
+ * (one cell per press), held DOWN soft-drops. The single fire button CYCLES
789
+ * the trio's three colours (the 7800 pad has one button — this replaces the
790
+ * NES A/B two-way rotate). ── */
791
+ static void update_player(uint8_t p, uint8_t pad, uint8_t fire) {
792
+ uint8_t lf, rt, lr, t;
793
+ if (p == 0) { rt = (uint8_t)(pad & J1_RIGHT); lf = (uint8_t)(pad & J1_LEFT); }
794
+ else { rt = (uint8_t)(pad & J2_RIGHT); lf = (uint8_t)(pad & J2_LEFT); }
795
+ lr = (uint8_t)((lf ? 1 : 0) | (rt ? 2 : 0));
796
+
797
+ if ((lr & 1) && !(prev_lr[p] & 1) &&
798
+ can_place(p, (int8_t)(piece_x[p] - 1), piece_y[p])) { --piece_x[p]; fx_move(); }
799
+ if ((lr & 2) && !(prev_lr[p] & 2) &&
800
+ can_place(p, (int8_t)(piece_x[p] + 1), piece_y[p])) { ++piece_x[p]; fx_move(); }
801
+ prev_lr[p] = lr;
802
+
803
+ if (fire && !prev_fire[p]) { /* cycle colours downward */
804
+ t = piece_col[p][2];
805
+ piece_col[p][2] = piece_col[p][1];
806
+ piece_col[p][1] = piece_col[p][0];
807
+ piece_col[p][0] = t;
808
+ fx_cycle();
809
+ }
810
+ prev_fire[p] = fire;
811
+
812
+ /* soft drop on held DOWN */
813
+ if (p == 0) { if (pad & J1_DOWN) fall_t[p] += 4; }
814
+ else { if (pad & J2_DOWN) fall_t[p] += 4; }
815
+
816
+ ++fall_t[p];
817
+ {
818
+ uint8_t fd = two_p ? VS_FALL_DELAY
819
+ : (uint8_t)(34 - ((level << 1) + level)); /* 31..7 */
820
+ if (fall_t[p] >= fd) {
821
+ fall_t[p] = 0;
822
+ if (can_place(p, (int8_t)piece_x[p], (int8_t)(piece_y[p] + 1)))
823
+ ++piece_y[p];
824
+ else
825
+ lock_piece(p); /* may end the game */
133
826
  }
134
- set_dll_entry(i, dl);
135
827
  }
136
828
  }
137
829
 
830
+ /* ── GAME LOGIC (clay) — HUD: "S00000 H00000 L1" (1P) / "00000 V 00000" (2P)
831
+ * composed into the canvas. ── */
832
+ static void draw_hud(void) {
833
+ if (two_p) {
834
+ static char vbuf[18] = "00000 V 00000";
835
+ digits5(vbuf, score[0]);
836
+ digits5(vbuf + 12, score[1]);
837
+ memset(hud_canvas, 0, sizeof(hud_canvas));
838
+ draw_text(hud_canvas, 0, vbuf);
839
+ } else {
840
+ static char buf[17] = "S00000 H00000 L1";
841
+ digits5(buf + 1, score[0]);
842
+ digits5(buf + 8, hiscore);
843
+ buf[15] = (char)('0' + level);
844
+ memset(hud_canvas, 0, sizeof(hud_canvas));
845
+ draw_text(hud_canvas, 0, buf);
846
+ }
847
+ dirty = 0;
848
+ }
849
+
850
+ static void draw_hud_title(void) {
851
+ static char buf[9] = "HI 00000";
852
+ digits5(buf + 3, hiscore);
853
+ memset(hud_canvas, 0, sizeof(hud_canvas));
854
+ draw_text(hud_canvas, 4, buf);
855
+ }
856
+
857
+ /* ── HARDWARE IDIOM (load-bearing) — paint functions bracket structural
858
+ * display-list changes with MARIA DMA OFF ($7F) / ON ($40), the 7800's
859
+ * version of the NES "rendering off before nametable writes" rule: MARIA
860
+ * may be mid-walk through the very lists being rewritten, and repointing
861
+ * dozens of zones under it glitches (or with bad luck hangs) the frame.
862
+ * CTRL $40 = DMA on, 160A read mode, colour burst on — forget to restore
863
+ * it and the screen stays the flat BACKGRND colour forever. ── */
864
+
865
+ /* Title screen: borrow well zones for three text overlays composed in POOLB
866
+ * (the pool isn't drawing wells on the title, so its RAM is free — 4KB
867
+ * machines make you reuse like this). Title is double-height by pointing TWO
868
+ * consecutive 1-line zones at each canvas row — zero extra RAM, pure DLL
869
+ * trickery. */
870
+ static void paint_title(void) {
871
+ uint8_t i;
872
+ uint8_t* c0 = POOLB; /* title canvas (256 bytes) */
873
+ uint8_t* c1 = POOLB + 256; /* menu line 1 (256 bytes) */
874
+ uint8_t* c2 = POOLB + 512; /* menu line 2 (256 bytes) */
875
+ uint8_t* td = POOLB + 768; /* 3 lines * 8 row-DLs * 7 */
876
+ CTRL = 0x7F; /* DMA off */
877
+ memset(POOLB, 0, 768);
878
+ draw_text(c0, (uint8_t)((16 - (sizeof(GAME_TITLE) - 1)) / 2), GAME_TITLE);
879
+ draw_text(c1, 1, "1P - FIRE PLAY");
880
+ draw_text(c2, 1, "2P PAD2 VERSUS");
881
+ canvas_dls(td, c0, 0); /* white */
882
+ canvas_dls(td + 56, c1, 5); /* HUD green */
883
+ canvas_dls(td + 112, c2, 5);
884
+ for (i = 0; i < FIELD_LINES; ++i)
885
+ point_field_zone(i, (uint16_t)(uintptr_t)dl_empty);
886
+ for (i = 0; i < 16; ++i) /* double-height title rows */
887
+ point_field_zone((uint8_t)(8 + i),
888
+ (uint16_t)(uintptr_t)(td + ((i >> 1) * 7)));
889
+ for (i = 0; i < 8; ++i) {
890
+ point_field_zone((uint8_t)(56 + i), (uint16_t)(uintptr_t)(td + 56 + i * 7));
891
+ point_field_zone((uint8_t)(76 + i), (uint16_t)(uintptr_t)(td + 112 + i * 7));
892
+ }
893
+ draw_hud_title();
894
+ state = ST_TITLE;
895
+ CTRL = 0x40; /* DMA back on */
896
+ }
897
+
898
+ /* Game over: the pool RAM becomes the message overlay (same reuse trick as
899
+ * the title), the rest of the well area goes blank. */
900
+ static void paint_gameover(void) {
901
+ uint8_t i;
902
+ uint8_t* c0 = POOLB;
903
+ uint8_t* c1 = POOLB + 256;
904
+ uint8_t* td = POOLB + 768;
905
+ static char buf[12] = "SCORE 00000";
906
+ CTRL = 0x7F;
907
+ memset(POOLB, 0, 768);
908
+ if (two_p) draw_text(c0, 4, winner ? "P2 WINS" : "P1 WINS");
909
+ else draw_text(c0, 3, "GAME OVER");
910
+ digits5(buf + 6, two_p ? score[winner ? 1 : 0] : score[0]);
911
+ draw_text(c1, 2, buf);
912
+ canvas_dls(td, c0, 0);
913
+ canvas_dls(td + 56, c1, 5);
914
+ for (i = 0; i < FIELD_LINES; ++i)
915
+ point_field_zone(i, (uint16_t)(uintptr_t)dl_empty);
916
+ for (i = 0; i < 8; ++i) {
917
+ point_field_zone((uint8_t)(40 + i), (uint16_t)(uintptr_t)(td + i * 7));
918
+ point_field_zone((uint8_t)(60 + i), (uint16_t)(uintptr_t)(td + 56 + i * 7));
919
+ }
920
+ over_lock = 30; /* swallow the held fire button */
921
+ state = ST_OVER;
922
+ CTRL = 0x40;
923
+ }
924
+
925
+ /* ── GAME LOGIC (clay) — start a run ── */
926
+ static void start_game(uint8_t versus) {
927
+ uint8_t p, r, c, i;
928
+ CTRL = 0x7F;
929
+ two_p = versus;
930
+ well_x[0] = versus ? WELL_VS_P0 : WELL_1P_X;
931
+ well_x[1] = WELL_VS_P1;
932
+ for (i = 0; i < FIELD_LINES; ++i) /* well zones → pool slots */
933
+ point_field_zone(i, (uint16_t)(uintptr_t)line_dl(i));
934
+ wells_open();
935
+ terminate_all(); /* all lines empty + termed */
936
+ for (p = 0; p < 2; ++p) {
937
+ for (r = 0; r < GRID_H; ++r)
938
+ for (c = 0; c < GRID_W; ++c) board[p][r][c] = EMPTY;
939
+ composite_all(p);
940
+ score[p] = 0;
941
+ fall_t[p] = 0;
942
+ prev_fire[p] = 1; /* the button that started *
943
+ * the run shouldn't cycle */
944
+ prev_lr[p] = 0;
945
+ alive[p] = (uint8_t)((p == 0) || versus);
946
+ }
947
+ cleared_total = 0;
948
+ level = 1;
949
+ winner = 0;
950
+ trio_rows[0][0] = trio_rows[0][1] = trio_rows[0][2] = -1;
951
+ trio_rows[1][0] = trio_rows[1][1] = trio_rows[1][2] = -1;
952
+ rng ^= (uint16_t)(hiscore * 251) ^ 0x1234;
953
+ ready_pause = 40; /* a "ready" breather */
954
+ draw_hud();
955
+ fx_start();
956
+ state = ST_PLAY;
957
+ spawn_piece(0);
958
+ if (versus) spawn_piece(1);
959
+ CTRL = 0x40;
960
+ }
961
+
138
962
  static void vblank_wait(void) {
139
- while (MSTAT & 0x80) { }
140
- while (!(MSTAT & 0x80)) { }
963
+ while (MSTAT & 0x80) { } /* leave the current vblank */
964
+ while (!(MSTAT & 0x80)) { } /* catch the next one starting */
965
+ }
966
+
967
+ /* ── HARDWARE IDIOM (load-bearing) — emit ONE well's STATIC part: per scanline
968
+ * a SINGLE wide canvas object (the row image, frame baked in — see the WELL
969
+ * CANVAS note). The 14px frame columns sit 4px outside the 48px cell area, so
970
+ * the wide object is placed at well_x - 4 to keep cell column c at exactly
971
+ * well_x + c*8 (where the collision math expects it). Called only on a board
972
+ * change (build_wells), never per frame. */
973
+ static void build_well(uint8_t p) {
974
+ uint8_t r;
975
+ uint8_t ox = (uint8_t)(well_x[p] - 4);
976
+ for (r = 0; r < GRID_H; ++r)
977
+ emit_well_row((uint8_t)(WELL_LINE0 + (uint16_t)r * CELL_PX),
978
+ well_canvas[p] + (uint16_t)r * CANVAS_ROW_BYTES, ox);
979
+ }
980
+
981
+ /* Rebuild both wells' static DL entries + snapshot the per-line base length.
982
+ * Call after any board change (start, lock, clear, garbage). DMA stays on —
983
+ * we only rewrite bytes INSIDE existing slots, never the DLL zones. */
984
+ static void build_wells(void) {
985
+ wells_open();
986
+ build_well(0);
987
+ if (two_p) build_well(1);
988
+ terminate_all();
989
+ }
990
+
991
+ /* ── HARDWARE IDIOM (load-bearing) — the FALLING TRIO is drawn by OVERLAYING
992
+ * its cells into the standing well canvas, NOT as extra DL objects. Why: in 2P
993
+ * every well-scanline already carries TWO wide row objects (one per well, 5
994
+ * bytes each = 10 of the 14-byte slot); a separate 4-byte trio object would be
995
+ * 14 bytes with no room for the line terminator, spilling into the next line's
996
+ * slot and walking MARIA into garbage (the off-by-one that blanks the screen).
997
+ * Overlaying the trio into the canvas keeps it to ONE object per line, and —
998
+ * because build_wells already pointed the DL at the canvas — costs only a few
999
+ * canvas-byte writes, no DL rewrite. The previous frame's overlay is wiped by
1000
+ * recompositing the touched rows from the board (clear_trio_overlay). */
1001
+ static void clear_trio_overlay(uint8_t p) {
1002
+ uint8_t i;
1003
+ for (i = 0; i < 3; ++i)
1004
+ if (trio_rows[p][i] >= 0) { composite_row(p, (uint8_t)trio_rows[p][i]); trio_rows[p][i] = -1; }
1005
+ }
1006
+
1007
+ static void overlay_trio(uint8_t p) {
1008
+ uint8_t i, col, v;
1009
+ int8_t cy;
1010
+ for (i = 0; i < 3; ++i) {
1011
+ cy = (int8_t)(piece_y[p] + i);
1012
+ trio_rows[p][i] = -1;
1013
+ if (cy >= 0 && cy < GRID_H) {
1014
+ uint8_t* dst = well_canvas[p] + (uint16_t)cy * CANVAS_ROW_BYTES;
1015
+ col = piece_col[p][i];
1016
+ v = (uint8_t)(col * 0x55);
1017
+ dst[1 + (uint16_t)piece_x[p] * 2] = v; /* +1 skips left frame col */
1018
+ dst[1 + (uint16_t)piece_x[p] * 2 + 1] = v;
1019
+ trio_rows[p][i] = cy;
1020
+ }
1021
+ }
141
1022
  }
142
1023
 
143
1024
  void main(void) {
144
- uint16_t dll_addr;
145
-
146
- set_dl_addr(dl_row0, block_row0);
147
- set_dl_addr(dl_row1, block_row1);
148
- set_dl_addr(dl_row2, block_row2);
149
- set_dl_addr(dl_row3, block_row3);
150
- set_dl_addr(dl_row4, block_row4);
151
- set_dl_addr(dl_row5, block_row5);
152
- set_dl_addr(dl_row6, block_row6);
153
- set_dl_addr(dl_row7, block_row7);
154
-
155
- piece_x_col = COLS / 2;
156
- piece_y = TOP_Y;
157
- color_cycle = 0;
158
- set_well_addr();
159
- set_x((uint8_t)(60 + piece_x_col * CELL_W_PIX));
160
- build_dll(piece_y);
161
-
162
- BACKGRND = 0x00; /* black surround */
163
- P0C1 = 0x46; /* falling piece (red) */
164
- P0C2 = 0x46;
165
- P0C3 = 0x46;
166
- P1C1 = 0x02; /* well interior (dark blue-grey) */
167
- P2C1 = 0x08; /* well walls (steel) */
1025
+ uint8_t i;
1026
+ uint16_t a;
1027
+
1028
+ /* ── HARDWARE IDIOM (load-bearing) — boot order: build EVERYTHING the DLL
1029
+ * will reference, then point DPP at it, THEN enable DMA. Enabling DMA over
1030
+ * a half-built DLL is the 7800 black-screen classic. ── */
1031
+
1032
+ /* (pool split is resolved on demand by line_dl(); see its comment.) */
1033
+
1034
+ /* Patch the ROM band drawables' data pointers (SOLID8). */
1035
+ a = (uint16_t)(uintptr_t)SOLID8;
1036
+ dl_band_a[0] = dl_band_a[5] = (uint8_t)(a & 0xFF);
1037
+ dl_band_a[2] = dl_band_a[7] = (uint8_t)(a >> 8);
1038
+ dl_band_b[0] = dl_band_b[5] = (uint8_t)(a & 0xFF);
1039
+ dl_band_b[2] = dl_band_b[7] = (uint8_t)(a >> 8);
1040
+ dl_base[0] = dl_base[5] = (uint8_t)(a & 0xFF);
1041
+ dl_base[2] = dl_base[7] = (uint8_t)(a >> 8);
1042
+
1043
+ canvas_dls(hud_dls, hud_canvas, 5);
1044
+
1045
+ /* The DLL — the screen layout, built once (see the layout table above).
1046
+ * 143 entries, mixed zone heights; only the 120 well-area entries are ever
1047
+ * repointed after this. */
1048
+ dllp = dll;
1049
+ dll_zone(16, (uint16_t)(uintptr_t)dl_empty); /* lines 0-15 */
1050
+ for (i = 0; i < 8; ++i) /* HUD 16-23 */
1051
+ dll_zone(1, (uint16_t)(uintptr_t)(hud_dls + i * 7));
1052
+ dll_zone(2, (uint16_t)(uintptr_t)dl_band_a); /* divider */
1053
+ for (i = 0; i < FIELD_LINES; ++i) /* wells 26-145 */
1054
+ dll_zone(1, (uint16_t)(uintptr_t)line_dl(i));
1055
+ dll_zone(2, (uint16_t)(uintptr_t)dl_base); /* floor band */
1056
+ /* Below-floor decor stripes — also our anti-blank-screen ballast: with DMA
1057
+ * fetching only objects, everything else is the single flat BACKGRND
1058
+ * colour, and a mostly-one-colour frame reads as "dead". */
1059
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_a);
1060
+ dll_zone(8, (uint16_t)(uintptr_t)dl_empty);
1061
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_b);
1062
+ dll_zone(8, (uint16_t)(uintptr_t)dl_empty);
1063
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_a);
1064
+ dll_zone(8, (uint16_t)(uintptr_t)dl_empty);
1065
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_b);
1066
+ dll_zone(8, (uint16_t)(uintptr_t)dl_empty);
1067
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_a);
1068
+ dll_zone(8, (uint16_t)(uintptr_t)dl_empty);
1069
+ dll_zone(8, (uint16_t)(uintptr_t)dl_band_b); /* …through 235 */
1070
+ dll_zone(7, (uint16_t)(uintptr_t)dl_empty); /* 236-242 */
1071
+
1072
+ /* Palettes (Atari colour byte = hue<<4 | luminance). */
1073
+ BACKGRND = 0x00; /* cabinet black */
1074
+ P0C1 = 0x0F; /* title text white */
1075
+ P1C1 = 0x3A; /* trio colour 1 (red/gold) */
1076
+ P2C1 = 0xBA; /* trio colour 2 (green) */
1077
+ P3C1 = 0x9A; /* trio colour 3 (blue) */
1078
+ /* well palette: value 1 = red/gold, value 2 = green, value 3 = blue —
1079
+ * same hues as the trio so a locked cell matches the piece that placed it. */
1080
+ P4C1 = 0x36; P4C2 = 0xB6; P4C3 = 0x96;
1081
+ P5C1 = 0xC8; /* HUD green / frame / floor */
1082
+ P6C1 = 0x54; /* decor band deep purple */
1083
+ P7C1 = 0x58; /* decor band brighter purple */
168
1084
  CHARBASE = 0;
169
- OFFSET = 0;
1085
+ OFFSET = 0; /* must stay 0 (7800 standard) */
1086
+
1087
+ a = (uint16_t)(uintptr_t)dll;
1088
+ DPPL = (uint8_t)(a & 0xFF);
1089
+ DPPH = (uint8_t)(a >> 8);
170
1090
 
171
- dll_addr = (uint16_t)(uintptr_t)dll;
172
- DPPL = (uint8_t)(dll_addr & 0xFF);
173
- DPPH = (uint8_t)(dll_addr >> 8);
174
- CTRL = 0x40;
175
1091
  sfx_init();
1092
+ hiscore = 0; /* in-session only — see header */
1093
+ paint_title(); /* …turns DMA on */
176
1094
 
177
1095
  for (;;) {
178
- uint8_t pad, btn;
1096
+ uint8_t pad, f1, f2;
179
1097
  vblank_wait();
180
1098
  sfx_update();
1099
+ music_tick();
181
1100
 
182
- pad = ~SWCHA;
183
- if (pad & JOY_LEFT && piece_x_col > 0) { piece_x_col--; set_x((uint8_t)(60 + piece_x_col * CELL_W_PIX)); }
184
- if (pad & JOY_RIGHT && piece_x_col < COLS - 1) { piece_x_col++; set_x((uint8_t)(60 + piece_x_col * CELL_W_PIX)); }
185
-
186
- btn = (INPT4 & 0x80) ? 0 : 1;
187
- if (btn && !prev_btn) { fall_timer = 18; sfx_tone(0, 4, 4); }
188
- prev_btn = btn;
189
-
190
- fall_timer++;
191
- if (fall_timer >= 18) { /* was 30 — 'moving down very slowly' */
192
- fall_timer = 0;
193
- piece_y++;
194
- if (piece_y >= BOT_Y) {
195
- piece_y = TOP_Y;
196
- color_cycle = (uint8_t)((color_cycle + 1) % 3);
197
- P0C1 = P0C2 = P0C3 = (color_cycle == 0) ? 0x46 : (color_cycle == 1) ? 0xC8 : 0x96;
198
- }
199
- build_dll(piece_y);
1101
+ pad = (uint8_t)~SWCHA;
1102
+ f1 = (uint8_t)(!(INPT4 & 0x80));
1103
+ f2 = (uint8_t)(!(INPT5 & 0x80));
1104
+
1105
+ if (state == ST_TITLE) {
1106
+ /* ── GAME LOGIC (clay) title: P1 fire = 1P, P2 fire = 2P versus ── */
1107
+ if (f1 && !prev_fire[0]) start_game(0);
1108
+ else if (f2 && !prev_fire[1]) start_game(1);
1109
+ prev_fire[0] = f1; prev_fire[1] = f2;
1110
+ continue;
1111
+ }
1112
+
1113
+ if (state == ST_OVER) {
1114
+ if (over_lock) { --over_lock; prev_fire[0] = f1; prev_fire[1] = f2; continue; }
1115
+ if ((f1 || f2) && !prev_fire[0] && !prev_fire[1]) paint_title();
1116
+ prev_fire[0] = f1; prev_fire[1] = f2;
1117
+ continue;
200
1118
  }
1119
+
1120
+ /* ── ST_PLAY ───────────────────────────────────────────────────── */
1121
+ if (ready_pause) { /* ready breather, frozen */
1122
+ --ready_pause;
1123
+ prev_fire[0] = f1; prev_fire[1] = f2;
1124
+ } else {
1125
+ update_player(0, pad, f1);
1126
+ if (state == ST_PLAY && two_p && alive[1]) update_player(1, pad, f2);
1127
+ if (state != ST_PLAY) continue; /* a lock/garbage ended the game */
1128
+ }
1129
+
1130
+ /* ── HARDWARE IDIOM (load-bearing) — the per-frame draw pass is now CHEAP
1131
+ * (see REBUILD-vs-PATCH + the trio-overlay note): the wells' DL entries are
1132
+ * already standing in the slots and point at the canvases, so per frame we
1133
+ * only WIPE last frame's trio (recomposite the touched rows from the board)
1134
+ * and OVERLAY this frame's trio into the canvas — a few dozen byte writes,
1135
+ * no DL traffic. The wells' DL entries are (re)built only on a board change
1136
+ * (a lock/clear/garbage flips dirty_wells via composite_all). ── */
1137
+ clear_trio_overlay(0);
1138
+ if (two_p) clear_trio_overlay(1);
1139
+ if (dirty_wells) { build_wells(); dirty_wells = 0; }
1140
+ overlay_trio(0);
1141
+ if (two_p) overlay_trio(1);
1142
+
1143
+ if (dirty) draw_hud();
201
1144
  }
202
1145
  }