web-a2e 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. package/.clangd +5 -0
  2. package/.mcp.json +12 -0
  3. package/CLAUDE.md +362 -0
  4. package/CMakeLists.txt +774 -0
  5. package/LICENSE +21 -0
  6. package/README.md +392 -0
  7. package/build-wasm/generated/roms.cpp +2447 -0
  8. package/docker-compose.staging.yml +9 -0
  9. package/docs/basic-rom-disassembly.md +6663 -0
  10. package/docs/softswitch-comparison.md +273 -0
  11. package/docs/thunderclock-debug.md +89 -0
  12. package/examples/cube.bas +72 -0
  13. package/examples/hello.s +55 -0
  14. package/examples/scroll.s +140 -0
  15. package/package.json +18 -0
  16. package/public/assets/apple-logo-old.png +0 -0
  17. package/public/assets/apple-logo.png +0 -0
  18. package/public/assets/drive-closed-light-on.png +0 -0
  19. package/public/assets/drive-closed.png +0 -0
  20. package/public/assets/drive-open-light-on.png +0 -0
  21. package/public/assets/drive-open.png +0 -0
  22. package/public/audio-worklet.js +82 -0
  23. package/public/disks/Apple DOS 3.3 January 1983.dsk +0 -0
  24. package/public/disks/ProDOS 2.4.3.po +0 -0
  25. package/public/disks/h32mb.2mg +0 -0
  26. package/public/disks/library.json +26 -0
  27. package/public/docs/llms/llm-assembler.txt +90 -0
  28. package/public/docs/llms/llm-basic-program.txt +256 -0
  29. package/public/docs/llms/llm-disk-drives.txt +72 -0
  30. package/public/docs/llms/llm-file-explorer.txt +50 -0
  31. package/public/docs/llms/llm-hard-drives.txt +80 -0
  32. package/public/docs/llms/llm-main.txt +51 -0
  33. package/public/docs/llms/llm-slot-configuration.txt +66 -0
  34. package/public/icons/icon-192.svg +4 -0
  35. package/public/icons/icon-512.svg +4 -0
  36. package/public/index.html +661 -0
  37. package/public/llms.txt +49 -0
  38. package/public/manifest.json +29 -0
  39. package/public/shaders/burnin.glsl +22 -0
  40. package/public/shaders/crt.glsl +706 -0
  41. package/public/shaders/edge.glsl +109 -0
  42. package/public/shaders/vertex.glsl +8 -0
  43. package/public/sw.js +186 -0
  44. package/roms/341-0027.bin +0 -0
  45. package/roms/341-0160-A-US-UK.bin +0 -0
  46. package/roms/341-0160-A.bin +0 -0
  47. package/roms/342-0273-A-US-UK.bin +0 -0
  48. package/roms/342-0349-B-C0-FF.bin +0 -0
  49. package/roms/Apple Mouse Interface Card ROM - 342-0270-C.bin +0 -0
  50. package/roms/Thunderclock Plus ROM.bin +0 -0
  51. package/scripts/generate_roms.sh +69 -0
  52. package/src/bindings/wasm_interface.cpp +1940 -0
  53. package/src/core/assembler/assembler.cpp +1239 -0
  54. package/src/core/assembler/assembler.hpp +115 -0
  55. package/src/core/audio/audio.cpp +160 -0
  56. package/src/core/audio/audio.hpp +81 -0
  57. package/src/core/basic/basic_detokenizer.cpp +436 -0
  58. package/src/core/basic/basic_detokenizer.hpp +41 -0
  59. package/src/core/basic/basic_tokenizer.cpp +286 -0
  60. package/src/core/basic/basic_tokenizer.hpp +26 -0
  61. package/src/core/basic/basic_tokens.hpp +295 -0
  62. package/src/core/cards/disk2_card.cpp +568 -0
  63. package/src/core/cards/disk2_card.hpp +316 -0
  64. package/src/core/cards/expansion_card.hpp +185 -0
  65. package/src/core/cards/mockingboard/ay8910.cpp +616 -0
  66. package/src/core/cards/mockingboard/ay8910.hpp +159 -0
  67. package/src/core/cards/mockingboard/via6522.cpp +530 -0
  68. package/src/core/cards/mockingboard/via6522.hpp +163 -0
  69. package/src/core/cards/mockingboard_card.cpp +312 -0
  70. package/src/core/cards/mockingboard_card.hpp +159 -0
  71. package/src/core/cards/mouse_card.cpp +654 -0
  72. package/src/core/cards/mouse_card.hpp +190 -0
  73. package/src/core/cards/smartport/block_device.cpp +202 -0
  74. package/src/core/cards/smartport/block_device.hpp +60 -0
  75. package/src/core/cards/smartport/smartport_card.cpp +603 -0
  76. package/src/core/cards/smartport/smartport_card.hpp +120 -0
  77. package/src/core/cards/thunderclock_card.cpp +237 -0
  78. package/src/core/cards/thunderclock_card.hpp +122 -0
  79. package/src/core/cpu/cpu6502.cpp +1609 -0
  80. package/src/core/cpu/cpu6502.hpp +203 -0
  81. package/src/core/debug/condition_evaluator.cpp +470 -0
  82. package/src/core/debug/condition_evaluator.hpp +87 -0
  83. package/src/core/disassembler/disassembler.cpp +552 -0
  84. package/src/core/disassembler/disassembler.hpp +171 -0
  85. package/src/core/disk-image/disk_image.hpp +267 -0
  86. package/src/core/disk-image/dsk_disk_image.cpp +827 -0
  87. package/src/core/disk-image/dsk_disk_image.hpp +204 -0
  88. package/src/core/disk-image/gcr_encoding.cpp +147 -0
  89. package/src/core/disk-image/gcr_encoding.hpp +78 -0
  90. package/src/core/disk-image/woz_disk_image.cpp +1049 -0
  91. package/src/core/disk-image/woz_disk_image.hpp +343 -0
  92. package/src/core/emulator.cpp +2126 -0
  93. package/src/core/emulator.hpp +434 -0
  94. package/src/core/filesystem/dos33.cpp +178 -0
  95. package/src/core/filesystem/dos33.hpp +66 -0
  96. package/src/core/filesystem/pascal.cpp +262 -0
  97. package/src/core/filesystem/pascal.hpp +87 -0
  98. package/src/core/filesystem/prodos.cpp +369 -0
  99. package/src/core/filesystem/prodos.hpp +119 -0
  100. package/src/core/input/keyboard.cpp +227 -0
  101. package/src/core/input/keyboard.hpp +111 -0
  102. package/src/core/mmu/mmu.cpp +1387 -0
  103. package/src/core/mmu/mmu.hpp +236 -0
  104. package/src/core/types.hpp +196 -0
  105. package/src/core/video/video.cpp +680 -0
  106. package/src/core/video/video.hpp +156 -0
  107. package/src/css/assembler-editor.css +1617 -0
  108. package/src/css/base.css +470 -0
  109. package/src/css/basic-debugger.css +791 -0
  110. package/src/css/basic-editor.css +792 -0
  111. package/src/css/controls.css +783 -0
  112. package/src/css/cpu-debugger.css +1413 -0
  113. package/src/css/debug-base.css +160 -0
  114. package/src/css/debug-windows.css +6455 -0
  115. package/src/css/disk-drives.css +406 -0
  116. package/src/css/documentation.css +392 -0
  117. package/src/css/file-explorer.css +867 -0
  118. package/src/css/hard-drive.css +180 -0
  119. package/src/css/layout.css +217 -0
  120. package/src/css/memory-windows.css +798 -0
  121. package/src/css/modals.css +510 -0
  122. package/src/css/monitor.css +425 -0
  123. package/src/css/release-notes.css +101 -0
  124. package/src/css/responsive.css +400 -0
  125. package/src/css/rule-builder.css +340 -0
  126. package/src/css/save-states.css +201 -0
  127. package/src/css/settings-windows.css +1231 -0
  128. package/src/css/window-switcher.css +150 -0
  129. package/src/js/agent/agent-manager.js +643 -0
  130. package/src/js/agent/agent-tools.js +293 -0
  131. package/src/js/agent/agent-version-tools.js +131 -0
  132. package/src/js/agent/assembler-tools.js +357 -0
  133. package/src/js/agent/basic-program-tools.js +894 -0
  134. package/src/js/agent/disk-tools.js +417 -0
  135. package/src/js/agent/file-explorer-tools.js +269 -0
  136. package/src/js/agent/index.js +13 -0
  137. package/src/js/agent/main-tools.js +222 -0
  138. package/src/js/agent/slot-tools.js +303 -0
  139. package/src/js/agent/smartport-tools.js +257 -0
  140. package/src/js/agent/window-tools.js +80 -0
  141. package/src/js/audio/audio-driver.js +417 -0
  142. package/src/js/audio/audio-worklet.js +85 -0
  143. package/src/js/audio/index.js +8 -0
  144. package/src/js/config/default-layout.js +34 -0
  145. package/src/js/config/version.js +8 -0
  146. package/src/js/data/apple2-rom-routines.js +577 -0
  147. package/src/js/debug/assembler-editor-window.js +2993 -0
  148. package/src/js/debug/basic-breakpoint-manager.js +529 -0
  149. package/src/js/debug/basic-program-parser.js +436 -0
  150. package/src/js/debug/basic-program-window.js +2594 -0
  151. package/src/js/debug/basic-variable-inspector.js +447 -0
  152. package/src/js/debug/breakpoint-manager.js +472 -0
  153. package/src/js/debug/cpu-debugger-window.js +2396 -0
  154. package/src/js/debug/index.js +22 -0
  155. package/src/js/debug/label-manager.js +238 -0
  156. package/src/js/debug/memory-browser-window.js +416 -0
  157. package/src/js/debug/memory-heat-map-window.js +481 -0
  158. package/src/js/debug/memory-map-window.js +206 -0
  159. package/src/js/debug/mockingboard-window.js +882 -0
  160. package/src/js/debug/mouse-card-window.js +355 -0
  161. package/src/js/debug/rule-builder-window.js +648 -0
  162. package/src/js/debug/soft-switch-window.js +458 -0
  163. package/src/js/debug/stack-viewer-window.js +221 -0
  164. package/src/js/debug/symbols.js +416 -0
  165. package/src/js/debug/trace-panel.js +291 -0
  166. package/src/js/debug/zero-page-watch-window.js +297 -0
  167. package/src/js/disk-manager/disk-drives-window.js +212 -0
  168. package/src/js/disk-manager/disk-operations.js +284 -0
  169. package/src/js/disk-manager/disk-persistence.js +301 -0
  170. package/src/js/disk-manager/disk-surface-renderer.js +388 -0
  171. package/src/js/disk-manager/drive-sounds.js +139 -0
  172. package/src/js/disk-manager/hard-drive-manager.js +481 -0
  173. package/src/js/disk-manager/hard-drive-persistence.js +187 -0
  174. package/src/js/disk-manager/hard-drive-window.js +57 -0
  175. package/src/js/disk-manager/index.js +890 -0
  176. package/src/js/display/display-settings-window.js +383 -0
  177. package/src/js/display/index.js +10 -0
  178. package/src/js/display/screen-window.js +342 -0
  179. package/src/js/display/webgl-renderer.js +705 -0
  180. package/src/js/file-explorer/disassembler.js +574 -0
  181. package/src/js/file-explorer/dos33.js +266 -0
  182. package/src/js/file-explorer/file-viewer.js +359 -0
  183. package/src/js/file-explorer/index.js +1261 -0
  184. package/src/js/file-explorer/prodos.js +549 -0
  185. package/src/js/file-explorer/utils.js +67 -0
  186. package/src/js/help/documentation-window.js +1096 -0
  187. package/src/js/help/index.js +10 -0
  188. package/src/js/help/release-notes-window.js +85 -0
  189. package/src/js/help/release-notes.js +612 -0
  190. package/src/js/input/gamepad-handler.js +176 -0
  191. package/src/js/input/index.js +12 -0
  192. package/src/js/input/input-handler.js +396 -0
  193. package/src/js/input/joystick-window.js +404 -0
  194. package/src/js/input/mouse-handler.js +99 -0
  195. package/src/js/input/text-selection.js +462 -0
  196. package/src/js/main.js +653 -0
  197. package/src/js/state/index.js +15 -0
  198. package/src/js/state/save-states-window.js +393 -0
  199. package/src/js/state/state-manager.js +409 -0
  200. package/src/js/state/state-persistence.js +218 -0
  201. package/src/js/ui/confirm.js +43 -0
  202. package/src/js/ui/disk-drive-positioner.js +347 -0
  203. package/src/js/ui/reminder-controller.js +129 -0
  204. package/src/js/ui/slot-configuration-window.js +560 -0
  205. package/src/js/ui/theme-manager.js +61 -0
  206. package/src/js/ui/toast.js +44 -0
  207. package/src/js/ui/ui-controller.js +897 -0
  208. package/src/js/ui/window-switcher.js +275 -0
  209. package/src/js/utils/basic-autocomplete.js +832 -0
  210. package/src/js/utils/basic-highlighting.js +473 -0
  211. package/src/js/utils/basic-tokenizer.js +153 -0
  212. package/src/js/utils/basic-tokens.js +117 -0
  213. package/src/js/utils/constants.js +28 -0
  214. package/src/js/utils/indexeddb-helper.js +225 -0
  215. package/src/js/utils/merlin-editor-support.js +905 -0
  216. package/src/js/utils/merlin-highlighting.js +551 -0
  217. package/src/js/utils/storage.js +125 -0
  218. package/src/js/utils/string-utils.js +19 -0
  219. package/src/js/utils/wasm-memory.js +54 -0
  220. package/src/js/windows/base-window.js +690 -0
  221. package/src/js/windows/index.js +9 -0
  222. package/src/js/windows/window-manager.js +375 -0
  223. package/tests/catch2/catch.hpp +17976 -0
  224. package/tests/common/basic_program_builder.cpp +119 -0
  225. package/tests/common/basic_program_builder.hpp +209 -0
  226. package/tests/common/disk_image_builder.cpp +444 -0
  227. package/tests/common/disk_image_builder.hpp +141 -0
  228. package/tests/common/test_helpers.hpp +118 -0
  229. package/tests/gcr/gcr-test.cpp +142 -0
  230. package/tests/integration/check-rom.js +70 -0
  231. package/tests/integration/compare-boot.js +239 -0
  232. package/tests/integration/crash-trace.js +102 -0
  233. package/tests/integration/disk-boot-test.js +264 -0
  234. package/tests/integration/memory-crash.js +108 -0
  235. package/tests/integration/nibble-read-test.js +249 -0
  236. package/tests/integration/phase-test.js +159 -0
  237. package/tests/integration/test_emulator.cpp +291 -0
  238. package/tests/integration/test_emulator_basic.cpp +91 -0
  239. package/tests/integration/test_emulator_debug.cpp +344 -0
  240. package/tests/integration/test_emulator_disk.cpp +153 -0
  241. package/tests/integration/test_emulator_state.cpp +163 -0
  242. package/tests/klaus/6502_functional_test.bin +0 -0
  243. package/tests/klaus/65C02_extended_opcodes_test.bin +0 -0
  244. package/tests/klaus/klaus_6502_test.cpp +184 -0
  245. package/tests/klaus/klaus_65c02_test.cpp +197 -0
  246. package/tests/thunderclock/thunderclock_mmu_test.cpp +304 -0
  247. package/tests/thunderclock/thunderclock_test.cpp +550 -0
  248. package/tests/unit/test_assembler.cpp +521 -0
  249. package/tests/unit/test_audio.cpp +196 -0
  250. package/tests/unit/test_ay8910.cpp +311 -0
  251. package/tests/unit/test_basic_detokenizer.cpp +265 -0
  252. package/tests/unit/test_basic_tokenizer.cpp +382 -0
  253. package/tests/unit/test_block_device.cpp +259 -0
  254. package/tests/unit/test_condition_evaluator.cpp +219 -0
  255. package/tests/unit/test_cpu6502.cpp +1301 -0
  256. package/tests/unit/test_cpu_addressing.cpp +361 -0
  257. package/tests/unit/test_cpu_cycle_counts.cpp +409 -0
  258. package/tests/unit/test_cpu_decimal.cpp +166 -0
  259. package/tests/unit/test_cpu_interrupts.cpp +285 -0
  260. package/tests/unit/test_disassembler.cpp +323 -0
  261. package/tests/unit/test_disk2_card.cpp +330 -0
  262. package/tests/unit/test_dos33.cpp +273 -0
  263. package/tests/unit/test_dsk_disk_image.cpp +315 -0
  264. package/tests/unit/test_expansion_card.cpp +178 -0
  265. package/tests/unit/test_gcr_encoding.cpp +232 -0
  266. package/tests/unit/test_keyboard.cpp +262 -0
  267. package/tests/unit/test_mmu.cpp +555 -0
  268. package/tests/unit/test_mmu_slots.cpp +323 -0
  269. package/tests/unit/test_mockingboard.cpp +352 -0
  270. package/tests/unit/test_mouse_card.cpp +386 -0
  271. package/tests/unit/test_pascal.cpp +248 -0
  272. package/tests/unit/test_prodos.cpp +259 -0
  273. package/tests/unit/test_smartport_card.cpp +321 -0
  274. package/tests/unit/test_thunderclock.cpp +354 -0
  275. package/tests/unit/test_via6522.cpp +323 -0
  276. package/tests/unit/test_video.cpp +319 -0
  277. package/tests/unit/test_woz_disk_image.cpp +257 -0
  278. package/vite.config.js +96 -0
  279. package/wiki/AI-Agent.md +372 -0
  280. package/wiki/Architecture-Overview.md +303 -0
  281. package/wiki/Audio-System.md +449 -0
  282. package/wiki/CPU-Emulation.md +477 -0
  283. package/wiki/Debugger.md +516 -0
  284. package/wiki/Disk-Drives.md +161 -0
  285. package/wiki/Disk-System-Internals.md +547 -0
  286. package/wiki/Display-Settings.md +88 -0
  287. package/wiki/Expansion-Slots.md +187 -0
  288. package/wiki/File-Explorer.md +259 -0
  289. package/wiki/Getting-Started.md +156 -0
  290. package/wiki/Home.md +69 -0
  291. package/wiki/Input-Devices.md +183 -0
  292. package/wiki/Keyboard-Shortcuts.md +158 -0
  293. package/wiki/Memory-System.md +364 -0
  294. package/wiki/Save-States.md +172 -0
  295. package/wiki/Video-Rendering.md +658 -0
@@ -0,0 +1,1239 @@
1
+ /*
2
+ * assembler.cpp - 65C02 multi-pass assembler
3
+ *
4
+ * Written by
5
+ * Mike Daley <michael_daley@icloud.com>
6
+ */
7
+
8
+ #include "assembler.hpp"
9
+ #include "../disassembler/disassembler.hpp"
10
+ #include <algorithm>
11
+ #include <cctype>
12
+ #include <cstdio>
13
+
14
+ namespace a2e {
15
+
16
+ // Directive names (uppercase)
17
+ static const char* DIRECTIVES[] = {
18
+ "ORG", "EQU", "DS", "DFB", "DB", "DW", "DA", "DDB",
19
+ "HEX", "ASC", "DCI", nullptr
20
+ };
21
+
22
+ // Merlin-specific directives we recognise but don't support
23
+ static const char* UNSUPPORTED_DIRECTIVES[] = {
24
+ "PUT", "USE", "MAC", "EOM", "<<<", "DO", "ELSE", "FIN",
25
+ "LUP", "ELUP", "--^", "OBJ", "LST", "REL", "TYP", "SAV",
26
+ "DSK", "CHN", "ENT", "EXT", "DUM", "DEND", "ERR", "CYC",
27
+ "DAT", "EXP", "PAU", "SW", "USR", "XC", "MX", "TR",
28
+ "KBD", "PMC", "PAG", "TTL", "SKP", "CHK", "IF", "END",
29
+ "ADR", "ADRL", "LNK", "STR", "STRL", "REV", nullptr
30
+ };
31
+
32
+ static bool isDirective(const std::string& s) {
33
+ for (int i = 0; DIRECTIVES[i]; i++) {
34
+ if (s == DIRECTIVES[i]) return true;
35
+ }
36
+ return false;
37
+ }
38
+
39
+ static bool isUnsupportedDirective(const std::string& s) {
40
+ for (int i = 0; UNSUPPORTED_DIRECTIVES[i]; i++) {
41
+ if (s == UNSUPPORTED_DIRECTIVES[i]) return true;
42
+ }
43
+ return false;
44
+ }
45
+
46
+ static std::string toUpper(const std::string& s) {
47
+ std::string r = s;
48
+ for (auto& c : r) c = toupper(c);
49
+ return r;
50
+ }
51
+
52
+ static void skipSpaces(const char*& p) {
53
+ while (*p && (*p == ' ' || *p == '\t')) p++;
54
+ }
55
+
56
+ static bool isIdentChar(char c) {
57
+ return isalnum(c) || c == '_' || c == '.' || c == ':' || c == ']';
58
+ }
59
+
60
+ // ============================================================================
61
+ // Constructor
62
+ // ============================================================================
63
+
64
+ Assembler::Assembler() : reverseTableBuilt(false), pc(0x0800) {
65
+ memset(reverseOpcodes, 0xFF, sizeof(reverseOpcodes));
66
+ }
67
+
68
+ // ============================================================================
69
+ // Build reverse opcode table from disassembler's forward table
70
+ // ============================================================================
71
+
72
+ void Assembler::buildReverseOpcodeTable() {
73
+ if (reverseTableBuilt) return;
74
+
75
+ const OpcodeInfo* table = getOpcodeTable();
76
+ for (int i = 0; i < 256; i++) {
77
+ uint8_t mnem = table[i].mnemonicIndex;
78
+ uint8_t mode = table[i].mode;
79
+ if (mnem == 0) continue; // skip unknown opcodes
80
+ if (mnem < 99 && mode < 16) {
81
+ reverseOpcodes[mnem][mode] = static_cast<uint8_t>(i);
82
+ }
83
+ }
84
+ reverseTableBuilt = true;
85
+ }
86
+
87
+ // ============================================================================
88
+ // Find mnemonic index by name
89
+ // ============================================================================
90
+
91
+ int Assembler::findMnemonicIndex(const std::string& mnemonic) {
92
+ int count = getMnemonicCount();
93
+ for (int i = 1; i < count; i++) {
94
+ if (mnemonic == getMnemonicByIndex(i)) {
95
+ return i;
96
+ }
97
+ }
98
+ return -1;
99
+ }
100
+
101
+ // ============================================================================
102
+ // Branch detection helpers
103
+ // ============================================================================
104
+
105
+ bool Assembler::isBranchMnemonic(int mnemonicIndex) {
106
+ // Check if the opcode table has this mnemonic in REL mode
107
+ const OpcodeInfo* table = getOpcodeTable();
108
+ for (int i = 0; i < 256; i++) {
109
+ if (table[i].mnemonicIndex == mnemonicIndex &&
110
+ table[i].mode == static_cast<uint8_t>(AddrMode::REL)) {
111
+ return true;
112
+ }
113
+ }
114
+ return false;
115
+ }
116
+
117
+ bool Assembler::isZPRMnemonic(int mnemonicIndex) {
118
+ const OpcodeInfo* table = getOpcodeTable();
119
+ for (int i = 0; i < 256; i++) {
120
+ if (table[i].mnemonicIndex == mnemonicIndex &&
121
+ table[i].mode == static_cast<uint8_t>(AddrMode::ZPR)) {
122
+ return true;
123
+ }
124
+ }
125
+ return false;
126
+ }
127
+
128
+ // ============================================================================
129
+ // Parse source into lines
130
+ // ============================================================================
131
+
132
+ std::vector<Assembler::ParsedLine> Assembler::parseSource(const char* source) {
133
+ std::vector<ParsedLine> lines;
134
+ const char* p = source;
135
+ int lineNum = 1;
136
+
137
+ while (*p) {
138
+ // Find end of line
139
+ const char* lineStart = p;
140
+ while (*p && *p != '\n' && *p != '\r') p++;
141
+
142
+ std::string lineStr(lineStart, p - lineStart);
143
+
144
+ // Skip line endings
145
+ if (*p == '\r') p++;
146
+ if (*p == '\n') p++;
147
+
148
+ ParsedLine parsed = parseLine(lineStr.c_str(), lineNum);
149
+ if (!parsed.mnemonic.empty() || !parsed.label.empty()) {
150
+ lines.push_back(parsed);
151
+ }
152
+
153
+ lineNum++;
154
+ }
155
+
156
+ return lines;
157
+ }
158
+
159
+ // ============================================================================
160
+ // Parse a single line
161
+ // ============================================================================
162
+
163
+ Assembler::ParsedLine Assembler::parseLine(const char* line, int lineNumber) {
164
+ ParsedLine result;
165
+ result.lineNumber = lineNumber;
166
+
167
+ const char* p = line;
168
+
169
+ // Skip empty lines
170
+ skipSpaces(p);
171
+ if (!*p) return result;
172
+
173
+ // Full-line comments
174
+ if (*p == ';' || *p == '*') return result;
175
+
176
+ // Reset p to start of line for column detection
177
+ p = line;
178
+
179
+ // If line starts with non-whitespace, first token is a label
180
+ if (*p && *p != ' ' && *p != '\t' && *p != ';' && *p != '*') {
181
+ const char* start = p;
182
+ while (*p && !isspace(*p)) p++;
183
+ result.label = std::string(start, p - start);
184
+ // Strip optional trailing colon (non-Merlin convention but common)
185
+ if (!result.label.empty() && result.label.back() == ':') {
186
+ result.label.pop_back();
187
+ }
188
+ }
189
+
190
+ // Skip whitespace to opcode
191
+ skipSpaces(p);
192
+ if (!*p || *p == ';') return result;
193
+
194
+ // Extract opcode/mnemonic
195
+ const char* opcStart = p;
196
+ while (*p && !isspace(*p)) p++;
197
+ result.mnemonic = toUpper(std::string(opcStart, p - opcStart));
198
+
199
+ // Skip whitespace to operand
200
+ skipSpaces(p);
201
+ if (!*p || *p == ';') return result;
202
+
203
+ // Extract operand (up to comment or end of line)
204
+ // Respect string delimiters
205
+ const char* opStart = p;
206
+ bool inSingle = false, inDouble = false;
207
+ while (*p) {
208
+ if (*p == '\'' && !inDouble) inSingle = !inSingle;
209
+ else if (*p == '"' && !inSingle) inDouble = !inDouble;
210
+ else if (*p == ';' && !inSingle && !inDouble) break;
211
+ p++;
212
+ }
213
+
214
+ // Trim trailing whitespace from operand
215
+ const char* opEnd = p;
216
+ while (opEnd > opStart && isspace(*(opEnd - 1))) opEnd--;
217
+ result.operand = std::string(opStart, opEnd - opStart);
218
+
219
+ return result;
220
+ }
221
+
222
+ // ============================================================================
223
+ // Expression evaluator (recursive descent)
224
+ // ============================================================================
225
+
226
+ int32_t Assembler::evaluateExpression(const std::string& expr, bool& error,
227
+ std::string& errorMsg, int lineNumber) {
228
+ const char* p = expr.c_str();
229
+ skipSpaces(p);
230
+ int32_t val = evalAddSub(p, error, errorMsg, lineNumber);
231
+ return val;
232
+ }
233
+
234
+ int32_t Assembler::evalAddSub(const char*& p, bool& error,
235
+ std::string& errorMsg, int lineNumber) {
236
+ int32_t left = evalMulDiv(p, error, errorMsg, lineNumber);
237
+ if (error) return 0;
238
+
239
+ while (*p == '+' || *p == '-') {
240
+ char op = *p++;
241
+ skipSpaces(p);
242
+ int32_t right = evalMulDiv(p, error, errorMsg, lineNumber);
243
+ if (error) return 0;
244
+ if (op == '+') left += right;
245
+ else left -= right;
246
+ }
247
+ return left;
248
+ }
249
+
250
+ int32_t Assembler::evalMulDiv(const char*& p, bool& error,
251
+ std::string& errorMsg, int lineNumber) {
252
+ int32_t left = evalUnary(p, error, errorMsg, lineNumber);
253
+ if (error) return 0;
254
+
255
+ while (*p == '*' || *p == '/') {
256
+ // Peek: if * is followed by nothing meaningful (end/space/operator),
257
+ // it might be a standalone PC reference - stop here
258
+ char op = *p;
259
+ if (op == '*') {
260
+ // Check if this is multiplication or PC reference
261
+ // Multiplication only if preceded by a value and followed by a value
262
+ const char* next = p + 1;
263
+ skipSpaces(next);
264
+ if (!*next || *next == '+' || *next == '-' || *next == ')' ||
265
+ *next == ',' || *next == ';') {
266
+ break; // Not multiplication
267
+ }
268
+ }
269
+ p++;
270
+ skipSpaces(p);
271
+ int32_t right = evalUnary(p, error, errorMsg, lineNumber);
272
+ if (error) return 0;
273
+ if (op == '/') {
274
+ if (right == 0) {
275
+ error = true;
276
+ errorMsg = "Division by zero";
277
+ return 0;
278
+ }
279
+ left /= right;
280
+ } else {
281
+ left *= right;
282
+ }
283
+ }
284
+ return left;
285
+ }
286
+
287
+ int32_t Assembler::evalUnary(const char*& p, bool& error,
288
+ std::string& errorMsg, int lineNumber) {
289
+ skipSpaces(p);
290
+
291
+ // Low byte selector: <expr
292
+ if (*p == '<') {
293
+ p++;
294
+ skipSpaces(p);
295
+ int32_t val = evalUnary(p, error, errorMsg, lineNumber);
296
+ return val & 0xFF;
297
+ }
298
+
299
+ // High byte selector: >expr
300
+ if (*p == '>') {
301
+ p++;
302
+ skipSpaces(p);
303
+ int32_t val = evalUnary(p, error, errorMsg, lineNumber);
304
+ return (val >> 8) & 0xFF;
305
+ }
306
+
307
+ // Unary minus
308
+ if (*p == '-') {
309
+ p++;
310
+ skipSpaces(p);
311
+ int32_t val = evalUnary(p, error, errorMsg, lineNumber);
312
+ return -val;
313
+ }
314
+
315
+ return evalPrimary(p, error, errorMsg, lineNumber);
316
+ }
317
+
318
+ int32_t Assembler::evalPrimary(const char*& p, bool& error,
319
+ std::string& errorMsg, int lineNumber) {
320
+ skipSpaces(p);
321
+
322
+ // Parenthesized expression
323
+ if (*p == '(') {
324
+ p++;
325
+ skipSpaces(p);
326
+ int32_t val = evalAddSub(p, error, errorMsg, lineNumber);
327
+ if (error) return 0;
328
+ skipSpaces(p);
329
+ if (*p == ')') p++;
330
+ return val;
331
+ }
332
+
333
+ // Current PC reference: *
334
+ if (*p == '*') {
335
+ p++;
336
+ return static_cast<int32_t>(pc);
337
+ }
338
+
339
+ // Hex number: $xxxx
340
+ if (*p == '$') {
341
+ p++;
342
+ if (!isxdigit(*p)) {
343
+ error = true;
344
+ errorMsg = "Expected hex digit after $";
345
+ return 0;
346
+ }
347
+ int32_t val = 0;
348
+ while (isxdigit(*p)) {
349
+ val = val * 16;
350
+ if (*p >= '0' && *p <= '9') val += *p - '0';
351
+ else if (*p >= 'A' && *p <= 'F') val += *p - 'A' + 10;
352
+ else if (*p >= 'a' && *p <= 'f') val += *p - 'a' + 10;
353
+ p++;
354
+ }
355
+ return val;
356
+ }
357
+
358
+ // Binary number: %01010101
359
+ if (*p == '%') {
360
+ p++;
361
+ if (*p != '0' && *p != '1') {
362
+ error = true;
363
+ errorMsg = "Expected binary digit after %";
364
+ return 0;
365
+ }
366
+ int32_t val = 0;
367
+ while (*p == '0' || *p == '1') {
368
+ val = (val << 1) | (*p - '0');
369
+ p++;
370
+ }
371
+ return val;
372
+ }
373
+
374
+ // Character literal: 'A'
375
+ if (*p == '\'') {
376
+ p++;
377
+ if (!*p) {
378
+ error = true;
379
+ errorMsg = "Unterminated character literal";
380
+ return 0;
381
+ }
382
+ int32_t val = static_cast<uint8_t>(*p);
383
+ p++;
384
+ if (*p == '\'') p++; // skip closing quote
385
+ return val;
386
+ }
387
+
388
+ // Decimal number
389
+ if (isdigit(*p)) {
390
+ int32_t val = 0;
391
+ while (isdigit(*p)) {
392
+ val = val * 10 + (*p - '0');
393
+ p++;
394
+ }
395
+ return val;
396
+ }
397
+
398
+ // Symbol / label reference
399
+ if (isalpha(*p) || *p == '_' || *p == ':' || *p == ']') {
400
+ const char* start = p;
401
+ while (*p && isIdentChar(*p)) p++;
402
+ std::string name(start, p - start);
403
+ std::string upper = toUpper(name);
404
+
405
+ auto it = symbols.find(upper);
406
+ if (it != symbols.end()) {
407
+ return it->second;
408
+ }
409
+
410
+ // Undefined symbol
411
+ error = true;
412
+ errorMsg = "Undefined symbol: " + name;
413
+ return 0;
414
+ }
415
+
416
+ error = true;
417
+ errorMsg = "Unexpected character in expression";
418
+ return 0;
419
+ }
420
+
421
+ // ============================================================================
422
+ // Addressing mode detection
423
+ // ============================================================================
424
+
425
+ uint8_t Assembler::detectAddressingMode(const std::string& mnemonic,
426
+ const std::string& operand,
427
+ int32_t value, bool valueKnown) {
428
+ int mnemIdx = findMnemonicIndex(mnemonic);
429
+ if (mnemIdx < 0) return 0xFF;
430
+
431
+ // No operand
432
+ if (operand.empty()) {
433
+ // Check if IMP exists for this mnemonic
434
+ if (reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::IMP)] != 0xFF) {
435
+ return static_cast<uint8_t>(AddrMode::IMP);
436
+ }
437
+ // Some shift/rotate instructions use ACC with no explicit operand
438
+ if (reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ACC)] != 0xFF) {
439
+ return static_cast<uint8_t>(AddrMode::ACC);
440
+ }
441
+ return 0xFF;
442
+ }
443
+
444
+ std::string op = operand;
445
+
446
+ // Accumulator: "A" for shift/rotate instructions
447
+ if (op == "A" || op == "a") {
448
+ if (reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ACC)] != 0xFF) {
449
+ return static_cast<uint8_t>(AddrMode::ACC);
450
+ }
451
+ }
452
+
453
+ // Immediate: #expr
454
+ if (op[0] == '#') {
455
+ return static_cast<uint8_t>(AddrMode::IMM);
456
+ }
457
+
458
+ // Indirect modes: start with (
459
+ if (op[0] == '(') {
460
+ // (expr,X) - Indexed Indirect or Absolute Indexed Indirect
461
+ if (op.size() >= 4) {
462
+ std::string upper = toUpper(op);
463
+ // Check for (expr,X)
464
+ if (upper.back() == ')') {
465
+ size_t commaPos = upper.rfind(',');
466
+ if (commaPos != std::string::npos) {
467
+ std::string afterComma = upper.substr(commaPos + 1);
468
+ // Remove trailing )
469
+ afterComma.pop_back();
470
+ // Trim spaces
471
+ while (!afterComma.empty() && afterComma[0] == ' ') afterComma.erase(0, 1);
472
+ while (!afterComma.empty() && afterComma.back() == ' ') afterComma.pop_back();
473
+ if (afterComma == "X") {
474
+ // IZX (zero page) or AIX (absolute) based on value
475
+ if (valueKnown && value >= 0 && value <= 255 &&
476
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::IZX)] != 0xFF) {
477
+ return static_cast<uint8_t>(AddrMode::IZX);
478
+ }
479
+ if (reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::AIX)] != 0xFF) {
480
+ return static_cast<uint8_t>(AddrMode::AIX);
481
+ }
482
+ return static_cast<uint8_t>(AddrMode::IZX);
483
+ }
484
+ }
485
+ }
486
+ // (expr),Y - Indirect Indexed
487
+ size_t closeParen = upper.find(')');
488
+ if (closeParen != std::string::npos && closeParen < upper.size() - 1) {
489
+ std::string afterParen = upper.substr(closeParen + 1);
490
+ // Trim spaces
491
+ while (!afterParen.empty() && afterParen[0] == ' ') afterParen.erase(0, 1);
492
+ if (afterParen.size() >= 2 && afterParen[0] == ',' &&
493
+ (afterParen[1] == 'Y' || afterParen[1] == 'y' ||
494
+ (afterParen.size() >= 3 && afterParen[2] == 'Y'))) {
495
+ return static_cast<uint8_t>(AddrMode::IZY);
496
+ }
497
+ }
498
+ }
499
+ // (expr) - Indirect or Zero Page Indirect
500
+ if (op.back() == ')') {
501
+ if (valueKnown && value >= 0 && value <= 255 &&
502
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ZPI)] != 0xFF) {
503
+ return static_cast<uint8_t>(AddrMode::ZPI);
504
+ }
505
+ return static_cast<uint8_t>(AddrMode::IND);
506
+ }
507
+ }
508
+
509
+ // ZPR mode: zp,target (for BBR/BBS)
510
+ if (isZPRMnemonic(mnemIdx)) {
511
+ return static_cast<uint8_t>(AddrMode::ZPR);
512
+ }
513
+
514
+ // Branch instructions: REL mode
515
+ if (isBranchMnemonic(mnemIdx)) {
516
+ return static_cast<uint8_t>(AddrMode::REL);
517
+ }
518
+
519
+ // Check for ,X or ,Y suffix
520
+ {
521
+ std::string upper = toUpper(op);
522
+ size_t commaPos = upper.rfind(',');
523
+ if (commaPos != std::string::npos) {
524
+ std::string suffix = upper.substr(commaPos + 1);
525
+ while (!suffix.empty() && suffix[0] == ' ') suffix.erase(0, 1);
526
+
527
+ if (suffix == "X") {
528
+ if (valueKnown && value >= 0 && value <= 255 &&
529
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ZPX)] != 0xFF) {
530
+ return static_cast<uint8_t>(AddrMode::ZPX);
531
+ }
532
+ return static_cast<uint8_t>(AddrMode::ABX);
533
+ }
534
+ if (suffix == "Y") {
535
+ if (valueKnown && value >= 0 && value <= 255 &&
536
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ZPY)] != 0xFF) {
537
+ return static_cast<uint8_t>(AddrMode::ZPY);
538
+ }
539
+ return static_cast<uint8_t>(AddrMode::ABY);
540
+ }
541
+ }
542
+ }
543
+
544
+ // Plain operand: ZP or ABS based on value
545
+ if (valueKnown && value >= 0 && value <= 255 &&
546
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ZP)] != 0xFF) {
547
+ return static_cast<uint8_t>(AddrMode::ZP);
548
+ }
549
+ return static_cast<uint8_t>(AddrMode::ABS);
550
+ }
551
+
552
+ // ============================================================================
553
+ // Instruction sizing (for pass 1)
554
+ // ============================================================================
555
+
556
+ int Assembler::getInstructionSize(const std::string& mnemonic,
557
+ const std::string& operand,
558
+ bool labelsComplete) {
559
+ if (operand.empty()) {
560
+ // IMP or ACC = 1 byte
561
+ return 1;
562
+ }
563
+
564
+ // Immediate
565
+ if (operand[0] == '#') return 2;
566
+
567
+ int mnemIdx = findMnemonicIndex(mnemonic);
568
+ if (mnemIdx < 0) return 0;
569
+
570
+ // Branch
571
+ if (isBranchMnemonic(mnemIdx)) return 2;
572
+
573
+ // ZPR (BBR/BBS)
574
+ if (isZPRMnemonic(mnemIdx)) return 3;
575
+
576
+ // Accumulator
577
+ if (toUpper(operand) == "A" &&
578
+ reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ACC)] != 0xFF) {
579
+ return 1;
580
+ }
581
+
582
+ // Try to evaluate to determine ZP vs ABS
583
+ if (labelsComplete) {
584
+ bool error = false;
585
+ std::string errorMsg;
586
+
587
+ // Extract base expression (before ,X or ,Y)
588
+ std::string exprStr = operand;
589
+ bool hasIndex = false;
590
+ {
591
+ std::string upper = toUpper(operand);
592
+ size_t commaPos = upper.rfind(',');
593
+ if (commaPos != std::string::npos) {
594
+ std::string suffix = upper.substr(commaPos + 1);
595
+ while (!suffix.empty() && suffix[0] == ' ') suffix.erase(0, 1);
596
+ if (suffix == "X" || suffix == "Y") {
597
+ exprStr = operand.substr(0, commaPos);
598
+ hasIndex = true;
599
+ }
600
+ }
601
+ }
602
+
603
+ // Strip parentheses for indirect modes
604
+ std::string evalStr = exprStr;
605
+ if (!evalStr.empty() && evalStr[0] == '(') {
606
+ evalStr = evalStr.substr(1);
607
+ if (!evalStr.empty() && evalStr.back() == ')') evalStr.pop_back();
608
+ }
609
+
610
+ int32_t val = evaluateExpression(evalStr, error, errorMsg, 0);
611
+ if (!error) {
612
+ uint8_t mode = detectAddressingMode(mnemonic, operand, val, true);
613
+ if (mode != 0xFF) {
614
+ switch (static_cast<AddrMode>(mode)) {
615
+ case AddrMode::IMP:
616
+ case AddrMode::ACC:
617
+ return 1;
618
+ case AddrMode::IMM:
619
+ case AddrMode::ZP:
620
+ case AddrMode::ZPX:
621
+ case AddrMode::ZPY:
622
+ case AddrMode::IZX:
623
+ case AddrMode::IZY:
624
+ case AddrMode::ZPI:
625
+ case AddrMode::REL:
626
+ return 2;
627
+ case AddrMode::ABS:
628
+ case AddrMode::ABX:
629
+ case AddrMode::ABY:
630
+ case AddrMode::IND:
631
+ case AddrMode::AIX:
632
+ case AddrMode::ZPR:
633
+ return 3;
634
+ }
635
+ }
636
+ }
637
+ }
638
+
639
+ // Default: assume ABS (3 bytes) for forward references
640
+ return 3;
641
+ }
642
+
643
+ // ============================================================================
644
+ // Directive sizing
645
+ // ============================================================================
646
+
647
+ int Assembler::getDirectiveSize(const std::string& directive,
648
+ const std::string& operand,
649
+ bool& error, std::string& errorMsg,
650
+ int lineNumber) {
651
+ if (directive == "ORG" || directive == "EQU") return 0;
652
+
653
+ if (directive == "DS") {
654
+ int32_t val = evaluateExpression(operand, error, errorMsg, lineNumber);
655
+ if (error) return 0;
656
+ return static_cast<int>(val);
657
+ }
658
+
659
+ if (directive == "DFB" || directive == "DB") {
660
+ // Count comma-separated values
661
+ int count = 1;
662
+ bool inStr = false;
663
+ for (char c : operand) {
664
+ if (c == '"' || c == '\'') inStr = !inStr;
665
+ if (c == ',' && !inStr) count++;
666
+ }
667
+ return count;
668
+ }
669
+
670
+ if (directive == "DW" || directive == "DA") {
671
+ int count = 1;
672
+ for (char c : operand) {
673
+ if (c == ',') count++;
674
+ }
675
+ return count * 2;
676
+ }
677
+
678
+ if (directive == "DDB") {
679
+ int count = 1;
680
+ for (char c : operand) {
681
+ if (c == ',') count++;
682
+ }
683
+ return count * 2;
684
+ }
685
+
686
+ if (directive == "HEX") {
687
+ // Count hex digit pairs (ignore spaces)
688
+ int digits = 0;
689
+ for (char c : operand) {
690
+ if (isxdigit(c)) digits++;
691
+ }
692
+ return digits / 2;
693
+ }
694
+
695
+ if (directive == "ASC") {
696
+ // Count characters between delimiters
697
+ if (operand.size() >= 2) {
698
+ char delim = operand[0];
699
+ size_t end = operand.find(delim, 1);
700
+ if (end != std::string::npos) {
701
+ return static_cast<int>(end - 1);
702
+ }
703
+ }
704
+ return static_cast<int>(operand.size());
705
+ }
706
+
707
+ if (directive == "DCI") {
708
+ if (operand.size() >= 2) {
709
+ char delim = operand[0];
710
+ size_t end = operand.find(delim, 1);
711
+ if (end != std::string::npos) {
712
+ return static_cast<int>(end - 1);
713
+ }
714
+ }
715
+ return static_cast<int>(operand.size());
716
+ }
717
+
718
+ return 0;
719
+ }
720
+
721
+ // ============================================================================
722
+ // Directive emission
723
+ // ============================================================================
724
+
725
+ void Assembler::emitDirective(const std::string& directive,
726
+ const std::string& operand,
727
+ std::vector<uint8_t>& output,
728
+ bool& error, std::string& errorMsg,
729
+ int lineNumber) {
730
+ if (directive == "ORG" || directive == "EQU") return;
731
+
732
+ if (directive == "DS") {
733
+ int32_t val = evaluateExpression(operand, error, errorMsg, lineNumber);
734
+ if (error) return;
735
+ for (int32_t i = 0; i < val; i++) {
736
+ output.push_back(0);
737
+ }
738
+ return;
739
+ }
740
+
741
+ if (directive == "DFB" || directive == "DB") {
742
+ // Parse comma-separated byte values
743
+ const char* p = operand.c_str();
744
+ while (*p) {
745
+ skipSpaces(p);
746
+ if (!*p) break;
747
+
748
+ // Find end of this value (next comma or end)
749
+ const char* start = p;
750
+ int depth = 0;
751
+ while (*p && (*p != ',' || depth > 0)) {
752
+ if (*p == '(') depth++;
753
+ if (*p == ')') depth--;
754
+ p++;
755
+ }
756
+ std::string val(start, p - start);
757
+ // Trim
758
+ while (!val.empty() && val.back() == ' ') val.pop_back();
759
+
760
+ int32_t v = evaluateExpression(val, error, errorMsg, lineNumber);
761
+ if (error) return;
762
+ output.push_back(static_cast<uint8_t>(v & 0xFF));
763
+
764
+ if (*p == ',') p++;
765
+ }
766
+ return;
767
+ }
768
+
769
+ if (directive == "DW" || directive == "DA") {
770
+ const char* p = operand.c_str();
771
+ while (*p) {
772
+ skipSpaces(p);
773
+ if (!*p) break;
774
+
775
+ const char* start = p;
776
+ int depth = 0;
777
+ while (*p && (*p != ',' || depth > 0)) {
778
+ if (*p == '(') depth++;
779
+ if (*p == ')') depth--;
780
+ p++;
781
+ }
782
+ std::string val(start, p - start);
783
+ while (!val.empty() && val.back() == ' ') val.pop_back();
784
+
785
+ int32_t v = evaluateExpression(val, error, errorMsg, lineNumber);
786
+ if (error) return;
787
+ output.push_back(static_cast<uint8_t>(v & 0xFF));
788
+ output.push_back(static_cast<uint8_t>((v >> 8) & 0xFF));
789
+
790
+ if (*p == ',') p++;
791
+ }
792
+ return;
793
+ }
794
+
795
+ if (directive == "DDB") {
796
+ const char* p = operand.c_str();
797
+ while (*p) {
798
+ skipSpaces(p);
799
+ if (!*p) break;
800
+
801
+ const char* start = p;
802
+ int depth = 0;
803
+ while (*p && (*p != ',' || depth > 0)) {
804
+ if (*p == '(') depth++;
805
+ if (*p == ')') depth--;
806
+ p++;
807
+ }
808
+ std::string val(start, p - start);
809
+ while (!val.empty() && val.back() == ' ') val.pop_back();
810
+
811
+ int32_t v = evaluateExpression(val, error, errorMsg, lineNumber);
812
+ if (error) return;
813
+ // Big-endian
814
+ output.push_back(static_cast<uint8_t>((v >> 8) & 0xFF));
815
+ output.push_back(static_cast<uint8_t>(v & 0xFF));
816
+
817
+ if (*p == ',') p++;
818
+ }
819
+ return;
820
+ }
821
+
822
+ if (directive == "HEX") {
823
+ const char* p = operand.c_str();
824
+ while (*p) {
825
+ if (isspace(*p) || *p == ',') { p++; continue; }
826
+ if (!isxdigit(*p)) {
827
+ error = true;
828
+ errorMsg = "Invalid hex digit";
829
+ return;
830
+ }
831
+ char hi = *p++;
832
+ if (!isxdigit(*p)) {
833
+ error = true;
834
+ errorMsg = "Odd number of hex digits";
835
+ return;
836
+ }
837
+ char lo = *p++;
838
+ auto hexVal = [](char c) -> uint8_t {
839
+ if (c >= '0' && c <= '9') return c - '0';
840
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
841
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
842
+ return 0;
843
+ };
844
+ output.push_back((hexVal(hi) << 4) | hexVal(lo));
845
+ }
846
+ return;
847
+ }
848
+
849
+ if (directive == "ASC") {
850
+ if (operand.size() < 2) return;
851
+ char delim = operand[0];
852
+ bool highBit = (delim == '"'); // Merlin convention: " sets high bit
853
+ for (size_t i = 1; i < operand.size(); i++) {
854
+ if (operand[i] == delim) break;
855
+ uint8_t ch = static_cast<uint8_t>(operand[i]);
856
+ if (highBit) ch |= 0x80;
857
+ output.push_back(ch);
858
+ }
859
+ return;
860
+ }
861
+
862
+ if (directive == "DCI") {
863
+ if (operand.size() < 2) return;
864
+ char delim = operand[0];
865
+ // Collect characters
866
+ std::vector<uint8_t> chars;
867
+ for (size_t i = 1; i < operand.size(); i++) {
868
+ if (operand[i] == delim) break;
869
+ chars.push_back(static_cast<uint8_t>(operand[i]));
870
+ }
871
+ // Emit all but last with normal, last with high bit set
872
+ for (size_t i = 0; i < chars.size(); i++) {
873
+ uint8_t ch = chars[i];
874
+ if (i == chars.size() - 1) ch |= 0x80;
875
+ output.push_back(ch);
876
+ }
877
+ return;
878
+ }
879
+ }
880
+
881
+ // ============================================================================
882
+ // Main assemble function
883
+ // ============================================================================
884
+
885
+ AsmResult Assembler::assemble(const char* source) {
886
+ AsmResult result;
887
+ result.origin = 0x0800;
888
+ result.endAddress = 0x0800;
889
+ result.success = false;
890
+
891
+ buildReverseOpcodeTable();
892
+ symbols.clear();
893
+
894
+ // Parse source
895
+ auto lines = parseSource(source);
896
+ if (lines.empty()) {
897
+ result.success = true;
898
+ return result;
899
+ }
900
+
901
+ auto addError = [&](int lineNum, const std::string& msg) {
902
+ AsmError err;
903
+ err.lineNumber = lineNum;
904
+ strncpy(err.message, msg.c_str(), ASM_MAX_ERROR_MSG - 1);
905
+ err.message[ASM_MAX_ERROR_MSG - 1] = '\0';
906
+ result.errors.push_back(err);
907
+ };
908
+
909
+ // ========================================================================
910
+ // Pass 1: Collect labels and compute sizes
911
+ // ========================================================================
912
+
913
+ pc = 0x0800;
914
+ result.origin = pc;
915
+
916
+ for (auto& line : lines) {
917
+ std::string mnem = line.mnemonic;
918
+
919
+ // Handle ORG directive
920
+ if (mnem == "ORG") {
921
+ bool error = false;
922
+ std::string errorMsg;
923
+ int32_t val = evaluateExpression(line.operand, error, errorMsg,
924
+ line.lineNumber);
925
+ if (error) {
926
+ addError(line.lineNumber, "ORG: " + errorMsg);
927
+ continue;
928
+ }
929
+ pc = static_cast<uint16_t>(val);
930
+ if (result.output.empty()) {
931
+ result.origin = pc;
932
+ }
933
+ continue;
934
+ }
935
+
936
+ // Record label address
937
+ if (!line.label.empty()) {
938
+ std::string labelUpper = toUpper(line.label);
939
+
940
+ // Handle EQU: label = value
941
+ if (mnem == "EQU") {
942
+ bool error = false;
943
+ std::string errorMsg;
944
+ int32_t val = evaluateExpression(line.operand, error, errorMsg,
945
+ line.lineNumber);
946
+ if (!error) {
947
+ symbols[labelUpper] = val;
948
+ }
949
+ // If error, will be caught in pass 2
950
+ continue;
951
+ }
952
+
953
+ symbols[labelUpper] = static_cast<int32_t>(pc);
954
+ }
955
+
956
+ if (mnem.empty()) continue;
957
+
958
+ // Handle unsupported directives
959
+ if (isUnsupportedDirective(mnem)) {
960
+ addError(line.lineNumber, "Unsupported directive: " + mnem);
961
+ continue;
962
+ }
963
+
964
+ // Directive sizing
965
+ if (isDirective(mnem)) {
966
+ bool error = false;
967
+ std::string errorMsg;
968
+ int size = getDirectiveSize(mnem, line.operand, error, errorMsg,
969
+ line.lineNumber);
970
+ if (error) {
971
+ // Ignore sizing errors in pass 1 (may have forward references)
972
+ size = 0;
973
+ }
974
+ pc += size;
975
+ continue;
976
+ }
977
+
978
+ // Instruction sizing
979
+ int mnemIdx = findMnemonicIndex(mnem);
980
+ if (mnemIdx < 0) {
981
+ addError(line.lineNumber, "Unknown mnemonic: " + mnem);
982
+ continue;
983
+ }
984
+
985
+ int size = getInstructionSize(mnem, line.operand, false);
986
+ if (size == 0) {
987
+ addError(line.lineNumber, "Invalid instruction: " + mnem);
988
+ continue;
989
+ }
990
+ pc += size;
991
+ }
992
+
993
+ // Track if we had pass 1 errors (still run pass 2 to find more errors)
994
+ bool hadPass1Errors = !result.errors.empty();
995
+
996
+ // ========================================================================
997
+ // Pass 2: Encode instructions (run even with errors to find all issues)
998
+ // ========================================================================
999
+
1000
+ pc = result.origin;
1001
+
1002
+ for (auto& line : lines) {
1003
+ std::string mnem = line.mnemonic;
1004
+
1005
+ // Handle ORG
1006
+ if (mnem == "ORG") {
1007
+ bool error = false;
1008
+ std::string errorMsg;
1009
+ int32_t val = evaluateExpression(line.operand, error, errorMsg,
1010
+ line.lineNumber);
1011
+ if (error) {
1012
+ addError(line.lineNumber, "ORG: " + errorMsg);
1013
+ continue;
1014
+ }
1015
+ pc = static_cast<uint16_t>(val);
1016
+ continue;
1017
+ }
1018
+
1019
+ // EQU already handled in pass 1
1020
+ if (mnem == "EQU") {
1021
+ // Re-evaluate to catch errors
1022
+ bool error = false;
1023
+ std::string errorMsg;
1024
+ int32_t val = evaluateExpression(line.operand, error, errorMsg,
1025
+ line.lineNumber);
1026
+ if (error) {
1027
+ addError(line.lineNumber, "EQU: " + errorMsg);
1028
+ } else {
1029
+ symbols[toUpper(line.label)] = val;
1030
+ }
1031
+ continue;
1032
+ }
1033
+
1034
+ if (mnem.empty()) continue;
1035
+
1036
+ // Skip unsupported directives (already errored in pass 1)
1037
+ if (isUnsupportedDirective(mnem)) continue;
1038
+
1039
+ // Handle directives
1040
+ if (isDirective(mnem)) {
1041
+ bool error = false;
1042
+ std::string errorMsg;
1043
+ emitDirective(mnem, line.operand, result.output, error, errorMsg,
1044
+ line.lineNumber);
1045
+ if (error) {
1046
+ addError(line.lineNumber, mnem + ": " + errorMsg);
1047
+ }
1048
+ // Advance PC by actual emitted bytes
1049
+ int size = getDirectiveSize(mnem, line.operand, error, errorMsg,
1050
+ line.lineNumber);
1051
+ pc += size;
1052
+ continue;
1053
+ }
1054
+
1055
+ // Instruction encoding
1056
+ int mnemIdx = findMnemonicIndex(mnem);
1057
+ if (mnemIdx < 0) continue; // Already errored in pass 1
1058
+
1059
+ // Evaluate operand expression
1060
+ std::string exprStr = line.operand;
1061
+ int32_t value = 0;
1062
+ bool valueKnown = false;
1063
+
1064
+ if (!exprStr.empty()) {
1065
+ // Handle ZPR mode (BBR/BBS): zp,target
1066
+ if (isZPRMnemonic(mnemIdx)) {
1067
+ // Split on comma to get zp and target
1068
+ size_t commaPos = exprStr.find(',');
1069
+ if (commaPos == std::string::npos) {
1070
+ addError(line.lineNumber, "ZPR instructions need zp,target operand");
1071
+ continue;
1072
+ }
1073
+ std::string zpStr = exprStr.substr(0, commaPos);
1074
+ std::string targetStr = exprStr.substr(commaPos + 1);
1075
+ // Trim
1076
+ while (!zpStr.empty() && zpStr.back() == ' ') zpStr.pop_back();
1077
+ while (!targetStr.empty() && targetStr[0] == ' ') targetStr.erase(0, 1);
1078
+
1079
+ bool zpError = false, targetError = false;
1080
+ std::string zpErrMsg, targetErrMsg;
1081
+ int32_t zpVal = evaluateExpression(zpStr, zpError, zpErrMsg,
1082
+ line.lineNumber);
1083
+ int32_t targetVal = evaluateExpression(targetStr, targetError,
1084
+ targetErrMsg, line.lineNumber);
1085
+ if (zpError) {
1086
+ addError(line.lineNumber, zpErrMsg);
1087
+ continue;
1088
+ }
1089
+ if (targetError) {
1090
+ addError(line.lineNumber, targetErrMsg);
1091
+ continue;
1092
+ }
1093
+
1094
+ uint8_t opcode = reverseOpcodes[mnemIdx][static_cast<int>(AddrMode::ZPR)];
1095
+ if (opcode == 0xFF) {
1096
+ addError(line.lineNumber, "Invalid mode for " + mnem);
1097
+ continue;
1098
+ }
1099
+
1100
+ // Calculate relative offset from PC+3 (instruction is 3 bytes)
1101
+ int32_t offset = targetVal - (pc + 3);
1102
+ if (offset < -128 || offset > 127) {
1103
+ addError(line.lineNumber, "Branch target out of range");
1104
+ continue;
1105
+ }
1106
+
1107
+ result.output.push_back(opcode);
1108
+ result.output.push_back(static_cast<uint8_t>(zpVal & 0xFF));
1109
+ result.output.push_back(static_cast<uint8_t>(offset & 0xFF));
1110
+ pc += 3;
1111
+ continue;
1112
+ }
1113
+
1114
+ // Strip index suffix for expression evaluation
1115
+ std::string evalStr = exprStr;
1116
+ {
1117
+ std::string upper = toUpper(exprStr);
1118
+ size_t commaPos = upper.rfind(',');
1119
+ if (commaPos != std::string::npos) {
1120
+ std::string suffix = upper.substr(commaPos + 1);
1121
+ while (!suffix.empty() && suffix[0] == ' ') suffix.erase(0, 1);
1122
+ if (suffix == "X" || suffix == "Y") {
1123
+ evalStr = exprStr.substr(0, commaPos);
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+ // Strip # prefix for immediate
1129
+ if (!evalStr.empty() && evalStr[0] == '#') {
1130
+ evalStr = evalStr.substr(1);
1131
+ }
1132
+
1133
+ // Strip parentheses for indirect
1134
+ if (!evalStr.empty() && evalStr[0] == '(') {
1135
+ evalStr = evalStr.substr(1);
1136
+ if (!evalStr.empty() && evalStr.back() == ')') evalStr.pop_back();
1137
+ }
1138
+
1139
+ // Trim
1140
+ while (!evalStr.empty() && evalStr[0] == ' ') evalStr.erase(0, 1);
1141
+ while (!evalStr.empty() && evalStr.back() == ' ') evalStr.pop_back();
1142
+
1143
+ bool error = false;
1144
+ std::string errorMsg;
1145
+ value = evaluateExpression(evalStr, error, errorMsg, line.lineNumber);
1146
+ if (error) {
1147
+ addError(line.lineNumber, errorMsg);
1148
+ continue;
1149
+ }
1150
+ valueKnown = true;
1151
+ }
1152
+
1153
+ // Detect addressing mode
1154
+ uint8_t mode = detectAddressingMode(mnem, line.operand, value, valueKnown);
1155
+ if (mode == 0xFF) {
1156
+ addError(line.lineNumber, "Cannot determine addressing mode for " + mnem);
1157
+ continue;
1158
+ }
1159
+
1160
+ // Look up opcode
1161
+ uint8_t opcode = reverseOpcodes[mnemIdx][mode];
1162
+ if (opcode == 0xFF) {
1163
+ addError(line.lineNumber, mnem + " does not support this addressing mode");
1164
+ continue;
1165
+ }
1166
+
1167
+ // Emit instruction bytes
1168
+ AddrMode addrMode = static_cast<AddrMode>(mode);
1169
+ switch (addrMode) {
1170
+ case AddrMode::IMP:
1171
+ case AddrMode::ACC:
1172
+ result.output.push_back(opcode);
1173
+ pc += 1;
1174
+ break;
1175
+
1176
+ case AddrMode::IMM:
1177
+ case AddrMode::ZP:
1178
+ case AddrMode::ZPX:
1179
+ case AddrMode::ZPY:
1180
+ case AddrMode::IZX:
1181
+ case AddrMode::IZY:
1182
+ case AddrMode::ZPI:
1183
+ result.output.push_back(opcode);
1184
+ result.output.push_back(static_cast<uint8_t>(value & 0xFF));
1185
+ pc += 2;
1186
+ break;
1187
+
1188
+ case AddrMode::REL: {
1189
+ int32_t offset = value - (pc + 2);
1190
+ if (offset < -128 || offset > 127) {
1191
+ addError(line.lineNumber, "Branch target out of range");
1192
+ continue;
1193
+ }
1194
+ result.output.push_back(opcode);
1195
+ result.output.push_back(static_cast<uint8_t>(offset & 0xFF));
1196
+ pc += 2;
1197
+ break;
1198
+ }
1199
+
1200
+ case AddrMode::ABS:
1201
+ case AddrMode::ABX:
1202
+ case AddrMode::ABY:
1203
+ case AddrMode::IND:
1204
+ case AddrMode::AIX:
1205
+ result.output.push_back(opcode);
1206
+ result.output.push_back(static_cast<uint8_t>(value & 0xFF));
1207
+ result.output.push_back(static_cast<uint8_t>((value >> 8) & 0xFF));
1208
+ pc += 3;
1209
+ break;
1210
+
1211
+ default:
1212
+ addError(line.lineNumber, "Internal error: unhandled addressing mode");
1213
+ continue;
1214
+ }
1215
+ }
1216
+
1217
+ result.endAddress = pc;
1218
+ result.success = result.errors.empty();
1219
+
1220
+ // Copy symbol table into result for inspection
1221
+ result.symbols.clear();
1222
+ result.symbols.reserve(symbols.size());
1223
+ for (const auto& [name, value] : symbols) {
1224
+ AsmSymbol sym;
1225
+ std::strncpy(sym.name, name.c_str(), sizeof(sym.name) - 1);
1226
+ sym.name[sizeof(sym.name) - 1] = '\0';
1227
+ sym.value = value;
1228
+ result.symbols.push_back(sym);
1229
+ }
1230
+ // Sort alphabetically
1231
+ std::sort(result.symbols.begin(), result.symbols.end(),
1232
+ [](const AsmSymbol& a, const AsmSymbol& b) {
1233
+ return std::strcmp(a.name, b.name) < 0;
1234
+ });
1235
+
1236
+ return result;
1237
+ }
1238
+
1239
+ } // namespace a2e