mos6502-workbench 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,725 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MOS6502
4
+ # Raised when assembly source cannot be parsed or encoded.
5
+ class AssemblyError < StandardError; end
6
+
7
+ # A minimal two-pass assembler for the subset of 6502 syntax needed by this project.
8
+ #
9
+ # Supported features:
10
+ # - labels with `label:`
11
+ # - directives: `.org`, `.byte`, `.word`
12
+ # - numeric expressions with labels, `+`, `-`, unary `<` and `>`
13
+ # - the implemented official NMOS 6502 opcodes and addressing modes
14
+ class Assembler
15
+ INTEL_HEX_EOF_RECORD = ":00000001FF\n"
16
+
17
+ # A contiguous block of machine code emitted at a specific address.
18
+ Segment = Struct.new(:start_address, :bytes)
19
+
20
+ # The assembled program, including labels and one or more emitted segments.
21
+ Program = Struct.new(:segments, :labels, :entry_point) do
22
+ # Returns the lowest emitted address.
23
+ #
24
+ # @return [Integer] the first address occupied by any segment
25
+ def start_address
26
+ segments.min_by(&:start_address)&.start_address || entry_point
27
+ end
28
+
29
+ # Returns the first address after the final emitted byte.
30
+ #
31
+ # @return [Integer] the exclusive end address of the emitted program
32
+ def end_address
33
+ segments.map { |segment| segment.start_address + segment.bytes.length }.max || entry_point
34
+ end
35
+
36
+ # Loads the program into a CPU instance.
37
+ #
38
+ # @param cpu [CPU] the destination CPU
39
+ # @param set_reset_vector [Boolean] whether to install the reset vector
40
+ # @param reset [Boolean] whether to reset the CPU after loading
41
+ # @return [CPU] the CPU instance
42
+ def load_into(cpu, set_reset_vector: true, reset: true)
43
+ segments.each do |segment|
44
+ cpu.load(segment.bytes, start_address: segment.start_address, set_reset_vector: false)
45
+ end
46
+
47
+ cpu.write_word(CPU::RESET_VECTOR, entry_point) if set_reset_vector
48
+ cpu.reset(program_counter: set_reset_vector ? nil : entry_point) if reset
49
+ cpu
50
+ end
51
+
52
+ # Converts the emitted segments into a flat binary blob.
53
+ #
54
+ # Gaps between segments are filled with `fill_byte`. By default the image
55
+ # begins at the lowest segment address and extends through the final byte
56
+ # of the last segment.
57
+ #
58
+ # @param origin [Integer, nil] the base address of the output image
59
+ # @param size [Integer, nil] the fixed output size in bytes
60
+ # @param fill_byte [Integer] the byte used to fill gaps
61
+ # @return [String] the flat binary image
62
+ def to_flat_binary(origin: nil, size: nil, fill_byte: 0x00)
63
+ base = origin || start_address
64
+ fill = fill_byte & 0xff
65
+ required_size = [end_address - base, 0].max
66
+ image_size = size || required_size
67
+
68
+ raise AssemblyError, 'Binary size must be non-negative' if image_size.negative?
69
+ raise AssemblyError, 'Output image does not fit all segments' if image_size < required_size
70
+
71
+ image = Array.new(image_size, fill)
72
+
73
+ segments.each do |segment|
74
+ offset = segment.start_address - base
75
+ if offset.negative?
76
+ raise AssemblyError,
77
+ "Segment at $#{format('%04X', segment.start_address)} starts before origin $#{format('%04X', base)}"
78
+ end
79
+
80
+ segment.bytes.each_with_index do |byte, index|
81
+ image[offset + index] = byte
82
+ end
83
+ end
84
+
85
+ image.pack('C*')
86
+ end
87
+
88
+ # Writes the program as a flat binary file.
89
+ #
90
+ # @param path [String] the output file path
91
+ # @param origin [Integer, nil] the base address of the output image
92
+ # @param size [Integer, nil] the fixed output size in bytes
93
+ # @param fill_byte [Integer] the byte used to fill gaps
94
+ # @return [String] the output path
95
+ def write_flat_binary(path, origin: nil, size: nil, fill_byte: 0x00)
96
+ File.binwrite(path, to_flat_binary(origin:, size:, fill_byte:))
97
+ path
98
+ end
99
+
100
+ # Converts the emitted segments into Intel HEX text records.
101
+ #
102
+ # Each data record preserves the original segment address, so sparse
103
+ # layouts remain sparse instead of being padded into a single image.
104
+ #
105
+ # @param bytes_per_record [Integer] the maximum payload size for each data record
106
+ # @return [String] the Intel HEX document
107
+ def to_intel_hex(bytes_per_record: 16)
108
+ raise AssemblyError, 'Intel HEX bytes_per_record must be positive' unless bytes_per_record.is_a?(Integer) && bytes_per_record.positive?
109
+
110
+ records = segments.sort_by(&:start_address).flat_map do |segment|
111
+ segment.bytes.each_slice(bytes_per_record).with_index.map do |slice, index|
112
+ address = (segment.start_address + (index * bytes_per_record)) & 0xffff
113
+ intel_hex_record(address, 0x00, slice)
114
+ end
115
+ end
116
+
117
+ (records << Assembler::INTEL_HEX_EOF_RECORD).join
118
+ end
119
+
120
+ # Writes the program as an Intel HEX file.
121
+ #
122
+ # @param path [String] the output file path
123
+ # @param bytes_per_record [Integer] the maximum payload size for each data record
124
+ # @return [String] the output path
125
+ def write_intel_hex(path, bytes_per_record: 16)
126
+ File.write(path, to_intel_hex(bytes_per_record:))
127
+ path
128
+ end
129
+
130
+ private
131
+
132
+ # Builds a single Intel HEX record line with the correct checksum.
133
+ #
134
+ # @param address [Integer] the 16-bit record address
135
+ # @param record_type [Integer] the Intel HEX record type
136
+ # @param data [Array<Integer>] the payload bytes
137
+ # @return [String] the encoded record line
138
+ def intel_hex_record(address, record_type, data)
139
+ bytes = [data.length, (address >> 8) & 0xff, address & 0xff, record_type, *data]
140
+ checksum = ((-bytes.sum) & 0xff)
141
+ ":#{bytes.map { |byte| format('%02X', byte) }.join}#{format('%02X', checksum)}\n"
142
+ end
143
+ end
144
+
145
+ Statement = Struct.new(:line_number, :label, :kind, :name, :argument, :raw)
146
+
147
+ DEFAULT_ORIGIN = CPU::DEFAULT_LOAD_ADDRESS
148
+ BRANCH_MNEMONICS = %i[bcc bcs beq bmi bne bpl bvc bvs].freeze
149
+ DIRECTIVES = %w[.org .byte .word].freeze
150
+ MAX_PASSES = 8
151
+
152
+ OPCODES_BY_MNEMONIC = CPU::OPCODES.each_with_object(Hash.new { |hash, key| hash[key] = {} }) do |(opcode, (mnemonic, mode)), hash|
153
+ hash[mnemonic][mode] = opcode
154
+ end.freeze
155
+
156
+ # Assembles 6502 source code into a {Program}.
157
+ #
158
+ # @param source [String] the source code to assemble
159
+ # @return [Program] the assembled program
160
+ def assemble(source)
161
+ statements = parse(source)
162
+ labels, layout = resolve_layout(statements)
163
+ build_program(statements, labels, layout)
164
+ end
165
+
166
+ private
167
+
168
+ # Parses source text into line-oriented statements.
169
+ #
170
+ # @param source [String] the assembly source
171
+ # @return [Array<Statement>] the parsed statements
172
+ def parse(source)
173
+ source.each_line.with_index(1).filter_map do |line, line_number|
174
+ stripped = line.sub(/;.*/, '').strip
175
+ next if stripped.empty?
176
+
177
+ label = nil
178
+ if stripped =~ /\A([A-Za-z_][A-Za-z0-9_]*):/
179
+ label = Regexp.last_match(1)
180
+ stripped = stripped[Regexp.last_match(0).length..].to_s.strip
181
+ end
182
+
183
+ if stripped.empty?
184
+ Statement.new(line_number:, label:, raw: line.rstrip)
185
+ else
186
+ name, argument = stripped.split(/\s+/, 2)
187
+ kind = name.start_with?('.') ? :directive : :instruction
188
+ Statement.new(
189
+ line_number:,
190
+ label:,
191
+ kind:,
192
+ name: name.downcase,
193
+ argument: argument&.strip,
194
+ raw: line.rstrip
195
+ )
196
+ end
197
+ end
198
+ end
199
+
200
+ # Resolves labels and instruction sizes until the layout converges.
201
+ #
202
+ # @param statements [Array<Statement>] the parsed statements
203
+ # @return [Array<(Hash, Hash)>] labels and per-line layout metadata
204
+ def resolve_layout(statements)
205
+ previous_state = nil
206
+
207
+ MAX_PASSES.times do
208
+ pc = DEFAULT_ORIGIN
209
+ labels = {}
210
+ layout = {}
211
+
212
+ statements.each_with_index do |statement, index|
213
+ register_label(statement, labels, pc)
214
+ next unless statement.kind
215
+
216
+ case statement.kind
217
+ when :directive
218
+ pc = layout_directive(statement, layout, index, labels, pc)
219
+ when :instruction
220
+ mode = select_mode(statement, labels, pc, strict: false)
221
+ size = instruction_size(mode)
222
+ layout[index] = { address: pc, mode:, size: }
223
+ pc = (pc + size) & 0xffff
224
+ end
225
+ end
226
+
227
+ current_state = [labels, layout]
228
+ return current_state if current_state == previous_state
229
+
230
+ previous_state = current_state
231
+ end
232
+
233
+ raise AssemblyError, "Assembly did not converge after #{MAX_PASSES} passes"
234
+ end
235
+
236
+ # Emits the final bytecode into loadable segments.
237
+ #
238
+ # @param statements [Array<Statement>] the parsed statements
239
+ # @param labels [Hash{String => Integer}] resolved labels
240
+ # @param layout [Hash{Integer => Hash}] resolved instruction layout
241
+ # @return [Program] the assembled program
242
+ def build_program(statements, labels, layout)
243
+ pc = DEFAULT_ORIGIN
244
+ current_start = pc
245
+ current_bytes = []
246
+ segments = []
247
+
248
+ statements.each_with_index do |statement, index|
249
+ next unless statement.kind
250
+
251
+ case statement.kind
252
+ when :directive
253
+ case statement.name
254
+ when '.org'
255
+ flush_segment(segments, current_start, current_bytes)
256
+ pc = evaluate(statement.argument, labels, pc, strict: true) & 0xffff
257
+ current_start = pc
258
+ current_bytes = []
259
+ when '.byte'
260
+ bytes = split_operands(statement.argument).map { |operand| evaluate(operand, labels, pc, strict: true) & 0xff }
261
+ current_bytes.concat(bytes)
262
+ pc = (pc + bytes.length) & 0xffff
263
+ when '.word'
264
+ words = split_operands(statement.argument).map { |operand| evaluate(operand, labels, pc, strict: true) & 0xffff }
265
+ words.each do |word|
266
+ current_bytes << (word & 0xff)
267
+ current_bytes << (word >> 8)
268
+ end
269
+ pc = (pc + (words.length * 2)) & 0xffff
270
+ end
271
+ when :instruction
272
+ mode = layout.fetch(index).fetch(:mode)
273
+ bytes = encode_instruction(statement, mode, labels, pc)
274
+ current_bytes.concat(bytes)
275
+ pc = (pc + bytes.length) & 0xffff
276
+ end
277
+ end
278
+
279
+ flush_segment(segments, current_start, current_bytes)
280
+ Program.new(segments:, labels:, entry_point: determine_entry_point(statements, labels, layout, segments))
281
+ end
282
+
283
+ # Records a label at the current location counter.
284
+ #
285
+ # @param statement [Statement] the current statement
286
+ # @param labels [Hash{String => Integer}] the label table
287
+ # @param pc [Integer] the current location counter
288
+ # @return [void]
289
+ def register_label(statement, labels, pc)
290
+ return unless statement.label
291
+
292
+ raise AssemblyError, format_error(statement, "Duplicate label #{statement.label}") if labels.key?(statement.label)
293
+
294
+ labels[statement.label] = pc
295
+ end
296
+
297
+ # Reserves space for a directive during layout resolution.
298
+ #
299
+ # @param statement [Statement] the directive statement
300
+ # @param layout [Hash] layout metadata
301
+ # @param index [Integer] the statement index
302
+ # @param labels [Hash{String => Integer}] currently known labels
303
+ # @param pc [Integer] the current location counter
304
+ # @return [Integer] the updated location counter
305
+ def layout_directive(statement, layout, index, labels, pc)
306
+ raise AssemblyError, format_error(statement, "Unknown directive #{statement.name}") unless DIRECTIVES.include?(statement.name)
307
+
308
+ case statement.name
309
+ when '.org'
310
+ new_pc = evaluate(statement.argument, labels, pc, strict: true) & 0xffff
311
+ layout[index] = { address: new_pc, size: 0 }
312
+ new_pc
313
+ when '.byte'
314
+ count = split_operands(statement.argument).length
315
+ layout[index] = { address: pc, size: count }
316
+ (pc + count) & 0xffff
317
+ when '.word'
318
+ count = split_operands(statement.argument).length
319
+ layout[index] = { address: pc, size: count * 2 }
320
+ (pc + (count * 2)) & 0xffff
321
+ end
322
+ end
323
+
324
+ # Encodes a single instruction into bytes.
325
+ #
326
+ # @param statement [Statement] the instruction statement
327
+ # @param mode [Symbol] the resolved addressing mode
328
+ # @param labels [Hash{String => Integer}] resolved labels
329
+ # @param pc [Integer] the instruction address
330
+ # @return [Array<Integer>] the encoded instruction bytes
331
+ def encode_instruction(statement, mode, labels, pc)
332
+ mnemonic = statement.name.to_sym
333
+ opcode = OPCODES_BY_MNEMONIC.fetch(mnemonic).fetch(mode)
334
+ value = operand_value(statement, labels, pc, strict: true)
335
+ bytes = [opcode]
336
+
337
+ case mode
338
+ when :implied, :accumulator
339
+ bytes
340
+ when :immediate, :zero_page, :zero_page_x, :zero_page_y, :indirect_x, :indirect_y
341
+ bytes << (value & 0xff)
342
+ when :relative
343
+ offset = value - ((pc + 2) & 0xffff)
344
+ raise AssemblyError, format_error(statement, "Branch target out of range: #{statement.argument}") unless (-128..127).cover?(offset)
345
+
346
+ bytes << (offset & 0xff)
347
+ when :absolute, :absolute_x, :absolute_y, :indirect
348
+ bytes << (value & 0xff)
349
+ bytes << ((value >> 8) & 0xff)
350
+ end
351
+ end
352
+
353
+ # Selects the final addressing mode for an instruction.
354
+ #
355
+ # @param statement [Statement] the instruction statement
356
+ # @param labels [Hash{String => Integer}] known labels
357
+ # @param pc [Integer] the current instruction address
358
+ # @param strict [Boolean] whether unknown labels should raise
359
+ # @return [Symbol] the resolved addressing mode
360
+ def select_mode(statement, labels, pc, strict:)
361
+ mnemonic = statement.name.to_sym
362
+ modes = OPCODES_BY_MNEMONIC.fetch(mnemonic, nil)
363
+ raise AssemblyError, format_error(statement, "Unknown instruction #{statement.name}") unless modes
364
+
365
+ syntax, expression = operand_syntax(statement)
366
+ value = expression && evaluate(expression, labels, pc, strict:)
367
+
368
+ case syntax
369
+ when :implied, :accumulator, :immediate, :indirect, :indirect_x, :indirect_y
370
+ ensure_mode!(statement, modes, syntax)
371
+ when :relative
372
+ ensure_mode!(statement, modes, :relative)
373
+ when :bare
374
+ choose_direct_mode(statement, modes, value, :zero_page, :absolute)
375
+ when :indexed_x
376
+ choose_direct_mode(statement, modes, value, :zero_page_x, :absolute_x)
377
+ when :indexed_y
378
+ choose_direct_mode(statement, modes, value, :zero_page_y, :absolute_y)
379
+ else
380
+ raise AssemblyError, format_error(statement, "Unsupported operand syntax #{statement.argument.inspect}")
381
+ end
382
+ end
383
+
384
+ # Parses an operand into a syntax family and expression text.
385
+ #
386
+ # @param statement [Statement] the instruction statement
387
+ # @return [Array<(Symbol, String, nil)>] syntax kind and expression text
388
+ def operand_syntax(statement)
389
+ operand = statement.argument
390
+ return [:implied, nil] if operand.nil? || operand.empty?
391
+ return [:accumulator, nil] if operand.casecmp('a').zero?
392
+
393
+ return [:relative, operand] if BRANCH_MNEMONICS.include?(statement.name.to_sym)
394
+
395
+ case operand
396
+ when /\A#\s*(.+)\z/
397
+ [:immediate, Regexp.last_match(1)]
398
+ when /\A\(\s*(.+)\s*,\s*x\s*\)\z/i
399
+ [:indirect_x, Regexp.last_match(1)]
400
+ when /\A\(\s*(.+)\s*\)\s*,\s*y\z/i
401
+ [:indirect_y, Regexp.last_match(1)]
402
+ when /\A\(\s*(.+)\s*\)\z/
403
+ [:indirect, Regexp.last_match(1)]
404
+ when /\A(.+)\s*,\s*x\z/i
405
+ [:indexed_x, Regexp.last_match(1)]
406
+ when /\A(.+)\s*,\s*y\z/i
407
+ [:indexed_y, Regexp.last_match(1)]
408
+ else
409
+ [:bare, operand]
410
+ end
411
+ end
412
+
413
+ # Evaluates an instruction operand expression if one is present.
414
+ #
415
+ # @param statement [Statement] the instruction statement
416
+ # @param labels [Hash{String => Integer}] resolved labels
417
+ # @param pc [Integer] the current instruction address
418
+ # @param strict [Boolean] whether unknown labels should raise
419
+ # @return [Integer, nil] the evaluated operand
420
+ def operand_value(statement, labels, pc, strict:)
421
+ _syntax, expression = operand_syntax(statement)
422
+ expression && evaluate(expression, labels, pc, strict:)
423
+ end
424
+
425
+ # Chooses between short and long addressing forms.
426
+ #
427
+ # @param statement [Statement] the instruction statement
428
+ # @param modes [Hash{Symbol => Integer}] supported modes for the mnemonic
429
+ # @param value [Integer, nil] the evaluated operand
430
+ # @param short_mode [Symbol] the zero-page form
431
+ # @param long_mode [Symbol] the absolute form
432
+ # @return [Symbol] the selected mode
433
+ def choose_direct_mode(statement, modes, value, short_mode, long_mode)
434
+ short_supported = modes.key?(short_mode)
435
+ long_supported = modes.key?(long_mode)
436
+
437
+ if short_supported && value && fits_byte?(value)
438
+ short_mode
439
+ elsif long_supported
440
+ long_mode
441
+ elsif short_supported
442
+ if value && !fits_byte?(value)
443
+ raise AssemblyError,
444
+ format_error(statement, "Operand does not fit #{short_mode}: #{statement.argument}")
445
+ end
446
+
447
+ short_mode
448
+ else
449
+ supported = modes.keys.join(', ')
450
+ raise AssemblyError, format_error(statement, "Unsupported addressing mode for #{statement.name}; supported modes: #{supported}")
451
+ end
452
+ end
453
+
454
+ # Verifies that a mnemonic supports a required addressing mode.
455
+ #
456
+ # @param statement [Statement] the instruction statement
457
+ # @param modes [Hash{Symbol => Integer}] supported modes
458
+ # @param mode [Symbol] the required mode
459
+ # @return [Symbol] the mode when supported
460
+ def ensure_mode!(statement, modes, mode)
461
+ return mode if modes.key?(mode)
462
+
463
+ raise AssemblyError, format_error(statement, "Unsupported addressing mode #{mode} for #{statement.name}")
464
+ end
465
+
466
+ # Returns the encoded size of an instruction by addressing mode.
467
+ #
468
+ # @param mode [Symbol] the addressing mode
469
+ # @return [Integer] the instruction size in bytes
470
+ def instruction_size(mode)
471
+ case mode
472
+ when :implied, :accumulator
473
+ 1
474
+ when :immediate, :zero_page, :zero_page_x, :zero_page_y, :indirect_x, :indirect_y, :relative
475
+ 2
476
+ when :absolute, :absolute_x, :absolute_y, :indirect
477
+ 3
478
+ else
479
+ raise AssemblyError, "Unknown instruction size for #{mode}"
480
+ end
481
+ end
482
+
483
+ # Splits a comma-separated operand list while preserving nested expressions.
484
+ #
485
+ # @param text [String, nil] the raw operand text
486
+ # @return [Array<String>] the individual operands
487
+ def split_operands(text)
488
+ return [] if text.nil? || text.empty?
489
+
490
+ operands = []
491
+ depth = 0
492
+ current = +''
493
+
494
+ text.each_char do |char|
495
+ case char
496
+ when ','
497
+ if depth.zero?
498
+ operands << current.strip
499
+ current.clear
500
+ else
501
+ current << char
502
+ end
503
+ when '('
504
+ depth += 1
505
+ current << char
506
+ when ')'
507
+ depth -= 1
508
+ current << char
509
+ else
510
+ current << char
511
+ end
512
+ end
513
+
514
+ operands << current.strip unless current.empty?
515
+ operands.reject(&:empty?)
516
+ end
517
+
518
+ # Evaluates a numeric expression used by directives or operands.
519
+ #
520
+ # @param expression [String] the expression text
521
+ # @param labels [Hash{String => Integer}] resolved labels
522
+ # @param pc [Integer] the current assembly address
523
+ # @param strict [Boolean] whether unknown labels should raise
524
+ # @return [Integer, nil] the computed expression value
525
+ def evaluate(expression, labels, pc, strict:)
526
+ tokens = tokenize(expression)
527
+ value, position = parse_expression(tokens, 0, labels, pc, strict:)
528
+ raise AssemblyError, "Unexpected token #{tokens[position]}" if position < tokens.length && strict
529
+
530
+ value
531
+ end
532
+
533
+ # Tokenizes a simple arithmetic expression.
534
+ #
535
+ # @param expression [String] the expression to tokenize
536
+ # @return [Array<String>] the tokens
537
+ def tokenize(expression)
538
+ tokens = []
539
+ index = 0
540
+
541
+ while index < expression.length
542
+ char = expression[index]
543
+
544
+ if char.match?(/\s/)
545
+ index += 1
546
+ elsif %w[( ) + - < > *].include?(char)
547
+ tokens << char
548
+ index += 1
549
+ elsif char == '$'
550
+ match = expression[index..].match(/\A\$[0-9a-fA-F]+/)
551
+ raise AssemblyError, "Invalid hex literal in #{expression.inspect}" unless match
552
+
553
+ tokens << match[0]
554
+ index += match[0].length
555
+ elsif char == '%'
556
+ match = expression[index..].match(/\A%[01]+/)
557
+ raise AssemblyError, "Invalid binary literal in #{expression.inspect}" unless match
558
+
559
+ tokens << match[0]
560
+ index += match[0].length
561
+ elsif char.match?(/\d/)
562
+ match = expression[index..].match(/\A\d+/)
563
+ tokens << match[0]
564
+ index += match[0].length
565
+ elsif char == "'" && expression[index + 2] == "'"
566
+ tokens << expression[index, 3]
567
+ index += 3
568
+ elsif char.match?(/[A-Za-z_]/)
569
+ match = expression[index..].match(/\A[A-Za-z_][A-Za-z0-9_]*/)
570
+ tokens << match[0]
571
+ index += match[0].length
572
+ else
573
+ raise AssemblyError, "Unexpected character #{char.inspect} in #{expression.inspect}"
574
+ end
575
+ end
576
+
577
+ tokens
578
+ end
579
+
580
+ # Parses addition and subtraction.
581
+ #
582
+ # @param tokens [Array<String>] the token stream
583
+ # @param position [Integer] the current token index
584
+ # @param labels [Hash{String => Integer}] resolved labels
585
+ # @param pc [Integer] the current assembly address
586
+ # @param strict [Boolean] whether unknown labels should raise
587
+ # @return [Array<(Integer, nil, Integer)>] the value and next token index
588
+ def parse_expression(tokens, position, labels, pc, strict:)
589
+ value, position = parse_unary(tokens, position, labels, pc, strict:)
590
+
591
+ while %w[+ -].include?(tokens[position])
592
+ operator = tokens[position]
593
+ rhs, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
594
+ value = nil_value_math(value, rhs, operator)
595
+ position = next_position
596
+ end
597
+
598
+ [value, position]
599
+ end
600
+
601
+ # Parses unary operators and primary expressions.
602
+ #
603
+ # @param tokens [Array<String>] the token stream
604
+ # @param position [Integer] the current token index
605
+ # @param labels [Hash{String => Integer}] resolved labels
606
+ # @param pc [Integer] the current assembly address
607
+ # @param strict [Boolean] whether unknown labels should raise
608
+ # @return [Array<(Integer, nil, Integer)>] the value and next token index
609
+ def parse_unary(tokens, position, labels, pc, strict:)
610
+ token = tokens[position]
611
+ raise AssemblyError, 'Unexpected end of expression' unless token
612
+
613
+ case token
614
+ when '+'
615
+ parse_unary(tokens, position + 1, labels, pc, strict:)
616
+ when '-'
617
+ value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
618
+ [value&.-@, next_position]
619
+ when '<'
620
+ value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
621
+ [value && (value & 0xff), next_position]
622
+ when '>'
623
+ value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
624
+ [value && ((value >> 8) & 0xff), next_position]
625
+ when '('
626
+ value, next_position = parse_expression(tokens, position + 1, labels, pc, strict:)
627
+ raise AssemblyError, 'Missing closing parenthesis' unless tokens[next_position] == ')'
628
+
629
+ [value, next_position + 1]
630
+ else
631
+ [parse_primary(token, labels, pc, strict:), position + 1]
632
+ end
633
+ end
634
+
635
+ # Parses a numeric literal, current address marker, or label reference.
636
+ #
637
+ # @param token [String] the current token
638
+ # @param labels [Hash{String => Integer}] resolved labels
639
+ # @param pc [Integer] the current assembly address
640
+ # @param strict [Boolean] whether unknown labels should raise
641
+ # @return [Integer, nil] the resolved value
642
+ def parse_primary(token, labels, pc, strict:)
643
+ case token
644
+ when /\A\$[0-9a-fA-F]+\z/
645
+ token[1..].to_i(16)
646
+ when /\A%[01]+\z/
647
+ token[1..].to_i(2)
648
+ when /\A\d+\z/
649
+ token.to_i
650
+ when /\A'.'\z/
651
+ token[1].ord
652
+ when '*'
653
+ pc
654
+ else
655
+ return labels[token] if labels.key?(token)
656
+ raise AssemblyError, "Unknown symbol #{token}" if strict
657
+
658
+ nil
659
+ end
660
+ end
661
+
662
+ # Applies arithmetic while allowing unresolved values during non-strict passes.
663
+ #
664
+ # @param lhs [Integer, nil] the left-hand side
665
+ # @param rhs [Integer, nil] the right-hand side
666
+ # @param operator [String] `+` or `-`
667
+ # @return [Integer, nil] the arithmetic result
668
+ def nil_value_math(lhs, rhs, operator)
669
+ return nil if lhs.nil? || rhs.nil?
670
+
671
+ operator == '+' ? lhs + rhs : lhs - rhs
672
+ end
673
+
674
+ # Determines whether a value fits in an unsigned byte.
675
+ #
676
+ # @param value [Integer] the value to test
677
+ # @return [Boolean] true when the value fits `0x00..0xFF`
678
+ def fits_byte?(value)
679
+ value.between?(0x00, 0xff)
680
+ end
681
+
682
+ # Appends a finished segment when it contains emitted bytes.
683
+ #
684
+ # @param segments [Array<Segment>] the collected segments
685
+ # @param start_address [Integer] the segment origin
686
+ # @param bytes [Array<Integer>] the emitted bytes
687
+ # @return [void]
688
+ def flush_segment(segments, start_address, bytes)
689
+ return if bytes.empty?
690
+
691
+ segments << Segment.new(start_address:, bytes: bytes.dup)
692
+ end
693
+
694
+ # Chooses the program entry point for reset/load purposes.
695
+ #
696
+ # Preference order:
697
+ # - a label named `start`
698
+ # - the address of the first instruction
699
+ # - the first emitted segment address
700
+ # - the default origin
701
+ #
702
+ # @param statements [Array<Statement>] parsed source statements
703
+ # @param labels [Hash{String => Integer}] resolved labels
704
+ # @param layout [Hash{Integer => Hash}] resolved instruction layout
705
+ # @param segments [Array<Segment>] emitted segments
706
+ # @return [Integer] the chosen entry point
707
+ def determine_entry_point(statements, labels, layout, segments)
708
+ return labels['start'] if labels.key?('start')
709
+
710
+ first_instruction_index = statements.find_index { |statement| statement.kind == :instruction }
711
+ return layout.fetch(first_instruction_index).fetch(:address) if first_instruction_index
712
+
713
+ segments.first&.start_address || DEFAULT_ORIGIN
714
+ end
715
+
716
+ # Formats an error message with source line context.
717
+ #
718
+ # @param statement [Statement] the statement that failed
719
+ # @param message [String] the underlying error
720
+ # @return [String] the contextualized error message
721
+ def format_error(statement, message)
722
+ "Line #{statement.line_number}: #{message} (#{statement.raw.strip})"
723
+ end
724
+ end
725
+ end