mos6502-workbench 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +189 -0
- data/bin/assemble +122 -0
- data/bin/disassemble +58 -0
- data/bin/run_assembled_program +80 -0
- data/bin/run_klaus_functional_test +19 -0
- data/examples/README.md +147 -0
- data/examples/branching.asm +16 -0
- data/examples/countdown.asm +17 -0
- data/examples/labels_and_data.asm +28 -0
- data/examples/primes.asm +61 -0
- data/examples/simple_machine.rb +25 -0
- data/examples/traced_machine.rb +55 -0
- data/lib/mos6502/workbench/assembler.rb +725 -0
- data/lib/mos6502/workbench/bus.rb +264 -0
- data/lib/mos6502/workbench/cpu.rb +1292 -0
- data/lib/mos6502/workbench/device.rb +64 -0
- data/lib/mos6502/workbench/disassembler.rb +234 -0
- data/lib/mos6502/workbench/flags.rb +74 -0
- data/lib/mos6502/workbench/intel_hex.rb +116 -0
- data/lib/mos6502/workbench/machine.rb +140 -0
- data/lib/mos6502/workbench/memory.rb +159 -0
- data/lib/mos6502/workbench/registers.rb +19 -0
- data/lib/mos6502/workbench/tui.rb +537 -0
- data/lib/mos6502/workbench/version.rb +9 -0
- data/lib/mos6502/workbench.rb +12 -0
- metadata +126 -0
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module MOS6502
|
|
4
|
+
# Raised when assembly source cannot be parsed or encoded.
|
|
5
|
+
class AssemblyError < StandardError; end
|
|
6
|
+
|
|
7
|
+
# A minimal two-pass assembler for the subset of 6502 syntax needed by this project.
|
|
8
|
+
#
|
|
9
|
+
# Supported features:
|
|
10
|
+
# - labels with `label:`
|
|
11
|
+
# - directives: `.org`, `.byte`, `.word`
|
|
12
|
+
# - numeric expressions with labels, `+`, `-`, unary `<` and `>`
|
|
13
|
+
# - the implemented official NMOS 6502 opcodes and addressing modes
|
|
14
|
+
class Assembler
|
|
15
|
+
INTEL_HEX_EOF_RECORD = ":00000001FF\n"
|
|
16
|
+
|
|
17
|
+
# A contiguous block of machine code emitted at a specific address.
|
|
18
|
+
Segment = Struct.new(:start_address, :bytes)
|
|
19
|
+
|
|
20
|
+
# The assembled program, including labels and one or more emitted segments.
|
|
21
|
+
Program = Struct.new(:segments, :labels, :entry_point) do
|
|
22
|
+
# Returns the lowest emitted address.
|
|
23
|
+
#
|
|
24
|
+
# @return [Integer] the first address occupied by any segment
|
|
25
|
+
def start_address
|
|
26
|
+
segments.min_by(&:start_address)&.start_address || entry_point
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns the first address after the final emitted byte.
|
|
30
|
+
#
|
|
31
|
+
# @return [Integer] the exclusive end address of the emitted program
|
|
32
|
+
def end_address
|
|
33
|
+
segments.map { |segment| segment.start_address + segment.bytes.length }.max || entry_point
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Loads the program into a CPU instance.
|
|
37
|
+
#
|
|
38
|
+
# @param cpu [CPU] the destination CPU
|
|
39
|
+
# @param set_reset_vector [Boolean] whether to install the reset vector
|
|
40
|
+
# @param reset [Boolean] whether to reset the CPU after loading
|
|
41
|
+
# @return [CPU] the CPU instance
|
|
42
|
+
def load_into(cpu, set_reset_vector: true, reset: true)
|
|
43
|
+
segments.each do |segment|
|
|
44
|
+
cpu.load(segment.bytes, start_address: segment.start_address, set_reset_vector: false)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
cpu.write_word(CPU::RESET_VECTOR, entry_point) if set_reset_vector
|
|
48
|
+
cpu.reset(program_counter: set_reset_vector ? nil : entry_point) if reset
|
|
49
|
+
cpu
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Converts the emitted segments into a flat binary blob.
|
|
53
|
+
#
|
|
54
|
+
# Gaps between segments are filled with `fill_byte`. By default the image
|
|
55
|
+
# begins at the lowest segment address and extends through the final byte
|
|
56
|
+
# of the last segment.
|
|
57
|
+
#
|
|
58
|
+
# @param origin [Integer, nil] the base address of the output image
|
|
59
|
+
# @param size [Integer, nil] the fixed output size in bytes
|
|
60
|
+
# @param fill_byte [Integer] the byte used to fill gaps
|
|
61
|
+
# @return [String] the flat binary image
|
|
62
|
+
def to_flat_binary(origin: nil, size: nil, fill_byte: 0x00)
|
|
63
|
+
base = origin || start_address
|
|
64
|
+
fill = fill_byte & 0xff
|
|
65
|
+
required_size = [end_address - base, 0].max
|
|
66
|
+
image_size = size || required_size
|
|
67
|
+
|
|
68
|
+
raise AssemblyError, 'Binary size must be non-negative' if image_size.negative?
|
|
69
|
+
raise AssemblyError, 'Output image does not fit all segments' if image_size < required_size
|
|
70
|
+
|
|
71
|
+
image = Array.new(image_size, fill)
|
|
72
|
+
|
|
73
|
+
segments.each do |segment|
|
|
74
|
+
offset = segment.start_address - base
|
|
75
|
+
if offset.negative?
|
|
76
|
+
raise AssemblyError,
|
|
77
|
+
"Segment at $#{format('%04X', segment.start_address)} starts before origin $#{format('%04X', base)}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
segment.bytes.each_with_index do |byte, index|
|
|
81
|
+
image[offset + index] = byte
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
image.pack('C*')
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Writes the program as a flat binary file.
|
|
89
|
+
#
|
|
90
|
+
# @param path [String] the output file path
|
|
91
|
+
# @param origin [Integer, nil] the base address of the output image
|
|
92
|
+
# @param size [Integer, nil] the fixed output size in bytes
|
|
93
|
+
# @param fill_byte [Integer] the byte used to fill gaps
|
|
94
|
+
# @return [String] the output path
|
|
95
|
+
def write_flat_binary(path, origin: nil, size: nil, fill_byte: 0x00)
|
|
96
|
+
File.binwrite(path, to_flat_binary(origin:, size:, fill_byte:))
|
|
97
|
+
path
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Converts the emitted segments into Intel HEX text records.
|
|
101
|
+
#
|
|
102
|
+
# Each data record preserves the original segment address, so sparse
|
|
103
|
+
# layouts remain sparse instead of being padded into a single image.
|
|
104
|
+
#
|
|
105
|
+
# @param bytes_per_record [Integer] the maximum payload size for each data record
|
|
106
|
+
# @return [String] the Intel HEX document
|
|
107
|
+
def to_intel_hex(bytes_per_record: 16)
|
|
108
|
+
raise AssemblyError, 'Intel HEX bytes_per_record must be positive' unless bytes_per_record.is_a?(Integer) && bytes_per_record.positive?
|
|
109
|
+
|
|
110
|
+
records = segments.sort_by(&:start_address).flat_map do |segment|
|
|
111
|
+
segment.bytes.each_slice(bytes_per_record).with_index.map do |slice, index|
|
|
112
|
+
address = (segment.start_address + (index * bytes_per_record)) & 0xffff
|
|
113
|
+
intel_hex_record(address, 0x00, slice)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
(records << Assembler::INTEL_HEX_EOF_RECORD).join
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Writes the program as an Intel HEX file.
|
|
121
|
+
#
|
|
122
|
+
# @param path [String] the output file path
|
|
123
|
+
# @param bytes_per_record [Integer] the maximum payload size for each data record
|
|
124
|
+
# @return [String] the output path
|
|
125
|
+
def write_intel_hex(path, bytes_per_record: 16)
|
|
126
|
+
File.write(path, to_intel_hex(bytes_per_record:))
|
|
127
|
+
path
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
private
|
|
131
|
+
|
|
132
|
+
# Builds a single Intel HEX record line with the correct checksum.
|
|
133
|
+
#
|
|
134
|
+
# @param address [Integer] the 16-bit record address
|
|
135
|
+
# @param record_type [Integer] the Intel HEX record type
|
|
136
|
+
# @param data [Array<Integer>] the payload bytes
|
|
137
|
+
# @return [String] the encoded record line
|
|
138
|
+
def intel_hex_record(address, record_type, data)
|
|
139
|
+
bytes = [data.length, (address >> 8) & 0xff, address & 0xff, record_type, *data]
|
|
140
|
+
checksum = ((-bytes.sum) & 0xff)
|
|
141
|
+
":#{bytes.map { |byte| format('%02X', byte) }.join}#{format('%02X', checksum)}\n"
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
Statement = Struct.new(:line_number, :label, :kind, :name, :argument, :raw)
|
|
146
|
+
|
|
147
|
+
DEFAULT_ORIGIN = CPU::DEFAULT_LOAD_ADDRESS
|
|
148
|
+
BRANCH_MNEMONICS = %i[bcc bcs beq bmi bne bpl bvc bvs].freeze
|
|
149
|
+
DIRECTIVES = %w[.org .byte .word].freeze
|
|
150
|
+
MAX_PASSES = 8
|
|
151
|
+
|
|
152
|
+
OPCODES_BY_MNEMONIC = CPU::OPCODES.each_with_object(Hash.new { |hash, key| hash[key] = {} }) do |(opcode, (mnemonic, mode)), hash|
|
|
153
|
+
hash[mnemonic][mode] = opcode
|
|
154
|
+
end.freeze
|
|
155
|
+
|
|
156
|
+
# Assembles 6502 source code into a {Program}.
|
|
157
|
+
#
|
|
158
|
+
# @param source [String] the source code to assemble
|
|
159
|
+
# @return [Program] the assembled program
|
|
160
|
+
def assemble(source)
|
|
161
|
+
statements = parse(source)
|
|
162
|
+
labels, layout = resolve_layout(statements)
|
|
163
|
+
build_program(statements, labels, layout)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
private
|
|
167
|
+
|
|
168
|
+
# Parses source text into line-oriented statements.
|
|
169
|
+
#
|
|
170
|
+
# @param source [String] the assembly source
|
|
171
|
+
# @return [Array<Statement>] the parsed statements
|
|
172
|
+
def parse(source)
|
|
173
|
+
source.each_line.with_index(1).filter_map do |line, line_number|
|
|
174
|
+
stripped = line.sub(/;.*/, '').strip
|
|
175
|
+
next if stripped.empty?
|
|
176
|
+
|
|
177
|
+
label = nil
|
|
178
|
+
if stripped =~ /\A([A-Za-z_][A-Za-z0-9_]*):/
|
|
179
|
+
label = Regexp.last_match(1)
|
|
180
|
+
stripped = stripped[Regexp.last_match(0).length..].to_s.strip
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
if stripped.empty?
|
|
184
|
+
Statement.new(line_number:, label:, raw: line.rstrip)
|
|
185
|
+
else
|
|
186
|
+
name, argument = stripped.split(/\s+/, 2)
|
|
187
|
+
kind = name.start_with?('.') ? :directive : :instruction
|
|
188
|
+
Statement.new(
|
|
189
|
+
line_number:,
|
|
190
|
+
label:,
|
|
191
|
+
kind:,
|
|
192
|
+
name: name.downcase,
|
|
193
|
+
argument: argument&.strip,
|
|
194
|
+
raw: line.rstrip
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Resolves labels and instruction sizes until the layout converges.
|
|
201
|
+
#
|
|
202
|
+
# @param statements [Array<Statement>] the parsed statements
|
|
203
|
+
# @return [Array<(Hash, Hash)>] labels and per-line layout metadata
|
|
204
|
+
def resolve_layout(statements)
|
|
205
|
+
previous_state = nil
|
|
206
|
+
|
|
207
|
+
MAX_PASSES.times do
|
|
208
|
+
pc = DEFAULT_ORIGIN
|
|
209
|
+
labels = {}
|
|
210
|
+
layout = {}
|
|
211
|
+
|
|
212
|
+
statements.each_with_index do |statement, index|
|
|
213
|
+
register_label(statement, labels, pc)
|
|
214
|
+
next unless statement.kind
|
|
215
|
+
|
|
216
|
+
case statement.kind
|
|
217
|
+
when :directive
|
|
218
|
+
pc = layout_directive(statement, layout, index, labels, pc)
|
|
219
|
+
when :instruction
|
|
220
|
+
mode = select_mode(statement, labels, pc, strict: false)
|
|
221
|
+
size = instruction_size(mode)
|
|
222
|
+
layout[index] = { address: pc, mode:, size: }
|
|
223
|
+
pc = (pc + size) & 0xffff
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
current_state = [labels, layout]
|
|
228
|
+
return current_state if current_state == previous_state
|
|
229
|
+
|
|
230
|
+
previous_state = current_state
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
raise AssemblyError, "Assembly did not converge after #{MAX_PASSES} passes"
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Emits the final bytecode into loadable segments.
|
|
237
|
+
#
|
|
238
|
+
# @param statements [Array<Statement>] the parsed statements
|
|
239
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
240
|
+
# @param layout [Hash{Integer => Hash}] resolved instruction layout
|
|
241
|
+
# @return [Program] the assembled program
|
|
242
|
+
def build_program(statements, labels, layout)
|
|
243
|
+
pc = DEFAULT_ORIGIN
|
|
244
|
+
current_start = pc
|
|
245
|
+
current_bytes = []
|
|
246
|
+
segments = []
|
|
247
|
+
|
|
248
|
+
statements.each_with_index do |statement, index|
|
|
249
|
+
next unless statement.kind
|
|
250
|
+
|
|
251
|
+
case statement.kind
|
|
252
|
+
when :directive
|
|
253
|
+
case statement.name
|
|
254
|
+
when '.org'
|
|
255
|
+
flush_segment(segments, current_start, current_bytes)
|
|
256
|
+
pc = evaluate(statement.argument, labels, pc, strict: true) & 0xffff
|
|
257
|
+
current_start = pc
|
|
258
|
+
current_bytes = []
|
|
259
|
+
when '.byte'
|
|
260
|
+
bytes = split_operands(statement.argument).map { |operand| evaluate(operand, labels, pc, strict: true) & 0xff }
|
|
261
|
+
current_bytes.concat(bytes)
|
|
262
|
+
pc = (pc + bytes.length) & 0xffff
|
|
263
|
+
when '.word'
|
|
264
|
+
words = split_operands(statement.argument).map { |operand| evaluate(operand, labels, pc, strict: true) & 0xffff }
|
|
265
|
+
words.each do |word|
|
|
266
|
+
current_bytes << (word & 0xff)
|
|
267
|
+
current_bytes << (word >> 8)
|
|
268
|
+
end
|
|
269
|
+
pc = (pc + (words.length * 2)) & 0xffff
|
|
270
|
+
end
|
|
271
|
+
when :instruction
|
|
272
|
+
mode = layout.fetch(index).fetch(:mode)
|
|
273
|
+
bytes = encode_instruction(statement, mode, labels, pc)
|
|
274
|
+
current_bytes.concat(bytes)
|
|
275
|
+
pc = (pc + bytes.length) & 0xffff
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
flush_segment(segments, current_start, current_bytes)
|
|
280
|
+
Program.new(segments:, labels:, entry_point: determine_entry_point(statements, labels, layout, segments))
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Records a label at the current location counter.
|
|
284
|
+
#
|
|
285
|
+
# @param statement [Statement] the current statement
|
|
286
|
+
# @param labels [Hash{String => Integer}] the label table
|
|
287
|
+
# @param pc [Integer] the current location counter
|
|
288
|
+
# @return [void]
|
|
289
|
+
def register_label(statement, labels, pc)
|
|
290
|
+
return unless statement.label
|
|
291
|
+
|
|
292
|
+
raise AssemblyError, format_error(statement, "Duplicate label #{statement.label}") if labels.key?(statement.label)
|
|
293
|
+
|
|
294
|
+
labels[statement.label] = pc
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
# Reserves space for a directive during layout resolution.
|
|
298
|
+
#
|
|
299
|
+
# @param statement [Statement] the directive statement
|
|
300
|
+
# @param layout [Hash] layout metadata
|
|
301
|
+
# @param index [Integer] the statement index
|
|
302
|
+
# @param labels [Hash{String => Integer}] currently known labels
|
|
303
|
+
# @param pc [Integer] the current location counter
|
|
304
|
+
# @return [Integer] the updated location counter
|
|
305
|
+
def layout_directive(statement, layout, index, labels, pc)
|
|
306
|
+
raise AssemblyError, format_error(statement, "Unknown directive #{statement.name}") unless DIRECTIVES.include?(statement.name)
|
|
307
|
+
|
|
308
|
+
case statement.name
|
|
309
|
+
when '.org'
|
|
310
|
+
new_pc = evaluate(statement.argument, labels, pc, strict: true) & 0xffff
|
|
311
|
+
layout[index] = { address: new_pc, size: 0 }
|
|
312
|
+
new_pc
|
|
313
|
+
when '.byte'
|
|
314
|
+
count = split_operands(statement.argument).length
|
|
315
|
+
layout[index] = { address: pc, size: count }
|
|
316
|
+
(pc + count) & 0xffff
|
|
317
|
+
when '.word'
|
|
318
|
+
count = split_operands(statement.argument).length
|
|
319
|
+
layout[index] = { address: pc, size: count * 2 }
|
|
320
|
+
(pc + (count * 2)) & 0xffff
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Encodes a single instruction into bytes.
|
|
325
|
+
#
|
|
326
|
+
# @param statement [Statement] the instruction statement
|
|
327
|
+
# @param mode [Symbol] the resolved addressing mode
|
|
328
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
329
|
+
# @param pc [Integer] the instruction address
|
|
330
|
+
# @return [Array<Integer>] the encoded instruction bytes
|
|
331
|
+
def encode_instruction(statement, mode, labels, pc)
|
|
332
|
+
mnemonic = statement.name.to_sym
|
|
333
|
+
opcode = OPCODES_BY_MNEMONIC.fetch(mnemonic).fetch(mode)
|
|
334
|
+
value = operand_value(statement, labels, pc, strict: true)
|
|
335
|
+
bytes = [opcode]
|
|
336
|
+
|
|
337
|
+
case mode
|
|
338
|
+
when :implied, :accumulator
|
|
339
|
+
bytes
|
|
340
|
+
when :immediate, :zero_page, :zero_page_x, :zero_page_y, :indirect_x, :indirect_y
|
|
341
|
+
bytes << (value & 0xff)
|
|
342
|
+
when :relative
|
|
343
|
+
offset = value - ((pc + 2) & 0xffff)
|
|
344
|
+
raise AssemblyError, format_error(statement, "Branch target out of range: #{statement.argument}") unless (-128..127).cover?(offset)
|
|
345
|
+
|
|
346
|
+
bytes << (offset & 0xff)
|
|
347
|
+
when :absolute, :absolute_x, :absolute_y, :indirect
|
|
348
|
+
bytes << (value & 0xff)
|
|
349
|
+
bytes << ((value >> 8) & 0xff)
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# Selects the final addressing mode for an instruction.
|
|
354
|
+
#
|
|
355
|
+
# @param statement [Statement] the instruction statement
|
|
356
|
+
# @param labels [Hash{String => Integer}] known labels
|
|
357
|
+
# @param pc [Integer] the current instruction address
|
|
358
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
359
|
+
# @return [Symbol] the resolved addressing mode
|
|
360
|
+
def select_mode(statement, labels, pc, strict:)
|
|
361
|
+
mnemonic = statement.name.to_sym
|
|
362
|
+
modes = OPCODES_BY_MNEMONIC.fetch(mnemonic, nil)
|
|
363
|
+
raise AssemblyError, format_error(statement, "Unknown instruction #{statement.name}") unless modes
|
|
364
|
+
|
|
365
|
+
syntax, expression = operand_syntax(statement)
|
|
366
|
+
value = expression && evaluate(expression, labels, pc, strict:)
|
|
367
|
+
|
|
368
|
+
case syntax
|
|
369
|
+
when :implied, :accumulator, :immediate, :indirect, :indirect_x, :indirect_y
|
|
370
|
+
ensure_mode!(statement, modes, syntax)
|
|
371
|
+
when :relative
|
|
372
|
+
ensure_mode!(statement, modes, :relative)
|
|
373
|
+
when :bare
|
|
374
|
+
choose_direct_mode(statement, modes, value, :zero_page, :absolute)
|
|
375
|
+
when :indexed_x
|
|
376
|
+
choose_direct_mode(statement, modes, value, :zero_page_x, :absolute_x)
|
|
377
|
+
when :indexed_y
|
|
378
|
+
choose_direct_mode(statement, modes, value, :zero_page_y, :absolute_y)
|
|
379
|
+
else
|
|
380
|
+
raise AssemblyError, format_error(statement, "Unsupported operand syntax #{statement.argument.inspect}")
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# Parses an operand into a syntax family and expression text.
|
|
385
|
+
#
|
|
386
|
+
# @param statement [Statement] the instruction statement
|
|
387
|
+
# @return [Array<(Symbol, String, nil)>] syntax kind and expression text
|
|
388
|
+
def operand_syntax(statement)
|
|
389
|
+
operand = statement.argument
|
|
390
|
+
return [:implied, nil] if operand.nil? || operand.empty?
|
|
391
|
+
return [:accumulator, nil] if operand.casecmp('a').zero?
|
|
392
|
+
|
|
393
|
+
return [:relative, operand] if BRANCH_MNEMONICS.include?(statement.name.to_sym)
|
|
394
|
+
|
|
395
|
+
case operand
|
|
396
|
+
when /\A#\s*(.+)\z/
|
|
397
|
+
[:immediate, Regexp.last_match(1)]
|
|
398
|
+
when /\A\(\s*(.+)\s*,\s*x\s*\)\z/i
|
|
399
|
+
[:indirect_x, Regexp.last_match(1)]
|
|
400
|
+
when /\A\(\s*(.+)\s*\)\s*,\s*y\z/i
|
|
401
|
+
[:indirect_y, Regexp.last_match(1)]
|
|
402
|
+
when /\A\(\s*(.+)\s*\)\z/
|
|
403
|
+
[:indirect, Regexp.last_match(1)]
|
|
404
|
+
when /\A(.+)\s*,\s*x\z/i
|
|
405
|
+
[:indexed_x, Regexp.last_match(1)]
|
|
406
|
+
when /\A(.+)\s*,\s*y\z/i
|
|
407
|
+
[:indexed_y, Regexp.last_match(1)]
|
|
408
|
+
else
|
|
409
|
+
[:bare, operand]
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Evaluates an instruction operand expression if one is present.
|
|
414
|
+
#
|
|
415
|
+
# @param statement [Statement] the instruction statement
|
|
416
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
417
|
+
# @param pc [Integer] the current instruction address
|
|
418
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
419
|
+
# @return [Integer, nil] the evaluated operand
|
|
420
|
+
def operand_value(statement, labels, pc, strict:)
|
|
421
|
+
_syntax, expression = operand_syntax(statement)
|
|
422
|
+
expression && evaluate(expression, labels, pc, strict:)
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Chooses between short and long addressing forms.
|
|
426
|
+
#
|
|
427
|
+
# @param statement [Statement] the instruction statement
|
|
428
|
+
# @param modes [Hash{Symbol => Integer}] supported modes for the mnemonic
|
|
429
|
+
# @param value [Integer, nil] the evaluated operand
|
|
430
|
+
# @param short_mode [Symbol] the zero-page form
|
|
431
|
+
# @param long_mode [Symbol] the absolute form
|
|
432
|
+
# @return [Symbol] the selected mode
|
|
433
|
+
def choose_direct_mode(statement, modes, value, short_mode, long_mode)
|
|
434
|
+
short_supported = modes.key?(short_mode)
|
|
435
|
+
long_supported = modes.key?(long_mode)
|
|
436
|
+
|
|
437
|
+
if short_supported && value && fits_byte?(value)
|
|
438
|
+
short_mode
|
|
439
|
+
elsif long_supported
|
|
440
|
+
long_mode
|
|
441
|
+
elsif short_supported
|
|
442
|
+
if value && !fits_byte?(value)
|
|
443
|
+
raise AssemblyError,
|
|
444
|
+
format_error(statement, "Operand does not fit #{short_mode}: #{statement.argument}")
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
short_mode
|
|
448
|
+
else
|
|
449
|
+
supported = modes.keys.join(', ')
|
|
450
|
+
raise AssemblyError, format_error(statement, "Unsupported addressing mode for #{statement.name}; supported modes: #{supported}")
|
|
451
|
+
end
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Verifies that a mnemonic supports a required addressing mode.
|
|
455
|
+
#
|
|
456
|
+
# @param statement [Statement] the instruction statement
|
|
457
|
+
# @param modes [Hash{Symbol => Integer}] supported modes
|
|
458
|
+
# @param mode [Symbol] the required mode
|
|
459
|
+
# @return [Symbol] the mode when supported
|
|
460
|
+
def ensure_mode!(statement, modes, mode)
|
|
461
|
+
return mode if modes.key?(mode)
|
|
462
|
+
|
|
463
|
+
raise AssemblyError, format_error(statement, "Unsupported addressing mode #{mode} for #{statement.name}")
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
# Returns the encoded size of an instruction by addressing mode.
|
|
467
|
+
#
|
|
468
|
+
# @param mode [Symbol] the addressing mode
|
|
469
|
+
# @return [Integer] the instruction size in bytes
|
|
470
|
+
def instruction_size(mode)
|
|
471
|
+
case mode
|
|
472
|
+
when :implied, :accumulator
|
|
473
|
+
1
|
|
474
|
+
when :immediate, :zero_page, :zero_page_x, :zero_page_y, :indirect_x, :indirect_y, :relative
|
|
475
|
+
2
|
|
476
|
+
when :absolute, :absolute_x, :absolute_y, :indirect
|
|
477
|
+
3
|
|
478
|
+
else
|
|
479
|
+
raise AssemblyError, "Unknown instruction size for #{mode}"
|
|
480
|
+
end
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
# Splits a comma-separated operand list while preserving nested expressions.
|
|
484
|
+
#
|
|
485
|
+
# @param text [String, nil] the raw operand text
|
|
486
|
+
# @return [Array<String>] the individual operands
|
|
487
|
+
def split_operands(text)
|
|
488
|
+
return [] if text.nil? || text.empty?
|
|
489
|
+
|
|
490
|
+
operands = []
|
|
491
|
+
depth = 0
|
|
492
|
+
current = +''
|
|
493
|
+
|
|
494
|
+
text.each_char do |char|
|
|
495
|
+
case char
|
|
496
|
+
when ','
|
|
497
|
+
if depth.zero?
|
|
498
|
+
operands << current.strip
|
|
499
|
+
current.clear
|
|
500
|
+
else
|
|
501
|
+
current << char
|
|
502
|
+
end
|
|
503
|
+
when '('
|
|
504
|
+
depth += 1
|
|
505
|
+
current << char
|
|
506
|
+
when ')'
|
|
507
|
+
depth -= 1
|
|
508
|
+
current << char
|
|
509
|
+
else
|
|
510
|
+
current << char
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
operands << current.strip unless current.empty?
|
|
515
|
+
operands.reject(&:empty?)
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
# Evaluates a numeric expression used by directives or operands.
|
|
519
|
+
#
|
|
520
|
+
# @param expression [String] the expression text
|
|
521
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
522
|
+
# @param pc [Integer] the current assembly address
|
|
523
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
524
|
+
# @return [Integer, nil] the computed expression value
|
|
525
|
+
def evaluate(expression, labels, pc, strict:)
|
|
526
|
+
tokens = tokenize(expression)
|
|
527
|
+
value, position = parse_expression(tokens, 0, labels, pc, strict:)
|
|
528
|
+
raise AssemblyError, "Unexpected token #{tokens[position]}" if position < tokens.length && strict
|
|
529
|
+
|
|
530
|
+
value
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
# Tokenizes a simple arithmetic expression.
|
|
534
|
+
#
|
|
535
|
+
# @param expression [String] the expression to tokenize
|
|
536
|
+
# @return [Array<String>] the tokens
|
|
537
|
+
def tokenize(expression)
|
|
538
|
+
tokens = []
|
|
539
|
+
index = 0
|
|
540
|
+
|
|
541
|
+
while index < expression.length
|
|
542
|
+
char = expression[index]
|
|
543
|
+
|
|
544
|
+
if char.match?(/\s/)
|
|
545
|
+
index += 1
|
|
546
|
+
elsif %w[( ) + - < > *].include?(char)
|
|
547
|
+
tokens << char
|
|
548
|
+
index += 1
|
|
549
|
+
elsif char == '$'
|
|
550
|
+
match = expression[index..].match(/\A\$[0-9a-fA-F]+/)
|
|
551
|
+
raise AssemblyError, "Invalid hex literal in #{expression.inspect}" unless match
|
|
552
|
+
|
|
553
|
+
tokens << match[0]
|
|
554
|
+
index += match[0].length
|
|
555
|
+
elsif char == '%'
|
|
556
|
+
match = expression[index..].match(/\A%[01]+/)
|
|
557
|
+
raise AssemblyError, "Invalid binary literal in #{expression.inspect}" unless match
|
|
558
|
+
|
|
559
|
+
tokens << match[0]
|
|
560
|
+
index += match[0].length
|
|
561
|
+
elsif char.match?(/\d/)
|
|
562
|
+
match = expression[index..].match(/\A\d+/)
|
|
563
|
+
tokens << match[0]
|
|
564
|
+
index += match[0].length
|
|
565
|
+
elsif char == "'" && expression[index + 2] == "'"
|
|
566
|
+
tokens << expression[index, 3]
|
|
567
|
+
index += 3
|
|
568
|
+
elsif char.match?(/[A-Za-z_]/)
|
|
569
|
+
match = expression[index..].match(/\A[A-Za-z_][A-Za-z0-9_]*/)
|
|
570
|
+
tokens << match[0]
|
|
571
|
+
index += match[0].length
|
|
572
|
+
else
|
|
573
|
+
raise AssemblyError, "Unexpected character #{char.inspect} in #{expression.inspect}"
|
|
574
|
+
end
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
tokens
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
# Parses addition and subtraction.
|
|
581
|
+
#
|
|
582
|
+
# @param tokens [Array<String>] the token stream
|
|
583
|
+
# @param position [Integer] the current token index
|
|
584
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
585
|
+
# @param pc [Integer] the current assembly address
|
|
586
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
587
|
+
# @return [Array<(Integer, nil, Integer)>] the value and next token index
|
|
588
|
+
def parse_expression(tokens, position, labels, pc, strict:)
|
|
589
|
+
value, position = parse_unary(tokens, position, labels, pc, strict:)
|
|
590
|
+
|
|
591
|
+
while %w[+ -].include?(tokens[position])
|
|
592
|
+
operator = tokens[position]
|
|
593
|
+
rhs, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
|
|
594
|
+
value = nil_value_math(value, rhs, operator)
|
|
595
|
+
position = next_position
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
[value, position]
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# Parses unary operators and primary expressions.
|
|
602
|
+
#
|
|
603
|
+
# @param tokens [Array<String>] the token stream
|
|
604
|
+
# @param position [Integer] the current token index
|
|
605
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
606
|
+
# @param pc [Integer] the current assembly address
|
|
607
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
608
|
+
# @return [Array<(Integer, nil, Integer)>] the value and next token index
|
|
609
|
+
def parse_unary(tokens, position, labels, pc, strict:)
|
|
610
|
+
token = tokens[position]
|
|
611
|
+
raise AssemblyError, 'Unexpected end of expression' unless token
|
|
612
|
+
|
|
613
|
+
case token
|
|
614
|
+
when '+'
|
|
615
|
+
parse_unary(tokens, position + 1, labels, pc, strict:)
|
|
616
|
+
when '-'
|
|
617
|
+
value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
|
|
618
|
+
[value&.-@, next_position]
|
|
619
|
+
when '<'
|
|
620
|
+
value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
|
|
621
|
+
[value && (value & 0xff), next_position]
|
|
622
|
+
when '>'
|
|
623
|
+
value, next_position = parse_unary(tokens, position + 1, labels, pc, strict:)
|
|
624
|
+
[value && ((value >> 8) & 0xff), next_position]
|
|
625
|
+
when '('
|
|
626
|
+
value, next_position = parse_expression(tokens, position + 1, labels, pc, strict:)
|
|
627
|
+
raise AssemblyError, 'Missing closing parenthesis' unless tokens[next_position] == ')'
|
|
628
|
+
|
|
629
|
+
[value, next_position + 1]
|
|
630
|
+
else
|
|
631
|
+
[parse_primary(token, labels, pc, strict:), position + 1]
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
|
|
635
|
+
# Parses a numeric literal, current address marker, or label reference.
|
|
636
|
+
#
|
|
637
|
+
# @param token [String] the current token
|
|
638
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
639
|
+
# @param pc [Integer] the current assembly address
|
|
640
|
+
# @param strict [Boolean] whether unknown labels should raise
|
|
641
|
+
# @return [Integer, nil] the resolved value
|
|
642
|
+
def parse_primary(token, labels, pc, strict:)
|
|
643
|
+
case token
|
|
644
|
+
when /\A\$[0-9a-fA-F]+\z/
|
|
645
|
+
token[1..].to_i(16)
|
|
646
|
+
when /\A%[01]+\z/
|
|
647
|
+
token[1..].to_i(2)
|
|
648
|
+
when /\A\d+\z/
|
|
649
|
+
token.to_i
|
|
650
|
+
when /\A'.'\z/
|
|
651
|
+
token[1].ord
|
|
652
|
+
when '*'
|
|
653
|
+
pc
|
|
654
|
+
else
|
|
655
|
+
return labels[token] if labels.key?(token)
|
|
656
|
+
raise AssemblyError, "Unknown symbol #{token}" if strict
|
|
657
|
+
|
|
658
|
+
nil
|
|
659
|
+
end
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
# Applies arithmetic while allowing unresolved values during non-strict passes.
|
|
663
|
+
#
|
|
664
|
+
# @param lhs [Integer, nil] the left-hand side
|
|
665
|
+
# @param rhs [Integer, nil] the right-hand side
|
|
666
|
+
# @param operator [String] `+` or `-`
|
|
667
|
+
# @return [Integer, nil] the arithmetic result
|
|
668
|
+
def nil_value_math(lhs, rhs, operator)
|
|
669
|
+
return nil if lhs.nil? || rhs.nil?
|
|
670
|
+
|
|
671
|
+
operator == '+' ? lhs + rhs : lhs - rhs
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# Determines whether a value fits in an unsigned byte.
|
|
675
|
+
#
|
|
676
|
+
# @param value [Integer] the value to test
|
|
677
|
+
# @return [Boolean] true when the value fits `0x00..0xFF`
|
|
678
|
+
def fits_byte?(value)
|
|
679
|
+
value.between?(0x00, 0xff)
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
# Appends a finished segment when it contains emitted bytes.
|
|
683
|
+
#
|
|
684
|
+
# @param segments [Array<Segment>] the collected segments
|
|
685
|
+
# @param start_address [Integer] the segment origin
|
|
686
|
+
# @param bytes [Array<Integer>] the emitted bytes
|
|
687
|
+
# @return [void]
|
|
688
|
+
def flush_segment(segments, start_address, bytes)
|
|
689
|
+
return if bytes.empty?
|
|
690
|
+
|
|
691
|
+
segments << Segment.new(start_address:, bytes: bytes.dup)
|
|
692
|
+
end
|
|
693
|
+
|
|
694
|
+
# Chooses the program entry point for reset/load purposes.
|
|
695
|
+
#
|
|
696
|
+
# Preference order:
|
|
697
|
+
# - a label named `start`
|
|
698
|
+
# - the address of the first instruction
|
|
699
|
+
# - the first emitted segment address
|
|
700
|
+
# - the default origin
|
|
701
|
+
#
|
|
702
|
+
# @param statements [Array<Statement>] parsed source statements
|
|
703
|
+
# @param labels [Hash{String => Integer}] resolved labels
|
|
704
|
+
# @param layout [Hash{Integer => Hash}] resolved instruction layout
|
|
705
|
+
# @param segments [Array<Segment>] emitted segments
|
|
706
|
+
# @return [Integer] the chosen entry point
|
|
707
|
+
def determine_entry_point(statements, labels, layout, segments)
|
|
708
|
+
return labels['start'] if labels.key?('start')
|
|
709
|
+
|
|
710
|
+
first_instruction_index = statements.find_index { |statement| statement.kind == :instruction }
|
|
711
|
+
return layout.fetch(first_instruction_index).fetch(:address) if first_instruction_index
|
|
712
|
+
|
|
713
|
+
segments.first&.start_address || DEFAULT_ORIGIN
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
# Formats an error message with source line context.
|
|
717
|
+
#
|
|
718
|
+
# @param statement [Statement] the statement that failed
|
|
719
|
+
# @param message [String] the underlying error
|
|
720
|
+
# @return [String] the contextualized error message
|
|
721
|
+
def format_error(statement, message)
|
|
722
|
+
"Line #{statement.line_number}: #{message} (#{statement.raw.strip})"
|
|
723
|
+
end
|
|
724
|
+
end
|
|
725
|
+
end
|