psx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/psx/cpu.rb ADDED
@@ -0,0 +1,964 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "cop0"
4
+ require_relative "gte"
5
+
6
+ module PSX
7
+ class CPU
8
+ class ExecutionError < StandardError; end
9
+
10
+ RESET_VECTOR = 0xBFC0_0000 # BIOS entry point
11
+
12
+ # Physical address of the A/B/C BIOS jump tables in low RAM
13
+ BIOS_A_DISPATCH = 0x000000A0
14
+ BIOS_B_DISPATCH = 0x000000B0
15
+ BIOS_C_DISPATCH = 0x000000C0
16
+
17
+ attr_reader :pc, :regs, :hi, :lo, :memory, :cop0, :gte, :step_cycles
18
+ attr_accessor :tty_handler
19
+
20
+ def pc=(value)
21
+ @pc = value & 0xFFFF_FFFF
22
+ @next_pc = (@pc + 4) & 0xFFFF_FFFF
23
+ @branch_target = nil
24
+ @next_in_delay_slot = false
25
+ end
26
+
27
+ def initialize(memory, interrupts: nil)
28
+ @memory = memory
29
+ @interrupts = interrupts
30
+ @regs = Array.new(32, 0) # R0 is always 0
31
+ @pc = RESET_VECTOR
32
+ @next_pc = @pc + 4
33
+ @hi = 0
34
+ @lo = 0
35
+
36
+ # Coprocessors
37
+ @cop0 = COP0.new
38
+ @gte = GTE.new
39
+
40
+ # Delayed load handling
41
+ @load_delay_reg = 0
42
+ @load_delay_value = 0
43
+
44
+ # Branch delay slot tracking
45
+ @in_delay_slot = false
46
+ @branch_target = nil
47
+ @current_pc = @pc
48
+
49
+ # Interrupt check counter - only check every N cycles
50
+ @interrupt_check_counter = 0
51
+ @interrupt_check_interval = 64
52
+
53
+ # Cycles consumed by the most recent step. Defaults to 1 per
54
+ # instruction; loads bump it to reflect the R3000A load-delay slot plus
55
+ # main-RAM access latency (the BIOS code path is mostly uncached). The
56
+ # outer run loop reads this to drive VBlank/timer ticks at roughly the
57
+ # right rate, so wait loops in the BIOS (e.g. VSync) complete before
58
+ # their 0x8000-iteration timeout.
59
+ @step_cycles = 1
60
+
61
+ # Whether the previous step set up a branch — i.e. the *current* step
62
+ # is executing a delay-slot instruction. We can't infer this from
63
+ # @next_pc vs @pc+4 because short branches happen to have target ==
64
+ # delay_slot+4; only the branch instruction itself knows.
65
+ @next_in_delay_slot = false
66
+ end
67
+
68
+ def step
69
+ @step_cycles = 1
70
+ # Snapshot pre-execute state so a pending interrupt (or any exception
71
+ # raised during this step) records the right EPC/BD. The previous step
72
+ # set @next_in_delay_slot iff it was a taken branch; that means *this*
73
+ # step's instruction is in the delay slot.
74
+ pc = @pc
75
+ next_pc = @next_pc
76
+ @current_pc = pc
77
+ @in_delay_slot = @next_in_delay_slot
78
+ @next_in_delay_slot = false
79
+
80
+ # Check for pending interrupts (only every N cycles for performance).
81
+ # If this fires, @pc/@next_pc/@current_pc are clobbered to the vector
82
+ # and we continue with the vector's first instruction.
83
+ @interrupt_check_counter += 1
84
+ if @interrupt_check_counter >= @interrupt_check_interval
85
+ @interrupt_check_counter = 0
86
+ check_interrupts
87
+ if @pc != pc
88
+ # Exception was taken; rebind the loop variables to the vector.
89
+ pc = @pc
90
+ next_pc = @next_pc
91
+ end
92
+ end
93
+
94
+ # Optional TTY hook: intercept BIOS A-table entry so PS-EXE programs
95
+ # built against the standard BIOS putchar/puts functions can be tested
96
+ # without depending on a working CD-ROM/Shell. Returns true when the
97
+ # call has been handled and PC was advanced to the caller.
98
+ if @tty_handler && intercept_bios_call(pc)
99
+ return
100
+ end
101
+
102
+ # Fetch instruction
103
+ instruction = @memory.read32(pc)
104
+
105
+ # Advance PC (next_pc may have been redirected by a previous branch
106
+ # epilogue, which is exactly what makes the current instruction a
107
+ # delay slot — already captured in @in_delay_slot above).
108
+ @pc = next_pc
109
+ @next_pc = (next_pc + 4) & 0xFFFF_FFFF
110
+
111
+ # Apply pending load (inlined for performance)
112
+ load_reg = @load_delay_reg
113
+ if load_reg != 0
114
+ @regs[load_reg] = @load_delay_value
115
+ @load_delay_reg = 0
116
+ end
117
+
118
+ # Execute (skip NOP)
119
+ execute(instruction) if instruction != 0
120
+
121
+ # Handle branch delay (check @branch_target as instruction may have set new one)
122
+ new_branch = @branch_target
123
+ if new_branch
124
+ @next_pc = new_branch
125
+ @branch_target = nil
126
+ @next_in_delay_slot = true
127
+ end
128
+ end
129
+
130
+ def disassemble_current
131
+ instruction = @memory.read32(@pc)
132
+ Disasm.disassemble(@pc, instruction)
133
+ end
134
+
135
+ def dump_registers
136
+ lines = ["Registers:"]
137
+ (0...32).each_slice(4) do |slice|
138
+ row = slice.map { |i| format("R%-2d=%08X", i, @regs[i]) }.join(" ")
139
+ lines << row
140
+ end
141
+ lines << format("PC=%08X HI=%08X LO=%08X", @pc, @hi, @lo)
142
+ lines << format("SR=%08X CAUSE=%08X EPC=%08X", @cop0.sr, @cop0.cause, @cop0.epc)
143
+ lines.join("\n")
144
+ end
145
+
146
+ private
147
+
148
+ def check_interrupts
149
+ return unless @interrupts
150
+
151
+ # Update hardware IRQ status in COP0
152
+ @cop0.set_hardware_irq(@interrupts.pending?)
153
+
154
+ # If interrupts are pending and enabled, trigger exception
155
+ if @cop0.interrupt_pending?
156
+ exception(COP0::EXC_INT)
157
+ end
158
+ end
159
+
160
+ # Intercept BIOS jump-table calls for PS-EXE testing. Returns true when
161
+ # a known function (currently putchar/puts on A and B tables) was handled
162
+ # and PC has been advanced to the caller via $ra.
163
+ def intercept_bios_call(pc)
164
+ phys = pc & 0x1FFFFFFF
165
+ return false unless phys == BIOS_A_DISPATCH || phys == BIOS_B_DISPATCH
166
+
167
+ code = @regs[9] & 0xFF # $t1
168
+ handled = case [phys, code]
169
+ when [BIOS_A_DISPATCH, 0x3C], [BIOS_A_DISPATCH, 0x3D],
170
+ [BIOS_B_DISPATCH, 0x3B], [BIOS_B_DISPATCH, 0x3D]
171
+ @tty_handler.call(:char, @regs[4] & 0xFF)
172
+ true
173
+ when [BIOS_A_DISPATCH, 0x3E], [BIOS_A_DISPATCH, 0x3F],
174
+ [BIOS_B_DISPATCH, 0x3E], [BIOS_B_DISPATCH, 0x3F]
175
+ @tty_handler.call(:str, read_cstring(@regs[4]))
176
+ else
177
+ false
178
+ end
179
+ return false unless handled
180
+
181
+ # Return to caller: PC = $ra, no branch delay slot.
182
+ ra = @regs[31]
183
+ @pc = ra & 0xFFFF_FFFF
184
+ @next_pc = (@pc + 4) & 0xFFFF_FFFF
185
+ @branch_target = nil
186
+ true
187
+ end
188
+
189
+ # Read a null-terminated string from memory at addr, capped to avoid
190
+ # runaway reads on bad pointers.
191
+ def read_cstring(addr, max: 4096)
192
+ out = String.new
193
+ max.times do
194
+ b = @memory.read8((addr + out.length) & 0xFFFF_FFFF)
195
+ break if b == 0
196
+ out << b.chr
197
+ end
198
+ out
199
+ end
200
+
201
+ def apply_load_delay
202
+ if @load_delay_reg != 0
203
+ @regs[@load_delay_reg] = @load_delay_value
204
+ @load_delay_reg = 0
205
+ end
206
+ end
207
+
208
+ def set_reg(reg, value)
209
+ # If we're setting a register that has a pending load, cancel the load
210
+ @load_delay_reg = 0 if @load_delay_reg == reg
211
+ @regs[reg] = value & 0xFFFF_FFFF if reg != 0
212
+ end
213
+
214
+ def set_reg_delayed(reg, value)
215
+ # Delayed loads - value appears after next instruction
216
+ @load_delay_reg = reg
217
+ @load_delay_value = value & 0xFFFF_FFFF
218
+ end
219
+
220
+ def branch(offset)
221
+ # offset is already sign-extended 16-bit << 2
222
+ @branch_target = (@pc + offset) & 0xFFFF_FFFF # PC already advanced by 4
223
+ end
224
+
225
+ def jump(target)
226
+ @branch_target = target & 0xFFFF_FFFF
227
+ end
228
+
229
+ def sign_extend8(value)
230
+ (value & 0x80) != 0 ? (value | 0xFFFF_FF00) : value
231
+ end
232
+
233
+ def sign_extend16(value)
234
+ (value & 0x8000) != 0 ? (value | 0xFFFF_0000) : value
235
+ end
236
+
237
+ def sign_extend32(value)
238
+ # Convert to signed 32-bit for Ruby
239
+ (value & 0x8000_0000) != 0 ? (value - 0x1_0000_0000) : value
240
+ end
241
+
242
+ def execute(instruction)
243
+ # NOP check done in step() for performance
244
+ opcode = (instruction >> 26) & 0x3F
245
+
246
+ case opcode
247
+ when 0x00 then execute_special(instruction)
248
+ when 0x01 then execute_bcondz(instruction)
249
+ when 0x02 then op_j(instruction)
250
+ when 0x03 then op_jal(instruction)
251
+ when 0x04 then op_beq(instruction)
252
+ when 0x05 then op_bne(instruction)
253
+ when 0x06 then op_blez(instruction)
254
+ when 0x07 then op_bgtz(instruction)
255
+ when 0x08 then op_addi(instruction)
256
+ when 0x09 then op_addiu(instruction)
257
+ when 0x0A then op_slti(instruction)
258
+ when 0x0B then op_sltiu(instruction)
259
+ when 0x0C then op_andi(instruction)
260
+ when 0x0D then op_ori(instruction)
261
+ when 0x0E then op_xori(instruction)
262
+ when 0x0F then op_lui(instruction)
263
+ when 0x10 then execute_cop0(instruction)
264
+ when 0x11 then execute_cop1(instruction)
265
+ when 0x12 then execute_cop2(instruction) # GTE
266
+ when 0x13 then execute_cop3(instruction)
267
+ when 0x20 then op_lb(instruction)
268
+ when 0x21 then op_lh(instruction)
269
+ when 0x22 then op_lwl(instruction)
270
+ when 0x23 then op_lw(instruction)
271
+ when 0x24 then op_lbu(instruction)
272
+ when 0x25 then op_lhu(instruction)
273
+ when 0x26 then op_lwr(instruction)
274
+ when 0x28 then op_sb(instruction)
275
+ when 0x29 then op_sh(instruction)
276
+ when 0x2A then op_swl(instruction)
277
+ when 0x2B then op_sw(instruction)
278
+ when 0x2E then op_swr(instruction)
279
+ when 0x30 then op_lwcN(instruction, 0)
280
+ when 0x31 then op_lwcN(instruction, 1)
281
+ when 0x32 then op_lwcN(instruction, 2)
282
+ when 0x33 then op_lwcN(instruction, 3)
283
+ when 0x38 then op_swcN(instruction, 0)
284
+ when 0x39 then op_swcN(instruction, 1)
285
+ when 0x3A then op_swcN(instruction, 2)
286
+ when 0x3B then op_swcN(instruction, 3)
287
+ else
288
+ exception(COP0::EXC_RI)
289
+ end
290
+ end
291
+
292
+ def execute_special(instruction)
293
+ funct = instruction & 0x3F
294
+
295
+ case funct
296
+ when 0x00 then op_sll(instruction)
297
+ when 0x02 then op_srl(instruction)
298
+ when 0x03 then op_sra(instruction)
299
+ when 0x04 then op_sllv(instruction)
300
+ when 0x06 then op_srlv(instruction)
301
+ when 0x07 then op_srav(instruction)
302
+ when 0x08 then op_jr(instruction)
303
+ when 0x09 then op_jalr(instruction)
304
+ when 0x0C then op_syscall(instruction)
305
+ when 0x0D then op_break(instruction)
306
+ when 0x10 then op_mfhi(instruction)
307
+ when 0x11 then op_mthi(instruction)
308
+ when 0x12 then op_mflo(instruction)
309
+ when 0x13 then op_mtlo(instruction)
310
+ when 0x18 then op_mult(instruction)
311
+ when 0x19 then op_multu(instruction)
312
+ when 0x1A then op_div(instruction)
313
+ when 0x1B then op_divu(instruction)
314
+ when 0x20 then op_add(instruction)
315
+ when 0x21 then op_addu(instruction)
316
+ when 0x22 then op_sub(instruction)
317
+ when 0x23 then op_subu(instruction)
318
+ when 0x24 then op_and(instruction)
319
+ when 0x25 then op_or(instruction)
320
+ when 0x26 then op_xor(instruction)
321
+ when 0x27 then op_nor(instruction)
322
+ when 0x2A then op_slt(instruction)
323
+ when 0x2B then op_sltu(instruction)
324
+ else
325
+ exception(COP0::EXC_RI)
326
+ end
327
+ end
328
+
329
+ def execute_bcondz(instruction)
330
+ rs = (instruction >> 21) & 0x1F
331
+ rt = (instruction >> 16) & 0x1F
332
+ imm = sign_extend16(instruction & 0xFFFF)
333
+ offset = imm << 2
334
+
335
+ val = sign_extend32(@regs[rs])
336
+ link = (rt & 0x10) != 0
337
+ bgez = (rt & 0x01) != 0
338
+
339
+ test = bgez ? (val >= 0) : (val < 0)
340
+
341
+ # Link stores return address in R31
342
+ set_reg(31, @next_pc) if link
343
+
344
+ branch(offset) if test
345
+ end
346
+
347
+ def execute_cop0(instruction)
348
+ # COP0 is always available in kernel mode, and gated by SR.CU0 in user
349
+ # mode (KUc=1). When user-mode software touches a disabled COP0 the
350
+ # CPU raises Coprocessor-Unusable with CE=0.
351
+ if (@cop0.sr & COP0::SR_KUC) != 0 && (@cop0.sr & COP0::SR_CU0) == 0
352
+ exception(COP0::EXC_CPU, coprocessor: 0)
353
+ return
354
+ end
355
+
356
+ cop_op = (instruction >> 21) & 0x1F
357
+
358
+ case cop_op
359
+ when 0x00 then op_mfc0(instruction)
360
+ when 0x04 then op_mtc0(instruction)
361
+ when 0x10 then op_rfe(instruction)
362
+ else
363
+ # Unknown COP0 ops silently no-op on the PSX (verified by
364
+ # ps1-tests/cpu/cop testCop0InvalidOpcode -- "????" in source).
365
+ end
366
+ end
367
+
368
+ # COP1 doesn't exist on the PSX. Access raises Coprocessor-Unusable when
369
+ # CU1=0; with CU1=1 the instruction silently no-ops.
370
+ def execute_cop1(_instruction)
371
+ if (@cop0.sr & COP0::SR_CU1) == 0
372
+ exception(COP0::EXC_CPU, coprocessor: 1)
373
+ end
374
+ end
375
+
376
+ # COP3 doesn't exist either. Same model as COP1.
377
+ def execute_cop3(_instruction)
378
+ if (@cop0.sr & COP0::SR_CU3) == 0
379
+ exception(COP0::EXC_CPU, coprocessor: 3)
380
+ end
381
+ end
382
+
383
+ def execute_cop2(instruction)
384
+ if (@cop0.sr & COP0::SR_CU2) == 0
385
+ exception(COP0::EXC_CPU, coprocessor: 2)
386
+ return
387
+ end
388
+
389
+ if (instruction & (1 << 25)) != 0
390
+ @gte.execute(instruction)
391
+ return
392
+ end
393
+
394
+ cop_op = (instruction >> 21) & 0x1F
395
+ rt = (instruction >> 16) & 0x1F
396
+ rd = (instruction >> 11) & 0x1F
397
+
398
+ case cop_op
399
+ when 0x00 then set_reg_delayed(rt, @gte.read_data(rd)) # MFC2
400
+ when 0x02 then set_reg_delayed(rt, @gte.read_control(rd)) # CFC2
401
+ when 0x04 then @gte.write_data(rd, @regs[rt]) # MTC2
402
+ when 0x06 then @gte.write_control(rd, @regs[rt]) # CTC2
403
+ # Unknown COP2 register ops silently no-op (testCop2InvalidOpcode).
404
+ end
405
+ end
406
+
407
+ # R-type ALU operations (decode inlined for performance)
408
+ def op_sll(instruction)
409
+ rt = (instruction >> 16) & 0x1F
410
+ rd = (instruction >> 11) & 0x1F
411
+ shamt = (instruction >> 6) & 0x1F
412
+ set_reg(rd, @regs[rt] << shamt)
413
+ end
414
+
415
+ def op_srl(instruction)
416
+ rt = (instruction >> 16) & 0x1F
417
+ rd = (instruction >> 11) & 0x1F
418
+ shamt = (instruction >> 6) & 0x1F
419
+ set_reg(rd, @regs[rt] >> shamt)
420
+ end
421
+
422
+ def op_sra(instruction)
423
+ rt = (instruction >> 16) & 0x1F
424
+ rd = (instruction >> 11) & 0x1F
425
+ shamt = (instruction >> 6) & 0x1F
426
+ val = sign_extend32(@regs[rt])
427
+ set_reg(rd, val >> shamt)
428
+ end
429
+
430
+ def op_sllv(instruction)
431
+ rs = (instruction >> 21) & 0x1F
432
+ rt = (instruction >> 16) & 0x1F
433
+ rd = (instruction >> 11) & 0x1F
434
+ set_reg(rd, @regs[rt] << (@regs[rs] & 0x1F))
435
+ end
436
+
437
+ def op_srlv(instruction)
438
+ rs = (instruction >> 21) & 0x1F
439
+ rt = (instruction >> 16) & 0x1F
440
+ rd = (instruction >> 11) & 0x1F
441
+ set_reg(rd, @regs[rt] >> (@regs[rs] & 0x1F))
442
+ end
443
+
444
+ def op_srav(instruction)
445
+ rs = (instruction >> 21) & 0x1F
446
+ rt = (instruction >> 16) & 0x1F
447
+ rd = (instruction >> 11) & 0x1F
448
+ val = sign_extend32(@regs[rt])
449
+ set_reg(rd, val >> (@regs[rs] & 0x1F))
450
+ end
451
+
452
+ def op_jr(instruction)
453
+ rs = (instruction >> 21) & 0x1F
454
+ jump(@regs[rs])
455
+ end
456
+
457
+ def op_jalr(instruction)
458
+ rs = (instruction >> 21) & 0x1F
459
+ rd = (instruction >> 11) & 0x1F
460
+ set_reg(rd, @next_pc)
461
+ jump(@regs[rs])
462
+ end
463
+
464
+ def op_syscall(_instruction)
465
+ exception(COP0::EXC_SYS)
466
+ end
467
+
468
+ def op_break(_instruction)
469
+ exception(COP0::EXC_BP)
470
+ end
471
+
472
+ def op_mfhi(instruction)
473
+ rd = (instruction >> 11) & 0x1F
474
+ set_reg(rd, @hi)
475
+ end
476
+
477
+ def op_mthi(instruction)
478
+ rs = (instruction >> 21) & 0x1F
479
+ @hi = @regs[rs]
480
+ end
481
+
482
+ def op_mflo(instruction)
483
+ rd = (instruction >> 11) & 0x1F
484
+ set_reg(rd, @lo)
485
+ end
486
+
487
+ def op_mtlo(instruction)
488
+ rs = (instruction >> 21) & 0x1F
489
+ @lo = @regs[rs]
490
+ end
491
+
492
+ def op_mult(instruction)
493
+ rs = (instruction >> 21) & 0x1F
494
+ rt = (instruction >> 16) & 0x1F
495
+ a = sign_extend32(@regs[rs])
496
+ b = sign_extend32(@regs[rt])
497
+ result = a * b
498
+ @lo = result & 0xFFFF_FFFF
499
+ @hi = (result >> 32) & 0xFFFF_FFFF
500
+ end
501
+
502
+ def op_multu(instruction)
503
+ rs = (instruction >> 21) & 0x1F
504
+ rt = (instruction >> 16) & 0x1F
505
+ result = @regs[rs] * @regs[rt]
506
+ @lo = result & 0xFFFF_FFFF
507
+ @hi = (result >> 32) & 0xFFFF_FFFF
508
+ end
509
+
510
+ def op_div(instruction)
511
+ rs = (instruction >> 21) & 0x1F
512
+ rt = (instruction >> 16) & 0x1F
513
+ num = sign_extend32(@regs[rs])
514
+ den = sign_extend32(@regs[rt])
515
+
516
+ if den == 0
517
+ # Division by zero
518
+ @lo = num >= 0 ? 0xFFFF_FFFF : 1
519
+ @hi = @regs[rs]
520
+ elsif num == -0x8000_0000 && den == -1
521
+ # Overflow
522
+ @lo = 0x8000_0000
523
+ @hi = 0
524
+ else
525
+ @lo = (num / den) & 0xFFFF_FFFF
526
+ @hi = (num % den) & 0xFFFF_FFFF
527
+ end
528
+ end
529
+
530
+ def op_divu(instruction)
531
+ rs = (instruction >> 21) & 0x1F
532
+ rt = (instruction >> 16) & 0x1F
533
+ num = @regs[rs]
534
+ den = @regs[rt]
535
+
536
+ if den == 0
537
+ @lo = 0xFFFF_FFFF
538
+ @hi = num
539
+ else
540
+ @lo = num / den
541
+ @hi = num % den
542
+ end
543
+ end
544
+
545
+ def op_add(instruction)
546
+ rs = (instruction >> 21) & 0x1F
547
+ rt = (instruction >> 16) & 0x1F
548
+ rd = (instruction >> 11) & 0x1F
549
+ # TODO: overflow trap
550
+ set_reg(rd, @regs[rs] + @regs[rt])
551
+ end
552
+
553
+ def op_addu(instruction)
554
+ rs = (instruction >> 21) & 0x1F
555
+ rt = (instruction >> 16) & 0x1F
556
+ rd = (instruction >> 11) & 0x1F
557
+ set_reg(rd, @regs[rs] + @regs[rt])
558
+ end
559
+
560
+ def op_sub(instruction)
561
+ rs = (instruction >> 21) & 0x1F
562
+ rt = (instruction >> 16) & 0x1F
563
+ rd = (instruction >> 11) & 0x1F
564
+ # TODO: overflow trap
565
+ set_reg(rd, @regs[rs] - @regs[rt])
566
+ end
567
+
568
+ def op_subu(instruction)
569
+ rs = (instruction >> 21) & 0x1F
570
+ rt = (instruction >> 16) & 0x1F
571
+ rd = (instruction >> 11) & 0x1F
572
+ set_reg(rd, @regs[rs] - @regs[rt])
573
+ end
574
+
575
+ def op_and(instruction)
576
+ rs = (instruction >> 21) & 0x1F
577
+ rt = (instruction >> 16) & 0x1F
578
+ rd = (instruction >> 11) & 0x1F
579
+ set_reg(rd, @regs[rs] & @regs[rt])
580
+ end
581
+
582
+ def op_or(instruction)
583
+ rs = (instruction >> 21) & 0x1F
584
+ rt = (instruction >> 16) & 0x1F
585
+ rd = (instruction >> 11) & 0x1F
586
+ set_reg(rd, @regs[rs] | @regs[rt])
587
+ end
588
+
589
+ def op_xor(instruction)
590
+ rs = (instruction >> 21) & 0x1F
591
+ rt = (instruction >> 16) & 0x1F
592
+ rd = (instruction >> 11) & 0x1F
593
+ set_reg(rd, @regs[rs] ^ @regs[rt])
594
+ end
595
+
596
+ def op_nor(instruction)
597
+ rs = (instruction >> 21) & 0x1F
598
+ rt = (instruction >> 16) & 0x1F
599
+ rd = (instruction >> 11) & 0x1F
600
+ set_reg(rd, ~(@regs[rs] | @regs[rt]))
601
+ end
602
+
603
+ def op_slt(instruction)
604
+ rs = (instruction >> 21) & 0x1F
605
+ rt = (instruction >> 16) & 0x1F
606
+ rd = (instruction >> 11) & 0x1F
607
+ a = sign_extend32(@regs[rs])
608
+ b = sign_extend32(@regs[rt])
609
+ set_reg(rd, a < b ? 1 : 0)
610
+ end
611
+
612
+ def op_sltu(instruction)
613
+ rs = (instruction >> 21) & 0x1F
614
+ rt = (instruction >> 16) & 0x1F
615
+ rd = (instruction >> 11) & 0x1F
616
+ set_reg(rd, @regs[rs] < @regs[rt] ? 1 : 0)
617
+ end
618
+
619
+ # I-type operations (decode inlined for performance)
620
+ def op_addi(instruction)
621
+ rs = (instruction >> 21) & 0x1F
622
+ rt = (instruction >> 16) & 0x1F
623
+ imm = instruction & 0xFFFF
624
+ # TODO: overflow trap
625
+ set_reg(rt, @regs[rs] + sign_extend16(imm))
626
+ end
627
+
628
+ def op_addiu(instruction)
629
+ rs = (instruction >> 21) & 0x1F
630
+ rt = (instruction >> 16) & 0x1F
631
+ imm = instruction & 0xFFFF
632
+ # Inline sign_extend16 and set_reg for hot path
633
+ imm = imm | 0xFFFF_0000 if (imm & 0x8000) != 0
634
+ @load_delay_reg = 0 if @load_delay_reg == rt
635
+ @regs[rt] = (@regs[rs] + imm) & 0xFFFF_FFFF if rt != 0
636
+ end
637
+
638
+ def op_slti(instruction)
639
+ rs = (instruction >> 21) & 0x1F
640
+ rt = (instruction >> 16) & 0x1F
641
+ imm = instruction & 0xFFFF
642
+ a = sign_extend32(@regs[rs])
643
+ b = sign_extend16(imm)
644
+ b = sign_extend32(b & 0xFFFF_FFFF)
645
+ set_reg(rt, a < b ? 1 : 0)
646
+ end
647
+
648
+ def op_sltiu(instruction)
649
+ rs = (instruction >> 21) & 0x1F
650
+ rt = (instruction >> 16) & 0x1F
651
+ imm = instruction & 0xFFFF
652
+ set_reg(rt, @regs[rs] < (sign_extend16(imm) & 0xFFFF_FFFF) ? 1 : 0)
653
+ end
654
+
655
+ def op_andi(instruction)
656
+ rs = (instruction >> 21) & 0x1F
657
+ rt = (instruction >> 16) & 0x1F
658
+ imm = instruction & 0xFFFF
659
+ set_reg(rt, @regs[rs] & imm)
660
+ end
661
+
662
+ def op_ori(instruction)
663
+ rs = (instruction >> 21) & 0x1F
664
+ rt = (instruction >> 16) & 0x1F
665
+ imm = instruction & 0xFFFF
666
+ set_reg(rt, @regs[rs] | imm)
667
+ end
668
+
669
+ def op_xori(instruction)
670
+ rs = (instruction >> 21) & 0x1F
671
+ rt = (instruction >> 16) & 0x1F
672
+ imm = instruction & 0xFFFF
673
+ set_reg(rt, @regs[rs] ^ imm)
674
+ end
675
+
676
+ def op_lui(instruction)
677
+ rt = (instruction >> 16) & 0x1F
678
+ imm = instruction & 0xFFFF
679
+ set_reg(rt, imm << 16)
680
+ end
681
+
682
+ # Branch operations (decode inlined)
683
+ def op_beq(instruction)
684
+ rs = (instruction >> 21) & 0x1F
685
+ rt = (instruction >> 16) & 0x1F
686
+ imm = instruction & 0xFFFF
687
+ branch(sign_extend16(imm) << 2) if @regs[rs] == @regs[rt]
688
+ end
689
+
690
+ def op_bne(instruction)
691
+ rs = (instruction >> 21) & 0x1F
692
+ rt = (instruction >> 16) & 0x1F
693
+ imm = instruction & 0xFFFF
694
+ branch(sign_extend16(imm) << 2) if @regs[rs] != @regs[rt]
695
+ end
696
+
697
+ def op_blez(instruction)
698
+ rs = (instruction >> 21) & 0x1F
699
+ imm = instruction & 0xFFFF
700
+ branch(sign_extend16(imm) << 2) if sign_extend32(@regs[rs]) <= 0
701
+ end
702
+
703
+ def op_bgtz(instruction)
704
+ rs = (instruction >> 21) & 0x1F
705
+ val = @regs[rs]
706
+ # sign_extend32 check: value > 0 and not negative (high bit clear or value is 0)
707
+ if val != 0 && (val & 0x8000_0000) == 0
708
+ imm = instruction & 0xFFFF
709
+ imm = imm | 0xFFFF_0000 if (imm & 0x8000) != 0
710
+ @branch_target = (@pc + (imm << 2)) & 0xFFFF_FFFF
711
+ end
712
+ end
713
+
714
+ # Jump operations (decode inlined)
715
+ def op_j(instruction)
716
+ target = instruction & 0x03FF_FFFF
717
+ jump((@pc & 0xF000_0000) | (target << 2))
718
+ end
719
+
720
+ def op_jal(instruction)
721
+ target = instruction & 0x03FF_FFFF
722
+ set_reg(31, @next_pc)
723
+ jump((@pc & 0xF000_0000) | (target << 2))
724
+ end
725
+
726
+ # Load operations (decode inlined). Each load bumps @step_cycles by 1 to
727
+ # approximate the load-delay slot + RAM access cost — without this a tight
728
+ # poll loop runs faster than 1 VBlank period and BIOS VSync times out.
729
+ def op_lb(instruction)
730
+ rs = (instruction >> 21) & 0x1F
731
+ rt = (instruction >> 16) & 0x1F
732
+ imm = instruction & 0xFFFF
733
+ addr = @regs[rs] + sign_extend16(imm)
734
+ val = sign_extend8(@memory.read8(addr))
735
+ set_reg_delayed(rt, val)
736
+ @step_cycles += 1
737
+ end
738
+
739
+ def op_lh(instruction)
740
+ rs = (instruction >> 21) & 0x1F
741
+ rt = (instruction >> 16) & 0x1F
742
+ imm = instruction & 0xFFFF
743
+ addr = @regs[rs] + sign_extend16(imm)
744
+ val = sign_extend16(@memory.read16(addr))
745
+ set_reg_delayed(rt, val)
746
+ @step_cycles += 1
747
+ end
748
+
749
+ def op_lw(instruction)
750
+ rs = (instruction >> 21) & 0x1F
751
+ rt = (instruction >> 16) & 0x1F
752
+ imm = instruction & 0xFFFF
753
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
754
+ set_reg_delayed(rt, @memory.read32(addr))
755
+ @step_cycles += 2
756
+ end
757
+
758
+ def op_lbu(instruction)
759
+ rs = (instruction >> 21) & 0x1F
760
+ rt = (instruction >> 16) & 0x1F
761
+ imm = instruction & 0xFFFF
762
+ # Inline sign_extend16
763
+ imm = imm | 0xFFFF_0000 if (imm & 0x8000) != 0
764
+ addr = (@regs[rs] + imm) & 0xFFFF_FFFF
765
+ # Inline set_reg_delayed
766
+ @load_delay_reg = rt
767
+ @load_delay_value = @memory.read8(addr)
768
+ @step_cycles += 1
769
+ end
770
+
771
+ def op_lhu(instruction)
772
+ rs = (instruction >> 21) & 0x1F
773
+ rt = (instruction >> 16) & 0x1F
774
+ imm = instruction & 0xFFFF
775
+ addr = @regs[rs] + sign_extend16(imm)
776
+ set_reg_delayed(rt, @memory.read16(addr))
777
+ @step_cycles += 1
778
+ end
779
+
780
+ def op_lwl(instruction)
781
+ rs = (instruction >> 21) & 0x1F
782
+ rt = (instruction >> 16) & 0x1F
783
+ imm = instruction & 0xFFFF
784
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
785
+ aligned = addr & ~3
786
+ val = @memory.read32(aligned)
787
+
788
+ # Merge with existing register value
789
+ current = @regs[rt]
790
+ case addr & 3
791
+ when 0 then result = (current & 0x00FF_FFFF) | (val << 24)
792
+ when 1 then result = (current & 0x0000_FFFF) | (val << 16)
793
+ when 2 then result = (current & 0x0000_00FF) | (val << 8)
794
+ when 3 then result = val
795
+ end
796
+ set_reg_delayed(rt, result)
797
+ end
798
+
799
+ def op_lwr(instruction)
800
+ rs = (instruction >> 21) & 0x1F
801
+ rt = (instruction >> 16) & 0x1F
802
+ imm = instruction & 0xFFFF
803
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
804
+ aligned = addr & ~3
805
+ val = @memory.read32(aligned)
806
+
807
+ current = @regs[rt]
808
+ case addr & 3
809
+ when 0 then result = val
810
+ when 1 then result = (current & 0xFF00_0000) | (val >> 8)
811
+ when 2 then result = (current & 0xFFFF_0000) | (val >> 16)
812
+ when 3 then result = (current & 0xFFFF_FF00) | (val >> 24)
813
+ end
814
+ set_reg_delayed(rt, result)
815
+ end
816
+
817
+ # Store operations (decode inlined)
818
+ def op_sb(instruction)
819
+ rs = (instruction >> 21) & 0x1F
820
+ rt = (instruction >> 16) & 0x1F
821
+ imm = instruction & 0xFFFF
822
+ # Inline sign_extend16
823
+ imm = imm | 0xFFFF_0000 if (imm & 0x8000) != 0
824
+ addr = (@regs[rs] + imm) & 0xFFFF_FFFF
825
+ @memory.write8(addr, @regs[rt])
826
+ end
827
+
828
+ def op_sh(instruction)
829
+ rs = (instruction >> 21) & 0x1F
830
+ rt = (instruction >> 16) & 0x1F
831
+ imm = instruction & 0xFFFF
832
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
833
+ @memory.write16(addr, @regs[rt])
834
+ end
835
+
836
+ def op_sw(instruction)
837
+ rs = (instruction >> 21) & 0x1F
838
+ rt = (instruction >> 16) & 0x1F
839
+ imm = instruction & 0xFFFF
840
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
841
+ @memory.write32(addr, @regs[rt])
842
+ end
843
+
844
+ def op_swl(instruction)
845
+ rs = (instruction >> 21) & 0x1F
846
+ rt = (instruction >> 16) & 0x1F
847
+ imm = instruction & 0xFFFF
848
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
849
+ aligned = addr & ~3
850
+ val = @memory.read32(aligned)
851
+ reg = @regs[rt]
852
+
853
+ case addr & 3
854
+ when 0 then result = (val & 0xFFFF_FF00) | (reg >> 24)
855
+ when 1 then result = (val & 0xFFFF_0000) | (reg >> 16)
856
+ when 2 then result = (val & 0xFF00_0000) | (reg >> 8)
857
+ when 3 then result = reg
858
+ end
859
+ @memory.write32(aligned, result)
860
+ end
861
+
862
+ def op_swr(instruction)
863
+ rs = (instruction >> 21) & 0x1F
864
+ rt = (instruction >> 16) & 0x1F
865
+ imm = instruction & 0xFFFF
866
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
867
+ aligned = addr & ~3
868
+ val = @memory.read32(aligned)
869
+ reg = @regs[rt]
870
+
871
+ case addr & 3
872
+ when 0 then result = reg
873
+ when 1 then result = (val & 0x0000_00FF) | (reg << 8)
874
+ when 2 then result = (val & 0x0000_FFFF) | (reg << 16)
875
+ when 3 then result = (val & 0x00FF_FFFF) | (reg << 24)
876
+ end
877
+ @memory.write32(aligned, result)
878
+ end
879
+
880
+ # Coprocessor load (LWCn). For COP2 we move the word into the GTE data
881
+ # register; for the other coprocessors we either no-op (when enabled) or
882
+ # raise Coprocessor-Unusable (when disabled). Note that COP0/1/3 don't
883
+ # actually have memory data registers on the PSX -- BIOS catches the
884
+ # exception or treats the instruction as harmless.
885
+ def op_lwcN(instruction, cop)
886
+ unless coprocessor_usable?(cop)
887
+ exception(COP0::EXC_CPU, coprocessor: cop)
888
+ return
889
+ end
890
+ if cop == 2
891
+ rs = (instruction >> 21) & 0x1F
892
+ rt = (instruction >> 16) & 0x1F
893
+ imm = instruction & 0xFFFF
894
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
895
+ @gte.write_data(rt, @memory.read32(addr))
896
+ end
897
+ end
898
+
899
+ def op_swcN(instruction, cop)
900
+ unless coprocessor_usable?(cop)
901
+ exception(COP0::EXC_CPU, coprocessor: cop)
902
+ return
903
+ end
904
+ if cop == 2
905
+ rs = (instruction >> 21) & 0x1F
906
+ rt = (instruction >> 16) & 0x1F
907
+ imm = instruction & 0xFFFF
908
+ addr = (@regs[rs] + sign_extend16(imm)) & 0xFFFF_FFFF
909
+ @memory.write32(addr, @gte.read_data(rt))
910
+ end
911
+ end
912
+
913
+ # Whether LWCn / SWCn for the given coprocessor is allowed. Unlike the
914
+ # main COP0 register move instructions (MFC0/MTC0/RFE) the load/store
915
+ # variants require CU0 to be set explicitly even in kernel mode -- this
916
+ # is what ps1-tests/cpu/cop testSwc0Disabled exercises.
917
+ def coprocessor_usable?(cop)
918
+ mask = case cop
919
+ when 0 then COP0::SR_CU0
920
+ when 1 then COP0::SR_CU1
921
+ when 2 then COP0::SR_CU2
922
+ when 3 then COP0::SR_CU3
923
+ end
924
+ (@cop0.sr & mask) != 0
925
+ end
926
+
927
+ # COP0 operations
928
+ def op_mfc0(instruction)
929
+ rt = (instruction >> 16) & 0x1F
930
+ rd = (instruction >> 11) & 0x1F
931
+ set_reg_delayed(rt, @cop0.read(rd))
932
+ end
933
+
934
+ def op_mtc0(instruction)
935
+ rt = (instruction >> 16) & 0x1F
936
+ rd = (instruction >> 11) & 0x1F
937
+ val = @regs[rt]
938
+
939
+ @cop0.write(rd, val)
940
+
941
+ # Update memory cache isolation state
942
+ @memory.cache_isolated = @cop0.cache_isolated? if rd == COP0::SR
943
+ end
944
+
945
+ def op_rfe(_instruction)
946
+ @cop0.return_from_exception
947
+ end
948
+
949
+ def exception(cause, bad_addr: nil, coprocessor: nil)
950
+ vector = @cop0.enter_exception(
951
+ cause,
952
+ @current_pc,
953
+ in_delay_slot: @in_delay_slot,
954
+ bad_addr: bad_addr,
955
+ coprocessor: coprocessor
956
+ )
957
+
958
+ @pc = vector
959
+ @next_pc = @pc + 4
960
+ @branch_target = nil
961
+ @next_in_delay_slot = false
962
+ end
963
+ end
964
+ end