psx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/psx/dma.rb ADDED
@@ -0,0 +1,406 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PSX
4
+ # DMA Controller
5
+ # Handles bulk memory transfers between RAM and devices
6
+ class DMA
7
+ # DMA Channels
8
+ MDEC_IN = 0 # MDEC decoder input
9
+ MDEC_OUT = 1 # MDEC decoder output
10
+ GPU = 2 # Graphics Processing Unit
11
+ CDROM = 3 # CD-ROM drive
12
+ SPU = 4 # Sound Processing Unit
13
+ PIO = 5 # Expansion port
14
+ OTC = 6 # Ordering Table Clear
15
+
16
+ NUM_CHANNELS = 7
17
+
18
+ # Channel control register bits
19
+ CTRL_DIRECTION = 0x0000_0001 # 0=To RAM, 1=From RAM
20
+ CTRL_STEP = 0x0000_0002 # 0=Forward (+4), 1=Backward (-4)
21
+ CTRL_CHOPPING = 0x0000_0100 # Enable chopping
22
+ CTRL_SYNC_MODE = 0x0000_0600 # Sync mode (bits 9-10)
23
+ CTRL_CHOP_DMA = 0x0007_0000 # Chopping DMA window (bits 16-18)
24
+ CTRL_CHOP_CPU = 0x0700_0000 # Chopping CPU window (bits 20-22)
25
+ CTRL_START_BUSY = 0x0100_0000 # Start/Busy (bit 24)
26
+ CTRL_START_TRIGGER = 0x1000_0000 # Start trigger (bit 28)
27
+
28
+ # Sync modes
29
+ SYNC_MANUAL = 0 # Transfer all at once (burst)
30
+ SYNC_REQUEST = 1 # Sync blocks to DRQ
31
+ SYNC_LINKED = 2 # Linked list mode (GPU only)
32
+
33
+ # DICR bits
34
+ DICR_FORCE_IRQ = 0x0000_8000 # Force IRQ (bit 15)
35
+ DICR_IRQ_ENABLE = 0x007F_0000 # Per-channel IRQ enable (bits 16-22)
36
+ DICR_IRQ_MASTER = 0x0080_0000 # Master IRQ enable (bit 23)
37
+ DICR_IRQ_FLAGS = 0x7F00_0000 # Per-channel IRQ flags (bits 24-30)
38
+ DICR_IRQ_MASTER_FLAG = 0x8000_0000 # Master IRQ flag (bit 31)
39
+
40
+ class Channel
41
+ attr_accessor :base_addr, :block_ctrl, :channel_ctrl, :busy_cycles
42
+
43
+ def initialize
44
+ @base_addr = 0
45
+ @block_ctrl = 0
46
+ @channel_ctrl = 0
47
+ @busy_cycles = 0
48
+ end
49
+
50
+ def active?(needs_trigger: false)
51
+ # Channel is active when Start/Busy bit is set.
52
+ # SyncMode 0 (Manual) without a DRQ-driving device (e.g. OTC) also
53
+ # needs the start-trigger bit. Channels with DRQ (GPU, SPU, CDROM,
54
+ # ...) auto-start when BUSY is set.
55
+ enabled = (@channel_ctrl & CTRL_START_BUSY) != 0
56
+ return false unless enabled
57
+
58
+ if needs_trigger
59
+ (@channel_ctrl & CTRL_START_TRIGGER) != 0
60
+ else
61
+ true
62
+ end
63
+ end
64
+
65
+ def direction
66
+ (@channel_ctrl & CTRL_DIRECTION) != 0 ? :from_ram : :to_ram
67
+ end
68
+
69
+ def step
70
+ (@channel_ctrl & CTRL_STEP) != 0 ? -4 : 4
71
+ end
72
+
73
+ def sync_mode
74
+ (@channel_ctrl >> 9) & 0x3
75
+ end
76
+
77
+ def block_size
78
+ @block_ctrl & 0xFFFF
79
+ end
80
+
81
+ def block_count
82
+ (@block_ctrl >> 16) & 0xFFFF
83
+ end
84
+
85
+ def finish!
86
+ # On transfer completion: bit 24 (Start/Busy) and bit 28 (Start/
87
+ # Trigger) both clear. Bit 28 is "consumed" by the transfer it
88
+ # triggered. Unused R/W bits (29/30) keep their values.
89
+ # Verified against ps1-tests/dma/otc-test:
90
+ # testOtcWhichBitsAreHardwiredToZero -- bit 28 stays when no
91
+ # transfer happens (busy bit wasn't set, finish! isn't called).
92
+ # testOtcControlBitsAfterTransfer -- bit 28 cleared after
93
+ # transfer actually ran.
94
+ @channel_ctrl &= ~(CTRL_START_BUSY | CTRL_START_TRIGGER)
95
+ end
96
+ end
97
+
98
+ attr_reader :channels, :dpcr, :dicr
99
+ attr_accessor :spu
100
+
101
+ def initialize(interrupts: nil, spu: nil)
102
+ @interrupts = interrupts
103
+ @spu = spu
104
+ @channels = Array.new(NUM_CHANNELS) { Channel.new }
105
+ @dpcr = 0x0765_4321 # Default: all channels enabled with default priorities
106
+ @dicr = 0
107
+ @master_flag_latched = false # Has IRQ#3 fired for the current rising edge?
108
+ end
109
+
110
+ # Register access (offset from 0x1F801080)
111
+ def read(offset)
112
+ if offset < 0x70
113
+ # Channel registers
114
+ channel_num = offset >> 4
115
+ reg = offset & 0xF
116
+
117
+ return 0 if channel_num >= NUM_CHANNELS
118
+
119
+ channel = @channels[channel_num]
120
+ case reg
121
+ when 0x0 then channel.base_addr
122
+ when 0x4 then channel.block_ctrl
123
+ when 0x8 then channel.channel_ctrl
124
+ else 0
125
+ end
126
+ elsif offset == 0x70
127
+ @dpcr
128
+ elsif offset == 0x74
129
+ @dicr
130
+ else
131
+ 0
132
+ end
133
+ end
134
+
135
+ def write(offset, value)
136
+ if offset < 0x70
137
+ # Channel registers
138
+ channel_num = offset >> 4
139
+ reg = offset & 0xF
140
+
141
+ return if channel_num >= NUM_CHANNELS
142
+
143
+ channel = @channels[channel_num]
144
+ case reg
145
+ when 0x0
146
+ channel.base_addr = value & 0x00FF_FFFC # Word-aligned, 24-bit
147
+ when 0x4
148
+ channel.block_ctrl = value
149
+ when 0x8
150
+ # OTC (channel 6) has hard-wired CHCR bits: only bits 24, 28, 30
151
+ # are writable, and bit 1 (Memory Address Step = backward) always
152
+ # reads as 1. Verified against ps1-tests/dma/otc-test.
153
+ if channel_num == OTC
154
+ channel.channel_ctrl = (value & 0x5100_0000) | 0x0000_0002
155
+ else
156
+ channel.channel_ctrl = value
157
+ end
158
+ end
159
+ elsif offset == 0x70
160
+ @dpcr = value
161
+ elsif offset == 0x74
162
+ write_dicr(value)
163
+ end
164
+ end
165
+
166
+ def write_dicr(value)
167
+ # Bits 0-5: Unknown/unused
168
+ # Bit 15: Force IRQ
169
+ # Bits 16-22: IRQ enable for channels 0-6
170
+ # Bit 23: Master IRQ enable
171
+ # Bits 24-30: IRQ flags for channels 0-6 (write 1 to acknowledge)
172
+ # Bit 31: Master IRQ flag (read-only)
173
+
174
+ # Acknowledge flags by writing 1
175
+ ack = value & DICR_IRQ_FLAGS
176
+ @dicr &= ~ack
177
+
178
+ # Update writable bits (preserve flags that weren't acknowledged)
179
+ @dicr = (@dicr & DICR_IRQ_FLAGS) |
180
+ (value & 0x00FF_803F)
181
+
182
+ # Update master flag
183
+ update_master_flag
184
+ end
185
+
186
+ def update_master_flag
187
+ # PSX semantics: IRQ#3 fires on the rising edge of master-flag (bit 31).
188
+ # The master flag itself is "calculated" from current state. We expose it
189
+ # as DICR bit 31 for software to read, and use an internal latch so the
190
+ # IRQ fires exactly once per rising edge — until the condition goes false
191
+ # again (BIOS acks channel flags or clears master enable).
192
+ force = (@dicr & DICR_FORCE_IRQ) != 0
193
+ master_enable = (@dicr & DICR_IRQ_MASTER) != 0
194
+ flags = (@dicr >> 24) & 0x7F
195
+ enables = (@dicr >> 16) & 0x7F
196
+ master = force || (master_enable && (flags & enables) != 0)
197
+
198
+ if master
199
+ @dicr |= DICR_IRQ_MASTER_FLAG
200
+ unless @master_flag_latched
201
+ @master_flag_latched = true
202
+ @interrupts&.request(Interrupts::IRQ_DMA)
203
+ end
204
+ else
205
+ @dicr &= ~DICR_IRQ_MASTER_FLAG
206
+ @master_flag_latched = false
207
+ end
208
+ end
209
+
210
+ def channel_enabled?(n)
211
+ # Check DPCR enable bit for channel
212
+ # Each channel has 4 bits in DPCR, bit 3 of each is enable
213
+ (@dpcr >> (n * 4 + 3)) & 1 == 1
214
+ end
215
+
216
+ def set_irq_flag(channel)
217
+ # Per Nocash: per-channel IRQ flag is set on completion only when the
218
+ # corresponding per-channel IRQ enable (bits 16-22) is also set.
219
+ return unless ((@dicr >> (16 + channel)) & 1) == 1
220
+ @dicr |= (1 << (24 + channel))
221
+ update_master_flag
222
+ end
223
+
224
+ # Execute pending DMA transfers
225
+ # Returns true if any transfer was performed
226
+ def tick_cycles(cycles)
227
+ return unless @pending_completions && !@pending_completions.empty?
228
+
229
+ @pending_completions.reject! do |n|
230
+ ch = @channels[n]
231
+ ch.busy_cycles -= cycles
232
+ if ch.busy_cycles <= 0
233
+ ch.finish!
234
+ set_irq_flag(n)
235
+ true
236
+ else
237
+ false
238
+ end
239
+ end
240
+ end
241
+
242
+ def tick(memory, gpu: nil)
243
+ NUM_CHANNELS.times do |n|
244
+ next unless channel_enabled?(n)
245
+ sync_mode = (@channels[n].channel_ctrl >> 9) & 0x3
246
+ # Only OTC needs an explicit manual trigger in SyncMode 0; channels
247
+ # backed by a device (GPU/SPU/...) start on BUSY alone.
248
+ needs_trigger = (n == OTC) && (sync_mode == SYNC_MANUAL)
249
+ next unless @channels[n].active?(needs_trigger: needs_trigger)
250
+
251
+ case n
252
+ when GPU
253
+ transfer_gpu(memory, gpu)
254
+ when SPU
255
+ transfer_spu(memory)
256
+ when OTC
257
+ transfer_otc(memory)
258
+ # Other channels can be added as needed
259
+ end
260
+ end
261
+ end
262
+
263
+ private
264
+
265
+ def transfer_gpu(memory, gpu)
266
+ channel = @channels[GPU]
267
+
268
+ case channel.sync_mode
269
+ when SYNC_MANUAL, SYNC_REQUEST
270
+ transfer_gpu_block(memory, gpu, channel)
271
+ when SYNC_LINKED
272
+ transfer_gpu_linked_list(memory, gpu, channel)
273
+ end
274
+ end
275
+
276
+ def transfer_gpu_block(memory, gpu, channel)
277
+ # Block transfer to/from GPU
278
+ addr = channel.base_addr
279
+ size = channel.block_size
280
+ size = 0x10000 if size == 0 # 0 means 0x10000 words
281
+
282
+ count = channel.block_count
283
+ count = 1 if count == 0
284
+
285
+ total = size * count
286
+ step = channel.step
287
+
288
+ if channel.direction == :from_ram
289
+ # RAM -> GPU (send commands)
290
+ total.times do
291
+ word = memory.read32(addr & 0x1F_FFFC)
292
+ gpu&.gp0(word)
293
+ addr = (addr + step) & 0xFFFF_FFFF
294
+ end
295
+ else
296
+ # GPU -> RAM (read VRAM)
297
+ total.times do
298
+ word = gpu&.read_data || 0
299
+ memory.write32(addr & 0x1F_FFFC, word)
300
+ addr = (addr + step) & 0xFFFF_FFFF
301
+ end
302
+ end
303
+
304
+ channel.finish!
305
+ set_irq_flag(GPU)
306
+ end
307
+
308
+ def transfer_gpu_linked_list(memory, gpu, channel)
309
+ # Linked list mode - used for GPU command lists
310
+ # Each node: [header][data...]
311
+ # Header: bits 0-23 = next pointer (or 0xFFFFFF for end), bits 24-31 = word count
312
+ addr = channel.base_addr & 0x1F_FFFC
313
+
314
+ loop do
315
+ header = memory.read32(addr)
316
+ word_count = header >> 24
317
+
318
+ # Send data words to GPU
319
+ word_count.times do |i|
320
+ word = memory.read32((addr + 4 + i * 4) & 0x1F_FFFC)
321
+ gpu&.gp0(word)
322
+ end
323
+
324
+ # Check for end of list
325
+ break if (header & 0x80_0000) != 0
326
+
327
+ # Next node
328
+ addr = header & 0x1F_FFFC
329
+ end
330
+
331
+ channel.finish!
332
+ set_irq_flag(GPU)
333
+ end
334
+
335
+ def transfer_spu(memory)
336
+ channel = @channels[SPU]
337
+
338
+ size = channel.block_size
339
+ size = 0x10000 if size == 0
340
+ count = channel.block_count
341
+ count = 1 if count == 0
342
+ total = size * count
343
+
344
+ addr = channel.base_addr
345
+ step = channel.step
346
+
347
+ if channel.direction == :from_ram
348
+ total.times do
349
+ word = memory.read32(addr & 0x1F_FFFC)
350
+ @spu&.dma_write_word(word)
351
+ addr = (addr + step) & 0xFFFF_FFFF
352
+ end
353
+ else
354
+ total.times do
355
+ word = @spu ? @spu.dma_read_word : 0
356
+ memory.write32(addr & 0x1F_FFFC, word)
357
+ addr = (addr + step) & 0xFFFF_FFFF
358
+ end
359
+ end
360
+
361
+ # Real SPU DMA is ~16 cycles per word; defer the BUSY clear so that
362
+ # cycle-counting tests (ps1-tests spu/memory-transfer) see a non-zero
363
+ # wait. Data already in place — only the completion flag waits.
364
+ schedule_completion(SPU, total * 16)
365
+ end
366
+
367
+ def transfer_otc(memory)
368
+ # Ordering Table Clear - special reverse-linked-list generator
369
+ # Fills memory with a linked list going backwards
370
+ # Used to initialize GPU ordering tables
371
+ channel = @channels[OTC]
372
+
373
+ addr = channel.base_addr & 0x1F_FFFC
374
+ size = channel.block_size
375
+ size = 0x10000 if size == 0
376
+
377
+ # Write linked list entries going backwards
378
+ size.times do |i|
379
+ if i == size - 1
380
+ # Last entry - end marker
381
+ memory.write32(addr, 0x00FF_FFFF)
382
+ else
383
+ # Point to previous address
384
+ prev_addr = (addr - 4) & 0x1F_FFFF
385
+ memory.write32(addr, prev_addr)
386
+ end
387
+ addr = (addr - 4) & 0x1F_FFFC
388
+ end
389
+
390
+ # OTC is RAM-only; finish synchronously so the existing otc-test asserts
391
+ # still hold (they check CHCR after a single write, with no cycle gap).
392
+ channel.finish!
393
+ set_irq_flag(OTC)
394
+ end
395
+
396
+ # Mark a channel as still busy for `cycles` more CPU cycles, so callers
397
+ # polling CHCR.BUSY see a non-zero wait. Called by transfers that want to
398
+ # model device-side latency. Resolved by `tick_cycles`.
399
+ def schedule_completion(channel_num, cycles)
400
+ @channels[channel_num].busy_cycles = cycles
401
+ @pending_completions ||= []
402
+ @pending_completions << channel_num unless @pending_completions.include?(channel_num)
403
+ end
404
+
405
+ end
406
+ end