teek 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +46 -0
  3. data/Rakefile +161 -5
  4. data/ext/teek/extconf.rb +1 -1
  5. data/ext/teek/tcltkbridge.c +3 -0
  6. data/ext/teek/tcltkbridge.h +3 -0
  7. data/ext/teek/tkeventsource.c +195 -0
  8. data/ext/teek/tkphoto.c +169 -5
  9. data/ext/teek/tkwin.c +84 -0
  10. data/lib/teek/background_ractor4x.rb +32 -4
  11. data/lib/teek/photo.rb +232 -0
  12. data/lib/teek/version.rb +1 -1
  13. data/lib/teek.rb +3 -1
  14. data/sample/optcarrot/vendor/optcarrot/apu.rb +856 -0
  15. data/sample/optcarrot/vendor/optcarrot/config.rb +257 -0
  16. data/sample/optcarrot/vendor/optcarrot/cpu.rb +1162 -0
  17. data/sample/optcarrot/vendor/optcarrot/driver.rb +144 -0
  18. data/sample/optcarrot/vendor/optcarrot/mapper/cnrom.rb +14 -0
  19. data/sample/optcarrot/vendor/optcarrot/mapper/mmc1.rb +105 -0
  20. data/sample/optcarrot/vendor/optcarrot/mapper/mmc3.rb +153 -0
  21. data/sample/optcarrot/vendor/optcarrot/mapper/uxrom.rb +14 -0
  22. data/sample/optcarrot/vendor/optcarrot/nes.rb +105 -0
  23. data/sample/optcarrot/vendor/optcarrot/opt.rb +168 -0
  24. data/sample/optcarrot/vendor/optcarrot/pad.rb +92 -0
  25. data/sample/optcarrot/vendor/optcarrot/palette.rb +65 -0
  26. data/sample/optcarrot/vendor/optcarrot/ppu.rb +1468 -0
  27. data/sample/optcarrot/vendor/optcarrot/rom.rb +143 -0
  28. data/sample/optcarrot/vendor/optcarrot.rb +14 -0
  29. data/sample/optcarrot.rb +354 -0
  30. data/sample/paint/assets/bucket.png +0 -0
  31. data/sample/paint/assets/cursor.png +0 -0
  32. data/sample/paint/assets/eraser.png +0 -0
  33. data/sample/paint/assets/pencil.png +0 -0
  34. data/sample/paint/assets/spray.png +0 -0
  35. data/sample/paint/layer.rb +255 -0
  36. data/sample/paint/layer_manager.rb +179 -0
  37. data/sample/paint/paint_demo.rb +837 -0
  38. data/sample/paint/sparse_pixel_buffer.rb +202 -0
  39. data/sample/sdl2_demo.rb +318 -0
  40. metadata +29 -1
@@ -0,0 +1,1468 @@
1
+ require_relative "opt"
2
+
3
+ module Optcarrot
4
+ # PPU implementation (video output)
5
+ class PPU
6
+ # clock/timing constants (stolen from Nestopia)
7
+ RP2C02_CC = 4
8
+ RP2C02_HACTIVE = RP2C02_CC * 256
9
+ RP2C02_HBLANK = RP2C02_CC * 85
10
+ RP2C02_HSYNC = RP2C02_HACTIVE + RP2C02_HBLANK
11
+ RP2C02_VACTIVE = 240
12
+ RP2C02_VSLEEP = 1
13
+ RP2C02_VINT = 20
14
+ RP2C02_VDUMMY = 1
15
+ RP2C02_VBLANK = RP2C02_VSLEEP + RP2C02_VINT + RP2C02_VDUMMY
16
+ RP2C02_VSYNC = RP2C02_VACTIVE + RP2C02_VBLANK
17
+ RP2C02_HVSYNCBOOT = RP2C02_VACTIVE * RP2C02_HSYNC + RP2C02_CC * 312
18
+ RP2C02_HVINT = RP2C02_VINT * RP2C02_HSYNC
19
+ RP2C02_HVSYNC_0 = RP2C02_VSYNC * RP2C02_HSYNC
20
+ RP2C02_HVSYNC_1 = RP2C02_VSYNC * RP2C02_HSYNC - RP2C02_CC
21
+
22
+ # special scanlines
23
+ SCANLINE_HDUMMY = -1 # pre-render scanline
24
+ SCANLINE_VBLANK = 240 # post-render scanline
25
+
26
+ # special horizontal clocks
27
+ HCLOCK_DUMMY = 341
28
+ HCLOCK_VBLANK_0 = 681
29
+ HCLOCK_VBLANK_1 = 682
30
+ HCLOCK_VBLANK_2 = 684
31
+ HCLOCK_BOOT = 685
32
+ DUMMY_FRAME = [RP2C02_HVINT / RP2C02_CC - HCLOCK_DUMMY, RP2C02_HVINT, RP2C02_HVSYNC_0]
33
+ BOOT_FRAME = [RP2C02_HVSYNCBOOT / RP2C02_CC - HCLOCK_BOOT, RP2C02_HVSYNCBOOT, RP2C02_HVSYNCBOOT]
34
+
35
+ # constants related to OAM (sprite)
36
+ SP_PIXEL_POSITIONS = {
37
+ 0 => [3, 7, 2, 6, 1, 5, 0, 4], # normal
38
+ 1 => [4, 0, 5, 1, 6, 2, 7, 3], # flip
39
+ }
40
+
41
+ # A look-up table mapping: (two pattern bytes * attr) -> eight pixels
42
+ # TILE_LUT[attr][high_byte * 0x100 + low_byte] = [pixels] * 8
43
+ TILE_LUT = [0x0, 0x4, 0x8, 0xc].map do |attr|
44
+ (0..7).map do |j|
45
+ (0...0x10000).map do |i|
46
+ clr = i[15 - j] * 2 + i[7 - j]
47
+ clr != 0 ? attr | clr : 0
48
+ end
49
+ end.transpose
50
+ # Super dirty hack: This Array#transpose reduces page-faults.
51
+ # It might generate cache-friendly memory layout...
52
+ end
53
+
54
+ def inspect
55
+ "#<#{ self.class }>"
56
+ end
57
+
58
+ ###########################################################################
59
+ # initialization
60
+
61
+ def initialize(conf, cpu, palette)
62
+ @conf = conf
63
+ @cpu = cpu
64
+ @palette = palette
65
+
66
+ if @conf.load_ppu
67
+ eval(File.read(@conf.load_ppu))
68
+ elsif @conf.opt_ppu
69
+ eval(OptimizedCodeBuilder.new(@conf.loglevel, @conf.opt_ppu).build, nil, "(generated PPU core)")
70
+ end
71
+
72
+ @nmt_mem = [[0xff] * 0x400, [0xff] * 0x400]
73
+ @nmt_ref = [0, 1, 0, 1].map {|i| @nmt_mem[i] }
74
+
75
+ @output_pixels = []
76
+ @output_color = [@palette[0]] * 0x20 # palette size is 0x20
77
+
78
+ reset(mapping: false)
79
+ setup_lut
80
+ end
81
+
82
+ def reset(opt = {})
83
+ if opt.fetch(:mapping, true)
84
+ # setup mapped memory
85
+ @cpu.add_mappings(0x2000.step(0x3fff, 8), method(:peek_2xxx), method(:poke_2000))
86
+ @cpu.add_mappings(0x2001.step(0x3fff, 8), method(:peek_2xxx), method(:poke_2001))
87
+ @cpu.add_mappings(0x2002.step(0x3fff, 8), method(:peek_2002), method(:poke_2xxx))
88
+ @cpu.add_mappings(0x2003.step(0x3fff, 8), method(:peek_2xxx), method(:poke_2003))
89
+ @cpu.add_mappings(0x2004.step(0x3fff, 8), method(:peek_2004), method(:poke_2004))
90
+ @cpu.add_mappings(0x2005.step(0x3fff, 8), method(:peek_2xxx), method(:poke_2005))
91
+ @cpu.add_mappings(0x2006.step(0x3fff, 8), method(:peek_2xxx), method(:poke_2006))
92
+ @cpu.add_mappings(0x2007.step(0x3fff, 8), method(:peek_2007), method(:poke_2007))
93
+ @cpu.add_mappings(0x3000, method(:peek_3000), method(:poke_2000))
94
+ @cpu.add_mappings(0x4014, method(:peek_4014), method(:poke_4014))
95
+ end
96
+
97
+ @palette_ram = [
98
+ 0x3f, 0x01, 0x00, 0x01, 0x00, 0x02, 0x02, 0x0d,
99
+ 0x08, 0x10, 0x08, 0x24, 0x00, 0x00, 0x04, 0x2c,
100
+ 0x09, 0x01, 0x34, 0x03, 0x00, 0x04, 0x00, 0x14,
101
+ 0x08, 0x3a, 0x00, 0x02, 0x00, 0x20, 0x2c, 0x08,
102
+ ]
103
+ @coloring = 0x3f # not monochrome
104
+ @emphasis = 0
105
+ update_output_color
106
+
107
+ @run = true
108
+
109
+ # clock management
110
+ @hclk = HCLOCK_BOOT
111
+ @vclk = 0
112
+ @hclk_target = FOREVER_CLOCK
113
+
114
+ # CPU-PPU interface
115
+ @io_latch = 0
116
+ @io_buffer = 0xe8 # garbage
117
+
118
+ @regs_oam = 0
119
+
120
+ # misc
121
+ @vram_addr_inc = 1 # 1 or 32
122
+ @need_nmi = false
123
+ @pattern_end = 0x0ff0
124
+ @any_show = false # == @bg_show || @sp_show
125
+ @sp_overflow = false
126
+ @sp_zero_hit = false
127
+ @vblanking = @vblank = false
128
+
129
+ # PPU-nametable interface
130
+ @io_addr = 0
131
+ @io_pattern = 0
132
+
133
+ @a12_monitor = nil
134
+ @a12_state = nil
135
+
136
+ # the current scanline
137
+ @odd_frame = false
138
+ @scanline = SCANLINE_VBLANK
139
+
140
+ # scroll state
141
+ @scroll_toggle = false
142
+ @scroll_latch = 0
143
+ @scroll_xfine = 0
144
+ @scroll_addr_0_4 = @scroll_addr_5_14 = 0
145
+ @name_io_addr = 0x2000 # == (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000
146
+
147
+ ### BG-sprite state
148
+ @bg_enabled = false
149
+ @bg_show = false
150
+ @bg_show_edge = false
151
+ @bg_pixels = [0] * 16
152
+ @bg_pattern_base = 0 # == 0 or 0x1000
153
+ @bg_pattern_base_15 = 0 # == @bg_pattern_base[12] << 15
154
+ @bg_pattern = 0
155
+ @bg_pattern_lut = TILE_LUT[0]
156
+ @bg_pattern_lut_fetched = TILE_LUT[0]
157
+ # invariant:
158
+ # @bg_pattern_lut_fetched == TILE_LUT[
159
+ # @nmt_ref[@io_addr >> 10 & 3][@io_addr & 0x03ff] >>
160
+ # ((@scroll_addr_0_4 & 0x2) | (@scroll_addr_5_14[6] * 0x4)) & 3
161
+ # ]
162
+
163
+ ### OAM-sprite state
164
+ @sp_enabled = false
165
+ @sp_active = false # == @sp_visible && @sp_enabled
166
+ @sp_show = false
167
+ @sp_show_edge = false
168
+
169
+ # for CPU-PPU interface
170
+ @sp_base = 0
171
+ @sp_height = 8
172
+
173
+ # for OAM fetcher
174
+ @sp_phase = 0
175
+ @sp_ram = [0xff] * 0x100 # ram size is 0x100, 0xff is a OAM garbage
176
+ @sp_index = 0
177
+ @sp_addr = 0
178
+ @sp_latch = 0
179
+
180
+ # for internal state
181
+ # 8 sprites per line are allowed in standard NES, but a user may remove this limit.
182
+ @sp_limit = (@conf.sprite_limit ? 8 : 32) * 4
183
+ @sp_buffer = [0] * @sp_limit
184
+ @sp_buffered = 0
185
+ @sp_visible = false
186
+ @sp_map = [nil] * 264 # [[behind?, zero?, color]]
187
+ @sp_map_buffer = (0...264).map { [false, false, 0] } # preallocation for @sp_map
188
+ @sp_zero_in_line = false
189
+ end
190
+
191
+ def update_output_color
192
+ 0x20.times do |i|
193
+ @output_color[i] = @palette[@palette_ram[i] & @coloring | @emphasis]
194
+ end
195
+ end
196
+
197
+ def setup_lut
198
+ @lut_update = {}.compare_by_identity
199
+
200
+ @name_lut = (0..0xffff).map do |i|
201
+ nmt_bank = @nmt_ref[i >> 10 & 3]
202
+ nmt_idx = i & 0x03ff
203
+ fixed = (i >> 12 & 7) | (i[15] << 12)
204
+ (((@lut_update[nmt_bank] ||= [])[nmt_idx] ||= [nil, nil])[0] ||= []) << [i, fixed]
205
+ nmt_bank[nmt_idx] << 4 | fixed
206
+ end
207
+
208
+ entries = {}
209
+ @attr_lut = (0..0x7fff).map do |i|
210
+ io_addr = 0x23c0 | (i & 0x0c00) | (i >> 4 & 0x0038) | (i >> 2 & 0x0007)
211
+ nmt_bank = @nmt_ref[io_addr >> 10 & 3]
212
+ nmt_idx = io_addr & 0x03ff
213
+ attr_shift = (i & 2) | (i >> 4 & 4)
214
+ key = [io_addr, attr_shift]
215
+ entries[key] ||= [io_addr, TILE_LUT[nmt_bank[nmt_idx] >> attr_shift & 3], attr_shift]
216
+ (((@lut_update[nmt_bank] ||= [])[nmt_idx] ||= [nil, nil])[1] ||= []) << entries[key]
217
+ entries[key]
218
+ end.freeze
219
+ entries.each_value {|a| a.uniq! {|entry| entry.object_id } }
220
+ end
221
+
222
+ ###########################################################################
223
+ # other APIs
224
+
225
+ attr_reader :output_pixels
226
+
227
+ def set_chr_mem(mem, writable)
228
+ @chr_mem = mem
229
+ @chr_mem_writable = writable
230
+ end
231
+
232
+ NMT_TABLE = {
233
+ horizontal: [0, 0, 1, 1],
234
+ vertical: [0, 1, 0, 1],
235
+ four_screen: [0, 1, 2, 3],
236
+ first: [0, 0, 0, 0],
237
+ second: [1, 1, 1, 1],
238
+ }
239
+ def nametables=(mode)
240
+ update(RP2C02_CC)
241
+ idxs = NMT_TABLE[mode]
242
+ return if (0..3).all? {|i| @nmt_ref[i].equal?(@nmt_mem[idxs[i]]) }
243
+ @nmt_ref[0] = @nmt_mem[idxs[0]]
244
+ @nmt_ref[1] = @nmt_mem[idxs[1]]
245
+ @nmt_ref[2] = @nmt_mem[idxs[2]]
246
+ @nmt_ref[3] = @nmt_mem[idxs[3]]
247
+ setup_lut
248
+ end
249
+
250
+ def update(data_setup)
251
+ sync(data_setup + @cpu.update)
252
+ end
253
+
254
+ def setup_frame
255
+ @output_pixels.clear
256
+ @odd_frame = !@odd_frame
257
+ @vclk, @hclk_target, @cpu.next_frame_clock = @hclk == HCLOCK_DUMMY ? DUMMY_FRAME : BOOT_FRAME
258
+ end
259
+
260
+ def vsync
261
+ if @hclk_target != FOREVER_CLOCK
262
+ @hclk_target = FOREVER_CLOCK
263
+ run
264
+ end
265
+ @output_pixels << @palette[15] while @output_pixels.size < 256 * 240 # fill black
266
+ end
267
+
268
+ def monitor_a12_rising_edge(monitor)
269
+ @a12_monitor = monitor
270
+ end
271
+
272
+ ###########################################################################
273
+ # helpers
274
+
275
+ def update_vram_addr
276
+ if @vram_addr_inc == 32
277
+ if active?
278
+ if @scroll_addr_5_14 & 0x7000 == 0x7000
279
+ @scroll_addr_5_14 &= 0x0fff
280
+ case @scroll_addr_5_14 & 0x03e0
281
+ when 0x03a0 then @scroll_addr_5_14 ^= 0x0800
282
+ when 0x03e0 then @scroll_addr_5_14 &= 0x7c00
283
+ else @scroll_addr_5_14 += 0x20
284
+ end
285
+ else
286
+ @scroll_addr_5_14 += 0x1000
287
+ end
288
+ else
289
+ @scroll_addr_5_14 += 0x20
290
+ end
291
+ elsif @scroll_addr_0_4 < 0x1f
292
+ @scroll_addr_0_4 += 1
293
+ else
294
+ @scroll_addr_0_4 = 0
295
+ @scroll_addr_5_14 += 0x20
296
+ end
297
+ update_scroll_address_line
298
+ end
299
+
300
+ def update_scroll_address_line
301
+ @name_io_addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000
302
+ if @a12_monitor
303
+ a12_state = @scroll_addr_5_14 & 0x3000 == 0x1000
304
+ @a12_monitor.a12_signaled(@cpu.current_clock) if !@a12_state && a12_state
305
+ @a12_state = a12_state
306
+ end
307
+ end
308
+
309
+ def active?
310
+ @scanline != SCANLINE_VBLANK && @any_show
311
+ end
312
+
313
+ def sync(elapsed)
314
+ return unless @hclk_target < elapsed
315
+ @hclk_target = elapsed / RP2C02_CC - @vclk
316
+ run
317
+ end
318
+
319
+ def make_sure_invariants
320
+ @name_io_addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000
321
+ @bg_pattern_lut_fetched = TILE_LUT[
322
+ @nmt_ref[@io_addr >> 10 & 3][@io_addr & 0x03ff] >> ((@scroll_addr_0_4 & 0x2) | (@scroll_addr_5_14[6] * 0x4)) & 3
323
+ ]
324
+ end
325
+
326
+ def io_latch_mask(data)
327
+ if active?
328
+ 0xff
329
+ elsif @regs_oam & 0x03 == 0x02
330
+ data & 0xe3
331
+ else
332
+ data
333
+ end
334
+ end
335
+
336
+ ###########################################################################
337
+ # mapped memory handlers
338
+
339
+ # PPUCTRL
340
+ def poke_2000(_addr, data)
341
+ update(RP2C02_CC)
342
+ need_nmi_old = @need_nmi
343
+
344
+ @scroll_latch = (@scroll_latch & 0x73ff) | (data & 0x03) << 10
345
+ @vram_addr_inc = data[2] == 1 ? 32 : 1
346
+ @sp_base = data[3] == 1 ? 0x1000 : 0x0000
347
+ @bg_pattern_base = data[4] == 1 ? 0x1000 : 0x0000
348
+ @sp_height = data[5] == 1 ? 16 : 8
349
+ @need_nmi = data[7] == 1
350
+
351
+ @io_latch = data
352
+ @pattern_end = @sp_base != 0 || @sp_height == 16 ? 0x1ff0 : 0x0ff0
353
+ @bg_pattern_base_15 = @bg_pattern_base[12] << 15
354
+
355
+ if @need_nmi && @vblank && !need_nmi_old
356
+ clock = @cpu.current_clock + RP2C02_CC
357
+ @cpu.do_nmi(clock) if clock < RP2C02_HVINT
358
+ end
359
+ end
360
+
361
+ # PPUMASK
362
+ def poke_2001(_addr, data)
363
+ update(RP2C02_CC)
364
+ bg_show_old, bg_show_edge_old = @bg_show, @bg_show_edge
365
+ sp_show_old, sp_show_edge_old = @sp_show, @sp_show_edge
366
+ any_show_old = @any_show
367
+ coloring_old, emphasis_old = @coloring, @emphasis
368
+
369
+ @bg_show = data[3] == 1
370
+ @bg_show_edge = data[1] == 1 && @bg_show
371
+ @sp_show = data[4] == 1
372
+ @sp_show_edge = data[2] == 1 && @sp_show
373
+ @any_show = @bg_show || @sp_show
374
+ @coloring = data[0] == 1 ? 0x30 : 0x3f # 0x30: monochrome
375
+ @emphasis = (data & 0xe0) << 1
376
+
377
+ @io_latch = data
378
+
379
+ if bg_show_old != @bg_show || bg_show_edge_old != @bg_show_edge ||
380
+ sp_show_old != @sp_show || sp_show_edge_old != @sp_show_edge
381
+
382
+ if @hclk < 8 || @hclk >= 248
383
+ update_enabled_flags_edge
384
+ else
385
+ update_enabled_flags
386
+ end
387
+ update_scroll_address_line if any_show_old && !@any_show
388
+ end
389
+
390
+ update_output_color if coloring_old != @coloring || emphasis_old != @emphasis
391
+ end
392
+
393
+ # PPUSTATUS
394
+ def peek_2002(_addr)
395
+ update(RP2C02_CC)
396
+ v = @io_latch & 0x1f
397
+ v |= 0x80 if @vblank
398
+ v |= 0x40 if @sp_zero_hit
399
+ v |= 0x20 if @sp_overflow
400
+ @io_latch = v
401
+ @scroll_toggle = false
402
+ @vblanking = @vblank = false
403
+ @io_latch
404
+ end
405
+
406
+ # OAMADDR
407
+ def poke_2003(_addr, data)
408
+ update(RP2C02_CC)
409
+ @regs_oam = @io_latch = data
410
+ end
411
+
412
+ # OAMDATA (write)
413
+ def poke_2004(_addr, data)
414
+ update(RP2C02_CC)
415
+ @io_latch = @sp_ram[@regs_oam] = io_latch_mask(data)
416
+ @regs_oam = (@regs_oam + 1) & 0xff
417
+ end
418
+
419
+ # OAMDATA (read)
420
+ def peek_2004(_addr)
421
+ if !@any_show || @cpu.current_clock - (@cpu.next_frame_clock - (341 * 241) * RP2C02_CC) >= (341 * 240) * RP2C02_CC
422
+ @io_latch = @sp_ram[@regs_oam]
423
+ else
424
+ update(RP2C02_CC)
425
+ @io_latch = @sp_latch
426
+ end
427
+ end
428
+
429
+ # PPUSCROLL
430
+ def poke_2005(_addr, data)
431
+ update(RP2C02_CC)
432
+ @io_latch = data
433
+ @scroll_toggle = !@scroll_toggle
434
+ if @scroll_toggle
435
+ @scroll_latch = @scroll_latch & 0x7fe0 | (data >> 3)
436
+ xfine = 8 - (data & 0x7)
437
+ @bg_pixels.rotate!(@scroll_xfine - xfine)
438
+ @scroll_xfine = xfine
439
+ else
440
+ @scroll_latch = (@scroll_latch & 0x0c1f) | ((data << 2 | data << 12) & 0x73e0)
441
+ end
442
+ end
443
+
444
+ # PPUADDR
445
+ def poke_2006(_addr, data)
446
+ update(RP2C02_CC)
447
+ @io_latch = data
448
+ @scroll_toggle = !@scroll_toggle
449
+ if @scroll_toggle
450
+ @scroll_latch = @scroll_latch & 0x00ff | (data & 0x3f) << 8
451
+ else
452
+ @scroll_latch = (@scroll_latch & 0x7f00) | data
453
+ @scroll_addr_0_4 = @scroll_latch & 0x001f
454
+ @scroll_addr_5_14 = @scroll_latch & 0x7fe0
455
+ update_scroll_address_line
456
+ end
457
+ end
458
+
459
+ # PPUDATA (write)
460
+ def poke_2007(_addr, data)
461
+ update(RP2C02_CC * 4)
462
+ addr = @scroll_addr_0_4 | @scroll_addr_5_14
463
+ update_vram_addr
464
+ @io_latch = data
465
+ if addr & 0x3f00 == 0x3f00
466
+ addr &= 0x1f
467
+ final = @palette[data & @coloring | @emphasis]
468
+ @palette_ram[addr] = data
469
+ @output_color[addr] = final
470
+ if addr & 3 == 0
471
+ @palette_ram[addr ^ 0x10] = data
472
+ @output_color[addr ^ 0x10] = final
473
+ end
474
+ else
475
+ addr &= 0x3fff
476
+ if addr >= 0x2000
477
+ nmt_bank = @nmt_ref[addr >> 10 & 0x3]
478
+ nmt_idx = addr & 0x03ff
479
+ if nmt_bank[nmt_idx] != data
480
+ nmt_bank[nmt_idx] = data
481
+
482
+ name_lut_update, attr_lut_update = @lut_update[nmt_bank][nmt_idx]
483
+ name_lut_update.each {|i, b| @name_lut[i] = data << 4 | b } if name_lut_update
484
+ attr_lut_update.each {|a| a[1] = TILE_LUT[data >> a[2] & 3] } if attr_lut_update
485
+ end
486
+ elsif @chr_mem_writable
487
+ @chr_mem[addr] = data
488
+ end
489
+ end
490
+ end
491
+
492
+ # PPUDATA (read)
493
+ def peek_2007(_addr)
494
+ update(RP2C02_CC)
495
+ addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x3fff
496
+ update_vram_addr
497
+ @io_latch = (addr & 0x3f00) != 0x3f00 ? @io_buffer : @palette_ram[addr & 0x1f] & @coloring
498
+ @io_buffer = addr >= 0x2000 ? @nmt_ref[addr >> 10 & 0x3][addr & 0x3ff] : @chr_mem[addr]
499
+ @io_latch
500
+ end
501
+
502
+ def poke_2xxx(_addr, data)
503
+ @io_latch = data
504
+ end
505
+
506
+ def peek_2xxx(_addr)
507
+ @io_latch
508
+ end
509
+
510
+ def peek_3000(_addr)
511
+ update(RP2C02_CC)
512
+ @io_latch
513
+ end
514
+
515
+ # OAMDMA
516
+ def poke_4014(_addr, data) # DMA
517
+ @cpu.steal_clocks(CPU::CLK_1) if @cpu.odd_clock?
518
+ update(RP2C02_CC)
519
+ @cpu.steal_clocks(CPU::CLK_1)
520
+ data <<= 8
521
+ if @regs_oam == 0 && data < 0x2000 && (!@any_show || @cpu.current_clock <= RP2C02_HVINT - CPU::CLK_1 * 512)
522
+ @cpu.steal_clocks(CPU::CLK_1 * 512)
523
+ @cpu.sprite_dma(data & 0x7ff, @sp_ram)
524
+ @io_latch = @sp_ram[0xff]
525
+ else
526
+ begin
527
+ @io_latch = @cpu.fetch(data)
528
+ data += 1
529
+ @cpu.steal_clocks(CPU::CLK_1)
530
+ update(RP2C02_CC)
531
+ @cpu.steal_clocks(CPU::CLK_1)
532
+ @io_latch = io_latch_mask(@io_latch)
533
+ @sp_ram[@regs_oam] = @io_latch
534
+ @regs_oam = (@regs_oam + 1) & 0xff
535
+ end while data & 0xff != 0
536
+ end
537
+ end
538
+
539
+ def peek_4014(_addr)
540
+ 0x40
541
+ end
542
+
543
+ ###########################################################################
544
+ # helper methods for PPU#run
545
+
546
+ # NOTE: These methods will be adhocly-inlined. Keep compatibility with
547
+ # OptimizedCodeBuilder (e.g., do not change the parameter names blindly).
548
+
549
+ def open_pattern(exp)
550
+ return unless @any_show
551
+ @io_addr = exp
552
+ update_address_line
553
+ end
554
+
555
+ def open_sprite(buffer_idx)
556
+ flip_v = @sp_buffer[buffer_idx + 2][7] # OAM byte2 bit7: "Flip vertically" flag
557
+ tmp = (@scanline - @sp_buffer[buffer_idx]) ^ (flip_v * 0xf)
558
+ byte1 = @sp_buffer[buffer_idx + 1]
559
+ addr = @sp_height == 16 ? ((byte1 & 0x01) << 12) | ((byte1 & 0xfe) << 4) | (tmp[3] * 0x10) : @sp_base | byte1 << 4
560
+ addr | (tmp & 7)
561
+ end
562
+
563
+ def load_sprite(pat0, pat1, buffer_idx)
564
+ byte2 = @sp_buffer[buffer_idx + 2]
565
+ pos = SP_PIXEL_POSITIONS[byte2[6]] # OAM byte2 bit6: "Flip horizontally" flag
566
+ pat = (pat0 >> 1 & 0x55) | (pat1 & 0xaa) | ((pat0 & 0x55) | (pat1 << 1 & 0xaa)) << 8
567
+ x_base = @sp_buffer[buffer_idx + 3]
568
+ palette_base = 0x10 + ((byte2 & 3) << 2) # OAM byte2 bit0-1: Palette
569
+ @sp_visible ||= @sp_map.clear
570
+ 8.times do |dx|
571
+ x = x_base + dx
572
+ clr = (pat >> (pos[dx] * 2)) & 3
573
+ next if @sp_map[x] || clr == 0
574
+ @sp_map[x] = sprite = @sp_map_buffer[x]
575
+ # sprite[0]: behind flag, sprite[1]: zero hit flag, sprite[2]: color
576
+ sprite[0] = byte2[5] == 1 # OAM byte2 bit5: "Behind background" flag
577
+ sprite[1] = buffer_idx == 0 && @sp_zero_in_line
578
+ sprite[2] = palette_base + clr
579
+ end
580
+ @sp_active = @sp_enabled
581
+ end
582
+
583
+ def update_address_line
584
+ if @a12_monitor
585
+ a12_state = @io_addr[12] == 1
586
+ @a12_monitor.a12_signaled((@vclk + @hclk) * RP2C02_CC) if !@a12_state && a12_state
587
+ @a12_state = a12_state
588
+ end
589
+ end
590
+
591
+ ###########################################################################
592
+ # actions for PPU#run
593
+
594
+ def open_name
595
+ return unless @any_show
596
+ @io_addr = @name_io_addr
597
+ update_address_line
598
+ end
599
+
600
+ def fetch_name
601
+ return unless @any_show
602
+ @io_pattern = @name_lut[@scroll_addr_0_4 + @scroll_addr_5_14 + @bg_pattern_base_15]
603
+ end
604
+
605
+ def open_attr
606
+ return unless @any_show
607
+ @io_addr, @bg_pattern_lut_fetched, = @attr_lut[@scroll_addr_0_4 + @scroll_addr_5_14]
608
+ update_address_line
609
+ end
610
+
611
+ def fetch_attr
612
+ return unless @any_show
613
+ @bg_pattern_lut = @bg_pattern_lut_fetched
614
+ # raise unless @bg_pattern_lut_fetched ==
615
+ # @nmt_ref[@io_addr >> 10 & 3][@io_addr & 0x03ff] >>
616
+ # ((@scroll_addr_0_4 & 0x2) | (@scroll_addr_5_14[6] * 0x4)) & 3
617
+ end
618
+
619
+ def fetch_bg_pattern_0
620
+ return unless @any_show
621
+ @bg_pattern = @chr_mem[@io_addr & 0x1fff]
622
+ end
623
+
624
+ def fetch_bg_pattern_1
625
+ return unless @any_show
626
+ @bg_pattern |= @chr_mem[@io_addr & 0x1fff] * 0x100
627
+ end
628
+
629
+ def scroll_clock_x
630
+ return unless @any_show
631
+ if @scroll_addr_0_4 < 0x001f
632
+ @scroll_addr_0_4 += 1
633
+ @name_io_addr += 1 # make cache consistent
634
+ else
635
+ @scroll_addr_0_4 = 0
636
+ @scroll_addr_5_14 ^= 0x0400
637
+ @name_io_addr ^= 0x041f # make cache consistent
638
+ end
639
+ end
640
+
641
+ def scroll_reset_x
642
+ return unless @any_show
643
+ @scroll_addr_0_4 = @scroll_latch & 0x001f
644
+ @scroll_addr_5_14 = (@scroll_addr_5_14 & 0x7be0) | (@scroll_latch & 0x0400)
645
+ @name_io_addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000 # make cache consistent
646
+ end
647
+
648
+ def scroll_clock_y
649
+ return unless @any_show
650
+ if @scroll_addr_5_14 & 0x7000 != 0x7000
651
+ @scroll_addr_5_14 += 0x1000
652
+ else
653
+ mask = @scroll_addr_5_14 & 0x03e0
654
+ # rubocop:disable Style/CaseLikeIf
655
+ if mask == 0x03a0
656
+ @scroll_addr_5_14 ^= 0x0800
657
+ @scroll_addr_5_14 &= 0x0c00
658
+ elsif mask == 0x03e0
659
+ @scroll_addr_5_14 &= 0x0c00
660
+ else
661
+ @scroll_addr_5_14 = (@scroll_addr_5_14 & 0x0fe0) + 32
662
+ end
663
+ # rubocop:enable Style/CaseLikeIf
664
+ end
665
+
666
+ @name_io_addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000 # make cache consistent
667
+ end
668
+
669
+ def preload_tiles
670
+ return unless @any_show
671
+ @bg_pixels[@scroll_xfine, 8] = @bg_pattern_lut[@bg_pattern]
672
+ end
673
+
674
+ def load_tiles
675
+ return unless @any_show
676
+ @bg_pixels.rotate!(8)
677
+ @bg_pixels[@scroll_xfine, 8] = @bg_pattern_lut[@bg_pattern]
678
+ end
679
+
680
+ def evaluate_sprites_even
681
+ return unless @any_show
682
+ @sp_latch = @sp_ram[@sp_addr]
683
+ end
684
+
685
+ def evaluate_sprites_odd
686
+ return unless @any_show
687
+
688
+ # we first check phase 1 since it is the most-likely case
689
+ if @sp_phase # nil represents phase 1
690
+ # the second most-likely case is phase 9
691
+ if @sp_phase == 9
692
+ evaluate_sprites_odd_phase_9
693
+ else
694
+ # other cases are relatively rare
695
+ case @sp_phase
696
+ # when 1 then evaluate_sprites_odd_phase_1
697
+ # when 9 then evaluate_sprites_odd_phase_9
698
+ when 2 then evaluate_sprites_odd_phase_2
699
+ when 3 then evaluate_sprites_odd_phase_3
700
+ when 4 then evaluate_sprites_odd_phase_4
701
+ when 5 then evaluate_sprites_odd_phase_5
702
+ when 6 then evaluate_sprites_odd_phase_6
703
+ when 7 then evaluate_sprites_odd_phase_7
704
+ when 8 then evaluate_sprites_odd_phase_8
705
+ end
706
+ end
707
+ else
708
+ evaluate_sprites_odd_phase_1
709
+ end
710
+ end
711
+
712
+ def evaluate_sprites_odd_phase_1
713
+ @sp_index += 1
714
+ if @sp_latch <= @scanline && @scanline < @sp_latch + @sp_height
715
+ @sp_addr += 1
716
+ @sp_phase = 2
717
+ @sp_buffer[@sp_buffered] = @sp_latch
718
+ elsif @sp_index == 64
719
+ @sp_addr = 0
720
+ @sp_phase = 9
721
+ elsif @sp_index == 2
722
+ @sp_addr = 8
723
+ else
724
+ @sp_addr += 4
725
+ end
726
+ end
727
+
728
+ def evaluate_sprites_odd_phase_2
729
+ @sp_addr += 1
730
+ @sp_phase = 3
731
+ @sp_buffer[@sp_buffered + 1] = @sp_latch
732
+ end
733
+
734
+ def evaluate_sprites_odd_phase_3
735
+ @sp_addr += 1
736
+ @sp_phase = 4
737
+ @sp_buffer[@sp_buffered + 2] = @sp_latch
738
+ end
739
+
740
+ def evaluate_sprites_odd_phase_4
741
+ @sp_buffer[@sp_buffered + 3] = @sp_latch
742
+ @sp_buffered += 4
743
+ if @sp_index != 64
744
+ @sp_phase = @sp_buffered != @sp_limit ? nil : 5
745
+ if @sp_index != 2
746
+ @sp_addr += 1
747
+ @sp_zero_in_line ||= @sp_index == 1
748
+ else
749
+ @sp_addr = 8
750
+ end
751
+ else
752
+ @sp_addr = 0
753
+ @sp_phase = 9
754
+ end
755
+ end
756
+
757
+ def evaluate_sprites_odd_phase_5
758
+ if @sp_latch <= @scanline && @scanline < @sp_latch + @sp_height
759
+ @sp_phase = 6
760
+ @sp_addr = (@sp_addr + 1) & 0xff
761
+ @sp_overflow = true
762
+ else
763
+ @sp_addr = ((@sp_addr + 4) & 0xfc) + ((@sp_addr + 1) & 3)
764
+ if @sp_addr <= 5
765
+ @sp_phase = 9
766
+ @sp_addr &= 0xfc
767
+ end
768
+ end
769
+ end
770
+
771
+ def evaluate_sprites_odd_phase_6
772
+ @sp_phase = 7
773
+ @sp_addr = (@sp_addr + 1) & 0xff
774
+ end
775
+
776
+ def evaluate_sprites_odd_phase_7
777
+ @sp_phase = 8
778
+ @sp_addr = (@sp_addr + 1) & 0xff
779
+ end
780
+
781
+ def evaluate_sprites_odd_phase_8
782
+ @sp_phase = 9
783
+ @sp_addr = (@sp_addr + 1) & 0xff
784
+ @sp_addr += 1 if @sp_addr & 3 == 3
785
+ @sp_addr &= 0xfc
786
+ end
787
+
788
+ def evaluate_sprites_odd_phase_9
789
+ @sp_addr = (@sp_addr + 4) & 0xff
790
+ end
791
+
792
+ def load_extended_sprites
793
+ return unless @any_show
794
+ if 32 < @sp_buffered
795
+ buffer_idx = 32
796
+ begin
797
+ addr = open_sprite(buffer_idx)
798
+ pat0 = @chr_mem[addr]
799
+ pat1 = @chr_mem[addr | 8]
800
+ load_sprite(pat0, pat1, buffer_idx) if pat0 != 0 || pat1 != 0
801
+ buffer_idx += 4
802
+ end while buffer_idx != @sp_buffered
803
+ end
804
+ end
805
+
806
+ def render_pixel
807
+ if @any_show
808
+ pixel = @bg_enabled ? @bg_pixels[@hclk % 8] : 0
809
+ if @sp_active && (sprite = @sp_map[@hclk])
810
+ if pixel % 4 == 0
811
+ pixel = sprite[2]
812
+ else
813
+ @sp_zero_hit = true if sprite[1] && @hclk != 255
814
+ pixel = sprite[2] unless sprite[0]
815
+ end
816
+ end
817
+ else
818
+ pixel = @scroll_addr_5_14 & 0x3f00 == 0x3f00 ? @scroll_addr_0_4 : 0
819
+ @bg_pixels[@hclk % 8] = 0
820
+ end
821
+ @output_pixels << @output_color[pixel]
822
+ end
823
+
824
+ # just a placeholder; used for batch_render_pixels optimization
825
+ def batch_render_eight_pixels
826
+ end
827
+
828
+ def boot
829
+ @vblank = true
830
+ @hclk = HCLOCK_DUMMY
831
+ @hclk_target = FOREVER_CLOCK
832
+ end
833
+
834
+ def vblank_0
835
+ @vblanking = true
836
+ @hclk = HCLOCK_VBLANK_1
837
+ end
838
+
839
+ def vblank_1
840
+ @vblank ||= @vblanking
841
+ @vblanking = false
842
+ @sp_visible = false
843
+ @sp_active = false
844
+ @hclk = HCLOCK_VBLANK_2
845
+ end
846
+
847
+ def vblank_2
848
+ @vblank ||= @vblanking
849
+ @vblanking = false
850
+ @hclk = HCLOCK_DUMMY
851
+ @hclk_target = FOREVER_CLOCK
852
+ @cpu.do_nmi(@cpu.next_frame_clock) if @need_nmi && @vblank
853
+ end
854
+
855
+ def update_enabled_flags
856
+ return unless @any_show
857
+ @bg_enabled = @bg_show
858
+ @sp_enabled = @sp_show
859
+ @sp_active = @sp_enabled && @sp_visible
860
+ end
861
+
862
+ def update_enabled_flags_edge
863
+ @bg_enabled = @bg_show_edge
864
+ @sp_enabled = @sp_show_edge
865
+ @sp_active = @sp_enabled && @sp_visible
866
+ end
867
+
868
+ ###########################################################################
869
+ # default core
870
+
871
+ def debug_logging(scanline, hclk, hclk_target)
872
+ hclk = "forever" if hclk == FOREVER_CLOCK
873
+ hclk_target = "forever" if hclk_target == FOREVER_CLOCK
874
+
875
+ @conf.debug("ppu: scanline #{ scanline }, hclk #{ hclk }->#{ hclk_target }")
876
+ end
877
+
878
+ def run
879
+ @fiber ||= Fiber.new do
880
+ main_loop
881
+ :done
882
+ end
883
+
884
+ debug_logging(@scanline, @hclk, @hclk_target) if @conf.loglevel >= 3
885
+
886
+ make_sure_invariants
887
+
888
+ @hclk_target = (@vclk + @hclk) * RP2C02_CC unless @fiber.resume
889
+ end
890
+
891
+ def dispose
892
+ @run = false
893
+ raise 'PPU Fiber should have finished' unless @fiber.nil? || @fiber.resume == :done
894
+ @fiber = nil
895
+ end
896
+
897
+ def wait_frame
898
+ Fiber.yield true
899
+ end
900
+
901
+ def wait_zero_clocks
902
+ Fiber.yield if @hclk_target <= @hclk
903
+ end
904
+
905
+ def wait_one_clock
906
+ @hclk += 1
907
+ Fiber.yield if @hclk_target <= @hclk
908
+ end
909
+
910
+ def wait_two_clocks
911
+ @hclk += 2
912
+ Fiber.yield if @hclk_target <= @hclk
913
+ end
914
+
915
+ ### main-loop structure
916
+ #
917
+ # # wait for boot
918
+ # clk_685
919
+ #
920
+ # loop do
921
+ # # pre-render scanline
922
+ # clk_341, clk_342, ..., clk_659
923
+ # while true
924
+ # # visible scanline (not shown)
925
+ # clk_320, clk_321, ..., clk_337
926
+ #
927
+ # # increment scanline
928
+ # clk_338
929
+ # break if @scanline == 240
930
+ #
931
+ # # visible scanline (shown)
932
+ # clk_0, clk_1, ..., clk_319
933
+ # end
934
+ #
935
+ # # post-render sacnline (vblank)
936
+ # do_681,682,684
937
+ # end
938
+ #
939
+ # This method definition also serves as a template for OptimizedCodeBuilder.
940
+ # Comments like "when NNN" are markers for the purpose.
941
+ #
942
+ # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize, Style/SoleNestedConditional
943
+ def main_loop
944
+ # when 685
945
+
946
+ # wait for boot
947
+ boot
948
+ wait_frame
949
+
950
+ while @run
951
+ # pre-render scanline
952
+
953
+ 341.step(589, 8) do
954
+ # when 341, 349, ..., 589
955
+ if @hclk == 341
956
+ @sp_overflow = @sp_zero_hit = @vblanking = @vblank = false
957
+ @scanline = SCANLINE_HDUMMY
958
+ end
959
+ open_name
960
+ wait_two_clocks
961
+
962
+ # when 343, 351, ..., 591
963
+ open_attr
964
+ wait_two_clocks
965
+
966
+ # when 345, 353, ..., 593
967
+ open_pattern(@bg_pattern_base)
968
+ wait_two_clocks
969
+
970
+ # when 347, 355, ..., 595
971
+ open_pattern(@io_addr | 8)
972
+ wait_two_clocks
973
+ end
974
+
975
+ 597.step(653, 8) do
976
+ # when 597, 605, ..., 653
977
+ if @any_show
978
+ if @hclk == 645
979
+ @scroll_addr_0_4 = @scroll_latch & 0x001f
980
+ @scroll_addr_5_14 = @scroll_latch & 0x7fe0
981
+ @name_io_addr = (@scroll_addr_0_4 | @scroll_addr_5_14) & 0x0fff | 0x2000 # make cache consistent
982
+ end
983
+ end
984
+ open_name
985
+ wait_two_clocks
986
+
987
+ # when 599, 607, ..., 655
988
+ # Nestopia uses open_name here?
989
+ open_attr
990
+ wait_two_clocks
991
+
992
+ # when 601, 609, ..., 657
993
+ open_pattern(@pattern_end)
994
+ wait_two_clocks
995
+
996
+ # when 603, 611, ..., 659
997
+ open_pattern(@io_addr | 8)
998
+ if @hclk == 659
999
+ @hclk = 320
1000
+ @vclk += HCLOCK_DUMMY
1001
+ @hclk_target -= HCLOCK_DUMMY
1002
+ else
1003
+ wait_two_clocks
1004
+ end
1005
+ wait_zero_clocks
1006
+ end
1007
+
1008
+ while true
1009
+ # visible scanline (not shown)
1010
+
1011
+ # when 320
1012
+ load_extended_sprites
1013
+ open_name
1014
+ @sp_latch = @sp_ram[0] if @any_show
1015
+ @sp_buffered = 0
1016
+ @sp_zero_in_line = false
1017
+ @sp_index = 0
1018
+ @sp_phase = 0
1019
+ wait_one_clock
1020
+
1021
+ # when 321
1022
+ fetch_name
1023
+ wait_one_clock
1024
+
1025
+ # when 322
1026
+ open_attr
1027
+ wait_one_clock
1028
+
1029
+ # when 323
1030
+ fetch_attr
1031
+ scroll_clock_x
1032
+ wait_one_clock
1033
+
1034
+ # when 324
1035
+ open_pattern(@io_pattern)
1036
+ wait_one_clock
1037
+
1038
+ # when 325
1039
+ fetch_bg_pattern_0
1040
+ wait_one_clock
1041
+
1042
+ # when 326
1043
+ open_pattern(@io_pattern | 8)
1044
+ wait_one_clock
1045
+
1046
+ # when 327
1047
+ fetch_bg_pattern_1
1048
+ wait_one_clock
1049
+
1050
+ # when 328
1051
+ preload_tiles
1052
+ open_name
1053
+ wait_one_clock
1054
+
1055
+ # when 329
1056
+ fetch_name
1057
+ wait_one_clock
1058
+
1059
+ # when 330
1060
+ open_attr
1061
+ wait_one_clock
1062
+
1063
+ # when 331
1064
+ fetch_attr
1065
+ scroll_clock_x
1066
+ wait_one_clock
1067
+
1068
+ # when 332
1069
+ open_pattern(@io_pattern)
1070
+ wait_one_clock
1071
+
1072
+ # when 333
1073
+ fetch_bg_pattern_0
1074
+ wait_one_clock
1075
+
1076
+ # when 334
1077
+ open_pattern(@io_pattern | 8)
1078
+ wait_one_clock
1079
+
1080
+ # when 335
1081
+ fetch_bg_pattern_1
1082
+ wait_one_clock
1083
+
1084
+ # when 336
1085
+ open_name
1086
+ wait_one_clock
1087
+
1088
+ # when 337
1089
+ if @any_show
1090
+ update_enabled_flags_edge
1091
+ @cpu.next_frame_clock = RP2C02_HVSYNC_1 if @scanline == SCANLINE_HDUMMY && @odd_frame
1092
+ end
1093
+ wait_one_clock
1094
+
1095
+ # when 338
1096
+ open_name
1097
+ @scanline += 1
1098
+ if @scanline != SCANLINE_VBLANK
1099
+ if @any_show
1100
+ line = @scanline != 0 || !@odd_frame ? 341 : 340
1101
+ else
1102
+ update_enabled_flags_edge
1103
+ line = 341
1104
+ end
1105
+ @hclk = 0
1106
+ @vclk += line
1107
+ @hclk_target = @hclk_target <= line ? 0 : @hclk_target - line
1108
+ else
1109
+ @hclk = HCLOCK_VBLANK_0
1110
+ wait_zero_clocks
1111
+ break
1112
+ end
1113
+ wait_zero_clocks
1114
+
1115
+ # visible scanline (shown)
1116
+ 0.step(248, 8) do
1117
+ # when 0, 8, ..., 248
1118
+ if @any_show
1119
+ if @hclk == 64
1120
+ @sp_addr = @regs_oam & 0xf8 # SP_OFFSET_TO_0_1
1121
+ @sp_phase = nil
1122
+ @sp_latch = 0xff
1123
+ end
1124
+ load_tiles
1125
+ batch_render_eight_pixels
1126
+ evaluate_sprites_even if @hclk >= 64
1127
+ open_name
1128
+ end
1129
+ render_pixel
1130
+ wait_one_clock
1131
+
1132
+ # when 1, 9, ..., 249
1133
+ if @any_show
1134
+ fetch_name
1135
+ evaluate_sprites_odd if @hclk >= 64
1136
+ end
1137
+ render_pixel
1138
+ wait_one_clock
1139
+
1140
+ # when 2, 10, ..., 250
1141
+ if @any_show
1142
+ evaluate_sprites_even if @hclk >= 64
1143
+ open_attr
1144
+ end
1145
+ render_pixel
1146
+ wait_one_clock
1147
+
1148
+ # when 3, 11, ..., 251
1149
+ if @any_show
1150
+ fetch_attr
1151
+ evaluate_sprites_odd if @hclk >= 64
1152
+ scroll_clock_y if @hclk == 251
1153
+ scroll_clock_x
1154
+ end
1155
+ render_pixel
1156
+ wait_one_clock
1157
+
1158
+ # when 4, 12, ..., 252
1159
+ if @any_show
1160
+ evaluate_sprites_even if @hclk >= 64
1161
+ open_pattern(@io_pattern)
1162
+ end
1163
+ render_pixel
1164
+ wait_one_clock
1165
+
1166
+ # when 5, 13, ..., 253
1167
+ if @any_show
1168
+ fetch_bg_pattern_0
1169
+ evaluate_sprites_odd if @hclk >= 64
1170
+ end
1171
+ render_pixel
1172
+ wait_one_clock
1173
+
1174
+ # when 6, 14, ..., 254
1175
+ if @any_show
1176
+ evaluate_sprites_even if @hclk >= 64
1177
+ open_pattern(@io_pattern | 8)
1178
+ end
1179
+ render_pixel
1180
+ wait_one_clock
1181
+
1182
+ # when 7, 15, ..., 255
1183
+ if @any_show
1184
+ fetch_bg_pattern_1
1185
+ evaluate_sprites_odd if @hclk >= 64
1186
+ end
1187
+ render_pixel
1188
+ # rubocop:disable Style/NestedModifier, Style/IfUnlessModifierOfIfUnless:
1189
+ update_enabled_flags if @hclk != 255 if @any_show
1190
+ # rubocop:enable Style/NestedModifier, Style/IfUnlessModifierOfIfUnless:
1191
+ wait_one_clock
1192
+ end
1193
+
1194
+ 256.step(312, 8) do
1195
+ # rubocop:disable Style/IdenticalConditionalBranches
1196
+ if @hclk == 256
1197
+ # when 256
1198
+ open_name
1199
+ @sp_latch = 0xff if @any_show
1200
+ wait_one_clock
1201
+
1202
+ # when 257
1203
+ scroll_reset_x
1204
+ @sp_visible = false
1205
+ @sp_active = false
1206
+ wait_one_clock
1207
+ else
1208
+ # when 264, 272, ..., 312
1209
+ open_name
1210
+ wait_two_clocks
1211
+ end
1212
+ # rubocop:enable Style/IdenticalConditionalBranches
1213
+
1214
+ # when 258, 266, ..., 314
1215
+ # Nestopia uses open_name here?
1216
+ open_attr
1217
+ wait_two_clocks
1218
+
1219
+ # when 260, 268, ..., 316
1220
+ if @any_show
1221
+ buffer_idx = (@hclk - 260) / 2
1222
+ open_pattern(buffer_idx >= @sp_buffered ? @pattern_end : open_sprite(buffer_idx))
1223
+ # rubocop:disable Style/NestedModifier, Style/IfUnlessModifierOfIfUnless:
1224
+ @regs_oam = 0 if @scanline == 238 if @hclk == 316
1225
+ # rubocop:enable Style/NestedModifier, Style/IfUnlessModifierOfIfUnless:
1226
+ end
1227
+ wait_one_clock
1228
+
1229
+ # when 261, 269, ..., 317
1230
+ if @any_show
1231
+ @io_pattern = @chr_mem[@io_addr & 0x1fff] if (@hclk - 261) / 2 < @sp_buffered
1232
+ end
1233
+ wait_one_clock
1234
+
1235
+ # when 262, 270, ..., 318
1236
+ open_pattern(@io_addr | 8)
1237
+ wait_one_clock
1238
+
1239
+ # when 263, 271, ..., 319
1240
+ if @any_show
1241
+ buffer_idx = (@hclk - 263) / 2
1242
+ if buffer_idx < @sp_buffered
1243
+ pat0 = @io_pattern
1244
+ pat1 = @chr_mem[@io_addr & 0x1fff]
1245
+ load_sprite(pat0, pat1, buffer_idx) if pat0 != 0 || pat1 != 0
1246
+ end
1247
+ end
1248
+ wait_one_clock
1249
+ end
1250
+ end
1251
+
1252
+ # post-render scanline (vblank)
1253
+
1254
+ # when 681
1255
+ vblank_0
1256
+ wait_zero_clocks
1257
+
1258
+ # when 682
1259
+ vblank_1
1260
+ wait_zero_clocks
1261
+
1262
+ # when 684
1263
+ vblank_2
1264
+ wait_frame
1265
+ end
1266
+ end
1267
+ # rubocop:enable Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize, Style/SoleNestedConditional
1268
+
1269
+ ###########################################################################
1270
+ # optimized core generator
1271
+ class OptimizedCodeBuilder
1272
+ include CodeOptimizationHelper
1273
+
1274
+ OPTIONS = [
1275
+ :method_inlining, :ivar_localization,
1276
+ :split_show_mode, :split_a12_checks, :clock_specialization,
1277
+ :fastpath, :batch_render_pixels, :oneline,
1278
+ ]
1279
+
1280
+ def build
1281
+ depends(:ivar_localization, :method_inlining)
1282
+ depends(:batch_render_pixels, :fastpath)
1283
+
1284
+ mdefs = parse_method_definitions(__FILE__)
1285
+ handlers = parse_clock_handlers(mdefs[:main_loop].body)
1286
+
1287
+ handlers = specialize_clock_handlers(handlers) if @clock_specialization
1288
+ if @fastpath
1289
+ handlers = add_fastpath(handlers) do |fastpath, hclk|
1290
+ @batch_render_pixels ? batch_render_pixels(fastpath, hclk) : fastpath
1291
+ end
1292
+ end
1293
+ code = build_loop(handlers)
1294
+ code = ppu_expand_methods(code, mdefs) if @method_inlining
1295
+
1296
+ if @split_show_mode
1297
+ code, code_no_show = split_mode(code, "@any_show")
1298
+ if @split_a12_checks
1299
+ code, code_no_a12 = split_mode(code, "@a12_monitor")
1300
+ code = branch("@a12_monitor", code, code_no_a12)
1301
+ end
1302
+ code = branch("@any_show", code, code_no_show)
1303
+ end
1304
+
1305
+ code = gen(
1306
+ mdefs[:make_sure_invariants].body,
1307
+ code,
1308
+ "@hclk_target = (@vclk + @hclk) * RP2C02_CC"
1309
+ )
1310
+
1311
+ code = localize_instance_variables(code) if @ivar_localization
1312
+
1313
+ code = gen(
1314
+ "def self.run",
1315
+ *(@loglevel >= 3 ? [" debug_logging(@scanline, @hclk, @hclk_target)"] : []),
1316
+ indent(2, code),
1317
+ "end",
1318
+ )
1319
+
1320
+ code = oneline(code) if @oneline
1321
+
1322
+ code
1323
+ end
1324
+
1325
+ COMMANDS = {
1326
+ wait_zero_clocks: "",
1327
+ wait_one_clock: "@hclk += 1\n",
1328
+ wait_two_clocks: "@hclk += 2\n",
1329
+ wait_frame: "return\n",
1330
+ }
1331
+
1332
+ # extracts the actions for each clock from CPU#main_loop
1333
+ def parse_clock_handlers(main_loop)
1334
+ handlers = {}
1335
+ main_loop.scan(/^( *)# when (.*)\n((?:\1.*\n|\n)*?\1wait_.*\n)/) do |indent, hclks, body|
1336
+ body = indent(-indent.size, body)
1337
+ body = body.gsub(/^( *)break\n/, "")
1338
+ body = expand_methods(body, COMMANDS)
1339
+ if hclks =~ /^(\d+), (\d+), \.\.\., (\d+)$/
1340
+ first, second, last = $1.to_i, $2.to_i, $3.to_i
1341
+ first.step(last, second - first) do |hclk|
1342
+ handlers[hclk] = body
1343
+ end
1344
+ else
1345
+ handlers[hclks.to_i] = body
1346
+ end
1347
+ end
1348
+ handlers
1349
+ end
1350
+
1351
+ # split clock handlers that contains a branch depending on clock
1352
+ def specialize_clock_handlers(handlers)
1353
+ handlers.each do |hclk, handler|
1354
+ # pre-caluculate some conditions like `@hclk == 64` with `false`
1355
+ handler = handler.gsub(/@hclk (==|>=|!=) (\d+)/) { hclk.send($1.to_sym, $2.to_i) }
1356
+
1357
+ # remove disabled branches like `if false ... end`
1358
+ handlers[hclk] = remove_trivial_branches(handler)
1359
+ end
1360
+ end
1361
+
1362
+ # pass a fastpath
1363
+ def add_fastpath(handlers)
1364
+ handlers.each do |hclk, handler|
1365
+ next unless hclk % 8 == 0 && hclk < 256
1366
+ fastpath = gen(*(0..7).map {|i| handlers[hclk + i] })
1367
+ fastpath = yield fastpath, hclk
1368
+ handlers[hclk] = branch("@hclk + 8 <= @hclk_target", fastpath, handler)
1369
+ end
1370
+ end
1371
+
1372
+ # replace eight `render_pixel` calls with one optimized batch version
1373
+ def batch_render_pixels(fastpath, hclk)
1374
+ fastpath = expand_methods(fastpath, render_pixel: gen(
1375
+ "unless @any_show",
1376
+ " @bg_pixels[@hclk % 8] = 0",
1377
+ " @output_pixels << @output_color[@scroll_addr_5_14 & 0x3f00 == 0x3f00 ? @scroll_addr_0_4 : 0]",
1378
+ "end",
1379
+ ))
1380
+ expand_methods(fastpath, batch_render_eight_pixels: gen(
1381
+ "# batch-version of render_pixel",
1382
+ "if @any_show",
1383
+ " if @sp_active",
1384
+ " if @bg_enabled",
1385
+ *(0..7).flat_map do |i|
1386
+ [
1387
+ " pixel#{ i } = @bg_pixels[#{ i }]",
1388
+ " if sprite = @sp_map[@hclk#{ i != 0 ? " + #{ i }" : "" }]",
1389
+ " if pixel#{ i } % 4 == 0",
1390
+ " pixel#{ i } = sprite[2]",
1391
+ " else",
1392
+ *(hclk + i == 255 ? [] : [" @sp_zero_hit = true if sprite[1]"]),
1393
+ " pixel#{ i } = sprite[2] unless sprite[0]",
1394
+ " end",
1395
+ " end",
1396
+ ]
1397
+ end,
1398
+ " @output_pixels << " + (0..7).map {|n| "@output_color[pixel#{ n }]" } * " << ",
1399
+ " else",
1400
+ *(0..7).map do |i|
1401
+ " pixel#{ i } = (sprite = @sp_map[@hclk #{ i != 0 ? " + #{ i }" : "" }]) ? sprite[2] : 0"
1402
+ end,
1403
+ " @output_pixels << " + (0..7).map {|n| "@output_color[pixel#{ n }]" } * " << ",
1404
+ " end",
1405
+ " else",
1406
+ " if @bg_enabled # this is the true hot-spot",
1407
+ " @output_pixels << " + (0..7).map {|n| "@output_color[@bg_pixels[#{ n }]]" } * " << ",
1408
+ " else",
1409
+ " clr = @output_color[0]",
1410
+ " @output_pixels << " + ["clr"] * 8 * " << ",
1411
+ " end",
1412
+ " end",
1413
+ "end",
1414
+ ))
1415
+ end
1416
+
1417
+ # remove all newlines (this will reduce `trace` instructions)
1418
+ def oneline(code)
1419
+ code.gsub(/^ *|#.*/, "").gsub("[\n", "[").gsub(/\n *\]/, "]").tr("\n", ";")
1420
+ end
1421
+
1422
+ # inline method calls
1423
+ def ppu_expand_methods(code, mdefs)
1424
+ code = expand_inline_methods(code, :open_sprite, mdefs[:open_sprite])
1425
+
1426
+ # twice is enough
1427
+ expand_methods(expand_methods(code, mdefs), mdefs)
1428
+ end
1429
+
1430
+ # create two version of the same code by evaluating easy branches
1431
+ # CAUTION: the condition must be invariant during PPU#run
1432
+ def split_mode(code, cond)
1433
+ %w(true false).map do |bool|
1434
+ rebuild_loop(remove_trivial_branches(replace_cond_var(code, cond, bool)))
1435
+ end
1436
+ end
1437
+
1438
+ # generate a main code
1439
+ def build_loop(handlers)
1440
+ clauses = {}
1441
+ handlers.sort.each do |hclk, handler|
1442
+ (clauses[handler] ||= []) << hclk
1443
+ end
1444
+
1445
+ gen(
1446
+ "while @hclk_target > @hclk",
1447
+ " case @hclk",
1448
+ *clauses.invert.sort.map do |hclks, handler|
1449
+ " when #{ hclks * ", " }\n" + indent(4, handler)
1450
+ end,
1451
+ " end",
1452
+ "end",
1453
+ )
1454
+ end
1455
+
1456
+ # deconstruct a loop, unify handlers, and re-generate a new loop
1457
+ def rebuild_loop(code)
1458
+ handlers = {}
1459
+ code.scan(/^ when ((?:\d+, )*\d+)\n((?: .*\n|\n)*)/) do |hclks, handler|
1460
+ hclks.split(", ").each do |hclk|
1461
+ handlers[hclk.to_i] = indent(-4, handler)
1462
+ end
1463
+ end
1464
+ build_loop(handlers)
1465
+ end
1466
+ end
1467
+ end
1468
+ end