nyxis 1.0.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/nxs.rb +959 -18
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 21f182240eeb38df31bb91d81c52a41df156a234b82e61454b49c34f49de5ac8
|
|
4
|
+
data.tar.gz: ce6fca2da3243e57488f081ee10302cf780575591b2947d11a7becc85ad4feea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 27d92d5aad9aa3f661c610666361cd420e337b1955891abd006d02ec899f4b94e7040c5baeb4614b2d71efe457bb6728806a2c7e7f0e6ccda09075416a0286d7
|
|
7
|
+
data.tar.gz: d5f7df6269b0fe00e8ae6d6512cc67eed7ba33891654b51833f8519a627ac29136f90cd6e0bcceae3cf01db0ef025166bbc84e836a00cf6bf489eafb744ea4d5
|
data/nxs.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'pattern'
|
|
4
|
+
|
|
3
5
|
# NXS Reader — .nxb parser (Ruby 3.x, stdlib only).
|
|
4
6
|
#
|
|
5
7
|
# Implements Nyxis v1.1 binary wire format.
|
|
@@ -22,8 +24,233 @@
|
|
|
22
24
|
module Nxs
|
|
23
25
|
MAGIC_FILE = 0x4E595842 # NYXB
|
|
24
26
|
MAGIC_OBJ = 0x4E59584F # NYXO
|
|
27
|
+
MAGIC_LIST = 0x4E59584C # NYXL
|
|
28
|
+
MAGIC_PAGE = 0x4E585350 # NYXP
|
|
25
29
|
MAGIC_FOOTER = 0x2153584E # NXS!
|
|
26
|
-
|
|
30
|
+
FLAG_COLUMNAR = 0x0001
|
|
31
|
+
FLAG_PAX = 0x0004
|
|
32
|
+
FLAG_SCHEMA = 0x0002
|
|
33
|
+
|
|
34
|
+
FOOTER_ROW_BYTES = 12
|
|
35
|
+
FOOTER_COL_BYTES = 20
|
|
36
|
+
FOOTER_PAX_BYTES = 28
|
|
37
|
+
COL_TAIL_ENTRY_BYTES = 20
|
|
38
|
+
PAX_TAIL_ENTRY_BYTES = 28
|
|
39
|
+
|
|
40
|
+
# Adaptive prefetch (phase 1) — spec §6–§8.4
|
|
41
|
+
DEFAULT_PAGE_SIZE = 65_536
|
|
42
|
+
DEFAULT_MAX_PAGES = 64
|
|
43
|
+
DEFAULT_COALESCE_GAP_PAGES = 1
|
|
44
|
+
DEFAULT_PREFETCH_DEPTH = 4
|
|
45
|
+
EAGER_THRESHOLD_MB = 10
|
|
46
|
+
LAZY_THRESHOLD_MB = 50
|
|
47
|
+
|
|
48
|
+
HINT_UNKNOWN = 0
|
|
49
|
+
HINT_SEQUENTIAL = 1
|
|
50
|
+
HINT_RANDOM = 2
|
|
51
|
+
HINT_FULL = 3
|
|
52
|
+
HINT_PARTIAL = 4
|
|
53
|
+
|
|
54
|
+
HINT_SYMBOLS = {
|
|
55
|
+
unknown: HINT_UNKNOWN,
|
|
56
|
+
sequential: HINT_SEQUENTIAL,
|
|
57
|
+
random: HINT_RANDOM,
|
|
58
|
+
full: HINT_FULL,
|
|
59
|
+
partial: HINT_PARTIAL
|
|
60
|
+
}.freeze
|
|
61
|
+
|
|
62
|
+
def self.normalize_hint(hint)
|
|
63
|
+
return hint if hint.is_a?(Integer)
|
|
64
|
+
|
|
65
|
+
HINT_SYMBOLS.fetch(hint) { HINT_UNKNOWN }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Initial prefetch strategy from open hint and file size (spec §5.1).
|
|
69
|
+
def self.initial_strategy(hint, file_size)
|
|
70
|
+
hint = normalize_hint(hint)
|
|
71
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
72
|
+
return 'eager' if hint == HINT_FULL && file_size_mb <= EAGER_THRESHOLD_MB
|
|
73
|
+
return 'lazy' if file_size_mb > LAZY_THRESHOLD_MB
|
|
74
|
+
|
|
75
|
+
'adaptive'
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Row-layout data sector byte range [start, length).
|
|
79
|
+
def self.row_data_sector(tail_start, file_size)
|
|
80
|
+
sector_start = 32
|
|
81
|
+
if tail_start > sector_start && tail_start <= file_size
|
|
82
|
+
[sector_start, tail_start - sector_start]
|
|
83
|
+
else
|
|
84
|
+
[sector_start, 0]
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Merge sorted unique page indices when gap <= gap_pages (inclusive).
|
|
89
|
+
def self.coalesce_page_indices(indices, gap_pages, page_size = DEFAULT_PAGE_SIZE)
|
|
90
|
+
return [] if indices.empty?
|
|
91
|
+
|
|
92
|
+
uniq = indices.uniq.sort
|
|
93
|
+
spans = []
|
|
94
|
+
start = uniq[0]
|
|
95
|
+
end_ = uniq[0]
|
|
96
|
+
uniq.each_cons(2) do |_a, b|
|
|
97
|
+
if b - end_ <= gap_pages
|
|
98
|
+
end_ = b
|
|
99
|
+
else
|
|
100
|
+
spans << [start, end_]
|
|
101
|
+
start = end_ = b
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
spans << [start, end_]
|
|
105
|
+
spans.map do |a, b|
|
|
106
|
+
{ page_start: a, page_end: b, byte_start: a * page_size, byte_length: (b - a + 1) * page_size }
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def self.clamp_page_ranges(ranges, file_size)
|
|
111
|
+
ranges.filter_map do |r|
|
|
112
|
+
len = r[:byte_length]
|
|
113
|
+
len = file_size - r[:byte_start] if r[:byte_start] + len > file_size
|
|
114
|
+
next nil if len <= 0
|
|
115
|
+
|
|
116
|
+
r.merge(byte_length: len)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def self.page_indices_for_viewport(start_index, end_index, page_size, &record_offset)
|
|
121
|
+
(start_index..end_index).map { |i| record_offset.call(i) / page_size }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# LRU page cache with optional pinning (spec §6).
|
|
125
|
+
class PageCache
|
|
126
|
+
attr_reader :max_pages, :page_size, :hits, :misses
|
|
127
|
+
|
|
128
|
+
def initialize(max_pages = DEFAULT_MAX_PAGES, page_size = DEFAULT_PAGE_SIZE)
|
|
129
|
+
@max_pages = max_pages
|
|
130
|
+
@page_size = page_size
|
|
131
|
+
@pages = {}
|
|
132
|
+
@clock = 0
|
|
133
|
+
@hits = 0
|
|
134
|
+
@misses = 0
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def has?(page_index)
|
|
138
|
+
@pages.key?(page_index)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def get(page_index)
|
|
142
|
+
entry = @pages[page_index]
|
|
143
|
+
unless entry
|
|
144
|
+
@misses += 1
|
|
145
|
+
return nil
|
|
146
|
+
end
|
|
147
|
+
@clock += 1
|
|
148
|
+
entry[:last_used] = @clock
|
|
149
|
+
@hits += 1
|
|
150
|
+
entry[:data]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def set(page_index, data, pinned: false)
|
|
154
|
+
return if @max_pages <= 0
|
|
155
|
+
|
|
156
|
+
while @pages.size >= @max_pages && !evict_one?; end
|
|
157
|
+
@clock += 1
|
|
158
|
+
@pages[page_index] = { data: data, last_used: @clock, pinned: pinned }
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def pin_pages(page_indices)
|
|
162
|
+
page_indices.each do |p|
|
|
163
|
+
entry = @pages[p]
|
|
164
|
+
entry[:pinned] = true if entry
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def unpin_all
|
|
169
|
+
@pages.each_value { |entry| entry[:pinned] = false }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def stats
|
|
173
|
+
bytes = @pages.values.sum { |e| e[:data].bytesize }
|
|
174
|
+
{
|
|
175
|
+
pages_cached: @pages.size,
|
|
176
|
+
pages_max: @max_pages,
|
|
177
|
+
memory_used_bytes: bytes,
|
|
178
|
+
cache_hits: @hits,
|
|
179
|
+
cache_misses: @misses
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
private
|
|
184
|
+
|
|
185
|
+
def evict_one?
|
|
186
|
+
victim = nil
|
|
187
|
+
oldest = nil
|
|
188
|
+
@pages.each do |idx, entry|
|
|
189
|
+
next if entry[:pinned]
|
|
190
|
+
|
|
191
|
+
if oldest.nil? || entry[:last_used] < oldest
|
|
192
|
+
oldest = entry[:last_used]
|
|
193
|
+
victim = idx
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
return false unless victim
|
|
197
|
+
|
|
198
|
+
@pages.delete(victim)
|
|
199
|
+
true
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# In-flight page fetch deduplication for concurrent prefetch_viewport calls.
|
|
204
|
+
class InFlightMap
|
|
205
|
+
Entry = Struct.new(:queue, :data, :error)
|
|
206
|
+
|
|
207
|
+
def initialize
|
|
208
|
+
@mu = Mutex.new
|
|
209
|
+
@map = {}
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def has?(page_index)
|
|
213
|
+
@mu.synchronize { @map.key?(page_index) }
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def wait(page_index)
|
|
217
|
+
entry = @mu.synchronize { @map[page_index] }
|
|
218
|
+
return nil unless entry
|
|
219
|
+
|
|
220
|
+
entry.queue.pop
|
|
221
|
+
raise entry.error if entry.error
|
|
222
|
+
|
|
223
|
+
entry.data
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def with(page_index)
|
|
227
|
+
entry = nil
|
|
228
|
+
leader = @mu.synchronize do
|
|
229
|
+
existing = @map[page_index]
|
|
230
|
+
if existing
|
|
231
|
+
false
|
|
232
|
+
else
|
|
233
|
+
entry = Entry.new(Queue.new)
|
|
234
|
+
@map[page_index] = entry
|
|
235
|
+
true
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
return wait(page_index) unless leader
|
|
239
|
+
|
|
240
|
+
begin
|
|
241
|
+
data = yield
|
|
242
|
+
entry.data = data
|
|
243
|
+
entry.queue << true
|
|
244
|
+
data
|
|
245
|
+
rescue StandardError => e
|
|
246
|
+
entry.error = e
|
|
247
|
+
entry.queue << true
|
|
248
|
+
raise
|
|
249
|
+
ensure
|
|
250
|
+
@mu.synchronize { @map.delete(page_index) if @map[page_index] == entry }
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
27
254
|
|
|
28
255
|
class NxsError < StandardError
|
|
29
256
|
attr_reader :code
|
|
@@ -37,9 +264,15 @@ module Nxs
|
|
|
37
264
|
# ── Reader ──────────────────────────────────────────────────────────────────
|
|
38
265
|
|
|
39
266
|
class Reader
|
|
40
|
-
attr_reader :keys, :record_count
|
|
41
|
-
|
|
42
|
-
def initialize(bytes)
|
|
267
|
+
attr_reader :keys, :record_count, :layout
|
|
268
|
+
|
|
269
|
+
def initialize(bytes, **options)
|
|
270
|
+
hint = options.fetch(:hint, HINT_UNKNOWN)
|
|
271
|
+
max_pages = options.fetch(:max_pages, DEFAULT_MAX_PAGES)
|
|
272
|
+
page_size = options.fetch(:page_size, DEFAULT_PAGE_SIZE)
|
|
273
|
+
coalesce_gap_pages = options.fetch(:coalesce_gap_pages, DEFAULT_COALESCE_GAP_PAGES)
|
|
274
|
+
prefetch_depth = options.fetch(:prefetch_depth, DEFAULT_PREFETCH_DEPTH)
|
|
275
|
+
fetch_range = options.fetch(:fetch_range, nil)
|
|
43
276
|
@data = bytes.b # force binary encoding
|
|
44
277
|
sz = @data.bytesize
|
|
45
278
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'file too small') if sz < 32
|
|
@@ -51,12 +284,14 @@ module Nxs
|
|
|
51
284
|
raise NxsError.new('ERR_BAD_MAGIC', 'footer magic mismatch') if footer != MAGIC_FOOTER
|
|
52
285
|
|
|
53
286
|
# Preamble: Version(2) + Flags(2) + DictHash(8) + TailPtr(8) + Reserved(8)
|
|
54
|
-
@flags
|
|
55
|
-
|
|
56
|
-
|
|
287
|
+
@flags = @data.unpack1('@6 S<')
|
|
288
|
+
preamble_tail = @data.unpack1('@16 Q<')
|
|
289
|
+
@tail_ptr = preamble_tail
|
|
290
|
+
layout_flags = @flags & (FLAG_COLUMNAR | FLAG_PAX)
|
|
291
|
+
if @tail_ptr.zero? && layout_flags.zero?
|
|
57
292
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'stream footer') if sz < 44
|
|
58
293
|
|
|
59
|
-
@tail_ptr = @data.unpack1("@#{sz -
|
|
294
|
+
@tail_ptr = @data.unpack1("@#{sz - FOOTER_ROW_BYTES}Q<")
|
|
60
295
|
end
|
|
61
296
|
|
|
62
297
|
@dict_hash = @data.unpack1('@8 Q<')
|
|
@@ -71,24 +306,67 @@ module Nxs
|
|
|
71
306
|
raise NxsError.new('ERR_DICT_MISMATCH', 'schema hash mismatch') if computed != @dict_hash
|
|
72
307
|
end
|
|
73
308
|
|
|
74
|
-
|
|
75
|
-
@
|
|
76
|
-
|
|
309
|
+
@col_buf_off = []
|
|
310
|
+
@col_buf_len = []
|
|
311
|
+
parse_layout_tail!(preamble_tail)
|
|
312
|
+
init_column_prefetch!(fetch_range: fetch_range)
|
|
313
|
+
init_prefetch!(
|
|
314
|
+
hint: hint,
|
|
315
|
+
max_pages: max_pages,
|
|
316
|
+
page_size: page_size,
|
|
317
|
+
coalesce_gap_pages: coalesce_gap_pages,
|
|
318
|
+
prefetch_depth: prefetch_depth,
|
|
319
|
+
fetch_range: fetch_range
|
|
320
|
+
)
|
|
77
321
|
end
|
|
78
322
|
|
|
79
|
-
# O(1) record lookup —
|
|
323
|
+
# O(1) record lookup — row tail-index or columnar/PAX record index.
|
|
80
324
|
def record(i)
|
|
81
325
|
unless i >= 0 && i < @record_count
|
|
82
326
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', "record #{i} out of [0, #{@record_count})")
|
|
83
327
|
end
|
|
84
328
|
|
|
85
|
-
|
|
329
|
+
return Object.new(self, i, i) if @layout != :row
|
|
330
|
+
|
|
331
|
+
on_access(i)
|
|
86
332
|
abs_offset = @data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
|
|
87
333
|
Object.new(self, abs_offset)
|
|
88
334
|
end
|
|
89
335
|
|
|
90
|
-
#
|
|
336
|
+
# Prefetch one column buffer (columnar layout only; §7.4).
|
|
337
|
+
def prefetch_column(key)
|
|
338
|
+
raise NxsError.new('ERR_LAYOUT', 'prefetch_column requires columnar layout') unless @layout == :columnar
|
|
339
|
+
|
|
340
|
+
slot = @key_index[key]
|
|
341
|
+
raise NxsError.new('ERR_KEY_NOT_FOUND', "key #{key.inspect} not in schema") unless slot
|
|
342
|
+
|
|
343
|
+
off = nil
|
|
344
|
+
length = nil
|
|
345
|
+
fetch = nil
|
|
346
|
+
@col_mu.synchronize do
|
|
347
|
+
return if @col_warmed[slot]
|
|
348
|
+
|
|
349
|
+
off = @col_buf_off[slot].to_i
|
|
350
|
+
length = @col_buf_len[slot].to_i
|
|
351
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off.negative? || length.negative?
|
|
352
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if !@col_remote_fetch && off + length > @data.bytesize
|
|
353
|
+
|
|
354
|
+
fetch = @col_fetch_range
|
|
355
|
+
end
|
|
356
|
+
blob = fetch.call(off, length)
|
|
357
|
+
@col_mu.synchronize do
|
|
358
|
+
return if @col_warmed[slot]
|
|
359
|
+
|
|
360
|
+
@col_overlay[slot] = blob if off + blob.bytesize > @data.bytesize
|
|
361
|
+
@col_warmed[slot] = true
|
|
362
|
+
@col_fetches += 1
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Sum f64 column — columnar/PAX buffer path or row scan.
|
|
91
367
|
def sum_f64(key)
|
|
368
|
+
return col_sum_f64(key) if @layout != :row
|
|
369
|
+
|
|
92
370
|
slot = @key_index[key]
|
|
93
371
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
|
|
94
372
|
|
|
@@ -106,6 +384,68 @@ module Nxs
|
|
|
106
384
|
sum
|
|
107
385
|
end
|
|
108
386
|
|
|
387
|
+
# Columnar/PAX f64 sum (row layout delegates to sum_f64).
|
|
388
|
+
def col_sum_f64(key)
|
|
389
|
+
slot = @key_index[key]
|
|
390
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
|
|
391
|
+
|
|
392
|
+
return sum_f64(key) if @layout == :row
|
|
393
|
+
return pax_sum_f64(slot) if @layout == :pax
|
|
394
|
+
|
|
395
|
+
bm, vals = col_field_parts(slot)
|
|
396
|
+
n = @record_count
|
|
397
|
+
sum = 0.0
|
|
398
|
+
i = 0
|
|
399
|
+
while i < n
|
|
400
|
+
if col_bit(bm, i)
|
|
401
|
+
off = i * 8
|
|
402
|
+
sum += vals.unpack1("@#{off}E") if off + 8 <= vals.bytesize
|
|
403
|
+
end
|
|
404
|
+
i += 1
|
|
405
|
+
end
|
|
406
|
+
sum
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Raw value bytes for a fixed-width column (columnar/PAX).
|
|
410
|
+
def col_buffer(key)
|
|
411
|
+
raise NxsError.new('ERR_LAYOUT', 'col_buffer requires columnar or PAX layout') if @layout == :row
|
|
412
|
+
|
|
413
|
+
slot = @key_index[key]
|
|
414
|
+
return nil unless slot
|
|
415
|
+
return nil if var_sigil?(@key_sigils[slot])
|
|
416
|
+
|
|
417
|
+
_bm, vals = col_field_parts(slot)
|
|
418
|
+
vals
|
|
419
|
+
rescue NxsError
|
|
420
|
+
nil
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
# Null bitmap + u32 offsets + values for var-length columns (columnar only).
|
|
424
|
+
def col_var_buffer(key)
|
|
425
|
+
raise NxsError.new('ERR_LAYOUT', 'col_var_buffer is columnar-only') unless @layout == :columnar
|
|
426
|
+
|
|
427
|
+
slot = @key_index[key]
|
|
428
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
|
|
429
|
+
raise NxsError.new('ERR_UNSUPPORTED_FIELD_TYPE', key) unless var_sigil?(@key_sigils[slot])
|
|
430
|
+
|
|
431
|
+
bm, offsets, values = col_var_parts(slot)
|
|
432
|
+
{ bitmap: bm, offsets: offsets, values: values, count: @record_count }
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def col_get_str(key, record_index)
|
|
436
|
+
slot = @key_index[key]
|
|
437
|
+
return nil unless slot && record_index < @record_count && @layout != :row
|
|
438
|
+
return nil unless @key_sigils[slot] == 0x22
|
|
439
|
+
|
|
440
|
+
bm, offsets, values, ok = col_var_parts_at(record_index, slot)
|
|
441
|
+
return nil unless ok
|
|
442
|
+
|
|
443
|
+
bit_idx = @layout == :pax ? pax_find_page(record_index)&.[](:local) : record_index
|
|
444
|
+
return nil if bit_idx.nil? || !col_bit(bm, bit_idx)
|
|
445
|
+
|
|
446
|
+
var_str_at(offsets, values, bit_idx)
|
|
447
|
+
end
|
|
448
|
+
|
|
109
449
|
def min_f64(key)
|
|
110
450
|
slot = @key_index[key]
|
|
111
451
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
|
|
@@ -178,6 +518,8 @@ module Nxs
|
|
|
178
518
|
t_idx = 0
|
|
179
519
|
|
|
180
520
|
loop do
|
|
521
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'bitmask overrun on corrupt input') if p >= data.bytesize
|
|
522
|
+
|
|
181
523
|
b = data.getbyte(p)
|
|
182
524
|
p += 1
|
|
183
525
|
bits = b & 0x7F
|
|
@@ -203,8 +545,556 @@ module Nxs
|
|
|
203
545
|
end
|
|
204
546
|
end
|
|
205
547
|
|
|
548
|
+
# rubocop:disable Metrics/ParameterLists -- prefetch open options mirror Go OpenOptions
|
|
549
|
+
def init_prefetch!(hint:, max_pages:, page_size:, coalesce_gap_pages:, prefetch_depth:, fetch_range:)
|
|
550
|
+
@prefetch_mu = Mutex.new
|
|
551
|
+
@cache_mu = Mutex.new
|
|
552
|
+
@prefetch_hint = Nxs.normalize_hint(hint)
|
|
553
|
+
@prefetch_page_size = page_size
|
|
554
|
+
@prefetch_depth = prefetch_depth.positive? ? prefetch_depth : DEFAULT_PREFETCH_DEPTH
|
|
555
|
+
@coalesce_gap_pages = coalesce_gap_pages
|
|
556
|
+
@page_cache = PageCache.new(max_pages, page_size)
|
|
557
|
+
@in_flight = InFlightMap.new
|
|
558
|
+
@fetches_issued = 0
|
|
559
|
+
@detector = AccessPatternDetector.new
|
|
560
|
+
@prefetch_strategy = Nxs.initial_strategy(@prefetch_hint, @data.bytesize)
|
|
561
|
+
@prefetch_pattern = PATTERN_UNKNOWN
|
|
562
|
+
@eager_started = false
|
|
563
|
+
@eager_complete = false
|
|
564
|
+
@eager_cancel = false
|
|
565
|
+
@eager_thread = nil
|
|
566
|
+
@closed = false
|
|
567
|
+
@prefetch_paused = false
|
|
568
|
+
@fetch_range = fetch_range || lambda do |byte_start, byte_length|
|
|
569
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if byte_start.negative?
|
|
570
|
+
|
|
571
|
+
end_ = byte_start + byte_length
|
|
572
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if end_ > @data.bytesize
|
|
573
|
+
|
|
574
|
+
@data[byte_start, byte_length]
|
|
575
|
+
end
|
|
576
|
+
start_eager_background! if @layout == :row && @prefetch_strategy == 'eager'
|
|
577
|
+
end
|
|
578
|
+
# rubocop:enable Metrics/ParameterLists
|
|
579
|
+
|
|
580
|
+
# Block until eager / background prefetch completes (spec §8).
|
|
581
|
+
def warmup
|
|
582
|
+
t = @prefetch_mu.synchronize { @eager_thread }
|
|
583
|
+
t&.join
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# Stop scheduling speculative and eager prefetch (§8.1).
|
|
587
|
+
def pause_prefetch
|
|
588
|
+
@prefetch_mu.synchronize { @prefetch_paused = true }
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Re-enable speculative prefetch after pause_prefetch.
|
|
592
|
+
def resume_prefetch
|
|
593
|
+
@prefetch_mu.synchronize { @prefetch_paused = false }
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Cancel in-flight eager prefetch and wait for the background thread.
|
|
597
|
+
def close
|
|
598
|
+
t = nil
|
|
599
|
+
@prefetch_mu.synchronize do
|
|
600
|
+
@closed = true
|
|
601
|
+
@eager_cancel = true
|
|
602
|
+
t = @eager_thread
|
|
603
|
+
end
|
|
604
|
+
t&.join
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def on_access(index)
|
|
608
|
+
return unless @layout == :row
|
|
609
|
+
return if @record_count.zero?
|
|
610
|
+
|
|
611
|
+
adaptive_seq = false
|
|
612
|
+
skip_spec = false
|
|
613
|
+
start_eager = false
|
|
614
|
+
@prefetch_mu.synchronize do
|
|
615
|
+
return if @closed || @prefetch_paused
|
|
616
|
+
|
|
617
|
+
@detector.observe(index)
|
|
618
|
+
@prefetch_pattern = @detector.pattern
|
|
619
|
+
start_eager = maybe_upgrade_to_eager!
|
|
620
|
+
if eager_complete? || @prefetch_strategy == 'eager'
|
|
621
|
+
skip_spec = true
|
|
622
|
+
next
|
|
623
|
+
end
|
|
624
|
+
page_index = record_byte_offset(index) / @prefetch_page_size
|
|
625
|
+
@cache_mu.synchronize { @page_cache.get(page_index) }
|
|
626
|
+
adaptive_seq = @prefetch_strategy == 'adaptive' && @detector.pattern == PATTERN_SEQUENTIAL
|
|
627
|
+
end
|
|
628
|
+
start_eager_background! if start_eager
|
|
629
|
+
return if skip_spec
|
|
630
|
+
|
|
631
|
+
speculative_prefetch! if adaptive_seq
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
def record_byte_offset(i)
|
|
635
|
+
@data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Prefetch pages for records [start_index, end_index] (row layout only).
|
|
639
|
+
def prefetch_viewport(start_index, end_index)
|
|
640
|
+
return self if @layout != :row
|
|
641
|
+
|
|
642
|
+
n = @record_count
|
|
643
|
+
unless start_index.between?(0, end_index) && end_index < n
|
|
644
|
+
raise NxsError.new(
|
|
645
|
+
'ERR_OUT_OF_BOUNDS',
|
|
646
|
+
"prefetch_viewport [#{start_index}, #{end_index}] out of [0, #{n})"
|
|
647
|
+
)
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
@cache_mu.synchronize do
|
|
651
|
+
page_size = @prefetch_page_size
|
|
652
|
+
indices = Nxs.page_indices_for_viewport(start_index, end_index, page_size) do |i|
|
|
653
|
+
record_byte_offset(i)
|
|
654
|
+
end
|
|
655
|
+
missing = indices.uniq.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
|
|
656
|
+
if missing.empty?
|
|
657
|
+
@page_cache.pin_pages(indices)
|
|
658
|
+
@page_cache.unpin_all
|
|
659
|
+
return self
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
ranges = Nxs.clamp_page_ranges(
|
|
663
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
664
|
+
@data.bytesize
|
|
665
|
+
)
|
|
666
|
+
ranges.each { |r| fetch_coalesced_range_unlocked!(r) }
|
|
667
|
+
@page_cache.pin_pages(indices)
|
|
668
|
+
@page_cache.unpin_all
|
|
669
|
+
end
|
|
670
|
+
self
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
def cache_stats
|
|
674
|
+
stats = @page_cache.stats
|
|
675
|
+
col_fetches = @col_mu.synchronize { @col_fetches }
|
|
676
|
+
strategy, pattern = @prefetch_mu.synchronize do
|
|
677
|
+
[@prefetch_strategy, @detector.pattern]
|
|
678
|
+
end
|
|
679
|
+
stats.merge(
|
|
680
|
+
fetches_issued: @fetches_issued,
|
|
681
|
+
column_fetches_issued: col_fetches,
|
|
682
|
+
strategy: strategy,
|
|
683
|
+
pattern: pattern
|
|
684
|
+
)
|
|
685
|
+
end
|
|
686
|
+
|
|
206
687
|
private
|
|
207
688
|
|
|
689
|
+
def eager_complete?
|
|
690
|
+
@prefetch_strategy == 'eager' && @eager_complete
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
def maybe_upgrade_to_eager!
|
|
694
|
+
return if @prefetch_paused
|
|
695
|
+
return unless @prefetch_strategy == 'adaptive'
|
|
696
|
+
return unless @detector.pattern == PATTERN_SEQUENTIAL
|
|
697
|
+
return if @detector.sequential_runs < UPGRADE_SEQUENTIAL_THRESHOLD
|
|
698
|
+
return if @data.bytesize / (1024 * 1024) > EAGER_THRESHOLD_MB
|
|
699
|
+
|
|
700
|
+
@prefetch_strategy = 'eager'
|
|
701
|
+
true
|
|
702
|
+
end
|
|
703
|
+
|
|
704
|
+
def speculative_prefetch!
|
|
705
|
+
return if @prefetch_mu.synchronize { @prefetch_paused }
|
|
706
|
+
|
|
707
|
+
predicted = @prefetch_mu.synchronize { @detector.predict_next(@prefetch_depth, @record_count) }
|
|
708
|
+
return if predicted.empty?
|
|
709
|
+
|
|
710
|
+
page_size = @prefetch_page_size
|
|
711
|
+
missing = @cache_mu.synchronize do
|
|
712
|
+
predicted.filter_map do |idx|
|
|
713
|
+
off = record_byte_offset(idx)
|
|
714
|
+
p = off / page_size
|
|
715
|
+
p unless @page_cache.has?(p) || @in_flight.has?(p)
|
|
716
|
+
end.uniq
|
|
717
|
+
end
|
|
718
|
+
return if missing.empty?
|
|
719
|
+
|
|
720
|
+
ranges = Nxs.clamp_page_ranges(
|
|
721
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
722
|
+
@data.bytesize
|
|
723
|
+
)
|
|
724
|
+
ranges.each { |r| fetch_coalesced_range!(r) }
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def start_eager_background!
|
|
728
|
+
return unless @prefetch_strategy == 'eager'
|
|
729
|
+
|
|
730
|
+
@prefetch_mu.synchronize do
|
|
731
|
+
return if @prefetch_paused || @eager_started
|
|
732
|
+
|
|
733
|
+
@eager_started = true
|
|
734
|
+
sector_start, sector_len = Nxs.row_data_sector(@tail_start, @data.bytesize)
|
|
735
|
+
if sector_len.zero?
|
|
736
|
+
@eager_complete = true
|
|
737
|
+
next
|
|
738
|
+
end
|
|
739
|
+
@eager_thread = Thread.new { run_eager_background(sector_start, sector_len) }
|
|
740
|
+
end
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
def run_eager_background(sector_start, sector_len)
|
|
744
|
+
end_byte = [sector_start + sector_len, @data.bytesize].min
|
|
745
|
+
return if sector_start >= end_byte
|
|
746
|
+
|
|
747
|
+
page_size = @prefetch_page_size
|
|
748
|
+
first_page = sector_start / page_size
|
|
749
|
+
last_page = (end_byte - 1) / page_size
|
|
750
|
+
indices = (first_page..last_page).to_a
|
|
751
|
+
eager_cancelled = @prefetch_mu.synchronize { @eager_cancel }
|
|
752
|
+
return if eager_cancelled
|
|
753
|
+
|
|
754
|
+
missing = @cache_mu.synchronize do
|
|
755
|
+
indices.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
|
|
756
|
+
end
|
|
757
|
+
if missing.empty?
|
|
758
|
+
@prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
|
|
759
|
+
return
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
ranges = Nxs.clamp_page_ranges(
|
|
763
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
764
|
+
@data.bytesize
|
|
765
|
+
)
|
|
766
|
+
ranges.each do |r|
|
|
767
|
+
break if @prefetch_mu.synchronize { @eager_cancel }
|
|
768
|
+
|
|
769
|
+
fetch_coalesced_range!(r)
|
|
770
|
+
end
|
|
771
|
+
@prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
def fetch_coalesced_range!(page_range)
|
|
775
|
+
@cache_mu.synchronize { fetch_coalesced_range_unlocked!(page_range) }
|
|
776
|
+
end
|
|
777
|
+
|
|
778
|
+
def fetch_coalesced_range_unlocked!(page_range)
|
|
779
|
+
blob = fetch_range_bytes!(page_range[:byte_start], page_range[:byte_length])
|
|
780
|
+
page_size = @prefetch_page_size
|
|
781
|
+
(page_range[:page_start]..page_range[:page_end]).each do |p|
|
|
782
|
+
next if @page_cache.has?(p)
|
|
783
|
+
|
|
784
|
+
page_off = p * page_size - page_range[:byte_start]
|
|
785
|
+
page_len = [page_size, blob.bytesize - page_off].min
|
|
786
|
+
next if page_len <= 0
|
|
787
|
+
|
|
788
|
+
@page_cache.set(p, blob[page_off, page_len])
|
|
789
|
+
end
|
|
790
|
+
end
|
|
791
|
+
|
|
792
|
+
def fetch_range_bytes!(byte_start, byte_length)
|
|
793
|
+
@fetches_issued += 1
|
|
794
|
+
@fetch_range.call(byte_start, byte_length)
|
|
795
|
+
end
|
|
796
|
+
|
|
797
|
+
def parse_layout_tail!(preamble_tail)
|
|
798
|
+
if (@flags & FLAG_COLUMNAR != 0) && (@flags & FLAG_PAX != 0)
|
|
799
|
+
raise NxsError.new('ERR_INVALID_FLAGS', 'columnar and PAX both set')
|
|
800
|
+
end
|
|
801
|
+
if (@flags & FLAG_COLUMNAR != 0) && preamble_tail.zero?
|
|
802
|
+
raise NxsError.new('ERR_INCOMPATIBLE_FLAGS', 'columnar with TailPtr=0')
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
if (@flags & FLAG_COLUMNAR) != 0
|
|
806
|
+
@layout = :columnar
|
|
807
|
+
parse_columnar_footer!
|
|
808
|
+
return
|
|
809
|
+
end
|
|
810
|
+
if (@flags & FLAG_PAX) != 0
|
|
811
|
+
@layout = :pax
|
|
812
|
+
parse_pax_footer!
|
|
813
|
+
return
|
|
814
|
+
end
|
|
815
|
+
|
|
816
|
+
@layout = :row
|
|
817
|
+
if preamble_tail.zero?
|
|
818
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'streamable footer') if @data.bytesize < 44
|
|
819
|
+
|
|
820
|
+
@tail_ptr = @data.unpack1("@#{@data.bytesize - FOOTER_ROW_BYTES}Q<")
|
|
821
|
+
end
|
|
822
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'tail index') if @tail_ptr + 4 > @data.bytesize
|
|
823
|
+
|
|
824
|
+
@record_count = @data.unpack1("@#{@tail_ptr}L<")
|
|
825
|
+
@tail_start = @tail_ptr + 4
|
|
826
|
+
end
|
|
827
|
+
|
|
828
|
+
def parse_columnar_footer!
|
|
829
|
+
sz = @data.bytesize
|
|
830
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'columnar footer') if sz < FOOTER_COL_BYTES
|
|
831
|
+
|
|
832
|
+
fo = sz - FOOTER_COL_BYTES
|
|
833
|
+
@tail_ptr = @data.unpack1("@#{fo}Q<")
|
|
834
|
+
@record_count = @data.unpack1("@#{fo + 8}Q<")
|
|
835
|
+
@tail_start = @tail_ptr
|
|
836
|
+
kc = @keys.length
|
|
837
|
+
@col_buf_off = Array.new(kc)
|
|
838
|
+
@col_buf_len = Array.new(kc)
|
|
839
|
+
kc.times do |i|
|
|
840
|
+
e = @tail_start + i * COL_TAIL_ENTRY_BYTES
|
|
841
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'columnar tail entry') if e + COL_TAIL_ENTRY_BYTES > sz
|
|
842
|
+
|
|
843
|
+
fid = @data.unpack1("@#{e}S<")
|
|
844
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', "invalid field ID #{fid}") if fid >= kc
|
|
845
|
+
|
|
846
|
+
@col_buf_off[fid] = @data.unpack1("@#{e + 4}Q<")
|
|
847
|
+
@col_buf_len[fid] = @data.unpack1("@#{e + 12}Q<")
|
|
848
|
+
end
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
def parse_pax_footer!
|
|
852
|
+
sz = @data.bytesize
|
|
853
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'PAX footer') if sz < FOOTER_PAX_BYTES
|
|
854
|
+
|
|
855
|
+
fo = sz - FOOTER_PAX_BYTES
|
|
856
|
+
@tail_ptr = @data.unpack1("@#{fo}Q<")
|
|
857
|
+
@record_count = @data.unpack1("@#{fo + 8}Q<")
|
|
858
|
+
@page_count = @data.unpack1("@#{fo + 16}L<")
|
|
859
|
+
@page_size_hint = @data.unpack1("@#{fo + 20}L<")
|
|
860
|
+
@tail_start = @tail_ptr
|
|
861
|
+
@page_index = []
|
|
862
|
+
@page_rec_start = []
|
|
863
|
+
@page_rec_count = []
|
|
864
|
+
@page_offset = []
|
|
865
|
+
@page_length = []
|
|
866
|
+
|
|
867
|
+
@page_count.times do |i|
|
|
868
|
+
e = @tail_start + i * PAX_TAIL_ENTRY_BYTES
|
|
869
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'PAX tail entry') if e + PAX_TAIL_ENTRY_BYTES > sz
|
|
870
|
+
|
|
871
|
+
@page_index << @data.unpack1("@#{e}L<")
|
|
872
|
+
@page_rec_start << @data.unpack1("@#{e + 4}Q<")
|
|
873
|
+
@page_rec_count << @data.unpack1("@#{e + 12}L<")
|
|
874
|
+
@page_offset << @data.unpack1("@#{e + 16}Q<")
|
|
875
|
+
@page_length << @data.unpack1("@#{e + 24}L<")
|
|
876
|
+
end
|
|
877
|
+
|
|
878
|
+
@page_count.times do |i|
|
|
879
|
+
poff = @page_offset[i]
|
|
880
|
+
if poff > sz || poff + 4 > sz || @data.unpack1("@#{poff}L<") != MAGIC_PAGE
|
|
881
|
+
raise NxsError.new('ERR_INVALID_PAGE_MAGIC', 'PAX page magic mismatch')
|
|
882
|
+
end
|
|
883
|
+
end
|
|
884
|
+
end
|
|
885
|
+
|
|
886
|
+
def null_bitmap_bytes(n)
|
|
887
|
+
raw = (n + 7) / 8
|
|
888
|
+
(raw + 7) & ~7
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
# rubocop:disable Naming/PredicateMethod -- mirrors C col_bit naming
|
|
892
|
+
def col_bit(bm, rec)
|
|
893
|
+
((bm.getbyte(rec / 8) >> (rec % 8)) & 1) == 1
|
|
894
|
+
end
|
|
895
|
+
# rubocop:enable Naming/PredicateMethod
|
|
896
|
+
|
|
897
|
+
def var_sigil?(sig)
|
|
898
|
+
[0x22, 0x3C].include?(sig)
|
|
899
|
+
end
|
|
900
|
+
|
|
901
|
+
def var_off_bytes_len(rc)
|
|
902
|
+
off = (rc + 1) * 4
|
|
903
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets overflow') if off > @data.bytesize
|
|
904
|
+
|
|
905
|
+
off
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
def field_sector_len(sector_off, rc, sigil)
|
|
909
|
+
bm_len = null_bitmap_bytes(rc)
|
|
910
|
+
return bm_len + rc * 8 unless var_sigil?(sigil)
|
|
911
|
+
|
|
912
|
+
off_bytes = var_off_bytes_len(rc)
|
|
913
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets') if sector_off + bm_len + off_bytes > @data.bytesize
|
|
914
|
+
|
|
915
|
+
end_off = @data.unpack1("@#{sector_off + bm_len + rc * 4}L<")
|
|
916
|
+
total = bm_len + off_bytes + end_off
|
|
917
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var values') if sector_off + total > @data.bytesize
|
|
918
|
+
|
|
919
|
+
total
|
|
920
|
+
end
|
|
921
|
+
|
|
922
|
+
def var_str_at(offsets, values, record_index)
|
|
923
|
+
need = (record_index + 2) * 4
|
|
924
|
+
return nil if offsets.bytesize < need
|
|
925
|
+
|
|
926
|
+
off = record_index * 4
|
|
927
|
+
start = offsets.unpack1("@#{off}L<")
|
|
928
|
+
end_ = offsets.unpack1("@#{off + 4}L<")
|
|
929
|
+
return nil if end_ < start || end_ > values.bytesize
|
|
930
|
+
|
|
931
|
+
values[start...end_].force_encoding('UTF-8')
|
|
932
|
+
end
|
|
933
|
+
|
|
934
|
+
def col_field_parts(slot)
|
|
935
|
+
sector = column_sector(slot)
|
|
936
|
+
bm_len = null_bitmap_bytes(@record_count)
|
|
937
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'null bitmap') if sector.bytesize < bm_len
|
|
938
|
+
|
|
939
|
+
[sector[0, bm_len], sector[bm_len..]]
|
|
940
|
+
end
|
|
941
|
+
|
|
942
|
+
def init_column_prefetch!(fetch_range: nil)
|
|
943
|
+
return unless @layout == :columnar
|
|
944
|
+
|
|
945
|
+
@col_mu = Mutex.new
|
|
946
|
+
@col_warmed = {}
|
|
947
|
+
@col_overlay = {}
|
|
948
|
+
@col_fetches = 0
|
|
949
|
+
@col_remote_fetch = !fetch_range.nil?
|
|
950
|
+
data = @data
|
|
951
|
+
@col_fetch_range = fetch_range || ->(off, len) { data[off, len] }
|
|
952
|
+
end
|
|
953
|
+
|
|
954
|
+
def column_sector(slot)
|
|
955
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key slot #{slot}") if slot.negative? || slot >= @col_buf_off.length
|
|
956
|
+
|
|
957
|
+
off = @col_buf_off[slot].to_i
|
|
958
|
+
length = @col_buf_len[slot].to_i
|
|
959
|
+
if @col_warmed
|
|
960
|
+
@col_mu.synchronize do
|
|
961
|
+
overlay = @col_overlay[slot]
|
|
962
|
+
return overlay[0, length] if @col_warmed[slot] && overlay && !overlay.empty?
|
|
963
|
+
end
|
|
964
|
+
end
|
|
965
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off + length > @data.bytesize
|
|
966
|
+
|
|
967
|
+
@data[off, length]
|
|
968
|
+
end
|
|
969
|
+
|
|
970
|
+
def col_var_parts(slot)
|
|
971
|
+
bm, tail = col_field_parts(slot)
|
|
972
|
+
off_bytes = var_off_bytes_len(@record_count)
|
|
973
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets') if tail.bytesize < off_bytes
|
|
974
|
+
|
|
975
|
+
[bm, tail[0, off_bytes], tail[off_bytes..]]
|
|
976
|
+
end
|
|
977
|
+
|
|
978
|
+
def col_var_parts_at(rec, slot)
|
|
979
|
+
return [nil, nil, nil, false] if slot.negative? || slot >= @key_sigils.length || !var_sigil?(@key_sigils[slot])
|
|
980
|
+
|
|
981
|
+
if @layout == :columnar
|
|
982
|
+
bm, offsets, values = col_var_parts(slot)
|
|
983
|
+
return [bm, offsets, values, true]
|
|
984
|
+
end
|
|
985
|
+
if @layout == :pax
|
|
986
|
+
loc = pax_find_page(rec)
|
|
987
|
+
return [nil, nil, nil, false] unless loc
|
|
988
|
+
|
|
989
|
+
bm, tail = page_field_parts(loc[:page], slot)
|
|
990
|
+
return [nil, nil, nil, false] unless bm
|
|
991
|
+
|
|
992
|
+
rc = @page_rec_count[loc[:page]]
|
|
993
|
+
off_bytes = var_off_bytes_len(rc)
|
|
994
|
+
return [nil, nil, nil, false] if tail.bytesize < off_bytes
|
|
995
|
+
|
|
996
|
+
return [bm, tail[0, off_bytes], tail[off_bytes..], true]
|
|
997
|
+
end
|
|
998
|
+
[nil, nil, nil, false]
|
|
999
|
+
end
|
|
1000
|
+
|
|
1001
|
+
def col_numeric_bytes(rec, slot)
|
|
1002
|
+
return nil if slot >= 0 && slot < @key_sigils.length && var_sigil?(@key_sigils[slot])
|
|
1003
|
+
|
|
1004
|
+
if @layout == :columnar
|
|
1005
|
+
bm, vals = col_field_parts(slot)
|
|
1006
|
+
return nil if rec >= @record_count || !col_bit(bm, rec)
|
|
1007
|
+
|
|
1008
|
+
off = rec * 8
|
|
1009
|
+
return nil if off + 8 > vals.bytesize
|
|
1010
|
+
|
|
1011
|
+
return vals[off, 8]
|
|
1012
|
+
end
|
|
1013
|
+
if @layout == :pax
|
|
1014
|
+
loc = pax_find_page(rec)
|
|
1015
|
+
return nil unless loc
|
|
1016
|
+
|
|
1017
|
+
bm, vals = page_field_parts(loc[:page], slot)
|
|
1018
|
+
return nil unless bm && col_bit(bm, loc[:local])
|
|
1019
|
+
|
|
1020
|
+
off = loc[:local] * 8
|
|
1021
|
+
return nil if off + 8 > vals.bytesize
|
|
1022
|
+
|
|
1023
|
+
return vals[off, 8]
|
|
1024
|
+
end
|
|
1025
|
+
nil
|
|
1026
|
+
end
|
|
1027
|
+
|
|
1028
|
+
def pax_find_page(rec)
|
|
1029
|
+
return nil if @page_count.zero?
|
|
1030
|
+
|
|
1031
|
+
lo = 0
|
|
1032
|
+
hi = @page_count - 1
|
|
1033
|
+
while lo <= hi
|
|
1034
|
+
mid = lo + (hi - lo) / 2
|
|
1035
|
+
start = @page_rec_start[mid]
|
|
1036
|
+
count = @page_rec_count[mid]
|
|
1037
|
+
if rec < start
|
|
1038
|
+
hi = mid - 1
|
|
1039
|
+
elsif rec >= start + count
|
|
1040
|
+
lo = mid + 1
|
|
1041
|
+
else
|
|
1042
|
+
return { page: mid, local: rec - start }
|
|
1043
|
+
end
|
|
1044
|
+
end
|
|
1045
|
+
nil
|
|
1046
|
+
end
|
|
1047
|
+
|
|
1048
|
+
def page_field_sector(pi, slot)
|
|
1049
|
+
poff = @page_offset[pi].to_i
|
|
1050
|
+
return nil if poff + 24 > @data.bytesize || @data.unpack1("@#{poff}L<") != MAGIC_PAGE
|
|
1051
|
+
|
|
1052
|
+
fc = @data.unpack1("@#{poff + 20}S<")
|
|
1053
|
+
return nil if slot.negative? || slot >= fc || fc > @key_sigils.length
|
|
1054
|
+
|
|
1055
|
+
rc = @page_rec_count[pi]
|
|
1056
|
+
body = poff + 24
|
|
1057
|
+
slot.times do |fi|
|
|
1058
|
+
sig = fi < @key_sigils.length ? @key_sigils[fi] : 0x3D
|
|
1059
|
+
flen = field_sector_len(body, rc, sig)
|
|
1060
|
+
body += flen
|
|
1061
|
+
end
|
|
1062
|
+
sig = slot < @key_sigils.length ? @key_sigils[slot] : 0x3D
|
|
1063
|
+
flen = field_sector_len(body, rc, sig)
|
|
1064
|
+
return nil if body + flen > @data.bytesize
|
|
1065
|
+
|
|
1066
|
+
@data[body, flen]
|
|
1067
|
+
end
|
|
1068
|
+
|
|
1069
|
+
def page_field_parts(pi, slot)
|
|
1070
|
+
sector = page_field_sector(pi, slot)
|
|
1071
|
+
return [nil, nil] unless sector
|
|
1072
|
+
|
|
1073
|
+
bm_len = null_bitmap_bytes(@page_rec_count[pi])
|
|
1074
|
+
return [nil, nil] if sector.bytesize < bm_len
|
|
1075
|
+
|
|
1076
|
+
[sector[0, bm_len], sector[bm_len..]]
|
|
1077
|
+
end
|
|
1078
|
+
|
|
1079
|
+
def pax_sum_f64(slot)
|
|
1080
|
+
sum = 0.0
|
|
1081
|
+
@page_count.times do |pi|
|
|
1082
|
+
bm, vals = page_field_parts(pi, slot)
|
|
1083
|
+
next unless bm
|
|
1084
|
+
|
|
1085
|
+
rc = @page_rec_count[pi]
|
|
1086
|
+
i = 0
|
|
1087
|
+
while i < rc
|
|
1088
|
+
if col_bit(bm, i)
|
|
1089
|
+
off = i * 8
|
|
1090
|
+
sum += vals.unpack1("@#{off}E") if off + 8 <= vals.bytesize
|
|
1091
|
+
end
|
|
1092
|
+
i += 1
|
|
1093
|
+
end
|
|
1094
|
+
end
|
|
1095
|
+
sum
|
|
1096
|
+
end
|
|
1097
|
+
|
|
208
1098
|
def read_schema(offset)
|
|
209
1099
|
key_count = @data.unpack1("@#{offset}S<")
|
|
210
1100
|
offset += 2
|
|
@@ -438,13 +1328,19 @@ module Nxs
|
|
|
438
1328
|
# ── Object ───────────────────────────────────────────────────────────────────
|
|
439
1329
|
|
|
440
1330
|
class Object
|
|
441
|
-
def initialize(reader, offset)
|
|
442
|
-
@reader
|
|
443
|
-
@offset
|
|
444
|
-
@
|
|
1331
|
+
def initialize(reader, offset, record_index = nil)
|
|
1332
|
+
@reader = reader
|
|
1333
|
+
@offset = offset
|
|
1334
|
+
@record_index = record_index
|
|
1335
|
+
@parsed = false
|
|
445
1336
|
end
|
|
446
1337
|
|
|
447
1338
|
def get_str(key)
|
|
1339
|
+
slot = @reader.key_index[key]
|
|
1340
|
+
return nil unless slot
|
|
1341
|
+
|
|
1342
|
+
return @reader.col_get_str(key, record_index) if uses_columnar_field_access?
|
|
1343
|
+
|
|
448
1344
|
off = field_offset(key)
|
|
449
1345
|
return nil unless off
|
|
450
1346
|
|
|
@@ -453,6 +1349,16 @@ module Nxs
|
|
|
453
1349
|
end
|
|
454
1350
|
|
|
455
1351
|
def get_i64(key)
|
|
1352
|
+
slot = @reader.key_index[key]
|
|
1353
|
+
return nil unless slot
|
|
1354
|
+
|
|
1355
|
+
if uses_columnar_field_access?
|
|
1356
|
+
cell = @reader.send(:col_numeric_bytes, record_index, slot)
|
|
1357
|
+
return nil unless cell
|
|
1358
|
+
|
|
1359
|
+
return cell.unpack1('q<')
|
|
1360
|
+
end
|
|
1361
|
+
|
|
456
1362
|
off = field_offset(key)
|
|
457
1363
|
return nil unless off
|
|
458
1364
|
|
|
@@ -460,6 +1366,16 @@ module Nxs
|
|
|
460
1366
|
end
|
|
461
1367
|
|
|
462
1368
|
def get_f64(key)
|
|
1369
|
+
slot = @reader.key_index[key]
|
|
1370
|
+
return nil unless slot
|
|
1371
|
+
|
|
1372
|
+
if uses_columnar_field_access?
|
|
1373
|
+
cell = @reader.send(:col_numeric_bytes, record_index, slot)
|
|
1374
|
+
return nil unless cell
|
|
1375
|
+
|
|
1376
|
+
return cell.unpack1('E')
|
|
1377
|
+
end
|
|
1378
|
+
|
|
463
1379
|
off = field_offset(key)
|
|
464
1380
|
return nil unless off
|
|
465
1381
|
|
|
@@ -467,6 +1383,16 @@ module Nxs
|
|
|
467
1383
|
end
|
|
468
1384
|
|
|
469
1385
|
def get_bool(key)
|
|
1386
|
+
slot = @reader.key_index[key]
|
|
1387
|
+
return nil unless slot
|
|
1388
|
+
|
|
1389
|
+
if uses_columnar_field_access?
|
|
1390
|
+
cell = @reader.send(:col_numeric_bytes, record_index, slot)
|
|
1391
|
+
return nil unless cell
|
|
1392
|
+
|
|
1393
|
+
return cell.getbyte(0) != 0
|
|
1394
|
+
end
|
|
1395
|
+
|
|
470
1396
|
off = field_offset(key)
|
|
471
1397
|
return nil unless off
|
|
472
1398
|
|
|
@@ -475,6 +1401,21 @@ module Nxs
|
|
|
475
1401
|
|
|
476
1402
|
private
|
|
477
1403
|
|
|
1404
|
+
def record_index
|
|
1405
|
+
@record_index.nil? ? @offset : @record_index
|
|
1406
|
+
end
|
|
1407
|
+
|
|
1408
|
+
def obj_at_nyxo?
|
|
1409
|
+
return false if @offset + 4 > @reader.data.bytesize
|
|
1410
|
+
|
|
1411
|
+
@reader.data.unpack1("@#{@offset}L<") == MAGIC_OBJ
|
|
1412
|
+
end
|
|
1413
|
+
|
|
1414
|
+
# Columnar/PAX top-level records use record index; nested NYXO blobs use row paths.
|
|
1415
|
+
def uses_columnar_field_access?
|
|
1416
|
+
@reader.layout != :row && !obj_at_nyxo?
|
|
1417
|
+
end
|
|
1418
|
+
|
|
478
1419
|
# Parse the object header (lazy — only on first field access).
|
|
479
1420
|
def parse_header
|
|
480
1421
|
return if @parsed
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nyxis
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Micael Malta
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Pure-Ruby reader for NXB files produced by the NXS compiler. Provides
|