nexus-standard 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +101 -0
  4. data/nxs.rb +327 -0
  5. metadata +51 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 995c29cd7138de91cb54cc1466b42115597d2575fb0d4422e0ab8020a3f42817
4
+ data.tar.gz: 93514584b431ceead1fdfbdb59332657b134dfd1351c944776c0b24c85ab7103
5
+ SHA512:
6
+ metadata.gz: d9cef581fd5964f14dada1b0230c180c0085ee2b0844ba5639794e3ca1a1c22a6482b21edd07733095a6fe93cbb1bbaf3f53308e1982c53625b9bf1a89ba1203
7
+ data.tar.gz: f958845efc6bc1be795aef861acb578cfdc7c67f38ac886e4c84a2fa71d0335f3347e533a702b9b1ba08ea55cb0172e8ecfcf6ad505e49aca9c4f195474d643b
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Micael Malta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,101 @@
1
+ # NXS — Ruby
2
+
3
+ Zero-copy `.nxb` reader for Ruby 3.x. Pure-Ruby implementation with an optional C extension for hot-path columnar scans. No gems required.
4
+
5
+ ## Requirements
6
+
7
+ Ruby 3.0+. The C extension requires a C compiler and Ruby headers (`ruby-dev` / `ruby-devel`).
8
+
9
+ ## Read a file
10
+
11
+ ```ruby
12
+ require_relative "nxs"
13
+
14
+ bytes = File.binread("data.nxb")
15
+ reader = Nxs::Reader.new(bytes)
16
+
17
+ puts reader.record_count # instant — read from tail-index, no parse pass
18
+ obj = reader.record(42) # O(1) seek
19
+ puts obj.get_str("username")
20
+ puts obj.get_f64("score")
21
+ puts obj.get_bool("active")
22
+ puts obj.get_i64("id")
23
+ ```
24
+
25
+ ## Columnar reducers
26
+
27
+ ```ruby
28
+ total = reader.sum_f64("score")
29
+ low = reader.min_f64("score")
30
+ high = reader.max_f64("score")
31
+ ages = reader.sum_i64("age")
32
+ ```
33
+
34
+ ## C extension (hot path)
35
+
36
+ Build once:
37
+
38
+ ```bash
39
+ bash ext/build.sh
40
+ ```
41
+
42
+ ```ruby
43
+ require_relative "ext/nxs/nxs_ext" # loads Nxs::CReader and Nxs::CObject
44
+
45
+ reader = Nxs::CReader.new(bytes)
46
+ puts reader.record(42).get_str("username")
47
+ puts reader.sum_f64("score") # 6.78 ms at 1M records vs 942 ms pure Ruby
48
+ ```
49
+
50
+ At 1M records the C extension is **139× faster** than pure Ruby for `sum_f64`, and **5.6× faster** than `JSON.parse`.
51
+
52
+ ## Write a file
53
+
54
+ ```ruby
55
+ require_relative "nxs_writer"
56
+
57
+ schema = Nxs::Schema.new(["id", "username", "score", "active"])
58
+ w = Nxs::Writer.new(schema)
59
+
60
+ w.begin_object
61
+ w.write_i64(0, 42)
62
+ w.write_str(1, "alice")
63
+ w.write_f64(2, 9.5)
64
+ w.write_bool(3, true)
65
+ w.end_object
66
+
67
+ data = w.finish # binary String (encoding ASCII-8BIT)
68
+
69
+ # Convenience: write from an array of hashes
70
+ data2 = Nxs::Writer.from_records(
71
+ ["id", "username", "score"],
72
+ [{ "id" => 1, "username" => "bob", "score" => 8.2 }]
73
+ )
74
+ ```
75
+
76
+ ## Tests
77
+
78
+ ```bash
79
+ ruby test.rb ../js/fixtures # 22 tests
80
+ ```
81
+
82
+ ## Benchmarks
83
+
84
+ ```bash
85
+ ruby bench.rb ../js/fixtures # pure Ruby vs JSON
86
+ ruby bench_c.rb ../js/fixtures # C extension vs JSON
87
+ ```
88
+
89
+ ## Files
90
+
91
+ | File | Purpose |
92
+ | :--- | :--- |
93
+ | `nxs.rb` | Pure-Ruby reader (`Nxs::Reader`, `Nxs::Object`) |
94
+ | `nxs_writer.rb` | Pure-Ruby writer (`Nxs::Schema`, `Nxs::Writer`) |
95
+ | `ext/nxs/nxs_ext.c` | C extension source (`Nxs::CReader`, `Nxs::CObject`) |
96
+ | `ext/nxs/extconf.rb` | Extension build configuration |
97
+ | `ext/build.sh` | Compiles the C extension |
98
+
99
+ ---
100
+
101
+ For the format specification see [`SPEC.md`](../SPEC.md). For cross-language examples see [`GETTING_STARTED.md`](../GETTING_STARTED.md).
data/nxs.rb ADDED
@@ -0,0 +1,327 @@
1
+ # frozen_string_literal: true
2
+
3
+ # NXS Reader — .nxb parser (Ruby 3.x, stdlib only).
4
+ #
5
+ # Implements Nexus Standard v1.0 binary wire format.
6
+ #
7
+ # Usage:
8
+ # buf = File.binread("data.nxb")
9
+ # reader = Nxs::Reader.new(buf)
10
+ # reader.record_count # => Integer
11
+ # reader.keys # => Array<String>
12
+ # obj = reader.record(42) # => Nxs::Object
13
+ # obj.get_str("username") # => String | nil
14
+ # obj.get_i64("id") # => Integer | nil
15
+ # obj.get_f64("score") # => Float | nil
16
+ # obj.get_bool("active") # => true/false | nil
17
+ # reader.sum_f64("score") # => Float
18
+ # reader.min_f64("score") # => Float | nil
19
+ # reader.max_f64("score") # => Float | nil
20
+ # reader.sum_i64("id") # => Integer
21
+
22
+ module Nxs
23
+ MAGIC_FILE = 0x4E585342 # NXSB
24
+ MAGIC_OBJ = 0x4E58534F # NXSO
25
+ MAGIC_FOOTER = 0x2153584E # NXS!
26
+ FLAG_SCHEMA = 0x0002
27
+
28
+ class NxsError < StandardError
29
+ attr_reader :code
30
+
31
+ def initialize(code, msg)
32
+ super("#{code}: #{msg}")
33
+ @code = code
34
+ end
35
+ end
36
+
37
+ # ── Reader ──────────────────────────────────────────────────────────────────
38
+
39
+ class Reader
40
+ attr_reader :keys, :record_count
41
+
42
+ def initialize(bytes)
43
+ @data = bytes.b # force binary encoding
44
+ sz = @data.bytesize
45
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'file too small') if sz < 32
46
+
47
+ magic = @data.unpack1('L<')
48
+ raise NxsError.new('ERR_BAD_MAGIC', "expected NXSB, got 0x#{magic.to_s(16)}") if magic != MAGIC_FILE
49
+
50
+ footer = @data.unpack1("@#{sz - 4}L<")
51
+ raise NxsError.new('ERR_BAD_MAGIC', 'footer magic mismatch') if footer != MAGIC_FOOTER
52
+
53
+ # Preamble: Version(2) + Flags(2) + DictHash(8) + TailPtr(8) + Reserved(8)
54
+ @flags = @data.unpack1('@6 S<')
55
+ @tail_ptr = @data.unpack1('@16 Q<')
56
+
57
+ @dict_hash = @data.unpack1('@8 Q<')
58
+
59
+ # Schema (when Flags bit 1 set)
60
+ @keys = []
61
+ @key_sigils = []
62
+ @key_index = {}
63
+ if @flags & FLAG_SCHEMA != 0
64
+ schema_end = read_schema(32)
65
+ computed = murmur3_64(@data[32...schema_end].bytes)
66
+ raise NxsError.new('ERR_DICT_MISMATCH', 'schema hash mismatch') if computed != @dict_hash
67
+ end
68
+
69
+ # Tail-index: u32 EntryCount followed by records
70
+ @record_count = @data.unpack1("@#{@tail_ptr}L<")
71
+ @tail_start = @tail_ptr + 4
72
+ end
73
+
74
+ # O(1) record lookup — reads one 10-byte tail-index entry.
75
+ def record(i)
76
+ unless i >= 0 && i < @record_count
77
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "record #{i} out of [0, #{@record_count})")
78
+ end
79
+
80
+ # Each tail-index entry: u16 KeyID + u64 AbsoluteOffset = 10 bytes
81
+ abs_offset = @data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
82
+ Object.new(self, abs_offset)
83
+ end
84
+
85
+ # Tight allocation-free sum loop.
86
+ def sum_f64(key)
87
+ slot = @key_index[key]
88
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
89
+
90
+ data = @data
91
+ tail = @tail_start
92
+ n = @record_count
93
+ sum = 0.0
94
+ i = 0
95
+ while i < n
96
+ abs = data.unpack1("@#{tail + i * 10 + 2}Q<")
97
+ off = _scan_offset(data, abs, slot)
98
+ sum += data.unpack1("@#{off}E") if off
99
+ i += 1
100
+ end
101
+ sum
102
+ end
103
+
104
+ def min_f64(key)
105
+ slot = @key_index[key]
106
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
107
+
108
+ data = @data
109
+ tail = @tail_start
110
+ n = @record_count
111
+ min = nil
112
+ i = 0
113
+ while i < n
114
+ abs = data.unpack1("@#{tail + i * 10 + 2}Q<")
115
+ off = _scan_offset(data, abs, slot)
116
+ if off
117
+ v = data.unpack1("@#{off}E")
118
+ min = v if min.nil? || v < min
119
+ end
120
+ i += 1
121
+ end
122
+ min
123
+ end
124
+
125
+ def max_f64(key)
126
+ slot = @key_index[key]
127
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
128
+
129
+ data = @data
130
+ tail = @tail_start
131
+ n = @record_count
132
+ max = nil
133
+ i = 0
134
+ while i < n
135
+ abs = data.unpack1("@#{tail + i * 10 + 2}Q<")
136
+ off = _scan_offset(data, abs, slot)
137
+ if off
138
+ v = data.unpack1("@#{off}E")
139
+ max = v if max.nil? || v > max
140
+ end
141
+ i += 1
142
+ end
143
+ max
144
+ end
145
+
146
+ def sum_i64(key)
147
+ slot = @key_index[key]
148
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
149
+
150
+ data = @data
151
+ tail = @tail_start
152
+ n = @record_count
153
+ sum = 0
154
+ i = 0
155
+ while i < n
156
+ abs = data.unpack1("@#{tail + i * 10 + 2}Q<")
157
+ off = _scan_offset(data, abs, slot)
158
+ sum += data.unpack1("@#{off}q<") if off
159
+ i += 1
160
+ end
161
+ sum
162
+ end
163
+
164
+ # Expose internals for Object
165
+ attr_reader :data, :key_index
166
+
167
+ # Walk the LEB128 bitmask from obj_offset+8, count set bits before `slot`,
168
+ # and return the absolute byte offset of the field value (or nil if absent).
169
+ # Used by both bulk reducers and NxsObject.
170
+ def _scan_offset(data, obj_offset, slot)
171
+ p = obj_offset + 8 # skip Magic(4) + Length(4)
172
+ cur = 0
173
+ t_idx = 0
174
+
175
+ loop do
176
+ b = data.getbyte(p)
177
+ p += 1
178
+ bits = b & 0x7F
179
+ 7.times do |i|
180
+ if cur == slot
181
+ # field absent if bit is 0
182
+ return nil if ((bits >> i) & 1).zero?
183
+
184
+ # p already past this bitmask byte; drain remaining continuation bytes
185
+ while (b & 0x80) != 0
186
+ b = data.getbyte(p)
187
+ p += 1
188
+ end
189
+ # p now points to the offset table
190
+ rel = data.unpack1("@#{p + t_idx * 2}S<")
191
+ return obj_offset + rel
192
+ end
193
+ t_idx += 1 if (bits >> i) & 1 == 1
194
+ cur += 1
195
+ end
196
+ # If all 7 bits processed and continuation bit clear, field is absent
197
+ return nil if (b & 0x80).zero?
198
+ end
199
+ end
200
+
201
+ private
202
+
203
+ def read_schema(offset)
204
+ key_count = @data.unpack1("@#{offset}S<")
205
+ offset += 2
206
+
207
+ @key_sigils = @data[offset, key_count].bytes
208
+ offset += key_count
209
+
210
+ # Null-terminated UTF-8 strings in StringPool
211
+ pool = @data[offset..]
212
+ pos = 0
213
+ key_count.times do |i|
214
+ term = pool.index("\x00", pos)
215
+ @keys << pool[pos...term].force_encoding('UTF-8')
216
+ @key_index[@keys.last] = i
217
+ pos = term + 1
218
+ end
219
+ offset += pos
220
+
221
+ # Pad to 8-byte boundary
222
+ rem = offset % 8
223
+ offset += (8 - rem) % 8
224
+ offset
225
+ end
226
+
227
+ MURMUR_C1 = 0xFF51AFD7ED558CCD
228
+ MURMUR_C2 = 0xC4CEB9FE1A85EC53
229
+ MURMUR_MASK = 0xFFFFFFFFFFFFFFFF
230
+
231
+ def murmur3_64(bytes)
232
+ h = 0x93681D6255313A99
233
+ i = 0
234
+ len = bytes.length
235
+ while i < len
236
+ chunk = bytes[i, 8]
237
+ k = 0
238
+ chunk.each_with_index { |b, j| k |= b << (j * 8) }
239
+ k = (k * MURMUR_C1) & MURMUR_MASK
240
+ k ^= k >> 33
241
+ h ^= k
242
+ h = (h * MURMUR_C2) & MURMUR_MASK
243
+ h ^= h >> 33
244
+ i += 8
245
+ end
246
+ h ^= len
247
+ h ^= h >> 33
248
+ h = (h * MURMUR_C1) & MURMUR_MASK
249
+ h ^= h >> 33
250
+ h
251
+ end
252
+ end
253
+
254
+ # ── Object ───────────────────────────────────────────────────────────────────
255
+
256
+ class Object
257
+ def initialize(reader, offset)
258
+ @reader = reader
259
+ @offset = offset
260
+ @parsed = false
261
+ end
262
+
263
+ def get_str(key)
264
+ off = field_offset(key)
265
+ return nil unless off
266
+
267
+ len = @reader.data.unpack1("@#{off}L<")
268
+ @reader.data[off + 4, len].force_encoding('UTF-8')
269
+ end
270
+
271
+ def get_i64(key)
272
+ off = field_offset(key)
273
+ return nil unless off
274
+
275
+ @reader.data.unpack1("@#{off}q<")
276
+ end
277
+
278
+ def get_f64(key)
279
+ off = field_offset(key)
280
+ return nil unless off
281
+
282
+ @reader.data.unpack1("@#{off}E")
283
+ end
284
+
285
+ def get_bool(key)
286
+ off = field_offset(key)
287
+ return nil unless off
288
+
289
+ @reader.data.getbyte(off) != 0
290
+ end
291
+
292
+ private
293
+
294
+ # Parse the object header (lazy — only on first field access).
295
+ def parse_header
296
+ return if @parsed
297
+
298
+ p = @offset
299
+
300
+ magic = @reader.data.unpack1("@#{p}L<")
301
+ raise NxsError.new('ERR_BAD_MAGIC', "expected NXSO at #{p}") if magic != MAGIC_OBJ
302
+
303
+ p += 8 # skip Magic(4) + Length(4)
304
+
305
+ bitmask = []
306
+ loop do
307
+ b = @reader.data.getbyte(p)
308
+ p += 1
309
+ bitmask << (b & 0x7F)
310
+ break if (b & 0x80).zero?
311
+ end
312
+
313
+ @bitmask = bitmask
314
+ @offset_tbl_start = p
315
+ @parsed = true
316
+ end
317
+
318
+ # Return the absolute byte offset of the field for `key`, or nil.
319
+ def field_offset(key)
320
+ slot = @reader.key_index[key]
321
+ return nil unless slot
322
+
323
+ # Delegate to Reader's scan logic (same implementation, avoids duplication)
324
+ @reader._scan_offset(@reader.data, @offset, slot)
325
+ end
326
+ end
327
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nexus-standard
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Micael Malta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-05-01 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |
14
+ Pure-Ruby reader for NXB files produced by the NXS compiler. Provides
15
+ zero-copy memory-mapped access to typed records with O(1) random access
16
+ via the tail-index.
17
+ email:
18
+ - micael@example.com
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - LICENSE
24
+ - README.md
25
+ - nxs.rb
26
+ homepage: https://github.com/micaelmalta/nexus-standard
27
+ licenses:
28
+ - MIT
29
+ metadata:
30
+ source_code_uri: https://github.com/micaelmalta/nexus-standard
31
+ changelog_uri: https://github.com/micaelmalta/nexus-standard/releases
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - "."
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '3.0'
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ requirements: []
47
+ rubygems_version: 3.5.22
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Zero-copy reader for the Nexus Standard (NXS) binary format
51
+ test_files: []