archive_r_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +56 -0
  3. data/README.md +103 -0
  4. data/ext/archive_r/archive_r_ext.cc +910 -0
  5. data/ext/archive_r/extconf.rb +90 -0
  6. data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
  7. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
  8. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
  9. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
  15. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
  16. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
  17. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
  18. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
  19. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
  20. data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
  21. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
  22. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
  23. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
  24. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
  25. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
  26. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
  27. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
  28. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
  29. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
  30. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
  31. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
  32. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
  33. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
  34. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
  35. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
  36. data/lib/archive_r.rb +80 -0
  37. metadata +112 -0
@@ -0,0 +1,910 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #include "archive_r/data_stream.h"
5
+ #include "archive_r/entry.h"
6
+ #include "archive_r/entry_fault.h"
7
+ #include "archive_r/path_hierarchy_utils.h"
8
+ #include "archive_r/traverser.h"
9
+ #include <cctype>
10
+ #include <cstring>
11
+ #include <memory>
12
+ #include <ruby.h>
13
+ #include <stdexcept>
14
+ #include <string>
15
+ #include <utility>
16
+ #include <variant>
17
+ #include <vector>
18
+ #include <limits>
19
+
20
+ using namespace archive_r;
21
+
22
+ // Ruby module and class references
23
+ static VALUE mArchive_r;
24
+ static VALUE cTraverser;
25
+ static VALUE cEntry;
26
+ static ID rb_id_read_method;
27
+ static ID rb_id_rewind_method;
28
+ static ID rb_id_seek_method;
29
+ static ID rb_id_tell_method;
30
+ static ID rb_id_eof_method;
31
+ static ID rb_id_call_method;
32
+ struct RubyCallbackHolder;
33
+ static std::shared_ptr<RubyCallbackHolder> g_stream_factory_callback;
34
+
35
+ // Helper: Convert Ruby string to C++ string
36
+ static std::string rb_string_to_cpp(VALUE rb_str) {
37
+ Check_Type(rb_str, T_STRING);
38
+ return std::string(RSTRING_PTR(rb_str), RSTRING_LEN(rb_str));
39
+ }
40
+
41
+ static void archive_r_cleanup(VALUE) {
42
+ register_fault_callback(FaultCallback{});
43
+ set_root_stream_factory(RootStreamFactory{});
44
+ g_stream_factory_callback.reset();
45
+ }
46
+
47
+ // Helper: Convert C++ string to Ruby string
48
+ static VALUE cpp_string_to_rb(const std::string &str) { return rb_str_new(str.c_str(), str.length()); }
49
+
50
+ static VALUE path_entry_to_rb(const PathEntry &entry) {
51
+ if (entry.is_single()) {
52
+ return cpp_string_to_rb(entry.single_value());
53
+ }
54
+ if (entry.is_multi_volume()) {
55
+ const auto &parts = entry.multi_volume_parts().values;
56
+ VALUE array = rb_ary_new_capa(parts.size());
57
+ for (const auto &part : parts) {
58
+ rb_ary_push(array, cpp_string_to_rb(part));
59
+ }
60
+ return array;
61
+ }
62
+ VALUE array = rb_ary_new_capa(entry.nested_nodes().size());
63
+ for (const auto &child : entry.nested_nodes()) {
64
+ rb_ary_push(array, path_entry_to_rb(child));
65
+ }
66
+ return array;
67
+ }
68
+
69
+ static VALUE path_hierarchy_to_rb(const PathHierarchy &hierarchy) {
70
+ VALUE array = rb_ary_new_capa(hierarchy.size());
71
+ for (const auto &component : hierarchy) {
72
+ rb_ary_push(array, path_entry_to_rb(component));
73
+ }
74
+ return array;
75
+ }
76
+
77
+ struct RubyCallbackHolder {
78
+ explicit RubyCallbackHolder(VALUE proc)
79
+ : proc_value(proc) {
80
+ rb_gc_register_address(&proc_value);
81
+ }
82
+
83
+ ~RubyCallbackHolder() { rb_gc_unregister_address(&proc_value); }
84
+
85
+ VALUE proc_value;
86
+ };
87
+
88
+ class RubyIOStream : public IDataStream {
89
+ public:
90
+ RubyIOStream(VALUE io, PathHierarchy hierarchy)
91
+ : _io(io)
92
+ , _hierarchy(std::move(hierarchy))
93
+ , _at_end(false)
94
+ , _seekable(rb_respond_to(io, rb_id_seek_method))
95
+ , _tellable(rb_respond_to(io, rb_id_tell_method))
96
+ , _rewindable(rb_respond_to(io, rb_id_rewind_method))
97
+ , _has_eof(rb_respond_to(io, rb_id_eof_method)) {
98
+ // Validate before GC registration to avoid resource leak on validation failure
99
+ if (!_rewindable) {
100
+ rb_raise(rb_eTypeError, "stream factory IO must respond to #read and #rewind");
101
+ }
102
+ rb_gc_register_address(&_io);
103
+ }
104
+
105
+ ~RubyIOStream() override { rb_gc_unregister_address(&_io); }
106
+
107
+ ssize_t read(void *buffer, size_t size) override {
108
+ if (size == 0) {
109
+ return 0;
110
+ }
111
+
112
+ VALUE result = rb_funcall(_io, rb_id_read_method, 1, SIZET2NUM(size));
113
+ if (NIL_P(result)) {
114
+ _at_end = true;
115
+ return 0;
116
+ }
117
+ Check_Type(result, T_STRING);
118
+ const ssize_t bytes_read = static_cast<ssize_t>(RSTRING_LEN(result));
119
+ if (bytes_read > 0) {
120
+ std::memcpy(buffer, RSTRING_PTR(result), static_cast<size_t>(bytes_read));
121
+ return bytes_read;
122
+ }
123
+
124
+ if (_has_eof) {
125
+ VALUE eof_val = rb_funcall(_io, rb_id_eof_method, 0);
126
+ _at_end = RTEST(eof_val);
127
+ }
128
+ return 0;
129
+ }
130
+
131
+ void rewind() override {
132
+ if (!_rewindable) {
133
+ rb_raise(rb_eRuntimeError, "IO object does not respond to #rewind");
134
+ }
135
+ rb_funcall(_io, rb_id_rewind_method, 0);
136
+ _at_end = false;
137
+ }
138
+
139
+ bool at_end() const override {
140
+ if (_has_eof) {
141
+ VALUE eof_val = rb_funcall(_io, rb_id_eof_method, 0);
142
+ return RTEST(eof_val);
143
+ }
144
+ return _at_end;
145
+ }
146
+
147
+ int64_t seek(int64_t offset, int whence) override {
148
+ if (!_seekable) {
149
+ return -1;
150
+ }
151
+ VALUE result = rb_funcall(_io, rb_id_seek_method, 2, LL2NUM(offset), INT2NUM(whence));
152
+ _at_end = false;
153
+ return NUM2LL(result);
154
+ }
155
+
156
+ int64_t tell() const override {
157
+ if (!_tellable) {
158
+ return -1;
159
+ }
160
+ VALUE result = rb_funcall(_io, rb_id_tell_method, 0);
161
+ return NUM2LL(result);
162
+ }
163
+
164
+ bool can_seek() const override { return _seekable; }
165
+
166
+ PathHierarchy source_hierarchy() const override { return _hierarchy; }
167
+
168
+ private:
169
+ VALUE _io;
170
+ PathHierarchy _hierarchy;
171
+ mutable bool _at_end;
172
+ bool _seekable;
173
+ bool _tellable;
174
+ bool _rewindable;
175
+ bool _has_eof;
176
+ };
177
+
178
+ static VALUE entry_fault_to_rb(const EntryFault &fault) {
179
+ VALUE hash = rb_hash_new();
180
+ static ID id_message = rb_intern("message");
181
+ static ID id_errno = rb_intern("errno");
182
+ static ID id_hierarchy = rb_intern("hierarchy");
183
+ static ID id_path = rb_intern("path");
184
+
185
+ rb_hash_aset(hash, ID2SYM(id_message), cpp_string_to_rb(fault.message));
186
+ rb_hash_aset(hash, ID2SYM(id_errno), INT2NUM(fault.errno_value));
187
+ rb_hash_aset(hash, ID2SYM(id_hierarchy), path_hierarchy_to_rb(fault.hierarchy));
188
+ std::string path_string = fault.hierarchy.empty() ? std::string() : hierarchy_display(fault.hierarchy);
189
+ rb_hash_aset(hash, ID2SYM(id_path), cpp_string_to_rb(path_string));
190
+ return hash;
191
+ }
192
+
193
+ static FaultCallback make_ruby_fault_callback(VALUE callable) {
194
+ if (NIL_P(callable)) {
195
+ return {};
196
+ }
197
+
198
+ static ID id_call = rb_intern("call");
199
+ if (!rb_respond_to(callable, id_call)) {
200
+ rb_raise(rb_eTypeError, "fault callback must respond to #call");
201
+ }
202
+
203
+ auto holder = std::make_shared<RubyCallbackHolder>(callable);
204
+
205
+ return [holder](const EntryFault &fault) {
206
+ struct InvokePayload {
207
+ std::shared_ptr<RubyCallbackHolder> holder;
208
+ VALUE fault_hash;
209
+ } payload{ holder, entry_fault_to_rb(fault) };
210
+
211
+ auto invoke = [](VALUE data) -> VALUE {
212
+ auto *info = reinterpret_cast<InvokePayload *>(data);
213
+ static ID id_call_inner = rb_intern("call");
214
+ return rb_funcall(info->holder->proc_value, id_call_inner, 1, info->fault_hash);
215
+ };
216
+
217
+ int state = 0;
218
+ rb_protect(invoke, reinterpret_cast<VALUE>(&payload), &state);
219
+ if (state != 0) {
220
+ rb_jump_tag(state);
221
+ }
222
+ return;
223
+ };
224
+ }
225
+
226
+ // Helper: Convert Ruby hash options into TraverserOptions
227
+ static void populate_traverser_options(VALUE opts, TraverserOptions &options) {
228
+ if (NIL_P(opts)) {
229
+ return;
230
+ }
231
+
232
+ Check_Type(opts, T_HASH);
233
+
234
+ static ID id_passphrases = rb_intern("passphrases");
235
+ static ID id_formats = rb_intern("formats");
236
+ static ID id_metadata_keys = rb_intern("metadata_keys");
237
+ static ID id_descend_archives = rb_intern("descend_archives");
238
+
239
+ VALUE passphrases_val = rb_hash_aref(opts, ID2SYM(id_passphrases));
240
+ if (!NIL_P(passphrases_val)) {
241
+ Check_Type(passphrases_val, T_ARRAY);
242
+ long len = RARRAY_LEN(passphrases_val);
243
+ options.passphrases.reserve(len);
244
+ for (long i = 0; i < len; ++i) {
245
+ VALUE item = rb_ary_entry(passphrases_val, i);
246
+ options.passphrases.push_back(rb_string_to_cpp(StringValue(item)));
247
+ }
248
+ }
249
+
250
+ VALUE formats_val = rb_hash_aref(opts, ID2SYM(id_formats));
251
+ if (!NIL_P(formats_val)) {
252
+ Check_Type(formats_val, T_ARRAY);
253
+ long len = RARRAY_LEN(formats_val);
254
+ options.formats.reserve(len);
255
+ for (long i = 0; i < len; ++i) {
256
+ VALUE item = rb_ary_entry(formats_val, i);
257
+ options.formats.push_back(rb_string_to_cpp(StringValue(item)));
258
+ }
259
+ }
260
+
261
+ VALUE metadata_val = rb_hash_aref(opts, ID2SYM(id_metadata_keys));
262
+ if (!NIL_P(metadata_val)) {
263
+ Check_Type(metadata_val, T_ARRAY);
264
+ long len = RARRAY_LEN(metadata_val);
265
+ options.metadata_keys.reserve(len);
266
+ for (long i = 0; i < len; ++i) {
267
+ VALUE item = rb_ary_entry(metadata_val, i);
268
+ options.metadata_keys.push_back(rb_string_to_cpp(StringValue(item)));
269
+ }
270
+ }
271
+
272
+ VALUE descend_val = rb_hash_aref(opts, ID2SYM(id_descend_archives));
273
+ if (!NIL_P(descend_val)) {
274
+ options.descend_archives = RTEST(descend_val);
275
+ }
276
+ }
277
+
278
+ static PathEntry rb_value_to_path_entry(VALUE value);
279
+
280
+ static PathHierarchy rb_value_to_path_hierarchy(VALUE value) {
281
+ if (RB_TYPE_P(value, T_STRING)) {
282
+ return make_single_path(rb_string_to_cpp(value));
283
+ }
284
+
285
+ VALUE array = rb_check_array_type(value);
286
+ if (NIL_P(array)) {
287
+ rb_raise(rb_eTypeError, "path hierarchy must be a String or Array");
288
+ }
289
+
290
+ const long length = RARRAY_LEN(array);
291
+ if (length == 0) {
292
+ rb_raise(rb_eArgError, "path hierarchy cannot be empty");
293
+ }
294
+
295
+ PathHierarchy hierarchy;
296
+ hierarchy.reserve(static_cast<size_t>(length));
297
+ for (long i = 0; i < length; ++i) {
298
+ VALUE component = rb_ary_entry(array, i);
299
+ hierarchy.emplace_back(rb_value_to_path_entry(component));
300
+ }
301
+
302
+ return hierarchy;
303
+ }
304
+
305
+ static PathEntry rb_value_to_path_entry(VALUE value) {
306
+ if (RB_TYPE_P(value, T_STRING)) {
307
+ return PathEntry::single(rb_string_to_cpp(value));
308
+ }
309
+
310
+ VALUE array = rb_check_array_type(value);
311
+ if (NIL_P(array)) {
312
+ rb_raise(rb_eTypeError, "PathEntry must be String or Array");
313
+ }
314
+
315
+ const long length = RARRAY_LEN(array);
316
+ if (length == 0) {
317
+ rb_raise(rb_eArgError, "PathEntry array cannot be empty");
318
+ }
319
+
320
+ bool all_strings = true;
321
+ for (long i = 0; i < length; ++i) {
322
+ VALUE element = rb_ary_entry(array, i);
323
+ if (!RB_TYPE_P(element, T_STRING)) {
324
+ all_strings = false;
325
+ break;
326
+ }
327
+ }
328
+
329
+ if (all_strings) {
330
+ std::vector<std::string> parts;
331
+ parts.reserve(static_cast<size_t>(length));
332
+ for (long i = 0; i < length; ++i) {
333
+ parts.emplace_back(rb_string_to_cpp(rb_ary_entry(array, i)));
334
+ }
335
+ return PathEntry::multi_volume(std::move(parts));
336
+ }
337
+
338
+ PathEntry::NodeList nodes;
339
+ nodes.reserve(static_cast<size_t>(length));
340
+ for (long i = 0; i < length; ++i) {
341
+ nodes.emplace_back(rb_value_to_path_entry(rb_ary_entry(array, i)));
342
+ }
343
+ return PathEntry::nested(std::move(nodes));
344
+ }
345
+
346
+ // Helper: Convert Ruby path argument into vector of PathHierarchy
347
+ static std::vector<PathHierarchy> rb_paths_to_hierarchies(VALUE paths) {
348
+ if (RB_TYPE_P(paths, T_STRING)) {
349
+ std::vector<PathHierarchy> result;
350
+ result.emplace_back(make_single_path(rb_string_to_cpp(paths)));
351
+ return result;
352
+ }
353
+
354
+ VALUE array = rb_check_array_type(paths);
355
+ if (NIL_P(array)) {
356
+ rb_raise(rb_eTypeError, "paths must be a String or an Array");
357
+ }
358
+
359
+ const long length = RARRAY_LEN(array);
360
+ if (length == 0) {
361
+ rb_raise(rb_eArgError, "paths cannot be empty");
362
+ }
363
+
364
+ std::vector<PathHierarchy> result;
365
+ result.reserve(static_cast<size_t>(length));
366
+ for (long i = 0; i < length; ++i) {
367
+ VALUE item = rb_ary_entry(array, i);
368
+ result.emplace_back(rb_value_to_path_hierarchy(item));
369
+ }
370
+
371
+ return result;
372
+ }
373
+
374
+ static std::shared_ptr<IDataStream> stream_from_ruby_value(VALUE value, const PathHierarchy &requested) {
375
+ if (NIL_P(value)) {
376
+ return nullptr;
377
+ }
378
+
379
+ if (!rb_respond_to(value, rb_id_read_method)) {
380
+ rb_raise(rb_eTypeError, "stream factory result must respond to #read");
381
+ }
382
+
383
+ return std::make_shared<RubyIOStream>(value, requested);
384
+ }
385
+
386
+ // Helper: Convert EntryMetadataValue to Ruby object
387
+ static VALUE metadata_value_to_rb(const EntryMetadataValue &value) {
388
+ struct Visitor {
389
+ VALUE
390
+ operator()(std::monostate) const { return Qnil; }
391
+ VALUE
392
+ operator()(bool v) const { return v ? Qtrue : Qfalse; }
393
+ VALUE
394
+ operator()(int64_t v) const { return LL2NUM(v); }
395
+ VALUE
396
+ operator()(uint64_t v) const { return ULL2NUM(v); }
397
+ VALUE
398
+ operator()(const std::string & v) const { return cpp_string_to_rb(v); }
399
+ VALUE
400
+ operator()(const std::vector<uint8_t> &v) const {
401
+ if (v.empty()) {
402
+ return rb_str_new(nullptr, 0);
403
+ }
404
+ return rb_str_new(reinterpret_cast<const char *>(v.data()), v.size());
405
+ }
406
+ VALUE
407
+ operator()(const EntryMetadataTime & v) const {
408
+ VALUE hash = rb_hash_new();
409
+ rb_hash_aset(hash, ID2SYM(rb_intern("seconds")), LL2NUM(v.seconds));
410
+ rb_hash_aset(hash, ID2SYM(rb_intern("nanoseconds")), INT2NUM(v.nanoseconds));
411
+ return hash;
412
+ }
413
+ VALUE
414
+ operator()(const EntryMetadataDeviceNumbers & v) const {
415
+ VALUE hash = rb_hash_new();
416
+ rb_hash_aset(hash, ID2SYM(rb_intern("major")), ULL2NUM(v.major));
417
+ rb_hash_aset(hash, ID2SYM(rb_intern("minor")), ULL2NUM(v.minor));
418
+ return hash;
419
+ }
420
+ VALUE
421
+ operator()(const EntryMetadataFileFlags & v) const {
422
+ VALUE hash = rb_hash_new();
423
+ rb_hash_aset(hash, ID2SYM(rb_intern("set")), ULL2NUM(v.set));
424
+ rb_hash_aset(hash, ID2SYM(rb_intern("clear")), ULL2NUM(v.clear));
425
+ return hash;
426
+ }
427
+ VALUE
428
+ operator()(const std::vector<EntryMetadataXattr> &vec) const {
429
+ VALUE array = rb_ary_new_capa(vec.size());
430
+ for (const auto &item : vec) {
431
+ VALUE hash = rb_hash_new();
432
+ rb_hash_aset(hash, ID2SYM(rb_intern("name")), cpp_string_to_rb(item.name));
433
+ if (item.value.empty()) {
434
+ rb_hash_aset(hash, ID2SYM(rb_intern("value")), rb_str_new(nullptr, 0));
435
+ } else {
436
+ rb_hash_aset(hash, ID2SYM(rb_intern("value")), rb_str_new(reinterpret_cast<const char *>(item.value.data()), item.value.size()));
437
+ }
438
+ rb_ary_push(array, hash);
439
+ }
440
+ return array;
441
+ }
442
+ VALUE
443
+ operator()(const std::vector<EntryMetadataSparseChunk> &vec) const {
444
+ VALUE array = rb_ary_new_capa(vec.size());
445
+ for (const auto &item : vec) {
446
+ VALUE hash = rb_hash_new();
447
+ rb_hash_aset(hash, ID2SYM(rb_intern("offset")), LL2NUM(item.offset));
448
+ rb_hash_aset(hash, ID2SYM(rb_intern("length")), LL2NUM(item.length));
449
+ rb_ary_push(array, hash);
450
+ }
451
+ return array;
452
+ }
453
+ VALUE
454
+ operator()(const std::vector<EntryMetadataDigest> &vec) const {
455
+ VALUE array = rb_ary_new_capa(vec.size());
456
+ for (const auto &item : vec) {
457
+ VALUE hash = rb_hash_new();
458
+ rb_hash_aset(hash, ID2SYM(rb_intern("algorithm")), cpp_string_to_rb(item.algorithm));
459
+ if (item.value.empty()) {
460
+ rb_hash_aset(hash, ID2SYM(rb_intern("value")), rb_str_new(nullptr, 0));
461
+ } else {
462
+ rb_hash_aset(hash, ID2SYM(rb_intern("value")), rb_str_new(reinterpret_cast<const char *>(item.value.data()), item.value.size()));
463
+ }
464
+ rb_ary_push(array, hash);
465
+ }
466
+ return array;
467
+ }
468
+ } visitor;
469
+
470
+ return std::visit(visitor, value);
471
+ }
472
+
473
+ static VALUE archive_r_register_stream_factory(int argc, VALUE *argv, VALUE self) {
474
+ VALUE callable = Qnil;
475
+ rb_scan_args(argc, argv, "01", &callable);
476
+
477
+ if (!NIL_P(callable) && rb_block_given_p()) {
478
+ rb_raise(rb_eArgError, "provide callable argument or block, not both");
479
+ }
480
+
481
+ if (NIL_P(callable) && rb_block_given_p()) {
482
+ callable = rb_block_proc();
483
+ }
484
+
485
+ if (NIL_P(callable)) {
486
+ set_root_stream_factory(RootStreamFactory{});
487
+ g_stream_factory_callback.reset();
488
+ return Qnil;
489
+ }
490
+
491
+ if (!rb_respond_to(callable, rb_id_call_method)) {
492
+ rb_raise(rb_eTypeError, "stream factory must respond to #call");
493
+ }
494
+
495
+ auto holder = std::make_shared<RubyCallbackHolder>(callable);
496
+
497
+ RootStreamFactory factory = [holder](const PathHierarchy &hierarchy) -> std::shared_ptr<IDataStream> {
498
+ struct FactoryPayload {
499
+ std::shared_ptr<RubyCallbackHolder> holder;
500
+ VALUE arg;
501
+ } payload{ holder, path_hierarchy_to_rb(hierarchy) };
502
+
503
+ auto invoke = [](VALUE data) -> VALUE {
504
+ auto *info = reinterpret_cast<FactoryPayload *>(data);
505
+ return rb_funcall(info->holder->proc_value, rb_id_call_method, 1, info->arg);
506
+ };
507
+
508
+ int state = 0;
509
+ VALUE result = rb_protect(invoke, reinterpret_cast<VALUE>(&payload), &state);
510
+ if (state != 0) {
511
+ rb_jump_tag(state);
512
+ }
513
+
514
+ return stream_from_ruby_value(result, hierarchy);
515
+ };
516
+
517
+ set_root_stream_factory(factory);
518
+ g_stream_factory_callback = holder;
519
+ return Qnil;
520
+ }
521
+
522
+ static VALUE archive_r_on_fault(int argc, VALUE *argv, VALUE self) {
523
+ VALUE callback = Qnil;
524
+ rb_scan_args(argc, argv, "01", &callback);
525
+
526
+ if (!NIL_P(callback) && rb_block_given_p()) {
527
+ rb_raise(rb_eArgError, "provide callable argument or block, not both");
528
+ }
529
+
530
+ if (NIL_P(callback) && rb_block_given_p()) {
531
+ callback = rb_block_proc();
532
+ }
533
+
534
+ FaultCallback cb = make_ruby_fault_callback(callback);
535
+ register_fault_callback(std::move(cb));
536
+ return self;
537
+ }
538
+
539
+ //=============================================================================
540
+ // Entry class
541
+ //=============================================================================
542
+
543
+ // EntryWrapper: references an Entry owned by Traverser iterator
544
+ struct EntryWrapper {
545
+ Entry *entry_ref;
546
+ std::unique_ptr<Entry> entry_copy;
547
+
548
+ EntryWrapper(Entry *ref, std::unique_ptr<Entry> copy)
549
+ : entry_ref(ref)
550
+ , entry_copy(std::move(copy)) {}
551
+ };
552
+
553
+ // Free function for EntryWrapper (wrapper only, Entry owned elsewhere)
554
+ static void entry_free(void *ptr) { delete static_cast<EntryWrapper *>(ptr); }
555
+
556
+ // Wrap Entry pointer in EntryWrapper (does not copy Entry)
557
+ static VALUE entry_wrap(Entry &entry) {
558
+ std::unique_ptr<Entry> copy(new Entry(entry));
559
+ EntryWrapper *wrapper = new EntryWrapper(&entry, std::move(copy));
560
+ return Data_Wrap_Struct(cEntry, nullptr, entry_free, wrapper);
561
+ }
562
+
563
+ static VALUE entry_invalidate(VALUE entry_obj) {
564
+ EntryWrapper *wrapper;
565
+ Data_Get_Struct(entry_obj, EntryWrapper, wrapper);
566
+ if (wrapper) {
567
+ wrapper->entry_ref = nullptr;
568
+ }
569
+ return Qnil;
570
+ }
571
+
572
+ static EntryWrapper *entry_get_wrapper(VALUE self) {
573
+ EntryWrapper *wrapper;
574
+ Data_Get_Struct(self, EntryWrapper, wrapper);
575
+ if (!wrapper) {
576
+ rb_raise(rb_eRuntimeError, "Invalid Entry handle");
577
+ }
578
+ return wrapper;
579
+ }
580
+
581
+ // Helper to fetch Entry for read operations, falling back to preserved copy
582
+ static Entry *entry_for_read(VALUE self) {
583
+ EntryWrapper *wrapper = entry_get_wrapper(self);
584
+ if (wrapper->entry_ref) {
585
+ return wrapper->entry_ref;
586
+ }
587
+ if (wrapper->entry_copy) {
588
+ return wrapper->entry_copy.get();
589
+ }
590
+ rb_raise(rb_eRuntimeError, "Entry data is no longer available");
591
+ return nullptr;
592
+ }
593
+
594
+ // Helper to fetch live Entry pointer required for mutating operations
595
+ static Entry *entry_for_live(VALUE self) {
596
+ EntryWrapper *wrapper = entry_get_wrapper(self);
597
+ if (!wrapper->entry_ref) {
598
+ rb_raise(rb_eRuntimeError, "Entry is no longer valid");
599
+ }
600
+ return wrapper->entry_ref;
601
+ }
602
+
603
+ // Entry#path -> String
604
+ static VALUE entry_path(VALUE self) {
605
+ Entry *entry = entry_for_read(self);
606
+ const std::string entry_path_str = entry->path();
607
+ return cpp_string_to_rb(entry->path());
608
+ }
609
+
610
+ // Entry#path_hierarchy -> Array
611
+ static VALUE entry_path_hierarchy(VALUE self) {
612
+ Entry *entry = entry_for_read(self);
613
+ return path_hierarchy_to_rb(entry->path_hierarchy());
614
+ }
615
+
616
+ // Entry#name -> String
617
+ static VALUE entry_name(VALUE self) {
618
+ Entry *entry = entry_for_read(self);
619
+ return cpp_string_to_rb(entry->name());
620
+ }
621
+
622
+ // Entry#size -> Integer
623
+ static VALUE entry_size(VALUE self) {
624
+ Entry *entry = entry_for_read(self);
625
+ return LONG2NUM(entry->size());
626
+ }
627
+
628
+ // Entry#file? -> Boolean
629
+ static VALUE entry_is_file(VALUE self) {
630
+ Entry *entry = entry_for_read(self);
631
+ return entry->is_file() ? Qtrue : Qfalse;
632
+ }
633
+
634
+ // Entry#directory? -> Boolean
635
+ static VALUE entry_is_directory(VALUE self) {
636
+ Entry *entry = entry_for_read(self);
637
+ return entry->is_directory() ? Qtrue : Qfalse;
638
+ }
639
+
640
+ // Entry#depth -> Integer
641
+ static VALUE entry_depth(VALUE self) {
642
+ Entry *entry = entry_for_read(self);
643
+ return INT2NUM(entry->depth());
644
+ }
645
+
646
+ // Entry#set_descent(enabled) -> self
647
+ static VALUE entry_set_descent(VALUE self, VALUE enabled) {
648
+ Entry *entry = entry_for_live(self);
649
+ entry->set_descent(RTEST(enabled));
650
+ return self;
651
+ }
652
+
653
+ // Entry#set_multi_volume_group(base_name, order: :natural) -> nil
654
+ static VALUE entry_set_multi_volume_group(int argc, VALUE *argv, VALUE self) {
655
+ VALUE base_name_val;
656
+ VALUE options_val = Qnil;
657
+
658
+ rb_scan_args(argc, argv, "11", &base_name_val, &options_val);
659
+
660
+ MultiVolumeGroupOptions options;
661
+ if (!NIL_P(options_val)) {
662
+ VALUE hash = rb_check_hash_type(options_val);
663
+ if (NIL_P(hash)) {
664
+ rb_raise(rb_eTypeError, "options must be a Hash");
665
+ }
666
+
667
+ static ID id_order = rb_intern("order");
668
+ VALUE order_val = rb_hash_aref(hash, ID2SYM(id_order));
669
+ if (!NIL_P(order_val)) {
670
+ std::string order_str;
671
+ if (SYMBOL_P(order_val)) {
672
+ order_str = rb_id2name(SYM2ID(order_val));
673
+ } else {
674
+ order_str = rb_string_to_cpp(StringValue(order_val));
675
+ }
676
+ for (char &ch : order_str) {
677
+ ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
678
+ }
679
+ if (order_str == "given") {
680
+ options.ordering = PathEntry::Parts::Ordering::Given;
681
+ } else if (order_str == "natural") {
682
+ options.ordering = PathEntry::Parts::Ordering::Natural;
683
+ } else {
684
+ rb_raise(rb_eArgError, "order must be :natural or :given");
685
+ }
686
+ }
687
+ }
688
+
689
+ Entry *entry = entry_for_live(self);
690
+ try {
691
+ entry->set_multi_volume_group(rb_string_to_cpp(StringValue(base_name_val)), options);
692
+ } catch (const std::exception &e) {
693
+ rb_raise(rb_eRuntimeError, "Failed to set multi-volume group: %s", e.what());
694
+ }
695
+ return Qnil;
696
+ }
697
+
698
+ // Entry#read(length = nil) -> String
699
+ static VALUE entry_read(int argc, VALUE *argv, VALUE self) {
700
+ VALUE length_val = Qnil;
701
+ rb_scan_args(argc, argv, "01", &length_val);
702
+
703
+ bool bounded_read = false;
704
+ size_t requested_size = 0;
705
+ if (!NIL_P(length_val)) {
706
+ long long length_long = NUM2LL(length_val);
707
+ if (length_long == 0) {
708
+ return rb_str_new("", 0);
709
+ } else if (length_long > 0) {
710
+ const auto max_allowed = std::numeric_limits<size_t>::max();
711
+ if (static_cast<unsigned long long>(length_long) > max_allowed) {
712
+ rb_raise(rb_eRangeError, "requested length exceeds platform limits");
713
+ }
714
+ requested_size = static_cast<size_t>(length_long);
715
+ bounded_read = true;
716
+ }
717
+ // Negative values fall through to the streaming path (full read)
718
+ }
719
+
720
+ Entry *entry = entry_for_read(self);
721
+ const std::string entry_path_str = entry->path();
722
+
723
+ try {
724
+ if (bounded_read) {
725
+ std::vector<uint8_t> buffer(requested_size);
726
+ const ssize_t bytes_read = entry->read(buffer.data(), buffer.size());
727
+ if (bytes_read < 0) {
728
+ rb_raise(rb_eRuntimeError, "Failed to read entry payload at %s", entry_path_str.c_str());
729
+ }
730
+ return rb_str_new(reinterpret_cast<const char *>(buffer.data()), static_cast<long>(bytes_read));
731
+ }
732
+
733
+ std::string aggregate;
734
+ const uint64_t reported_size = entry->size();
735
+ if (reported_size > 0 && reported_size <= static_cast<uint64_t>(std::numeric_limits<size_t>::max())) {
736
+ aggregate.reserve(static_cast<size_t>(reported_size));
737
+ }
738
+
739
+ std::vector<uint8_t> chunk(64 * 1024);
740
+ while (true) {
741
+ const ssize_t bytes_read = entry->read(chunk.data(), chunk.size());
742
+ if (bytes_read < 0) {
743
+ rb_raise(rb_eRuntimeError, "Failed to read entry payload at %s", entry_path_str.c_str());
744
+ }
745
+ if (bytes_read == 0) {
746
+ break;
747
+ }
748
+ aggregate.append(reinterpret_cast<const char *>(chunk.data()), static_cast<size_t>(bytes_read));
749
+ }
750
+
751
+ return rb_str_new(aggregate.data(), static_cast<long>(aggregate.size()));
752
+ } catch (const std::exception &e) {
753
+ rb_raise(rb_eRuntimeError, "Failed to read entry at %s: %s", entry_path_str.c_str(), e.what());
754
+ return Qnil;
755
+ }
756
+ }
757
+
758
+ // Entry#metadata -> Hash
759
+ static VALUE entry_metadata(VALUE self) {
760
+ Entry *entry = entry_for_read(self);
761
+ VALUE hash = rb_hash_new();
762
+ const EntryMetadataMap &metadata = entry->metadata();
763
+ for (const auto &kv : metadata) {
764
+ rb_hash_aset(hash, cpp_string_to_rb(kv.first), metadata_value_to_rb(kv.second));
765
+ }
766
+ return hash;
767
+ }
768
+
769
+ // Entry#metadata_value(key) -> Object or nil
770
+ static VALUE entry_metadata_value(VALUE self, VALUE key) {
771
+ Entry *entry = entry_for_read(self);
772
+ std::string key_str = rb_string_to_cpp(StringValue(key));
773
+ const EntryMetadataValue *value = entry->find_metadata(key_str);
774
+ if (!value) {
775
+ return Qnil;
776
+ }
777
+ return metadata_value_to_rb(*value);
778
+ }
779
+
780
+ //=============================================================================
781
+ // Traverser class
782
+ //=============================================================================
783
+
784
+ // Free function for Traverser
785
+ static void traverser_free(void *ptr) { delete static_cast<Traverser *>(ptr); }
786
+
787
+ // Wrap Traverser pointer
788
+ static VALUE traverser_wrap(Traverser *traverser) { return Data_Wrap_Struct(cTraverser, nullptr, traverser_free, traverser); }
789
+
790
+ // Get Traverser pointer from Ruby object
791
+ static Traverser *traverser_unwrap(VALUE self) {
792
+ Traverser *traverser;
793
+ Data_Get_Struct(self, Traverser, traverser);
794
+ return traverser;
795
+ }
796
+
797
+ // Traverser allocation
798
+ static VALUE traverser_allocate(VALUE klass) { return Data_Wrap_Struct(klass, nullptr, traverser_free, nullptr); }
799
+
800
+ // Traverser.new(paths, passphrases: [], formats: [], metadata_keys: []) -> Traverser
801
+ static VALUE traverser_initialize(int argc, VALUE *argv, VALUE self) {
802
+ VALUE paths;
803
+ VALUE opts = Qnil;
804
+ rb_scan_args(argc, argv, "11", &paths, &opts);
805
+
806
+ try {
807
+ std::vector<PathHierarchy> path_list = rb_paths_to_hierarchies(paths);
808
+ TraverserOptions options;
809
+ populate_traverser_options(opts, options);
810
+ Traverser *traverser = new Traverser(std::move(path_list), options);
811
+ DATA_PTR(self) = traverser;
812
+ return self;
813
+ } catch (const std::exception &e) {
814
+ rb_raise(rb_eRuntimeError, "Failed to open archive: %s", e.what());
815
+ return Qnil;
816
+ }
817
+ }
818
+
819
+ // Traverser#each { |entry| ... } -> Enumerator
820
+ static VALUE yield_entry(VALUE entry_obj) {
821
+ rb_yield(entry_obj);
822
+ return Qnil;
823
+ }
824
+
825
+ static VALUE traverser_each(VALUE self) {
826
+ Traverser *traverser = traverser_unwrap(self);
827
+
828
+ // If no block given, return Enumerator
829
+ if (!rb_block_given_p()) {
830
+ return rb_funcall(self, rb_intern("to_enum"), 1, ID2SYM(rb_intern("each")));
831
+ }
832
+
833
+ try {
834
+ for (auto it = traverser->begin(); it != traverser->end(); ++it) {
835
+ Entry &entry = *it;
836
+ VALUE rb_entry = entry_wrap(entry);
837
+ rb_ensure(yield_entry, rb_entry, entry_invalidate, rb_entry);
838
+ }
839
+ } catch (const std::exception &e) {
840
+ rb_raise(rb_eRuntimeError, "Error during traversal: %s", e.what());
841
+ }
842
+
843
+ return self;
844
+ }
845
+
846
+ // Helper for Traverser.open cleanup
847
+ static VALUE traverser_close_helper(VALUE arg) {
848
+ // Nothing to do - Traverser cleanup is automatic
849
+ return Qnil;
850
+ }
851
+
852
+ // Traverser.open(path, opts = {}) { |traverser| ... } -> result of block
853
+ static VALUE traverser_s_open(int argc, VALUE *argv, VALUE klass) {
854
+ VALUE traverser = rb_class_new_instance(argc, argv, klass);
855
+
856
+ if (rb_block_given_p()) {
857
+ return rb_ensure(rb_yield, traverser, traverser_close_helper, traverser);
858
+ }
859
+
860
+ return traverser;
861
+ }
862
+
863
+ //=============================================================================
864
+ // Module initialization
865
+ //=============================================================================
866
+
867
+ extern "C" void Init_archive_r() {
868
+ // Define module Archive_r
869
+ mArchive_r = rb_define_module("Archive_r");
870
+
871
+ rb_id_read_method = rb_intern("read");
872
+ rb_id_rewind_method = rb_intern("rewind");
873
+ rb_id_seek_method = rb_intern("seek");
874
+ rb_id_tell_method = rb_intern("tell");
875
+ rb_id_eof_method = rb_intern("eof?");
876
+ rb_id_call_method = rb_intern("call");
877
+
878
+ // Define Entry class
879
+ cEntry = rb_define_class_under(mArchive_r, "Entry", rb_cObject);
880
+ rb_undef_alloc_func(cEntry);
881
+
882
+ rb_define_method(cEntry, "path", RUBY_METHOD_FUNC(entry_path), 0);
883
+ rb_define_method(cEntry, "path_hierarchy", RUBY_METHOD_FUNC(entry_path_hierarchy), 0);
884
+ rb_define_method(cEntry, "name", RUBY_METHOD_FUNC(entry_name), 0);
885
+ rb_define_method(cEntry, "size", RUBY_METHOD_FUNC(entry_size), 0);
886
+ rb_define_method(cEntry, "file?", RUBY_METHOD_FUNC(entry_is_file), 0);
887
+ rb_define_method(cEntry, "directory?", RUBY_METHOD_FUNC(entry_is_directory), 0);
888
+ rb_define_method(cEntry, "depth", RUBY_METHOD_FUNC(entry_depth), 0);
889
+ rb_define_method(cEntry, "set_descent", RUBY_METHOD_FUNC(entry_set_descent), 1);
890
+ rb_define_method(cEntry, "set_multi_volume_group", RUBY_METHOD_FUNC(entry_set_multi_volume_group), -1);
891
+ rb_define_method(cEntry, "read", RUBY_METHOD_FUNC(entry_read), -1);
892
+ rb_define_method(cEntry, "metadata", RUBY_METHOD_FUNC(entry_metadata), 0);
893
+ rb_define_method(cEntry, "metadata_value", RUBY_METHOD_FUNC(entry_metadata_value), 1);
894
+
895
+ // Define Traverser class
896
+ cTraverser = rb_define_class_under(mArchive_r, "Traverser", rb_cObject);
897
+
898
+ rb_define_alloc_func(cTraverser, traverser_allocate);
899
+ rb_define_method(cTraverser, "initialize", RUBY_METHOD_FUNC(traverser_initialize), -1);
900
+ rb_define_method(cTraverser, "each", RUBY_METHOD_FUNC(traverser_each), 0);
901
+ rb_define_singleton_method(cTraverser, "open", RUBY_METHOD_FUNC(traverser_s_open), -1);
902
+
903
+ // Make Traverser enumerable
904
+ rb_include_module(cTraverser, rb_mEnumerable);
905
+
906
+ rb_define_module_function(mArchive_r, "on_fault", RUBY_METHOD_FUNC(archive_r_on_fault), -1);
907
+ rb_define_module_function(mArchive_r, "register_stream_factory", RUBY_METHOD_FUNC(archive_r_register_stream_factory), -1);
908
+
909
+ rb_set_end_proc(archive_r_cleanup, Qnil);
910
+ }