archive_r_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +56 -0
  3. data/README.md +103 -0
  4. data/ext/archive_r/archive_r_ext.cc +910 -0
  5. data/ext/archive_r/extconf.rb +90 -0
  6. data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
  7. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
  8. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
  9. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
  15. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
  16. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
  17. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
  18. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
  19. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
  20. data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
  21. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
  22. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
  23. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
  24. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
  25. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
  26. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
  27. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
  28. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
  29. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
  30. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
  31. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
  32. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
  33. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
  34. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
  35. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
  36. data/lib/archive_r.rb +80 -0
  37. metadata +112 -0
@@ -0,0 +1,54 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/path_hierarchy.h"
7
+ #include "archive_type.h"
8
+ #include "archive_stack_cursor.h"
9
+ #include "entry_fault_error.h"
10
+ #include "multi_volume_manager.h"
11
+ #include <limits>
12
+ #include <memory>
13
+ #include <string>
14
+ #include <sys/types.h>
15
+ #include <unordered_set>
16
+
17
+ namespace archive_r {
18
+
19
+ class ArchiveStackOrchestrator {
20
+ public:
21
+ explicit ArchiveStackOrchestrator(const ArchiveOption &options = {});
22
+ ArchiveStackOrchestrator(const ArchiveStackOrchestrator &) = delete;
23
+ ArchiveStackOrchestrator &operator=(const ArchiveStackOrchestrator &) = delete;
24
+
25
+ ~ArchiveStackOrchestrator();
26
+
27
+ void open_root_hierarchy(const PathHierarchy &root_hierarchy);
28
+
29
+ bool advance(bool descend_request = true);
30
+ const std::string &current_entryname();
31
+
32
+ size_t depth() const;
33
+ PathHierarchy current_entry_hierarchy();
34
+ bool synchronize_to_hierarchy(const PathHierarchy &path_hierarchy);
35
+
36
+ StreamArchive *current_archive();
37
+ ssize_t read_head(void *buff, size_t len);
38
+
39
+ const std::unordered_set<std::string> &metadata_keys() const { return _metadata_keys; }
40
+ const ArchiveOption &options() const { return _archive_options; }
41
+
42
+ void mark_entry_as_multi_volume(const PathHierarchy &entry_path, const std::string &base_name, PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural);
43
+ bool descend_pending_multi_volumes();
44
+
45
+ private:
46
+ ArchiveOption _archive_options;
47
+ std::unordered_set<std::string> _metadata_keys;
48
+ ArchiveStackCursor _head;
49
+ MultiVolumeManager _multi_volume_manager;
50
+
51
+ void dispatch_fault(EntryFault fault);
52
+ };
53
+
54
+ } // namespace archive_r
@@ -0,0 +1,552 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #include "archive_type.h"
5
+ #include <cstdio>
6
+ #include <cstdlib>
7
+ #include <cstring>
8
+ #include <stdexcept>
9
+ #include <string>
10
+ #include <typeinfo>
11
+ #include <unordered_map>
12
+
13
+ namespace archive_r {
14
+
15
+ namespace {
16
+
17
+ [[noreturn]] void throw_archive_fault(const std::string &message, struct archive *ar = nullptr) {
18
+ const int err = ar ? archive_errno(ar) : 0;
19
+ throw make_entry_fault_error(message, {}, err);
20
+ }
21
+
22
+ } // namespace
23
+
24
+ void archive_deleter::operator()(struct archive *a) const {
25
+ if (a) {
26
+ archive_read_close(a);
27
+ archive_read_free(a);
28
+ }
29
+ }
30
+
31
+ static void set_passphrases(struct archive *ar, const std::vector<std::string> &passphrases) {
32
+ size_t index = 0;
33
+ for (const auto &passphrase : passphrases) {
34
+ if (archive_read_add_passphrase(ar, passphrase.c_str()) != ARCHIVE_OK) {
35
+ char buf[1024];
36
+ std::snprintf(buf, sizeof(buf), "Failed to add passphrase (index %zu): %s", index, archive_error_string(ar));
37
+ throw_archive_fault(buf, ar);
38
+ }
39
+ ++index;
40
+ }
41
+ }
42
+
43
+ static void configure_formats(struct archive *ar, const std::vector<std::string> &format_names) {
44
+ if (format_names.empty()) {
45
+ archive_read_support_format_all(ar);
46
+ return;
47
+ }
48
+
49
+ using FormatHandler = int (*)(struct archive *);
50
+ static const std::unordered_map<std::string, FormatHandler> kFormatHandlers = {
51
+ { "7zip", archive_read_support_format_7zip }, { "ar", archive_read_support_format_ar }, { "cab", archive_read_support_format_cab }, { "cpio", archive_read_support_format_cpio },
52
+ { "empty", archive_read_support_format_empty }, { "iso9660", archive_read_support_format_iso9660 }, { "lha", archive_read_support_format_lha }, { "mtree", archive_read_support_format_mtree },
53
+ { "rar", archive_read_support_format_rar }, { "raw", archive_read_support_format_raw }, { "tar", archive_read_support_format_tar }, { "warc", archive_read_support_format_warc },
54
+ { "xar", archive_read_support_format_xar }, { "zip", archive_read_support_format_zip },
55
+ };
56
+
57
+ for (const auto &format : format_names) {
58
+ auto it = kFormatHandlers.find(format);
59
+ if (it == kFormatHandlers.end()) {
60
+ char buf[1024];
61
+ std::snprintf(buf, sizeof(buf), "Unsupported archive format specified (%s)", format.c_str());
62
+ throw_archive_fault(buf);
63
+ }
64
+
65
+ int r = it->second(ar);
66
+ if (r != ARCHIVE_OK) {
67
+ char buf[1024];
68
+ std::snprintf(buf, sizeof(buf), "Failed to enable format (%s): %s", format.c_str(), archive_error_string(ar));
69
+ throw_archive_fault(buf, ar);
70
+ }
71
+ }
72
+ }
73
+
74
+ static std::string format_archive_error(struct archive *ar, const std::string &prefix) {
75
+ std::string message = prefix;
76
+
77
+ if (!ar) {
78
+ return message;
79
+ }
80
+
81
+ if (const char *err = archive_error_string(ar)) {
82
+ if (*err) {
83
+ message += ": ";
84
+ message += err;
85
+ }
86
+ }
87
+
88
+ const int code = archive_errno(ar);
89
+ if (code != 0) {
90
+ message += " (libarchive errno=";
91
+ message += std::to_string(code);
92
+ message += ')';
93
+ }
94
+
95
+ return message;
96
+ }
97
+
98
+ archive_ptr new_read_archive_common(const std::vector<std::string> &passphrases, const std::vector<std::string> &format_names, open_delegate archive_open) {
99
+ archive_ptr ar(archive_read_new());
100
+ if (!ar) {
101
+ throw_archive_fault("archive_read_new failed.");
102
+ }
103
+
104
+ configure_formats(ar.get(), format_names);
105
+ archive_read_support_filter_all(ar.get());
106
+ set_passphrases(ar.get(), passphrases);
107
+
108
+ if (archive_open(ar.get()) != ARCHIVE_OK) {
109
+ char buf[1024];
110
+ std::snprintf(buf, sizeof(buf), "archive_read_open failed: (%d)%s", archive_errno(ar.get()), archive_error_string(ar.get()));
111
+ throw_archive_fault(buf, ar.get());
112
+ }
113
+
114
+ return ar;
115
+ }
116
+
117
+ Archive::Archive()
118
+ : _ar(nullptr)
119
+ , current_entry(nullptr)
120
+ , _at_eof(false)
121
+ , _current_entry_content_ready(false) {}
122
+
123
+ Archive::~Archive() { close_archive(); }
124
+
125
+ void Archive::close_archive() {
126
+ if (_ar) {
127
+ archive_read_close(_ar);
128
+ archive_read_free(_ar);
129
+ _ar = nullptr;
130
+ }
131
+ current_entry = nullptr;
132
+ current_entryname.clear();
133
+ _current_entry_content_ready = false;
134
+ }
135
+
136
+ void Archive::rewind() {
137
+ close_archive();
138
+ current_entryname.clear();
139
+ current_entry = nullptr;
140
+ _at_eof = false;
141
+ open_archive();
142
+ _current_entry_content_ready = false;
143
+ }
144
+
145
+ bool Archive::skip_to_next_header() {
146
+ if (!_ar) {
147
+ throw std::logic_error("Archive handle is not initialized");
148
+ }
149
+
150
+ if (_at_eof) {
151
+ current_entry = nullptr;
152
+ current_entryname.clear();
153
+ _current_entry_content_ready = false;
154
+ return false;
155
+ }
156
+
157
+ const int r = archive_read_next_header(_ar, &current_entry);
158
+
159
+ if (r == ARCHIVE_EOF) {
160
+ _at_eof = true;
161
+ current_entry = nullptr;
162
+ current_entryname.clear();
163
+ _current_entry_content_ready = false;
164
+ return false;
165
+ }
166
+
167
+ if (r == ARCHIVE_FAILED || r == ARCHIVE_FATAL || r == ARCHIVE_RETRY) {
168
+ const std::string message = format_archive_error(_ar, "Failed to read next header");
169
+ _at_eof = true;
170
+ current_entry = nullptr;
171
+ current_entryname.clear();
172
+ _current_entry_content_ready = false;
173
+ raise_archive_error(message);
174
+ }
175
+
176
+ const char *name = archive_entry_pathname(current_entry);
177
+ if (name == nullptr) {
178
+ throw make_entry_fault_error("Failed to retrieve entry pathname (archive_entry_pathname returned null)", {}, 0);
179
+ }
180
+ current_entryname = std::string(name);
181
+ _current_entry_content_ready = true;
182
+ return true;
183
+ }
184
+
185
+ bool Archive::skip_data() {
186
+ if (!_ar) {
187
+ throw std::logic_error("Archive handle is not initialized");
188
+ }
189
+
190
+ int r = archive_read_data_skip(_ar);
191
+ if (r != ARCHIVE_OK) {
192
+ raise_archive_error(format_archive_error(_ar, "Failed to skip data"));
193
+ }
194
+ _current_entry_content_ready = false;
195
+ return true;
196
+ }
197
+
198
+ bool Archive::skip_to_entry(const std::string &entryname) {
199
+
200
+ if (current_entryname == entryname && _current_entry_content_ready) {
201
+ return true;
202
+ }
203
+
204
+ if (_at_eof) {
205
+ rewind();
206
+ }
207
+
208
+ const std::string start_position = current_entryname;
209
+
210
+ if (search_forward_until_eof(entryname)) {
211
+ return true;
212
+ }
213
+
214
+ if (start_position.empty()) {
215
+ return false;
216
+ }
217
+
218
+ rewind();
219
+
220
+ return search_until_position(entryname, start_position);
221
+ }
222
+
223
+ bool Archive::skip_to_eof() {
224
+ while (!_at_eof) {
225
+ skip_to_next_header();
226
+ }
227
+ return true;
228
+ }
229
+
230
+ bool Archive::search_forward_until_eof(const std::string &entryname) {
231
+ while (skip_to_next_header()) {
232
+ if (current_entryname == entryname) {
233
+ return true;
234
+ }
235
+ if (!skip_data()) {
236
+ return false;
237
+ }
238
+ }
239
+ return false;
240
+ }
241
+
242
+ bool Archive::search_until_position(const std::string &entryname, const std::string &stop_position) {
243
+ while (skip_to_next_header()) {
244
+ if (current_entryname == entryname) {
245
+ return true;
246
+ }
247
+ if (current_entryname == stop_position) {
248
+ break;
249
+ }
250
+ if (!skip_data()) {
251
+ return false;
252
+ }
253
+ }
254
+ return false;
255
+ }
256
+
257
+ ssize_t Archive::read_current(void *buff, size_t len) {
258
+ if (!_ar) {
259
+ throw std::logic_error("Archive handle is not initialized");
260
+ }
261
+
262
+ const ssize_t bytes_read = archive_read_data(_ar, buff, len);
263
+ if (bytes_read < 0) {
264
+ raise_archive_error(format_archive_error(_ar, "Failed to read data"));
265
+ }
266
+
267
+ _current_entry_content_ready = false;
268
+ return bytes_read;
269
+ }
270
+
271
+ uint64_t Archive::current_entry_size() const {
272
+ if (!current_entry) {
273
+ return 0;
274
+ }
275
+ return archive_entry_size(current_entry);
276
+ }
277
+
278
+ mode_t Archive::current_entry_filetype() const {
279
+ if (!current_entry) {
280
+ return 0;
281
+ }
282
+ return archive_entry_filetype(current_entry);
283
+ }
284
+
285
+ EntryMetadataMap Archive::current_entry_metadata(const std::unordered_set<std::string> &allowed_keys) const {
286
+ EntryMetadataMap metadata;
287
+ if (!current_entry || allowed_keys.empty()) {
288
+ return metadata;
289
+ }
290
+
291
+ auto wants = [&allowed_keys](const std::string &key) { return allowed_keys.find(key) != allowed_keys.end(); };
292
+
293
+ const char *pathname_utf8 = archive_entry_pathname_utf8(current_entry);
294
+ if (pathname_utf8 && *pathname_utf8 && wants("pathname")) {
295
+ metadata["pathname"] = std::string(pathname_utf8);
296
+ } else {
297
+ const char *pathname = archive_entry_pathname(current_entry);
298
+ if (pathname && *pathname && wants("pathname")) {
299
+ metadata["pathname"] = std::string(pathname);
300
+ }
301
+ }
302
+
303
+ if (const char *sourcepath = archive_entry_sourcepath(current_entry)) {
304
+ if (wants("sourcepath")) {
305
+ metadata["sourcepath"] = std::string(sourcepath);
306
+ }
307
+ }
308
+
309
+ if (const char *symlink_utf8 = archive_entry_symlink_utf8(current_entry)) {
310
+ if (wants("symlink")) {
311
+ metadata["symlink"] = std::string(symlink_utf8);
312
+ }
313
+ }
314
+
315
+ if (const char *hardlink_utf8 = archive_entry_hardlink_utf8(current_entry)) {
316
+ if (wants("hardlink")) {
317
+ metadata["hardlink"] = std::string(hardlink_utf8);
318
+ }
319
+ } else if (const char *hardlink = archive_entry_hardlink(current_entry)) {
320
+ if (wants("hardlink")) {
321
+ metadata["hardlink"] = std::string(hardlink);
322
+ }
323
+ }
324
+
325
+ if (const char *uname_utf8 = archive_entry_uname_utf8(current_entry)) {
326
+ if (wants("uname")) {
327
+ metadata["uname"] = std::string(uname_utf8);
328
+ }
329
+ } else if (const char *uname = archive_entry_uname(current_entry)) {
330
+ if (wants("uname")) {
331
+ metadata["uname"] = std::string(uname);
332
+ }
333
+ }
334
+
335
+ if (const char *gname_utf8 = archive_entry_gname_utf8(current_entry)) {
336
+ if (wants("gname")) {
337
+ metadata["gname"] = std::string(gname_utf8);
338
+ }
339
+ } else if (const char *gname = archive_entry_gname(current_entry)) {
340
+ if (wants("gname")) {
341
+ metadata["gname"] = std::string(gname);
342
+ }
343
+ }
344
+
345
+ if (wants("uid")) {
346
+ bool has_uid = archive_entry_uname(current_entry) != nullptr;
347
+ if (!has_uid) {
348
+ has_uid = archive_entry_uid(current_entry) != 0;
349
+ }
350
+ if (has_uid) {
351
+ metadata["uid"] = static_cast<int64_t>(archive_entry_uid(current_entry));
352
+ }
353
+ }
354
+
355
+ if (wants("gid")) {
356
+ bool has_gid = archive_entry_gname(current_entry) != nullptr;
357
+ if (!has_gid) {
358
+ has_gid = archive_entry_gid(current_entry) != 0;
359
+ }
360
+ if (has_gid) {
361
+ metadata["gid"] = static_cast<int64_t>(archive_entry_gid(current_entry));
362
+ }
363
+ }
364
+
365
+ if (wants("perm")) {
366
+ metadata["perm"] = static_cast<uint64_t>(archive_entry_perm(current_entry));
367
+ }
368
+
369
+ if (wants("mode")) {
370
+ metadata["mode"] = static_cast<uint64_t>(archive_entry_mode(current_entry));
371
+ }
372
+
373
+ if (wants("filetype")) {
374
+ metadata["filetype"] = static_cast<uint64_t>(archive_entry_filetype(current_entry));
375
+ }
376
+
377
+ if (archive_entry_size_is_set(current_entry) && wants("size")) {
378
+ metadata["size"] = static_cast<uint64_t>(archive_entry_size(current_entry));
379
+ }
380
+
381
+ if (archive_entry_dev_is_set(current_entry) && wants("dev")) {
382
+ metadata["dev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_devmajor(current_entry)), static_cast<uint64_t>(archive_entry_devminor(current_entry)) };
383
+ }
384
+
385
+ if (wants("rdev")) {
386
+ const dev_t rdev = archive_entry_rdev(current_entry);
387
+ if (rdev != 0) {
388
+ metadata["rdev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_rdevmajor(current_entry)), static_cast<uint64_t>(archive_entry_rdevminor(current_entry)) };
389
+ }
390
+ }
391
+
392
+ if (archive_entry_ino_is_set(current_entry)) {
393
+ if (wants("ino")) {
394
+ metadata["ino"] = static_cast<uint64_t>(archive_entry_ino(current_entry));
395
+ }
396
+ if (wants("ino64")) {
397
+ metadata["ino64"] = static_cast<uint64_t>(archive_entry_ino64(current_entry));
398
+ }
399
+ }
400
+
401
+ if (wants("nlink")) {
402
+ metadata["nlink"] = static_cast<uint64_t>(archive_entry_nlink(current_entry));
403
+ }
404
+
405
+ if (const char *strmode = archive_entry_strmode(current_entry)) {
406
+ if (wants("strmode")) {
407
+ metadata["strmode"] = std::string(strmode);
408
+ }
409
+ }
410
+
411
+ auto record_time = [&metadata, &wants](const char *key, bool is_set, time_t seconds, long nanoseconds) {
412
+ if (!wants(key) || !is_set) {
413
+ return;
414
+ }
415
+ metadata[key] = EntryMetadataTime{ static_cast<int64_t>(seconds), static_cast<int32_t>(nanoseconds) };
416
+ };
417
+
418
+ record_time("atime", archive_entry_atime_is_set(current_entry) != 0, archive_entry_atime(current_entry), archive_entry_atime_nsec(current_entry));
419
+ record_time("birthtime", archive_entry_birthtime_is_set(current_entry) != 0, archive_entry_birthtime(current_entry), archive_entry_birthtime_nsec(current_entry));
420
+ record_time("ctime", archive_entry_ctime_is_set(current_entry) != 0, archive_entry_ctime(current_entry), archive_entry_ctime_nsec(current_entry));
421
+ record_time("mtime", archive_entry_mtime_is_set(current_entry) != 0, archive_entry_mtime(current_entry), archive_entry_mtime_nsec(current_entry));
422
+
423
+ unsigned long fflags_set = 0;
424
+ unsigned long fflags_clear = 0;
425
+ archive_entry_fflags(current_entry, &fflags_set, &fflags_clear);
426
+ if ((fflags_set != 0 || fflags_clear != 0) && wants("fflags")) {
427
+ metadata["fflags"] = EntryMetadataFileFlags{ static_cast<uint64_t>(fflags_set), static_cast<uint64_t>(fflags_clear) };
428
+ }
429
+
430
+ if (const char *fflags_text = archive_entry_fflags_text(current_entry)) {
431
+ if (wants("fflags_text")) {
432
+ metadata["fflags_text"] = std::string(fflags_text);
433
+ }
434
+ }
435
+
436
+ auto store_encryption_flag = [&metadata, &wants](const char *key, int value) {
437
+ if (!wants(key) || value < 0) {
438
+ return;
439
+ }
440
+ metadata[key] = (value != 0);
441
+ };
442
+
443
+ store_encryption_flag("is_data_encrypted", archive_entry_is_data_encrypted(current_entry));
444
+ store_encryption_flag("is_metadata_encrypted", archive_entry_is_metadata_encrypted(current_entry));
445
+ store_encryption_flag("is_encrypted", archive_entry_is_encrypted(current_entry));
446
+
447
+ if (wants("symlink_type")) {
448
+ const int symlink_type = archive_entry_symlink_type(current_entry);
449
+ if (symlink_type != 0) {
450
+ metadata["symlink_type"] = static_cast<int64_t>(symlink_type);
451
+ }
452
+ }
453
+
454
+ ssize_t acl_length = 0;
455
+ char *acl_text = wants("acl_text") ? archive_entry_acl_to_text(current_entry, &acl_length, ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA) : nullptr;
456
+ if (acl_text) {
457
+ if (acl_length >= 0) {
458
+ metadata["acl_text"] = std::string(acl_text, static_cast<size_t>(acl_length));
459
+ } else {
460
+ metadata["acl_text"] = std::string(acl_text);
461
+ }
462
+ std::free(acl_text);
463
+ }
464
+
465
+ if (wants("acl_types")) {
466
+ const int acl_types = archive_entry_acl_types(current_entry);
467
+ if (acl_types != 0) {
468
+ metadata["acl_types"] = static_cast<int64_t>(acl_types);
469
+ }
470
+ }
471
+
472
+ const int xattr_count = archive_entry_xattr_count(current_entry);
473
+ if (xattr_count > 0 && wants("xattr")) {
474
+ std::vector<EntryMetadataXattr> xattrs;
475
+ xattrs.reserve(static_cast<size_t>(xattr_count));
476
+ archive_entry_xattr_reset(current_entry);
477
+ const char *name = nullptr;
478
+ const void *value = nullptr;
479
+ size_t size = 0;
480
+ while (archive_entry_xattr_next(current_entry, &name, &value, &size) == ARCHIVE_OK) {
481
+ EntryMetadataXattr xattr;
482
+ if (name) {
483
+ xattr.name = name;
484
+ }
485
+ if (value && size > 0) {
486
+ const auto *begin = static_cast<const uint8_t *>(value);
487
+ xattr.value.assign(begin, begin + size);
488
+ }
489
+ xattrs.push_back(std::move(xattr));
490
+ }
491
+ if (!xattrs.empty()) {
492
+ metadata["xattr"] = std::move(xattrs);
493
+ }
494
+ }
495
+
496
+ const int sparse_count = archive_entry_sparse_count(current_entry);
497
+ if (sparse_count > 0 && wants("sparse")) {
498
+ std::vector<EntryMetadataSparseChunk> sparse_regions;
499
+ sparse_regions.reserve(static_cast<size_t>(sparse_count));
500
+ archive_entry_sparse_reset(current_entry);
501
+ la_int64_t offset = 0;
502
+ la_int64_t length = 0;
503
+ while (archive_entry_sparse_next(current_entry, &offset, &length) == ARCHIVE_OK) {
504
+ sparse_regions.push_back(EntryMetadataSparseChunk{ static_cast<int64_t>(offset), static_cast<int64_t>(length) });
505
+ }
506
+ if (!sparse_regions.empty()) {
507
+ metadata["sparse"] = std::move(sparse_regions);
508
+ }
509
+ }
510
+
511
+ if (wants("mac_metadata")) {
512
+ size_t mac_metadata_size = 0;
513
+ const void *mac_metadata = archive_entry_mac_metadata(current_entry, &mac_metadata_size);
514
+ if (mac_metadata && mac_metadata_size > 0) {
515
+ const auto *begin = static_cast<const uint8_t *>(mac_metadata);
516
+ metadata["mac_metadata"] = std::vector<uint8_t>(begin, begin + mac_metadata_size);
517
+ }
518
+ }
519
+
520
+ static constexpr struct {
521
+ int type;
522
+ const char *name;
523
+ size_t length;
524
+ } kDigestDescriptors[] = { { ARCHIVE_ENTRY_DIGEST_MD5, "md5", 16 }, { ARCHIVE_ENTRY_DIGEST_RMD160, "rmd160", 20 }, { ARCHIVE_ENTRY_DIGEST_SHA1, "sha1", 20 },
525
+ { ARCHIVE_ENTRY_DIGEST_SHA256, "sha256", 32 }, { ARCHIVE_ENTRY_DIGEST_SHA384, "sha384", 48 }, { ARCHIVE_ENTRY_DIGEST_SHA512, "sha512", 64 } };
526
+
527
+ if (wants("digests")) {
528
+ std::vector<EntryMetadataDigest> digests;
529
+ for (const auto &descriptor : kDigestDescriptors) {
530
+ const unsigned char *digest = archive_entry_digest(current_entry, descriptor.type);
531
+ if (!digest) {
532
+ continue;
533
+ }
534
+ EntryMetadataDigest digest_entry;
535
+ digest_entry.algorithm = descriptor.name;
536
+ digest_entry.value.assign(digest, digest + descriptor.length);
537
+ digests.push_back(std::move(digest_entry));
538
+ }
539
+ if (!digests.empty()) {
540
+ metadata["digests"] = std::move(digests);
541
+ }
542
+ }
543
+
544
+ return metadata;
545
+ }
546
+
547
+ [[noreturn]] void Archive::raise_archive_error(const std::string &message) {
548
+ const int err = _ar ? archive_errno(_ar) : 0;
549
+ throw make_entry_fault_error(message, {}, err);
550
+ }
551
+
552
+ } // namespace archive_r
@@ -0,0 +1,76 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/entry_metadata.h"
7
+ #include "entry_fault_error.h"
8
+ #include <archive.h>
9
+ #include <archive_entry.h>
10
+ #include <functional>
11
+ #include <memory>
12
+ #include <stdexcept>
13
+ #include <string>
14
+ #include <sys/types.h>
15
+ #include <unordered_set>
16
+ #include <vector>
17
+
18
+ namespace archive_r {
19
+
20
+ struct archive_deleter {
21
+ void operator()(struct archive *a) const;
22
+ };
23
+
24
+ using archive_ptr = std::unique_ptr<struct archive, archive_deleter>;
25
+ using open_delegate = std::function<int(struct archive *ar)>;
26
+
27
+ struct ArchiveOption {
28
+ std::vector<std::string> passphrases; ///< Passphrases for encrypted archives
29
+ std::vector<std::string> formats; ///< Specific format names to enable (empty = all)
30
+ std::vector<std::string> metadata_keys; ///< Metadata keys to capture (empty = none)
31
+ };
32
+
33
+ archive_ptr new_read_archive_common(const std::vector<std::string> &passphrases, const std::vector<std::string> &format_names, open_delegate archive_open);
34
+
35
+ struct Archive {
36
+ Archive();
37
+ virtual ~Archive();
38
+
39
+ Archive(const Archive &) = delete;
40
+ Archive &operator=(const Archive &) = delete;
41
+
42
+ virtual void open_archive() = 0;
43
+ virtual void close_archive();
44
+ virtual void rewind();
45
+
46
+ bool skip_to_next_header();
47
+ bool skip_data();
48
+ bool skip_to_entry(const std::string &entryname);
49
+ bool skip_to_eof();
50
+
51
+ std::string current_entryname;
52
+ struct archive_entry *current_entry;
53
+ ssize_t read_current(void *buff, size_t len);
54
+
55
+ // Get current entry metadata
56
+ uint64_t current_entry_size() const;
57
+ mode_t current_entry_filetype() const;
58
+ EntryMetadataMap current_entry_metadata(const std::unordered_set<std::string> &allowed_keys) const;
59
+
60
+ struct archive *_ar;
61
+ bool _at_eof;
62
+
63
+ protected:
64
+ bool _current_entry_content_ready;
65
+
66
+ public:
67
+ bool current_entry_content_ready() const { return _current_entry_content_ready; }
68
+
69
+ private:
70
+ [[noreturn]] void raise_archive_error(const std::string &message);
71
+
72
+ bool search_forward_until_eof(const std::string &entryname);
73
+ bool search_until_position(const std::string &entryname, const std::string &stop_position);
74
+ };
75
+
76
+ } // namespace archive_r