archive_r_ruby 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/{LICENSE → LICENSE.txt} +77 -77
  3. data/README.md +103 -103
  4. data/ext/archive_r/Makefile +48 -45
  5. data/ext/archive_r/archive_r-x64-mingw-ucrt.def +2 -0
  6. data/ext/archive_r/archive_r_ext.cc +1106 -1106
  7. data/ext/archive_r/archive_r_ext.o +0 -0
  8. data/ext/archive_r/extconf.rb +120 -120
  9. data/ext/archive_r/mkmf.log +23 -18
  10. data/ext/archive_r/vendor/archive_r/LICENSE.txt +77 -77
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +52 -52
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +166 -166
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -34
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -56
  15. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -46
  16. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -109
  17. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -37
  18. data/ext/archive_r/vendor/archive_r/include/archive_r/platform_compat.h +19 -19
  19. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -122
  20. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -330
  21. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +97 -97
  22. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -162
  23. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -54
  24. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -552
  25. data/ext/archive_r/vendor/archive_r/src/archive_type.h +77 -77
  26. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -35
  27. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -253
  28. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -26
  29. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -54
  30. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -32
  31. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +57 -57
  32. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -81
  33. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -41
  34. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -199
  35. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -151
  36. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -304
  37. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -120
  38. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +295 -295
  39. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -46
  40. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -314
  41. data/lib/archive_r.rb +105 -105
  42. metadata +11 -8
  43. data/ext/archive_r/archive_r.bundle +0 -0
@@ -1,552 +1,552 @@
1
- // SPDX-License-Identifier: MIT
2
- // Copyright (c) 2025 archive_r Team
3
-
4
- #include "archive_type.h"
5
- #include <cstdio>
6
- #include <cstdlib>
7
- #include <cstring>
8
- #include <stdexcept>
9
- #include <string>
10
- #include <typeinfo>
11
- #include <unordered_map>
12
-
13
- namespace archive_r {
14
-
15
- namespace {
16
-
17
- [[noreturn]] void throw_archive_fault(const std::string &message, struct archive *ar = nullptr) {
18
- const int err = ar ? archive_errno(ar) : 0;
19
- throw make_entry_fault_error(message, {}, err);
20
- }
21
-
22
- } // namespace
23
-
24
- void archive_deleter::operator()(struct archive *a) const {
25
- if (a) {
26
- archive_read_close(a);
27
- archive_read_free(a);
28
- }
29
- }
30
-
31
- static void set_passphrases(struct archive *ar, const std::vector<std::string> &passphrases) {
32
- size_t index = 0;
33
- for (const auto &passphrase : passphrases) {
34
- if (archive_read_add_passphrase(ar, passphrase.c_str()) != ARCHIVE_OK) {
35
- char buf[1024];
36
- std::snprintf(buf, sizeof(buf), "Failed to add passphrase (index %zu): %s", index, archive_error_string(ar));
37
- throw_archive_fault(buf, ar);
38
- }
39
- ++index;
40
- }
41
- }
42
-
43
- static void configure_formats(struct archive *ar, const std::vector<std::string> &format_names) {
44
- if (format_names.empty()) {
45
- archive_read_support_format_all(ar);
46
- return;
47
- }
48
-
49
- using FormatHandler = int (*)(struct archive *);
50
- static const std::unordered_map<std::string, FormatHandler> kFormatHandlers = {
51
- { "7zip", archive_read_support_format_7zip }, { "ar", archive_read_support_format_ar }, { "cab", archive_read_support_format_cab }, { "cpio", archive_read_support_format_cpio },
52
- { "empty", archive_read_support_format_empty }, { "iso9660", archive_read_support_format_iso9660 }, { "lha", archive_read_support_format_lha }, { "mtree", archive_read_support_format_mtree },
53
- { "rar", archive_read_support_format_rar }, { "raw", archive_read_support_format_raw }, { "tar", archive_read_support_format_tar }, { "warc", archive_read_support_format_warc },
54
- { "xar", archive_read_support_format_xar }, { "zip", archive_read_support_format_zip },
55
- };
56
-
57
- for (const auto &format : format_names) {
58
- auto it = kFormatHandlers.find(format);
59
- if (it == kFormatHandlers.end()) {
60
- char buf[1024];
61
- std::snprintf(buf, sizeof(buf), "Unsupported archive format specified (%s)", format.c_str());
62
- throw_archive_fault(buf);
63
- }
64
-
65
- int r = it->second(ar);
66
- if (r != ARCHIVE_OK) {
67
- char buf[1024];
68
- std::snprintf(buf, sizeof(buf), "Failed to enable format (%s): %s", format.c_str(), archive_error_string(ar));
69
- throw_archive_fault(buf, ar);
70
- }
71
- }
72
- }
73
-
74
- static std::string format_archive_error(struct archive *ar, const std::string &prefix) {
75
- std::string message = prefix;
76
-
77
- if (!ar) {
78
- return message;
79
- }
80
-
81
- if (const char *err = archive_error_string(ar)) {
82
- if (*err) {
83
- message += ": ";
84
- message += err;
85
- }
86
- }
87
-
88
- const int code = archive_errno(ar);
89
- if (code != 0) {
90
- message += " (libarchive errno=";
91
- message += std::to_string(code);
92
- message += ')';
93
- }
94
-
95
- return message;
96
- }
97
-
98
- archive_ptr new_read_archive_common(const std::vector<std::string> &passphrases, const std::vector<std::string> &format_names, open_delegate archive_open) {
99
- archive_ptr ar(archive_read_new());
100
- if (!ar) {
101
- throw_archive_fault("archive_read_new failed.");
102
- }
103
-
104
- configure_formats(ar.get(), format_names);
105
- archive_read_support_filter_all(ar.get());
106
- set_passphrases(ar.get(), passphrases);
107
-
108
- if (archive_open(ar.get()) != ARCHIVE_OK) {
109
- char buf[1024];
110
- std::snprintf(buf, sizeof(buf), "archive_read_open failed: (%d)%s", archive_errno(ar.get()), archive_error_string(ar.get()));
111
- throw_archive_fault(buf, ar.get());
112
- }
113
-
114
- return ar;
115
- }
116
-
117
- Archive::Archive()
118
- : _ar(nullptr)
119
- , current_entry(nullptr)
120
- , _at_eof(false)
121
- , _current_entry_content_ready(false) {}
122
-
123
- Archive::~Archive() { close_archive(); }
124
-
125
- void Archive::close_archive() {
126
- if (_ar) {
127
- archive_read_close(_ar);
128
- archive_read_free(_ar);
129
- _ar = nullptr;
130
- }
131
- current_entry = nullptr;
132
- current_entryname.clear();
133
- _current_entry_content_ready = false;
134
- }
135
-
136
- void Archive::rewind() {
137
- close_archive();
138
- current_entryname.clear();
139
- current_entry = nullptr;
140
- _at_eof = false;
141
- open_archive();
142
- _current_entry_content_ready = false;
143
- }
144
-
145
- bool Archive::skip_to_next_header() {
146
- if (!_ar) {
147
- throw std::logic_error("Archive handle is not initialized");
148
- }
149
-
150
- if (_at_eof) {
151
- current_entry = nullptr;
152
- current_entryname.clear();
153
- _current_entry_content_ready = false;
154
- return false;
155
- }
156
-
157
- const int r = archive_read_next_header(_ar, &current_entry);
158
-
159
- if (r == ARCHIVE_EOF) {
160
- _at_eof = true;
161
- current_entry = nullptr;
162
- current_entryname.clear();
163
- _current_entry_content_ready = false;
164
- return false;
165
- }
166
-
167
- if (r == ARCHIVE_FAILED || r == ARCHIVE_FATAL || r == ARCHIVE_RETRY) {
168
- const std::string message = format_archive_error(_ar, "Failed to read next header");
169
- _at_eof = true;
170
- current_entry = nullptr;
171
- current_entryname.clear();
172
- _current_entry_content_ready = false;
173
- raise_archive_error(message);
174
- }
175
-
176
- const char *name = archive_entry_pathname(current_entry);
177
- if (name == nullptr) {
178
- throw make_entry_fault_error("Failed to retrieve entry pathname (archive_entry_pathname returned null)", {}, 0);
179
- }
180
- current_entryname = std::string(name);
181
- _current_entry_content_ready = true;
182
- return true;
183
- }
184
-
185
- bool Archive::skip_data() {
186
- if (!_ar) {
187
- throw std::logic_error("Archive handle is not initialized");
188
- }
189
-
190
- int r = archive_read_data_skip(_ar);
191
- if (r != ARCHIVE_OK) {
192
- raise_archive_error(format_archive_error(_ar, "Failed to skip data"));
193
- }
194
- _current_entry_content_ready = false;
195
- return true;
196
- }
197
-
198
- bool Archive::skip_to_entry(const std::string &entryname) {
199
-
200
- if (current_entryname == entryname && _current_entry_content_ready) {
201
- return true;
202
- }
203
-
204
- if (_at_eof) {
205
- rewind();
206
- }
207
-
208
- const std::string start_position = current_entryname;
209
-
210
- if (search_forward_until_eof(entryname)) {
211
- return true;
212
- }
213
-
214
- if (start_position.empty()) {
215
- return false;
216
- }
217
-
218
- rewind();
219
-
220
- return search_until_position(entryname, start_position);
221
- }
222
-
223
- bool Archive::skip_to_eof() {
224
- while (!_at_eof) {
225
- skip_to_next_header();
226
- }
227
- return true;
228
- }
229
-
230
- bool Archive::search_forward_until_eof(const std::string &entryname) {
231
- while (skip_to_next_header()) {
232
- if (current_entryname == entryname) {
233
- return true;
234
- }
235
- if (!skip_data()) {
236
- return false;
237
- }
238
- }
239
- return false;
240
- }
241
-
242
- bool Archive::search_until_position(const std::string &entryname, const std::string &stop_position) {
243
- while (skip_to_next_header()) {
244
- if (current_entryname == entryname) {
245
- return true;
246
- }
247
- if (current_entryname == stop_position) {
248
- break;
249
- }
250
- if (!skip_data()) {
251
- return false;
252
- }
253
- }
254
- return false;
255
- }
256
-
257
- ssize_t Archive::read_current(void *buff, size_t len) {
258
- if (!_ar) {
259
- throw std::logic_error("Archive handle is not initialized");
260
- }
261
-
262
- const ssize_t bytes_read = archive_read_data(_ar, buff, len);
263
- if (bytes_read < 0) {
264
- raise_archive_error(format_archive_error(_ar, "Failed to read data"));
265
- }
266
-
267
- _current_entry_content_ready = false;
268
- return bytes_read;
269
- }
270
-
271
- uint64_t Archive::current_entry_size() const {
272
- if (!current_entry) {
273
- return 0;
274
- }
275
- return archive_entry_size(current_entry);
276
- }
277
-
278
- mode_t Archive::current_entry_filetype() const {
279
- if (!current_entry) {
280
- return 0;
281
- }
282
- return archive_entry_filetype(current_entry);
283
- }
284
-
285
- EntryMetadataMap Archive::current_entry_metadata(const std::unordered_set<std::string> &allowed_keys) const {
286
- EntryMetadataMap metadata;
287
- if (!current_entry || allowed_keys.empty()) {
288
- return metadata;
289
- }
290
-
291
- auto wants = [&allowed_keys](const std::string &key) { return allowed_keys.find(key) != allowed_keys.end(); };
292
-
293
- const char *pathname_utf8 = archive_entry_pathname_utf8(current_entry);
294
- if (pathname_utf8 && *pathname_utf8 && wants("pathname")) {
295
- metadata["pathname"] = std::string(pathname_utf8);
296
- } else {
297
- const char *pathname = archive_entry_pathname(current_entry);
298
- if (pathname && *pathname && wants("pathname")) {
299
- metadata["pathname"] = std::string(pathname);
300
- }
301
- }
302
-
303
- if (const char *sourcepath = archive_entry_sourcepath(current_entry)) {
304
- if (wants("sourcepath")) {
305
- metadata["sourcepath"] = std::string(sourcepath);
306
- }
307
- }
308
-
309
- if (const char *symlink_utf8 = archive_entry_symlink_utf8(current_entry)) {
310
- if (wants("symlink")) {
311
- metadata["symlink"] = std::string(symlink_utf8);
312
- }
313
- }
314
-
315
- if (const char *hardlink_utf8 = archive_entry_hardlink_utf8(current_entry)) {
316
- if (wants("hardlink")) {
317
- metadata["hardlink"] = std::string(hardlink_utf8);
318
- }
319
- } else if (const char *hardlink = archive_entry_hardlink(current_entry)) {
320
- if (wants("hardlink")) {
321
- metadata["hardlink"] = std::string(hardlink);
322
- }
323
- }
324
-
325
- if (const char *uname_utf8 = archive_entry_uname_utf8(current_entry)) {
326
- if (wants("uname")) {
327
- metadata["uname"] = std::string(uname_utf8);
328
- }
329
- } else if (const char *uname = archive_entry_uname(current_entry)) {
330
- if (wants("uname")) {
331
- metadata["uname"] = std::string(uname);
332
- }
333
- }
334
-
335
- if (const char *gname_utf8 = archive_entry_gname_utf8(current_entry)) {
336
- if (wants("gname")) {
337
- metadata["gname"] = std::string(gname_utf8);
338
- }
339
- } else if (const char *gname = archive_entry_gname(current_entry)) {
340
- if (wants("gname")) {
341
- metadata["gname"] = std::string(gname);
342
- }
343
- }
344
-
345
- if (wants("uid")) {
346
- bool has_uid = archive_entry_uname(current_entry) != nullptr;
347
- if (!has_uid) {
348
- has_uid = archive_entry_uid(current_entry) != 0;
349
- }
350
- if (has_uid) {
351
- metadata["uid"] = static_cast<int64_t>(archive_entry_uid(current_entry));
352
- }
353
- }
354
-
355
- if (wants("gid")) {
356
- bool has_gid = archive_entry_gname(current_entry) != nullptr;
357
- if (!has_gid) {
358
- has_gid = archive_entry_gid(current_entry) != 0;
359
- }
360
- if (has_gid) {
361
- metadata["gid"] = static_cast<int64_t>(archive_entry_gid(current_entry));
362
- }
363
- }
364
-
365
- if (wants("perm")) {
366
- metadata["perm"] = static_cast<uint64_t>(archive_entry_perm(current_entry));
367
- }
368
-
369
- if (wants("mode")) {
370
- metadata["mode"] = static_cast<uint64_t>(archive_entry_mode(current_entry));
371
- }
372
-
373
- if (wants("filetype")) {
374
- metadata["filetype"] = static_cast<uint64_t>(archive_entry_filetype(current_entry));
375
- }
376
-
377
- if (archive_entry_size_is_set(current_entry) && wants("size")) {
378
- metadata["size"] = static_cast<uint64_t>(archive_entry_size(current_entry));
379
- }
380
-
381
- if (archive_entry_dev_is_set(current_entry) && wants("dev")) {
382
- metadata["dev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_devmajor(current_entry)), static_cast<uint64_t>(archive_entry_devminor(current_entry)) };
383
- }
384
-
385
- if (wants("rdev")) {
386
- const dev_t rdev = archive_entry_rdev(current_entry);
387
- if (rdev != 0) {
388
- metadata["rdev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_rdevmajor(current_entry)), static_cast<uint64_t>(archive_entry_rdevminor(current_entry)) };
389
- }
390
- }
391
-
392
- if (archive_entry_ino_is_set(current_entry)) {
393
- if (wants("ino")) {
394
- metadata["ino"] = static_cast<uint64_t>(archive_entry_ino(current_entry));
395
- }
396
- if (wants("ino64")) {
397
- metadata["ino64"] = static_cast<uint64_t>(archive_entry_ino64(current_entry));
398
- }
399
- }
400
-
401
- if (wants("nlink")) {
402
- metadata["nlink"] = static_cast<uint64_t>(archive_entry_nlink(current_entry));
403
- }
404
-
405
- if (const char *strmode = archive_entry_strmode(current_entry)) {
406
- if (wants("strmode")) {
407
- metadata["strmode"] = std::string(strmode);
408
- }
409
- }
410
-
411
- auto record_time = [&metadata, &wants](const char *key, bool is_set, time_t seconds, long nanoseconds) {
412
- if (!wants(key) || !is_set) {
413
- return;
414
- }
415
- metadata[key] = EntryMetadataTime{ static_cast<int64_t>(seconds), static_cast<int32_t>(nanoseconds) };
416
- };
417
-
418
- record_time("atime", archive_entry_atime_is_set(current_entry) != 0, archive_entry_atime(current_entry), archive_entry_atime_nsec(current_entry));
419
- record_time("birthtime", archive_entry_birthtime_is_set(current_entry) != 0, archive_entry_birthtime(current_entry), archive_entry_birthtime_nsec(current_entry));
420
- record_time("ctime", archive_entry_ctime_is_set(current_entry) != 0, archive_entry_ctime(current_entry), archive_entry_ctime_nsec(current_entry));
421
- record_time("mtime", archive_entry_mtime_is_set(current_entry) != 0, archive_entry_mtime(current_entry), archive_entry_mtime_nsec(current_entry));
422
-
423
- unsigned long fflags_set = 0;
424
- unsigned long fflags_clear = 0;
425
- archive_entry_fflags(current_entry, &fflags_set, &fflags_clear);
426
- if ((fflags_set != 0 || fflags_clear != 0) && wants("fflags")) {
427
- metadata["fflags"] = EntryMetadataFileFlags{ static_cast<uint64_t>(fflags_set), static_cast<uint64_t>(fflags_clear) };
428
- }
429
-
430
- if (const char *fflags_text = archive_entry_fflags_text(current_entry)) {
431
- if (wants("fflags_text")) {
432
- metadata["fflags_text"] = std::string(fflags_text);
433
- }
434
- }
435
-
436
- auto store_encryption_flag = [&metadata, &wants](const char *key, int value) {
437
- if (!wants(key) || value < 0) {
438
- return;
439
- }
440
- metadata[key] = (value != 0);
441
- };
442
-
443
- store_encryption_flag("is_data_encrypted", archive_entry_is_data_encrypted(current_entry));
444
- store_encryption_flag("is_metadata_encrypted", archive_entry_is_metadata_encrypted(current_entry));
445
- store_encryption_flag("is_encrypted", archive_entry_is_encrypted(current_entry));
446
-
447
- if (wants("symlink_type")) {
448
- const int symlink_type = archive_entry_symlink_type(current_entry);
449
- if (symlink_type != 0) {
450
- metadata["symlink_type"] = static_cast<int64_t>(symlink_type);
451
- }
452
- }
453
-
454
- ssize_t acl_length = 0;
455
- char *acl_text = wants("acl_text") ? archive_entry_acl_to_text(current_entry, &acl_length, ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA) : nullptr;
456
- if (acl_text) {
457
- if (acl_length >= 0) {
458
- metadata["acl_text"] = std::string(acl_text, static_cast<size_t>(acl_length));
459
- } else {
460
- metadata["acl_text"] = std::string(acl_text);
461
- }
462
- std::free(acl_text);
463
- }
464
-
465
- if (wants("acl_types")) {
466
- const int acl_types = archive_entry_acl_types(current_entry);
467
- if (acl_types != 0) {
468
- metadata["acl_types"] = static_cast<int64_t>(acl_types);
469
- }
470
- }
471
-
472
- const int xattr_count = archive_entry_xattr_count(current_entry);
473
- if (xattr_count > 0 && wants("xattr")) {
474
- std::vector<EntryMetadataXattr> xattrs;
475
- xattrs.reserve(static_cast<size_t>(xattr_count));
476
- archive_entry_xattr_reset(current_entry);
477
- const char *name = nullptr;
478
- const void *value = nullptr;
479
- size_t size = 0;
480
- while (archive_entry_xattr_next(current_entry, &name, &value, &size) == ARCHIVE_OK) {
481
- EntryMetadataXattr xattr;
482
- if (name) {
483
- xattr.name = name;
484
- }
485
- if (value && size > 0) {
486
- const auto *begin = static_cast<const uint8_t *>(value);
487
- xattr.value.assign(begin, begin + size);
488
- }
489
- xattrs.push_back(std::move(xattr));
490
- }
491
- if (!xattrs.empty()) {
492
- metadata["xattr"] = std::move(xattrs);
493
- }
494
- }
495
-
496
- const int sparse_count = archive_entry_sparse_count(current_entry);
497
- if (sparse_count > 0 && wants("sparse")) {
498
- std::vector<EntryMetadataSparseChunk> sparse_regions;
499
- sparse_regions.reserve(static_cast<size_t>(sparse_count));
500
- archive_entry_sparse_reset(current_entry);
501
- la_int64_t offset = 0;
502
- la_int64_t length = 0;
503
- while (archive_entry_sparse_next(current_entry, &offset, &length) == ARCHIVE_OK) {
504
- sparse_regions.push_back(EntryMetadataSparseChunk{ static_cast<int64_t>(offset), static_cast<int64_t>(length) });
505
- }
506
- if (!sparse_regions.empty()) {
507
- metadata["sparse"] = std::move(sparse_regions);
508
- }
509
- }
510
-
511
- if (wants("mac_metadata")) {
512
- size_t mac_metadata_size = 0;
513
- const void *mac_metadata = archive_entry_mac_metadata(current_entry, &mac_metadata_size);
514
- if (mac_metadata && mac_metadata_size > 0) {
515
- const auto *begin = static_cast<const uint8_t *>(mac_metadata);
516
- metadata["mac_metadata"] = std::vector<uint8_t>(begin, begin + mac_metadata_size);
517
- }
518
- }
519
-
520
- static constexpr struct {
521
- int type;
522
- const char *name;
523
- size_t length;
524
- } kDigestDescriptors[] = { { ARCHIVE_ENTRY_DIGEST_MD5, "md5", 16 }, { ARCHIVE_ENTRY_DIGEST_RMD160, "rmd160", 20 }, { ARCHIVE_ENTRY_DIGEST_SHA1, "sha1", 20 },
525
- { ARCHIVE_ENTRY_DIGEST_SHA256, "sha256", 32 }, { ARCHIVE_ENTRY_DIGEST_SHA384, "sha384", 48 }, { ARCHIVE_ENTRY_DIGEST_SHA512, "sha512", 64 } };
526
-
527
- if (wants("digests")) {
528
- std::vector<EntryMetadataDigest> digests;
529
- for (const auto &descriptor : kDigestDescriptors) {
530
- const unsigned char *digest = archive_entry_digest(current_entry, descriptor.type);
531
- if (!digest) {
532
- continue;
533
- }
534
- EntryMetadataDigest digest_entry;
535
- digest_entry.algorithm = descriptor.name;
536
- digest_entry.value.assign(digest, digest + descriptor.length);
537
- digests.push_back(std::move(digest_entry));
538
- }
539
- if (!digests.empty()) {
540
- metadata["digests"] = std::move(digests);
541
- }
542
- }
543
-
544
- return metadata;
545
- }
546
-
547
- [[noreturn]] void Archive::raise_archive_error(const std::string &message) {
548
- const int err = _ar ? archive_errno(_ar) : 0;
549
- throw make_entry_fault_error(message, {}, err);
550
- }
551
-
552
- } // namespace archive_r
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #include "archive_type.h"
5
+ #include <cstdio>
6
+ #include <cstdlib>
7
+ #include <cstring>
8
+ #include <stdexcept>
9
+ #include <string>
10
+ #include <typeinfo>
11
+ #include <unordered_map>
12
+
13
+ namespace archive_r {
14
+
15
+ namespace {
16
+
17
+ [[noreturn]] void throw_archive_fault(const std::string &message, struct archive *ar = nullptr) {
18
+ const int err = ar ? archive_errno(ar) : 0;
19
+ throw make_entry_fault_error(message, {}, err);
20
+ }
21
+
22
+ } // namespace
23
+
24
+ void archive_deleter::operator()(struct archive *a) const {
25
+ if (a) {
26
+ archive_read_close(a);
27
+ archive_read_free(a);
28
+ }
29
+ }
30
+
31
+ static void set_passphrases(struct archive *ar, const std::vector<std::string> &passphrases) {
32
+ size_t index = 0;
33
+ for (const auto &passphrase : passphrases) {
34
+ if (archive_read_add_passphrase(ar, passphrase.c_str()) != ARCHIVE_OK) {
35
+ char buf[1024];
36
+ std::snprintf(buf, sizeof(buf), "Failed to add passphrase (index %zu): %s", index, archive_error_string(ar));
37
+ throw_archive_fault(buf, ar);
38
+ }
39
+ ++index;
40
+ }
41
+ }
42
+
43
+ static void configure_formats(struct archive *ar, const std::vector<std::string> &format_names) {
44
+ if (format_names.empty()) {
45
+ archive_read_support_format_all(ar);
46
+ return;
47
+ }
48
+
49
+ using FormatHandler = int (*)(struct archive *);
50
+ static const std::unordered_map<std::string, FormatHandler> kFormatHandlers = {
51
+ { "7zip", archive_read_support_format_7zip }, { "ar", archive_read_support_format_ar }, { "cab", archive_read_support_format_cab }, { "cpio", archive_read_support_format_cpio },
52
+ { "empty", archive_read_support_format_empty }, { "iso9660", archive_read_support_format_iso9660 }, { "lha", archive_read_support_format_lha }, { "mtree", archive_read_support_format_mtree },
53
+ { "rar", archive_read_support_format_rar }, { "raw", archive_read_support_format_raw }, { "tar", archive_read_support_format_tar }, { "warc", archive_read_support_format_warc },
54
+ { "xar", archive_read_support_format_xar }, { "zip", archive_read_support_format_zip },
55
+ };
56
+
57
+ for (const auto &format : format_names) {
58
+ auto it = kFormatHandlers.find(format);
59
+ if (it == kFormatHandlers.end()) {
60
+ char buf[1024];
61
+ std::snprintf(buf, sizeof(buf), "Unsupported archive format specified (%s)", format.c_str());
62
+ throw_archive_fault(buf);
63
+ }
64
+
65
+ int r = it->second(ar);
66
+ if (r != ARCHIVE_OK) {
67
+ char buf[1024];
68
+ std::snprintf(buf, sizeof(buf), "Failed to enable format (%s): %s", format.c_str(), archive_error_string(ar));
69
+ throw_archive_fault(buf, ar);
70
+ }
71
+ }
72
+ }
73
+
74
+ static std::string format_archive_error(struct archive *ar, const std::string &prefix) {
75
+ std::string message = prefix;
76
+
77
+ if (!ar) {
78
+ return message;
79
+ }
80
+
81
+ if (const char *err = archive_error_string(ar)) {
82
+ if (*err) {
83
+ message += ": ";
84
+ message += err;
85
+ }
86
+ }
87
+
88
+ const int code = archive_errno(ar);
89
+ if (code != 0) {
90
+ message += " (libarchive errno=";
91
+ message += std::to_string(code);
92
+ message += ')';
93
+ }
94
+
95
+ return message;
96
+ }
97
+
98
+ archive_ptr new_read_archive_common(const std::vector<std::string> &passphrases, const std::vector<std::string> &format_names, open_delegate archive_open) {
99
+ archive_ptr ar(archive_read_new());
100
+ if (!ar) {
101
+ throw_archive_fault("archive_read_new failed.");
102
+ }
103
+
104
+ configure_formats(ar.get(), format_names);
105
+ archive_read_support_filter_all(ar.get());
106
+ set_passphrases(ar.get(), passphrases);
107
+
108
+ if (archive_open(ar.get()) != ARCHIVE_OK) {
109
+ char buf[1024];
110
+ std::snprintf(buf, sizeof(buf), "archive_read_open failed: (%d)%s", archive_errno(ar.get()), archive_error_string(ar.get()));
111
+ throw_archive_fault(buf, ar.get());
112
+ }
113
+
114
+ return ar;
115
+ }
116
+
117
+ Archive::Archive()
118
+ : _ar(nullptr)
119
+ , current_entry(nullptr)
120
+ , _at_eof(false)
121
+ , _current_entry_content_ready(false) {}
122
+
123
+ Archive::~Archive() { close_archive(); }
124
+
125
+ void Archive::close_archive() {
126
+ if (_ar) {
127
+ archive_read_close(_ar);
128
+ archive_read_free(_ar);
129
+ _ar = nullptr;
130
+ }
131
+ current_entry = nullptr;
132
+ current_entryname.clear();
133
+ _current_entry_content_ready = false;
134
+ }
135
+
136
+ void Archive::rewind() {
137
+ close_archive();
138
+ current_entryname.clear();
139
+ current_entry = nullptr;
140
+ _at_eof = false;
141
+ open_archive();
142
+ _current_entry_content_ready = false;
143
+ }
144
+
145
+ bool Archive::skip_to_next_header() {
146
+ if (!_ar) {
147
+ throw std::logic_error("Archive handle is not initialized");
148
+ }
149
+
150
+ if (_at_eof) {
151
+ current_entry = nullptr;
152
+ current_entryname.clear();
153
+ _current_entry_content_ready = false;
154
+ return false;
155
+ }
156
+
157
+ const int r = archive_read_next_header(_ar, &current_entry);
158
+
159
+ if (r == ARCHIVE_EOF) {
160
+ _at_eof = true;
161
+ current_entry = nullptr;
162
+ current_entryname.clear();
163
+ _current_entry_content_ready = false;
164
+ return false;
165
+ }
166
+
167
+ if (r == ARCHIVE_FAILED || r == ARCHIVE_FATAL || r == ARCHIVE_RETRY) {
168
+ const std::string message = format_archive_error(_ar, "Failed to read next header");
169
+ _at_eof = true;
170
+ current_entry = nullptr;
171
+ current_entryname.clear();
172
+ _current_entry_content_ready = false;
173
+ raise_archive_error(message);
174
+ }
175
+
176
+ const char *name = archive_entry_pathname(current_entry);
177
+ if (name == nullptr) {
178
+ throw make_entry_fault_error("Failed to retrieve entry pathname (archive_entry_pathname returned null)", {}, 0);
179
+ }
180
+ current_entryname = std::string(name);
181
+ _current_entry_content_ready = true;
182
+ return true;
183
+ }
184
+
185
+ bool Archive::skip_data() {
186
+ if (!_ar) {
187
+ throw std::logic_error("Archive handle is not initialized");
188
+ }
189
+
190
+ int r = archive_read_data_skip(_ar);
191
+ if (r != ARCHIVE_OK) {
192
+ raise_archive_error(format_archive_error(_ar, "Failed to skip data"));
193
+ }
194
+ _current_entry_content_ready = false;
195
+ return true;
196
+ }
197
+
198
+ bool Archive::skip_to_entry(const std::string &entryname) {
199
+
200
+ if (current_entryname == entryname && _current_entry_content_ready) {
201
+ return true;
202
+ }
203
+
204
+ if (_at_eof) {
205
+ rewind();
206
+ }
207
+
208
+ const std::string start_position = current_entryname;
209
+
210
+ if (search_forward_until_eof(entryname)) {
211
+ return true;
212
+ }
213
+
214
+ if (start_position.empty()) {
215
+ return false;
216
+ }
217
+
218
+ rewind();
219
+
220
+ return search_until_position(entryname, start_position);
221
+ }
222
+
223
+ bool Archive::skip_to_eof() {
224
+ while (!_at_eof) {
225
+ skip_to_next_header();
226
+ }
227
+ return true;
228
+ }
229
+
230
+ bool Archive::search_forward_until_eof(const std::string &entryname) {
231
+ while (skip_to_next_header()) {
232
+ if (current_entryname == entryname) {
233
+ return true;
234
+ }
235
+ if (!skip_data()) {
236
+ return false;
237
+ }
238
+ }
239
+ return false;
240
+ }
241
+
242
+ bool Archive::search_until_position(const std::string &entryname, const std::string &stop_position) {
243
+ while (skip_to_next_header()) {
244
+ if (current_entryname == entryname) {
245
+ return true;
246
+ }
247
+ if (current_entryname == stop_position) {
248
+ break;
249
+ }
250
+ if (!skip_data()) {
251
+ return false;
252
+ }
253
+ }
254
+ return false;
255
+ }
256
+
257
+ ssize_t Archive::read_current(void *buff, size_t len) {
258
+ if (!_ar) {
259
+ throw std::logic_error("Archive handle is not initialized");
260
+ }
261
+
262
+ const ssize_t bytes_read = archive_read_data(_ar, buff, len);
263
+ if (bytes_read < 0) {
264
+ raise_archive_error(format_archive_error(_ar, "Failed to read data"));
265
+ }
266
+
267
+ _current_entry_content_ready = false;
268
+ return bytes_read;
269
+ }
270
+
271
+ uint64_t Archive::current_entry_size() const {
272
+ if (!current_entry) {
273
+ return 0;
274
+ }
275
+ return archive_entry_size(current_entry);
276
+ }
277
+
278
+ mode_t Archive::current_entry_filetype() const {
279
+ if (!current_entry) {
280
+ return 0;
281
+ }
282
+ return archive_entry_filetype(current_entry);
283
+ }
284
+
285
+ EntryMetadataMap Archive::current_entry_metadata(const std::unordered_set<std::string> &allowed_keys) const {
286
+ EntryMetadataMap metadata;
287
+ if (!current_entry || allowed_keys.empty()) {
288
+ return metadata;
289
+ }
290
+
291
+ auto wants = [&allowed_keys](const std::string &key) { return allowed_keys.find(key) != allowed_keys.end(); };
292
+
293
+ const char *pathname_utf8 = archive_entry_pathname_utf8(current_entry);
294
+ if (pathname_utf8 && *pathname_utf8 && wants("pathname")) {
295
+ metadata["pathname"] = std::string(pathname_utf8);
296
+ } else {
297
+ const char *pathname = archive_entry_pathname(current_entry);
298
+ if (pathname && *pathname && wants("pathname")) {
299
+ metadata["pathname"] = std::string(pathname);
300
+ }
301
+ }
302
+
303
+ if (const char *sourcepath = archive_entry_sourcepath(current_entry)) {
304
+ if (wants("sourcepath")) {
305
+ metadata["sourcepath"] = std::string(sourcepath);
306
+ }
307
+ }
308
+
309
+ if (const char *symlink_utf8 = archive_entry_symlink_utf8(current_entry)) {
310
+ if (wants("symlink")) {
311
+ metadata["symlink"] = std::string(symlink_utf8);
312
+ }
313
+ }
314
+
315
+ if (const char *hardlink_utf8 = archive_entry_hardlink_utf8(current_entry)) {
316
+ if (wants("hardlink")) {
317
+ metadata["hardlink"] = std::string(hardlink_utf8);
318
+ }
319
+ } else if (const char *hardlink = archive_entry_hardlink(current_entry)) {
320
+ if (wants("hardlink")) {
321
+ metadata["hardlink"] = std::string(hardlink);
322
+ }
323
+ }
324
+
325
+ if (const char *uname_utf8 = archive_entry_uname_utf8(current_entry)) {
326
+ if (wants("uname")) {
327
+ metadata["uname"] = std::string(uname_utf8);
328
+ }
329
+ } else if (const char *uname = archive_entry_uname(current_entry)) {
330
+ if (wants("uname")) {
331
+ metadata["uname"] = std::string(uname);
332
+ }
333
+ }
334
+
335
+ if (const char *gname_utf8 = archive_entry_gname_utf8(current_entry)) {
336
+ if (wants("gname")) {
337
+ metadata["gname"] = std::string(gname_utf8);
338
+ }
339
+ } else if (const char *gname = archive_entry_gname(current_entry)) {
340
+ if (wants("gname")) {
341
+ metadata["gname"] = std::string(gname);
342
+ }
343
+ }
344
+
345
+ if (wants("uid")) {
346
+ bool has_uid = archive_entry_uname(current_entry) != nullptr;
347
+ if (!has_uid) {
348
+ has_uid = archive_entry_uid(current_entry) != 0;
349
+ }
350
+ if (has_uid) {
351
+ metadata["uid"] = static_cast<int64_t>(archive_entry_uid(current_entry));
352
+ }
353
+ }
354
+
355
+ if (wants("gid")) {
356
+ bool has_gid = archive_entry_gname(current_entry) != nullptr;
357
+ if (!has_gid) {
358
+ has_gid = archive_entry_gid(current_entry) != 0;
359
+ }
360
+ if (has_gid) {
361
+ metadata["gid"] = static_cast<int64_t>(archive_entry_gid(current_entry));
362
+ }
363
+ }
364
+
365
+ if (wants("perm")) {
366
+ metadata["perm"] = static_cast<uint64_t>(archive_entry_perm(current_entry));
367
+ }
368
+
369
+ if (wants("mode")) {
370
+ metadata["mode"] = static_cast<uint64_t>(archive_entry_mode(current_entry));
371
+ }
372
+
373
+ if (wants("filetype")) {
374
+ metadata["filetype"] = static_cast<uint64_t>(archive_entry_filetype(current_entry));
375
+ }
376
+
377
+ if (archive_entry_size_is_set(current_entry) && wants("size")) {
378
+ metadata["size"] = static_cast<uint64_t>(archive_entry_size(current_entry));
379
+ }
380
+
381
+ if (archive_entry_dev_is_set(current_entry) && wants("dev")) {
382
+ metadata["dev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_devmajor(current_entry)), static_cast<uint64_t>(archive_entry_devminor(current_entry)) };
383
+ }
384
+
385
+ if (wants("rdev")) {
386
+ const dev_t rdev = archive_entry_rdev(current_entry);
387
+ if (rdev != 0) {
388
+ metadata["rdev"] = EntryMetadataDeviceNumbers{ static_cast<uint64_t>(archive_entry_rdevmajor(current_entry)), static_cast<uint64_t>(archive_entry_rdevminor(current_entry)) };
389
+ }
390
+ }
391
+
392
+ if (archive_entry_ino_is_set(current_entry)) {
393
+ if (wants("ino")) {
394
+ metadata["ino"] = static_cast<uint64_t>(archive_entry_ino(current_entry));
395
+ }
396
+ if (wants("ino64")) {
397
+ metadata["ino64"] = static_cast<uint64_t>(archive_entry_ino64(current_entry));
398
+ }
399
+ }
400
+
401
+ if (wants("nlink")) {
402
+ metadata["nlink"] = static_cast<uint64_t>(archive_entry_nlink(current_entry));
403
+ }
404
+
405
+ if (const char *strmode = archive_entry_strmode(current_entry)) {
406
+ if (wants("strmode")) {
407
+ metadata["strmode"] = std::string(strmode);
408
+ }
409
+ }
410
+
411
+ auto record_time = [&metadata, &wants](const char *key, bool is_set, time_t seconds, long nanoseconds) {
412
+ if (!wants(key) || !is_set) {
413
+ return;
414
+ }
415
+ metadata[key] = EntryMetadataTime{ static_cast<int64_t>(seconds), static_cast<int32_t>(nanoseconds) };
416
+ };
417
+
418
+ record_time("atime", archive_entry_atime_is_set(current_entry) != 0, archive_entry_atime(current_entry), archive_entry_atime_nsec(current_entry));
419
+ record_time("birthtime", archive_entry_birthtime_is_set(current_entry) != 0, archive_entry_birthtime(current_entry), archive_entry_birthtime_nsec(current_entry));
420
+ record_time("ctime", archive_entry_ctime_is_set(current_entry) != 0, archive_entry_ctime(current_entry), archive_entry_ctime_nsec(current_entry));
421
+ record_time("mtime", archive_entry_mtime_is_set(current_entry) != 0, archive_entry_mtime(current_entry), archive_entry_mtime_nsec(current_entry));
422
+
423
+ unsigned long fflags_set = 0;
424
+ unsigned long fflags_clear = 0;
425
+ archive_entry_fflags(current_entry, &fflags_set, &fflags_clear);
426
+ if ((fflags_set != 0 || fflags_clear != 0) && wants("fflags")) {
427
+ metadata["fflags"] = EntryMetadataFileFlags{ static_cast<uint64_t>(fflags_set), static_cast<uint64_t>(fflags_clear) };
428
+ }
429
+
430
+ if (const char *fflags_text = archive_entry_fflags_text(current_entry)) {
431
+ if (wants("fflags_text")) {
432
+ metadata["fflags_text"] = std::string(fflags_text);
433
+ }
434
+ }
435
+
436
+ auto store_encryption_flag = [&metadata, &wants](const char *key, int value) {
437
+ if (!wants(key) || value < 0) {
438
+ return;
439
+ }
440
+ metadata[key] = (value != 0);
441
+ };
442
+
443
+ store_encryption_flag("is_data_encrypted", archive_entry_is_data_encrypted(current_entry));
444
+ store_encryption_flag("is_metadata_encrypted", archive_entry_is_metadata_encrypted(current_entry));
445
+ store_encryption_flag("is_encrypted", archive_entry_is_encrypted(current_entry));
446
+
447
+ if (wants("symlink_type")) {
448
+ const int symlink_type = archive_entry_symlink_type(current_entry);
449
+ if (symlink_type != 0) {
450
+ metadata["symlink_type"] = static_cast<int64_t>(symlink_type);
451
+ }
452
+ }
453
+
454
+ ssize_t acl_length = 0;
455
+ char *acl_text = wants("acl_text") ? archive_entry_acl_to_text(current_entry, &acl_length, ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA) : nullptr;
456
+ if (acl_text) {
457
+ if (acl_length >= 0) {
458
+ metadata["acl_text"] = std::string(acl_text, static_cast<size_t>(acl_length));
459
+ } else {
460
+ metadata["acl_text"] = std::string(acl_text);
461
+ }
462
+ std::free(acl_text);
463
+ }
464
+
465
+ if (wants("acl_types")) {
466
+ const int acl_types = archive_entry_acl_types(current_entry);
467
+ if (acl_types != 0) {
468
+ metadata["acl_types"] = static_cast<int64_t>(acl_types);
469
+ }
470
+ }
471
+
472
+ const int xattr_count = archive_entry_xattr_count(current_entry);
473
+ if (xattr_count > 0 && wants("xattr")) {
474
+ std::vector<EntryMetadataXattr> xattrs;
475
+ xattrs.reserve(static_cast<size_t>(xattr_count));
476
+ archive_entry_xattr_reset(current_entry);
477
+ const char *name = nullptr;
478
+ const void *value = nullptr;
479
+ size_t size = 0;
480
+ while (archive_entry_xattr_next(current_entry, &name, &value, &size) == ARCHIVE_OK) {
481
+ EntryMetadataXattr xattr;
482
+ if (name) {
483
+ xattr.name = name;
484
+ }
485
+ if (value && size > 0) {
486
+ const auto *begin = static_cast<const uint8_t *>(value);
487
+ xattr.value.assign(begin, begin + size);
488
+ }
489
+ xattrs.push_back(std::move(xattr));
490
+ }
491
+ if (!xattrs.empty()) {
492
+ metadata["xattr"] = std::move(xattrs);
493
+ }
494
+ }
495
+
496
+ const int sparse_count = archive_entry_sparse_count(current_entry);
497
+ if (sparse_count > 0 && wants("sparse")) {
498
+ std::vector<EntryMetadataSparseChunk> sparse_regions;
499
+ sparse_regions.reserve(static_cast<size_t>(sparse_count));
500
+ archive_entry_sparse_reset(current_entry);
501
+ la_int64_t offset = 0;
502
+ la_int64_t length = 0;
503
+ while (archive_entry_sparse_next(current_entry, &offset, &length) == ARCHIVE_OK) {
504
+ sparse_regions.push_back(EntryMetadataSparseChunk{ static_cast<int64_t>(offset), static_cast<int64_t>(length) });
505
+ }
506
+ if (!sparse_regions.empty()) {
507
+ metadata["sparse"] = std::move(sparse_regions);
508
+ }
509
+ }
510
+
511
+ if (wants("mac_metadata")) {
512
+ size_t mac_metadata_size = 0;
513
+ const void *mac_metadata = archive_entry_mac_metadata(current_entry, &mac_metadata_size);
514
+ if (mac_metadata && mac_metadata_size > 0) {
515
+ const auto *begin = static_cast<const uint8_t *>(mac_metadata);
516
+ metadata["mac_metadata"] = std::vector<uint8_t>(begin, begin + mac_metadata_size);
517
+ }
518
+ }
519
+
520
+ static constexpr struct {
521
+ int type;
522
+ const char *name;
523
+ size_t length;
524
+ } kDigestDescriptors[] = { { ARCHIVE_ENTRY_DIGEST_MD5, "md5", 16 }, { ARCHIVE_ENTRY_DIGEST_RMD160, "rmd160", 20 }, { ARCHIVE_ENTRY_DIGEST_SHA1, "sha1", 20 },
525
+ { ARCHIVE_ENTRY_DIGEST_SHA256, "sha256", 32 }, { ARCHIVE_ENTRY_DIGEST_SHA384, "sha384", 48 }, { ARCHIVE_ENTRY_DIGEST_SHA512, "sha512", 64 } };
526
+
527
+ if (wants("digests")) {
528
+ std::vector<EntryMetadataDigest> digests;
529
+ for (const auto &descriptor : kDigestDescriptors) {
530
+ const unsigned char *digest = archive_entry_digest(current_entry, descriptor.type);
531
+ if (!digest) {
532
+ continue;
533
+ }
534
+ EntryMetadataDigest digest_entry;
535
+ digest_entry.algorithm = descriptor.name;
536
+ digest_entry.value.assign(digest, digest + descriptor.length);
537
+ digests.push_back(std::move(digest_entry));
538
+ }
539
+ if (!digests.empty()) {
540
+ metadata["digests"] = std::move(digests);
541
+ }
542
+ }
543
+
544
+ return metadata;
545
+ }
546
+
547
+ [[noreturn]] void Archive::raise_archive_error(const std::string &message) {
548
+ const int err = _ar ? archive_errno(_ar) : 0;
549
+ throw make_entry_fault_error(message, {}, err);
550
+ }
551
+
552
+ } // namespace archive_r