archive_r_ruby 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -21
  3. data/NOTICE +116 -116
  4. data/README.md +106 -106
  5. data/VERSION +1 -1
  6. data/ext/archive_r/archive_r_ext.cc +1098 -1098
  7. data/ext/archive_r/extconf.rb +125 -125
  8. data/ext/archive_r/vendor/archive_r/LICENSE +21 -21
  9. data/ext/archive_r/vendor/archive_r/NOTICE +116 -116
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +42 -42
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +180 -180
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -34
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -56
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -46
  15. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +92 -92
  16. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +36 -36
  17. data/ext/archive_r/vendor/archive_r/include/archive_r/platform_compat.h +34 -34
  18. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +156 -156
  19. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +300 -300
  20. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +110 -110
  21. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +161 -161
  22. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +53 -53
  23. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +545 -545
  24. data/ext/archive_r/vendor/archive_r/src/archive_type.h +77 -77
  25. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -35
  26. data/ext/archive_r/vendor/archive_r/src/entry.cc +238 -238
  27. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -26
  28. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -54
  29. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -32
  30. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +56 -56
  31. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +76 -76
  32. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +39 -39
  33. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +208 -208
  34. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +127 -127
  35. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +251 -251
  36. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +109 -109
  37. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +294 -294
  38. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -46
  39. data/ext/archive_r/vendor/archive_r/src/traverser.cc +295 -295
  40. data/lib/archive_r.rb +120 -120
  41. metadata +3 -6
@@ -1,92 +1,92 @@
1
- // SPDX-License-Identifier: MIT
2
- // Copyright (c) 2025 archive_r Team
3
-
4
- #pragma once
5
-
6
- #include <cstddef>
7
- #include <stdexcept>
8
- #include <string>
9
- #include <utility>
10
- #include <variant>
11
- #include <vector>
12
-
13
- namespace archive_r {
14
-
15
- /**
16
- * @brief Represents a single component within a logical path hierarchy.
17
- *
18
- * A component can be one of three shapes:
19
- * - single string value (most common)
20
- * - multi-volume part list (split archives that share a common base name)
21
- */
22
- class PathEntry {
23
- public:
24
- struct Parts {
25
- std::vector<std::string> values;
26
- enum class Ordering { Natural, Given } ordering = Ordering::Natural;
27
- };
28
-
29
- PathEntry() = default;
30
-
31
- explicit PathEntry(std::string value)
32
- : _value(std::move(value)) {}
33
-
34
- explicit PathEntry(Parts parts)
35
- : _value(std::move(parts)) {}
36
-
37
- static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
38
-
39
- static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
40
- Parts parts{ std::move(entries), ordering };
41
- if (parts.values.empty()) {
42
- throw std::invalid_argument("multi-volume parts cannot be empty");
43
- }
44
- return PathEntry(std::move(parts));
45
- }
46
-
47
- bool is_single() const { return std::holds_alternative<std::string>(_value); }
48
- bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
49
- const std::string &single_value() const { return std::get<std::string>(_value); }
50
- const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
51
- Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
52
-
53
- private:
54
- std::variant<std::string, Parts> _value;
55
- };
56
-
57
- using PathHierarchy = std::vector<PathEntry>;
58
-
59
- /**
60
- * Compare two entries using the ordering enforced throughout archive_r.
61
- *
62
- * Ordering rules:
63
- * 1. Entry categories are ordered single < multi-volume.
64
- * 2. Single entries compare by string value.
65
- * 3. Multi-volume entries first compare their ordering flag (Natural < Given),
66
- * then compare corresponding part names lexicographically, finally by list length.
67
- */
68
- int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
69
-
70
- /** Compare complete hierarchies lexicographically using compare_entries on each level. */
71
- int compare_hierarchies(const PathHierarchy &lhs, const PathHierarchy &rhs);
72
-
73
- /** Shorthand equality helpers for entries and hierarchies. */
74
- bool entries_equal(const PathEntry &lhs, const PathEntry &rhs);
75
- bool hierarchies_equal(const PathHierarchy &lhs, const PathHierarchy &rhs);
76
-
77
- /** Strict-weak-order functor suitable for associative containers. */
78
- struct PathHierarchyLess {
79
- bool operator()(const PathHierarchy &lhs, const PathHierarchy &rhs) const;
80
- };
81
-
82
- /** Build a hierarchy containing a single leaf component. */
83
- PathHierarchy make_single_path(const std::string &root);
84
-
85
- /** Append helpers for single and multi-volume components. */
86
- void append_single(PathHierarchy &hierarchy, std::string value);
87
- void append_multi_volume(PathHierarchy &hierarchy, std::vector<std::string> parts, PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural);
88
-
89
- /** Extract prefix/slice helpers. */
90
- PathHierarchy pathhierarchy_prefix_until(const PathHierarchy &hierarchy, size_t inclusive_index);
91
- PathHierarchy parent_hierarchy(const PathHierarchy &hierarchy);
92
- } // namespace archive_r
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <cstddef>
7
+ #include <stdexcept>
8
+ #include <string>
9
+ #include <utility>
10
+ #include <variant>
11
+ #include <vector>
12
+
13
+ namespace archive_r {
14
+
15
+ /**
16
+ * @brief Represents a single component within a logical path hierarchy.
17
+ *
18
+ * A component can be one of three shapes:
19
+ * - single string value (most common)
20
+ * - multi-volume part list (split archives that share a common base name)
21
+ */
22
+ class PathEntry {
23
+ public:
24
+ struct Parts {
25
+ std::vector<std::string> values;
26
+ enum class Ordering { Natural, Given } ordering = Ordering::Natural;
27
+ };
28
+
29
+ PathEntry() = default;
30
+
31
+ explicit PathEntry(std::string value)
32
+ : _value(std::move(value)) {}
33
+
34
+ explicit PathEntry(Parts parts)
35
+ : _value(std::move(parts)) {}
36
+
37
+ static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
38
+
39
+ static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
40
+ Parts parts{ std::move(entries), ordering };
41
+ if (parts.values.empty()) {
42
+ throw std::invalid_argument("multi-volume parts cannot be empty");
43
+ }
44
+ return PathEntry(std::move(parts));
45
+ }
46
+
47
+ bool is_single() const { return std::holds_alternative<std::string>(_value); }
48
+ bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
49
+ const std::string &single_value() const { return std::get<std::string>(_value); }
50
+ const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
51
+ Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
52
+
53
+ private:
54
+ std::variant<std::string, Parts> _value;
55
+ };
56
+
57
+ using PathHierarchy = std::vector<PathEntry>;
58
+
59
+ /**
60
+ * Compare two entries using the ordering enforced throughout archive_r.
61
+ *
62
+ * Ordering rules:
63
+ * 1. Entry categories are ordered single < multi-volume.
64
+ * 2. Single entries compare by string value.
65
+ * 3. Multi-volume entries first compare their ordering flag (Natural < Given),
66
+ * then compare corresponding part names lexicographically, finally by list length.
67
+ */
68
+ int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
69
+
70
+ /** Compare complete hierarchies lexicographically using compare_entries on each level. */
71
+ int compare_hierarchies(const PathHierarchy &lhs, const PathHierarchy &rhs);
72
+
73
+ /** Shorthand equality helpers for entries and hierarchies. */
74
+ bool entries_equal(const PathEntry &lhs, const PathEntry &rhs);
75
+ bool hierarchies_equal(const PathHierarchy &lhs, const PathHierarchy &rhs);
76
+
77
+ /** Strict-weak-order functor suitable for associative containers. */
78
+ struct PathHierarchyLess {
79
+ bool operator()(const PathHierarchy &lhs, const PathHierarchy &rhs) const;
80
+ };
81
+
82
+ /** Build a hierarchy containing a single leaf component. */
83
+ PathHierarchy make_single_path(const std::string &root);
84
+
85
+ /** Append helpers for single and multi-volume components. */
86
+ void append_single(PathHierarchy &hierarchy, std::string value);
87
+ void append_multi_volume(PathHierarchy &hierarchy, std::vector<std::string> parts, PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural);
88
+
89
+ /** Extract prefix/slice helpers. */
90
+ PathHierarchy pathhierarchy_prefix_until(const PathHierarchy &hierarchy, size_t inclusive_index);
91
+ PathHierarchy parent_hierarchy(const PathHierarchy &hierarchy);
92
+ } // namespace archive_r
@@ -1,36 +1,36 @@
1
- // SPDX-License-Identifier: MIT
2
- // Copyright (c) 2025 archive_r Team
3
-
4
- #pragma once
5
-
6
- #include "archive_r/path_hierarchy.h"
7
- #include <cstddef>
8
- #include <string>
9
- #include <vector>
10
-
11
- namespace archive_r {
12
-
13
- /** Return pointer to the Nth single value of an entry (nullptr if absent). */
14
- const std::string *path_entry_component_at(const PathEntry &entry, std::size_t index);
15
-
16
- /** Convenience helpers for multi-volume PathHierarchy nodes. */
17
- std::size_t pathhierarchy_volume_size(const PathHierarchy &logical);
18
- std::string pathhierarchy_volume_entry_name(const PathHierarchy &logical, std::size_t index);
19
- bool pathhierarchy_is_multivolume(const PathHierarchy &hierarchy);
20
- PathHierarchy pathhierarchy_select_single_part(const PathHierarchy &logical, std::size_t index);
21
-
22
- /** Combine sibling hierarchies that differ only by their terminal part list. */
23
- PathHierarchy merge_multi_volume_sources(const std::vector<PathHierarchy> &sources);
24
-
25
- /** Sort hierarchies using PathHierarchyLess semantics. */
26
- void sort_hierarchies(std::vector<PathHierarchy> &hierarchies);
27
-
28
- /** Render helpers converting entries to flattened strings for diagnostics. */
29
- bool flatten_entry_to_string(const PathEntry &entry, std::string &output);
30
- bool entry_name_from_component(const PathEntry &entry, std::string &output);
31
-
32
- /** Human readable pretty-printers used in logging and debug output. */
33
- std::string path_entry_display(const PathEntry &entry);
34
- std::string hierarchy_display(const PathHierarchy &hierarchy);
35
-
36
- } // namespace archive_r
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/path_hierarchy.h"
7
+ #include <cstddef>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ namespace archive_r {
12
+
13
+ /** Return pointer to the Nth single value of an entry (nullptr if absent). */
14
+ const std::string *path_entry_component_at(const PathEntry &entry, std::size_t index);
15
+
16
+ /** Convenience helpers for multi-volume PathHierarchy nodes. */
17
+ std::size_t pathhierarchy_volume_size(const PathHierarchy &logical);
18
+ std::string pathhierarchy_volume_entry_name(const PathHierarchy &logical, std::size_t index);
19
+ bool pathhierarchy_is_multivolume(const PathHierarchy &hierarchy);
20
+ PathHierarchy pathhierarchy_select_single_part(const PathHierarchy &logical, std::size_t index);
21
+
22
+ /** Combine sibling hierarchies that differ only by their terminal part list. */
23
+ PathHierarchy merge_multi_volume_sources(const std::vector<PathHierarchy> &sources);
24
+
25
+ /** Sort hierarchies using PathHierarchyLess semantics. */
26
+ void sort_hierarchies(std::vector<PathHierarchy> &hierarchies);
27
+
28
+ /** Render helpers converting entries to flattened strings for diagnostics. */
29
+ bool flatten_entry_to_string(const PathEntry &entry, std::string &output);
30
+ bool entry_name_from_component(const PathEntry &entry, std::string &output);
31
+
32
+ /** Human readable pretty-printers used in logging and debug output. */
33
+ std::string path_entry_display(const PathEntry &entry);
34
+ std::string hierarchy_display(const PathHierarchy &hierarchy);
35
+
36
+ } // namespace archive_r
@@ -1,34 +1,34 @@
1
- // SPDX-License-Identifier: MIT
2
- // Copyright (c) 2025 archive_r Team
3
-
4
- #pragma once
5
-
6
- #include <sys/types.h>
7
-
8
- #if defined(_WIN32)
9
- #include <BaseTsd.h>
10
- #include <sys/stat.h>
11
- #if !defined(_SSIZE_T_DEFINED)
12
- using ssize_t = SSIZE_T;
13
- #define _SSIZE_T_DEFINED
14
- #endif
15
- #if !defined(_MODE_T_DEFINED)
16
- using mode_t = unsigned short; // MSVC does not expose POSIX mode_t by default
17
- #define _MODE_T_DEFINED
18
- #endif
19
- #endif
20
-
21
- namespace archive_r {
22
-
23
- // Expose POSIX-like types within the archive_r namespace.
24
- // - On POSIX platforms, ssize_t/mode_t come from <sys/types.h>.
25
- // - On Windows, platform_compat provides fallback definitions above.
26
- #if defined(_WIN32)
27
- using ssize_t = SSIZE_T;
28
- using mode_t = unsigned short;
29
- #else
30
- using ssize_t = ::ssize_t;
31
- using mode_t = ::mode_t;
32
- #endif
33
-
34
- } // namespace archive_r
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <sys/types.h>
7
+
8
+ #if defined(_WIN32)
9
+ #include <BaseTsd.h>
10
+ #include <sys/stat.h>
11
+ #if !defined(_SSIZE_T_DEFINED)
12
+ using ssize_t = SSIZE_T;
13
+ #define _SSIZE_T_DEFINED
14
+ #endif
15
+ #if !defined(_MODE_T_DEFINED)
16
+ using mode_t = unsigned short; // MSVC does not expose POSIX mode_t by default
17
+ #define _MODE_T_DEFINED
18
+ #endif
19
+ #endif
20
+
21
+ namespace archive_r {
22
+
23
+ // Expose POSIX-like types within the archive_r namespace.
24
+ // - On POSIX platforms, ssize_t/mode_t come from <sys/types.h>.
25
+ // - On Windows, platform_compat provides fallback definitions above.
26
+ #if defined(_WIN32)
27
+ using ssize_t = SSIZE_T;
28
+ using mode_t = unsigned short;
29
+ #else
30
+ using ssize_t = ::ssize_t;
31
+ using mode_t = ::mode_t;
32
+ #endif
33
+
34
+ } // namespace archive_r
@@ -1,156 +1,156 @@
1
- // SPDX-License-Identifier: MIT
2
- // Copyright (c) 2025 archive_r Team
3
-
4
- #pragma once
5
-
6
- #include "archive_r/entry_fault.h"
7
- #include "archive_r/path_hierarchy.h"
8
- #include "entry.h"
9
- #include <memory>
10
- #include <string>
11
- #include <vector>
12
-
13
- namespace archive_r {
14
-
15
- struct TraverserOptions {
16
- std::vector<std::string> passphrases; ///< Passphrases for encrypted archives
17
- std::vector<std::string> formats; ///< Specific archive formats to enable (empty = all)
18
- std::vector<std::string> metadata_keys; ///< Metadata keys to capture for entries
19
- bool descend_archives = true; ///< Whether to descend into archives by default
20
- };
21
-
22
- /**
23
- * @brief Iterator-based traversal for archives and directories
24
- *
25
- * Traverser provides a unified iterator-based interface for traversing
26
- * entries within archives and directories, including support for nested
27
- * archives and automatic descent.
28
- *
29
- * Supports multiple archive formats via libarchive (tar, zip, gzip, etc.)
30
- * and filesystem directories.
31
- *
32
- * Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
33
- * @see Entry, ArchiveStackOrchestrator
34
- *
35
- * \par Inputs
36
- * - The input list must not be empty, and each PathHierarchy must not be empty.
37
- * Violations throw std::invalid_argument.
38
- * - For the common single-root case, prefer make_single_path("...") or
39
- * Traverser(const std::string&, ...).
40
- *
41
- * \par How Roots Are Interpreted
42
- * - If the root hierarchy is exactly one single path and it refers to a directory,
43
- * Traverser enumerates it using std::filesystem::recursive_directory_iterator.
44
- * - Otherwise, Traverser attempts archive traversal using libarchive.
45
- *
46
- * \par Error Model (Exceptions vs Faults)
47
- * - Invalid arguments are reported via exceptions (std::invalid_argument).
48
- * - Recoverable data / I/O errors during archive traversal are reported via the
49
- * global fault callback (EntryFault) and traversal continues.
50
- * - Directory traversal uses std::filesystem iterators; filesystem exceptions
51
- * (e.g. std::filesystem::filesystem_error) may be thrown and are not converted
52
- * to faults.
53
- *
54
- * \par Iterator Semantics
55
- * - Traverser::Iterator is an input iterator (single-pass).
56
- * - Dereferencing the end iterator throws std::logic_error.
57
- *
58
- * Usage:
59
- * Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
60
- * for (Entry& entry : traverser) {
61
- * // Process entry
62
- * }
63
- *
64
- * @note Thread Safety
65
- * Traverser instances are not thread-safe. To use the traverser in a
66
- * multi-threaded environment, create a separate Traverser instance for each
67
- * thread. Do not share a single instance across multiple threads.
68
- */
69
- class Traverser {
70
- public:
71
- /**
72
- * @brief Construct traverser for archives or directories
73
- * @param paths Paths to archive files or directories
74
- *
75
- * Provide one or more paths to traverse. Single-path traversal can be
76
- * achieved by passing a container with one element:
77
- * Traverser traverser({make_single_path("archive.tar.gz")});
78
- *
79
- * @throws std::invalid_argument if paths is empty or contains an empty hierarchy
80
- */
81
- explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
82
-
83
- /**
84
- * @brief Construct traverser for a single hierarchy
85
- */
86
- explicit Traverser(PathHierarchy path, TraverserOptions options = {});
87
-
88
- /**
89
- * @brief Construct traverser for a single archive or directory path
90
- */
91
- explicit Traverser(const std::string &path, TraverserOptions options = {});
92
-
93
- ~Traverser();
94
-
95
- // Non-copyable
96
- Traverser(const Traverser &) = delete;
97
- Traverser &operator=(const Traverser &) = delete;
98
-
99
- // ========================================================================
100
- // Iterator API
101
- // ========================================================================
102
-
103
- /**
104
- * @brief Forward iterator for traversing entries
105
- *
106
- * Satisfies InputIterator requirements:
107
- * - Move-only (non-copyable)
108
- * - Equality comparable
109
- * - Dereferenceable (returns Entry&)
110
- * - Incrementable
111
- */
112
- class Iterator {
113
- public:
114
- using iterator_category = std::input_iterator_tag;
115
- using value_type = Entry;
116
- using difference_type = std::ptrdiff_t;
117
- using pointer = Entry *;
118
- using reference = Entry &;
119
-
120
- reference operator*();
121
- pointer operator->();
122
- Iterator &operator++();
123
- bool operator==(const Iterator &other) const;
124
- bool operator!=(const Iterator &other) const;
125
-
126
- ~Iterator();
127
- Iterator(const Iterator &) = delete;
128
- Iterator &operator=(const Iterator &) = delete;
129
- Iterator(Iterator &&) noexcept;
130
- Iterator &operator=(Iterator &&) noexcept;
131
-
132
- private:
133
- friend class Traverser;
134
- class Impl;
135
- std::unique_ptr<Impl> _impl;
136
- explicit Iterator(std::unique_ptr<Impl> impl);
137
- };
138
-
139
- /**
140
- * @brief Get iterator to first entry
141
- * @return Iterator pointing to first entry
142
- */
143
- Iterator begin();
144
-
145
- /**
146
- * @brief Get end iterator
147
- * @return End iterator (sentinel)
148
- */
149
- Iterator end();
150
-
151
- private:
152
- std::vector<PathHierarchy> _initial_paths; ///< Initial paths provided to constructor
153
- TraverserOptions _options; ///< Options controlling archive handling
154
- };
155
-
156
- } // namespace archive_r
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/entry_fault.h"
7
+ #include "archive_r/path_hierarchy.h"
8
+ #include "entry.h"
9
+ #include <memory>
10
+ #include <string>
11
+ #include <vector>
12
+
13
+ namespace archive_r {
14
+
15
+ struct TraverserOptions {
16
+ std::vector<std::string> passphrases; ///< Passphrases for encrypted archives
17
+ std::vector<std::string> formats; ///< Specific archive formats to enable (empty = all)
18
+ std::vector<std::string> metadata_keys; ///< Metadata keys to capture for entries
19
+ bool descend_archives = true; ///< Whether to descend into archives by default
20
+ };
21
+
22
+ /**
23
+ * @brief Iterator-based traversal for archives and directories
24
+ *
25
+ * Traverser provides a unified iterator-based interface for traversing
26
+ * entries within archives and directories, including support for nested
27
+ * archives and automatic descent.
28
+ *
29
+ * Supports multiple archive formats via libarchive (tar, zip, gzip, etc.)
30
+ * and filesystem directories.
31
+ *
32
+ * Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
33
+ * @see Entry, ArchiveStackOrchestrator
34
+ *
35
+ * \par Inputs
36
+ * - The input list must not be empty, and each PathHierarchy must not be empty.
37
+ * Violations throw std::invalid_argument.
38
+ * - For the common single-root case, prefer make_single_path("...") or
39
+ * Traverser(const std::string&, ...).
40
+ *
41
+ * \par How Roots Are Interpreted
42
+ * - If the root hierarchy is exactly one single path and it refers to a directory,
43
+ * Traverser enumerates it using std::filesystem::recursive_directory_iterator.
44
+ * - Otherwise, Traverser attempts archive traversal using libarchive.
45
+ *
46
+ * \par Error Model (Exceptions vs Faults)
47
+ * - Invalid arguments are reported via exceptions (std::invalid_argument).
48
+ * - Recoverable data / I/O errors during archive traversal are reported via the
49
+ * global fault callback (EntryFault) and traversal continues.
50
+ * - Directory traversal uses std::filesystem iterators; filesystem exceptions
51
+ * (e.g. std::filesystem::filesystem_error) may be thrown and are not converted
52
+ * to faults.
53
+ *
54
+ * \par Iterator Semantics
55
+ * - Traverser::Iterator is an input iterator (single-pass).
56
+ * - Dereferencing the end iterator throws std::logic_error.
57
+ *
58
+ * Usage:
59
+ * Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
60
+ * for (Entry& entry : traverser) {
61
+ * // Process entry
62
+ * }
63
+ *
64
+ * @note Thread Safety
65
+ * Traverser instances are not thread-safe. To use the traverser in a
66
+ * multi-threaded environment, create a separate Traverser instance for each
67
+ * thread. Do not share a single instance across multiple threads.
68
+ */
69
+ class Traverser {
70
+ public:
71
+ /**
72
+ * @brief Construct traverser for archives or directories
73
+ * @param paths Paths to archive files or directories
74
+ *
75
+ * Provide one or more paths to traverse. Single-path traversal can be
76
+ * achieved by passing a container with one element:
77
+ * Traverser traverser({make_single_path("archive.tar.gz")});
78
+ *
79
+ * @throws std::invalid_argument if paths is empty or contains an empty hierarchy
80
+ */
81
+ explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
82
+
83
+ /**
84
+ * @brief Construct traverser for a single hierarchy
85
+ */
86
+ explicit Traverser(PathHierarchy path, TraverserOptions options = {});
87
+
88
+ /**
89
+ * @brief Construct traverser for a single archive or directory path
90
+ */
91
+ explicit Traverser(const std::string &path, TraverserOptions options = {});
92
+
93
+ ~Traverser();
94
+
95
+ // Non-copyable
96
+ Traverser(const Traverser &) = delete;
97
+ Traverser &operator=(const Traverser &) = delete;
98
+
99
+ // ========================================================================
100
+ // Iterator API
101
+ // ========================================================================
102
+
103
+ /**
104
+ * @brief Forward iterator for traversing entries
105
+ *
106
+ * Satisfies InputIterator requirements:
107
+ * - Move-only (non-copyable)
108
+ * - Equality comparable
109
+ * - Dereferenceable (returns Entry&)
110
+ * - Incrementable
111
+ */
112
+ class Iterator {
113
+ public:
114
+ using iterator_category = std::input_iterator_tag;
115
+ using value_type = Entry;
116
+ using difference_type = std::ptrdiff_t;
117
+ using pointer = Entry *;
118
+ using reference = Entry &;
119
+
120
+ reference operator*();
121
+ pointer operator->();
122
+ Iterator &operator++();
123
+ bool operator==(const Iterator &other) const;
124
+ bool operator!=(const Iterator &other) const;
125
+
126
+ ~Iterator();
127
+ Iterator(const Iterator &) = delete;
128
+ Iterator &operator=(const Iterator &) = delete;
129
+ Iterator(Iterator &&) noexcept;
130
+ Iterator &operator=(Iterator &&) noexcept;
131
+
132
+ private:
133
+ friend class Traverser;
134
+ class Impl;
135
+ std::unique_ptr<Impl> _impl;
136
+ explicit Iterator(std::unique_ptr<Impl> impl);
137
+ };
138
+
139
+ /**
140
+ * @brief Get iterator to first entry
141
+ * @return Iterator pointing to first entry
142
+ */
143
+ Iterator begin();
144
+
145
+ /**
146
+ * @brief Get end iterator
147
+ * @return End iterator (sentinel)
148
+ */
149
+ Iterator end();
150
+
151
+ private:
152
+ std::vector<PathHierarchy> _initial_paths; ///< Initial paths provided to constructor
153
+ TraverserOptions _options; ///< Options controlling archive handling
154
+ };
155
+
156
+ } // namespace archive_r