archive_r_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +56 -0
  3. data/README.md +103 -0
  4. data/ext/archive_r/archive_r_ext.cc +910 -0
  5. data/ext/archive_r/extconf.rb +90 -0
  6. data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
  7. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
  8. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
  9. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
  15. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
  16. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
  17. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
  18. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
  19. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
  20. data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
  21. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
  22. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
  23. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
  24. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
  25. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
  26. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
  27. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
  28. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
  29. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
  30. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
  31. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
  32. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
  33. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
  34. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
  35. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
  36. data/lib/archive_r.rb +80 -0
  37. metadata +112 -0
@@ -0,0 +1,90 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 archive_r Team
3
+
4
+ require 'mkmf'
5
+
6
+ def archive_r_core_root
7
+ candidates = []
8
+
9
+ env_root = ENV['ARCHIVE_R_CORE_ROOT']
10
+ candidates << File.expand_path(env_root) if env_root && !env_root.empty?
11
+
12
+ repo_root = File.expand_path('../../../..', __dir__)
13
+ candidates << repo_root
14
+
15
+ vendor_root = File.expand_path('vendor/archive_r', __dir__)
16
+ candidates << vendor_root
17
+
18
+ candidates.each do |root|
19
+ next unless root
20
+ include_dir = File.join(root, 'include')
21
+ src_dir = File.join(root, 'src')
22
+ return root if Dir.exist?(include_dir) && Dir.exist?(src_dir)
23
+ end
24
+
25
+ nil
26
+ end
27
+ archive_r_root = archive_r_core_root
28
+
29
+ unless archive_r_root
30
+ abort <<~MSG
31
+ archive_r core library not found.
32
+ Please set ARCHIVE_R_CORE_ROOT to a repository checkout or use the vendored gem package.
33
+ MSG
34
+ end
35
+
36
+ vendor_root = File.expand_path('vendor/archive_r', __dir__)
37
+
38
+ if archive_r_root == vendor_root
39
+ puts 'Using vendored archive_r core sources'
40
+ elsif ENV['ARCHIVE_R_CORE_ROOT'] && File.expand_path(ENV['ARCHIVE_R_CORE_ROOT']) == archive_r_root
41
+ puts "Using archive_r core from #{archive_r_root} (ARCHIVE_R_CORE_ROOT)"
42
+ else
43
+ puts "Using archive_r core from #{archive_r_root}"
44
+ end
45
+
46
+ archive_r_include = File.join(archive_r_root, 'include')
47
+ archive_r_src = File.join(archive_r_root, 'src')
48
+ archive_r_lib_dir = File.join(archive_r_root, 'build')
49
+ glue_source = File.join(__dir__, 'archive_r_ext.cc')
50
+
51
+ # Ensure make can locate vendored sources via VPATH
52
+ $VPATH ||= ''
53
+ unless $VPATH.empty?
54
+ $VPATH << File::PATH_SEPARATOR
55
+ end
56
+ $VPATH << archive_r_src
57
+
58
+ # Add include paths
59
+ $INCFLAGS << " -I#{archive_r_include}"
60
+ $INCFLAGS << " -I#{archive_r_src}"
61
+
62
+ # C++17 standard
63
+ $CXXFLAGS << " -std=c++17"
64
+
65
+ # Check for libarchive
66
+ unless have_library('archive')
67
+ abort "libarchive is required but not found"
68
+ end
69
+
70
+ # Try to link with pre-built static library first
71
+ prebuilt_lib = File.join(archive_r_lib_dir, 'libarchive_r_core.a')
72
+
73
+ if File.exist?(prebuilt_lib)
74
+ $LOCAL_LIBS << " #{prebuilt_lib}"
75
+ puts "Using pre-built archive_r core library"
76
+ else
77
+ # Build from source as fallback (ensure the Ruby glue source is compiled too)
78
+ puts "Pre-built library not found, will build from source"
79
+
80
+ srcs = [glue_source] + Dir.glob(File.join(archive_r_src, '*.cc'))
81
+ $srcs = srcs
82
+ end
83
+
84
+ # Guarantee the Ruby glue source is part of the compilation list when $srcs is set
85
+ if defined?($srcs) && $srcs
86
+ $srcs.unshift(glue_source) unless $srcs.include?(glue_source)
87
+ end
88
+
89
+ # Create Makefile
90
+ create_makefile('archive_r/archive_r')
@@ -0,0 +1,56 @@
1
+ archive_r License
2
+ Version: 0.1.0 (2025-10-25)
3
+
4
+ ----------------------------------------
5
+ Primary License
6
+ ----------------------------------------
7
+
8
+ MIT License
9
+
10
+ Copyright (c) 2025 archive_r Team
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+
30
+ ----------------------------------------
31
+ Third-Party Notices
32
+ ----------------------------------------
33
+
34
+ This distribution bundles or links against the following third-party
35
+ components. Their respective license terms apply in addition to the MIT
36
+ License shown above.
37
+
38
+ 1. libarchive
39
+ - Purpose: core archive reading and writing functionality for the C++
40
+ library and language bindings.
41
+ - License: New BSD License (https://www.libarchive.org/)
42
+
43
+ 2. pybind11
44
+ - Purpose: header-only binding generator for the Python extension module.
45
+ - License: BSD-style License (https://github.com/pybind/pybind11)
46
+
47
+ 3. rake (development dependency for Ruby bindings)
48
+ - Purpose: build and release tasks for the Ruby gem.
49
+ - License: MIT License (https://github.com/ruby/rake)
50
+
51
+ 4. minitest (development dependency for Ruby bindings)
52
+ - Purpose: unit testing framework for the Ruby gem.
53
+ - License: MIT License (https://github.com/minitest/minitest)
54
+
55
+ Users of archive_r should review the linked third-party licenses to ensure
56
+ compliance with their terms when redistributing this software.
@@ -0,0 +1,41 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/path_hierarchy.h"
7
+
8
+ #include <functional>
9
+ #include <memory>
10
+ #include <sys/types.h>
11
+
12
+ namespace archive_r {
13
+
14
+ /**
15
+ * @brief Abstract stream interface used by the archive traversal stack
16
+ */
17
+ class IDataStream {
18
+ public:
19
+ virtual ~IDataStream() = default;
20
+ virtual ssize_t read(void *buffer, size_t size) = 0;
21
+ virtual void rewind() = 0;
22
+ virtual bool at_end() const = 0;
23
+ virtual int64_t seek(int64_t offset, int whence) { return -1; }
24
+ virtual int64_t tell() const { return -1; }
25
+ virtual bool can_seek() const { return false; }
26
+ virtual PathHierarchy source_hierarchy() const = 0;
27
+ };
28
+
29
+ using RootStreamFactory = std::function<std::shared_ptr<IDataStream>(const PathHierarchy &)>;
30
+
31
+ /**
32
+ * @brief Register the default factory used for root PathHierarchy streams
33
+ */
34
+ void set_root_stream_factory(RootStreamFactory factory);
35
+
36
+ /**
37
+ * @brief Retrieve the currently registered root stream factory
38
+ */
39
+ RootStreamFactory get_root_stream_factory();
40
+
41
+ } // namespace archive_r
@@ -0,0 +1,161 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <cstdint>
7
+ #include <filesystem>
8
+ #include <memory>
9
+ #include <string>
10
+ #include <sys/types.h>
11
+ #include <vector>
12
+
13
+ #include "archive_r/entry_fault.h"
14
+ #include "archive_r/entry_metadata.h"
15
+ #include "archive_r/path_hierarchy.h"
16
+
17
+ namespace archive_r {
18
+
19
+ class ArchiveStackOrchestrator;
20
+
21
+ struct MultiVolumeGroupOptions {
22
+ PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural;
23
+ };
24
+
25
+ /**
26
+ * @brief Represents a single entry in an archive traversal
27
+ *
28
+ * Entry objects encapsulate all information about an archive entry including:
29
+ * - Path information (path, path hierarchy)
30
+ * - Metadata (size, type, timestamps)
31
+ * - Content access (read operations)
32
+ * - Multi-volume archive grouping support
33
+ *
34
+ * Entry objects are typically obtained from ArchiveTraverser::Iterator and
35
+ * remain valid until the iterator advances.
36
+ */
37
+ class Entry {
38
+ public:
39
+ /**
40
+ * @brief Get the entry name (last element of the path hierarchy)
41
+ * @return Entry name relative to its containing archive (e.g., "dir/subdir/file.txt" when the
42
+ * hierarchy is {"outer/archive.zip", "dir/subdir/file.txt"})
43
+ */
44
+ std::string name() const;
45
+
46
+ /**
47
+ * @brief Get the entry path as a string
48
+ * @return Joined path including outer archives (e.g., "outer/archive.zip/dir/subdir/file.txt"
49
+ * when the hierarchy is {"outer/archive.zip", "dir/subdir/file.txt"})
50
+ */
51
+ std::string path() const;
52
+
53
+ /**
54
+ * @brief Get the entry path as a hierarchy of components
55
+ * @return Vector describing each descent step (e.g., {"outer/archive.zip",
56
+ * "dir/subdir/file.txt"})
57
+ */
58
+ const PathHierarchy &path_hierarchy() const;
59
+
60
+ /**
61
+ * @brief Check if entry is a directory
62
+ * @return true if entry represents a directory
63
+ */
64
+ bool is_directory() const;
65
+
66
+ /**
67
+ * @brief Check if entry is a regular file
68
+ * @return true if entry represents a regular file
69
+ */
70
+ bool is_file() const;
71
+
72
+ /**
73
+ * @brief Get the uncompressed size of the entry
74
+ * @return Size in bytes, or 0 if unknown
75
+ */
76
+ uint64_t size() const;
77
+
78
+ /**
79
+ * @brief Get the archive nesting depth
80
+ * @return 0 for top-level archive, 1 for nested archive, etc.
81
+ */
82
+ size_t depth() const;
83
+
84
+ /**
85
+ * @brief Read data from the entry
86
+ *
87
+ * Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
88
+ * if the owning iterator advances or other traversal work continues in parallel.
89
+ *
90
+ * @param buffer Buffer to read data into
91
+ * @param length Maximum number of bytes to read
92
+ * @return Number of bytes read, 0 on EOF, -1 on error
93
+ */
94
+ ssize_t read(void *buffer, size_t length);
95
+
96
+ /**
97
+ * @brief Enable or disable automatic descent into this entry
98
+ * @param enabled true to descend (default), false to keep traversal at current level
99
+ */
100
+ void set_descent(bool enabled);
101
+
102
+ /**
103
+ * @brief Check if automatic descent is currently enabled
104
+ */
105
+ bool descent_enabled() const;
106
+
107
+ /**
108
+ * @brief Register this entry as part of a multi-volume (split) archive
109
+ * @param base_name Base name without the volume suffix (e.g., "archive.tar.gz")
110
+ * @param options Optional configuration (e.g., preserve Given ordering)
111
+ *
112
+ * Register each entry that belongs to the same multi-volume group so that
113
+ * once traversal of the parent archive finishes, the parts are combined
114
+ * automatically. The traverser will then descend into the combined archive
115
+ * and continue processing its contents.
116
+ *
117
+ * Example:
118
+ * @code
119
+ * for (Entry& entry : traverser) {
120
+ * if (entry.path().find(".part") != std::string::npos) {
121
+ * std::string base = extract_base_name(entry.path());
122
+ * entry.set_multi_volume_group(base);
123
+ * }
124
+ * }
125
+ * @endcode
126
+ */
127
+ void set_multi_volume_group(const std::string &base_name, const MultiVolumeGroupOptions &options = {});
128
+
129
+ /**
130
+ * @brief Get metadata captured for this entry
131
+ * @return Immutable metadata map keyed by libarchive field names
132
+ */
133
+ const EntryMetadataMap &metadata() const;
134
+
135
+ /**
136
+ * @brief Look up a metadata value by key
137
+ * @param key Metadata key (e.g., "uid", "mtime")
138
+ * @return Pointer to the stored value, or nullptr if not present
139
+ */
140
+ const EntryMetadataValue *find_metadata(const std::string &key) const;
141
+
142
+ static std::unique_ptr<Entry> create(PathHierarchy hierarchy, std::shared_ptr<ArchiveStackOrchestrator> data_source_orchestrator, bool default_descent);
143
+
144
+ // Copy/move operations
145
+ Entry(const Entry &);
146
+ Entry &operator=(const Entry &);
147
+ Entry(Entry &&) noexcept;
148
+ Entry &operator=(Entry &&) noexcept;
149
+
150
+ ~Entry();
151
+
152
+ private:
153
+ class Impl;
154
+ std::unique_ptr<Impl> _impl;
155
+
156
+ // Private constructor - only friends can create Entry objects
157
+ explicit Entry(Impl *impl);
158
+ Entry(const PathHierarchy &hierarchy, std::shared_ptr<ArchiveStackOrchestrator> data_source_orchestrator, bool default_descent);
159
+ };
160
+
161
+ } // namespace archive_r
@@ -0,0 +1,34 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/path_hierarchy.h"
7
+
8
+ #include <functional>
9
+ #include <string>
10
+
11
+ namespace archive_r {
12
+
13
+ /** Describes a recoverable failure encountered while visiting an entry. */
14
+ struct EntryFault {
15
+ PathHierarchy hierarchy; ///< Path hierarchy where the fault occurred
16
+ std::string message; ///< Human readable description
17
+ int errno_value = 0; ///< Optional errno captured from the failing API
18
+ };
19
+
20
+ /** Callback signature used to surface EntryFault notifications. */
21
+ using FaultCallback = std::function<void(const EntryFault &)>;
22
+
23
+ /**
24
+ * @brief Register a global callback to receive EntryFault notifications.
25
+ * Pass an empty std::function to clear the callback.
26
+ */
27
+ void register_fault_callback(FaultCallback callback);
28
+
29
+ /**
30
+ * @brief Dispatch a fault through the globally registered callback, if any.
31
+ */
32
+ void dispatch_registered_fault(const EntryFault &fault);
33
+
34
+ } // namespace archive_r
@@ -0,0 +1,56 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <cstdint>
7
+ #include <string>
8
+ #include <unordered_map>
9
+ #include <variant>
10
+ #include <vector>
11
+
12
+ namespace archive_r {
13
+
14
+ /** POSIX-style timestamp with sub-second precision. */
15
+ struct EntryMetadataTime {
16
+ int64_t seconds;
17
+ int32_t nanoseconds;
18
+ };
19
+
20
+ /** Sparse file chunk (offset + stored length). */
21
+ struct EntryMetadataSparseChunk {
22
+ int64_t offset;
23
+ int64_t length;
24
+ };
25
+
26
+ /** Extended attribute key/value pair. */
27
+ struct EntryMetadataXattr {
28
+ std::string name;
29
+ std::vector<uint8_t> value;
30
+ };
31
+
32
+ /** Generic digest (algorithm + raw bytes). */
33
+ struct EntryMetadataDigest {
34
+ std::string algorithm;
35
+ std::vector<uint8_t> value;
36
+ };
37
+
38
+ /** Device identifiers for special files. */
39
+ struct EntryMetadataDeviceNumbers {
40
+ uint64_t major;
41
+ uint64_t minor;
42
+ };
43
+
44
+ /** BSD-style file flags (bits to set/clear). */
45
+ struct EntryMetadataFileFlags {
46
+ uint64_t set;
47
+ uint64_t clear;
48
+ };
49
+
50
+ using EntryMetadataValue = std::variant<std::monostate, bool, int64_t, uint64_t, std::string, std::vector<uint8_t>, EntryMetadataTime, EntryMetadataDeviceNumbers, EntryMetadataFileFlags,
51
+ std::vector<EntryMetadataXattr>, std::vector<EntryMetadataSparseChunk>, std::vector<EntryMetadataDigest>>;
52
+
53
+ /** Unordered map storing metadata captured during traversal. */
54
+ using EntryMetadataMap = std::unordered_map<std::string, EntryMetadataValue>;
55
+
56
+ } // namespace archive_r
@@ -0,0 +1,46 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/data_stream.h"
7
+ #include "archive_r/path_hierarchy.h"
8
+
9
+ #include <cstddef>
10
+ #include <cstdint>
11
+ #include <memory>
12
+
13
+ namespace archive_r {
14
+
15
+ class MultiVolumeStreamBase : public IDataStream {
16
+ public:
17
+ ~MultiVolumeStreamBase() override;
18
+
19
+ ssize_t read(void *buffer, size_t size) override;
20
+ void rewind() override;
21
+ bool at_end() const override;
22
+ int64_t seek(int64_t offset, int whence) override;
23
+ int64_t tell() const override;
24
+ bool can_seek() const override { return _supports_seek; }
25
+ PathHierarchy source_hierarchy() const override { return _logical_path; }
26
+
27
+ protected:
28
+ MultiVolumeStreamBase(PathHierarchy logical_path, bool supports_seek);
29
+
30
+ virtual void open_single_part(const PathHierarchy &single_part) = 0;
31
+ virtual void close_single_part() = 0;
32
+ virtual ssize_t read_from_single_part(void *buffer, size_t size) = 0;
33
+ virtual int64_t seek_within_single_part(int64_t offset, int whence) = 0;
34
+ virtual int64_t size_of_single_part(const PathHierarchy &single_part) = 0;
35
+
36
+ PathHierarchy _logical_path;
37
+ void deactivate_active_part();
38
+
39
+ private:
40
+ friend struct Impl;
41
+ struct Impl;
42
+ std::unique_ptr<Impl> _impl;
43
+ const bool _supports_seek;
44
+ };
45
+
46
+ } // namespace archive_r
@@ -0,0 +1,109 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <cstddef>
7
+ #include <stdexcept>
8
+ #include <string>
9
+ #include <utility>
10
+ #include <variant>
11
+ #include <vector>
12
+
13
+ namespace archive_r {
14
+
15
+ /**
16
+ * @brief Represents a single component within a logical path hierarchy.
17
+ *
18
+ * A component can be one of three shapes:
19
+ * - single string value (most common)
20
+ * - multi-volume part list (split archives that share a common base name)
21
+ * - nested list of child entries (used for synthetic grouping)
22
+ */
23
+ class PathEntry {
24
+ public:
25
+ struct Parts {
26
+ std::vector<std::string> values;
27
+ enum class Ordering { Natural, Given } ordering = Ordering::Natural;
28
+ };
29
+
30
+ using NodeList = std::vector<PathEntry>;
31
+
32
+ PathEntry() = default;
33
+
34
+ explicit PathEntry(std::string value)
35
+ : _value(std::move(value)) {}
36
+
37
+ explicit PathEntry(Parts parts)
38
+ : _value(std::move(parts)) {}
39
+
40
+ explicit PathEntry(NodeList nodes)
41
+ : _value(std::move(nodes)) {}
42
+
43
+ static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
44
+
45
+ static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
46
+ Parts parts{ std::move(entries), ordering };
47
+ if (parts.values.empty()) {
48
+ throw std::invalid_argument("multi-volume parts cannot be empty");
49
+ }
50
+ return PathEntry(std::move(parts));
51
+ }
52
+
53
+ static PathEntry nested(NodeList hierarchies) {
54
+ if (hierarchies.empty()) {
55
+ throw std::invalid_argument("nested hierarchies cannot be empty");
56
+ }
57
+ return PathEntry(std::move(hierarchies));
58
+ }
59
+
60
+ bool is_single() const { return std::holds_alternative<std::string>(_value); }
61
+ bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
62
+ bool is_nested() const { return std::holds_alternative<NodeList>(_value); }
63
+ const std::string &single_value() const { return std::get<std::string>(_value); }
64
+ const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
65
+ Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
66
+ const NodeList &nested_nodes() const { return std::get<NodeList>(_value); }
67
+ NodeList &nested_nodes_mut() { return std::get<NodeList>(_value); }
68
+
69
+ private:
70
+ std::variant<std::string, Parts, NodeList> _value;
71
+ };
72
+
73
+ using PathHierarchy = std::vector<PathEntry>;
74
+
75
+ /**
76
+ * Compare two entries using the ordering enforced throughout archive_r.
77
+ *
78
+ * Ordering rules:
79
+ * 1. Entry categories are ordered single < multi-volume < nested node-list.
80
+ * 2. Single entries compare by string value.
81
+ * 3. Multi-volume entries first compare their ordering flag (Natural < Given),
82
+ * then compare corresponding part names lexicographically, finally by list length.
83
+ * 4. Nested node-lists compare child entries pairwise using the same rules.
84
+ */
85
+ int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
86
+
87
+ /** Compare complete hierarchies lexicographically using compare_entries on each level. */
88
+ int compare_hierarchies(const PathHierarchy &lhs, const PathHierarchy &rhs);
89
+
90
+ /** Shorthand equality helpers for entries and hierarchies. */
91
+ bool entries_equal(const PathEntry &lhs, const PathEntry &rhs);
92
+ bool hierarchies_equal(const PathHierarchy &lhs, const PathHierarchy &rhs);
93
+
94
+ /** Strict-weak-order functor suitable for associative containers. */
95
+ struct PathHierarchyLess {
96
+ bool operator()(const PathHierarchy &lhs, const PathHierarchy &rhs) const;
97
+ };
98
+
99
+ /** Build a hierarchy containing a single leaf component. */
100
+ PathHierarchy make_single_path(const std::string &root);
101
+
102
+ /** Append helpers for single and multi-volume components. */
103
+ void append_single(PathHierarchy &hierarchy, std::string value);
104
+ void append_multi_volume(PathHierarchy &hierarchy, std::vector<std::string> parts, PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural);
105
+
106
+ /** Extract prefix/slice helpers. */
107
+ PathHierarchy pathhierarchy_prefix_until(const PathHierarchy &hierarchy, size_t inclusive_index);
108
+ PathHierarchy parent_hierarchy(const PathHierarchy &hierarchy);
109
+ } // namespace archive_r
@@ -0,0 +1,37 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include "archive_r/path_hierarchy.h"
7
+ #include <cstddef>
8
+ #include <string>
9
+ #include <vector>
10
+
11
+ namespace archive_r {
12
+
13
+ /** Return pointer to the Nth single value of an entry (nullptr if absent). */
14
+ const std::string *path_entry_component_at(const PathEntry &entry, std::size_t index);
15
+
16
+ /** Convenience helpers for multi-volume PathHierarchy nodes. */
17
+ std::size_t pathhierarchy_volume_size(const PathHierarchy &logical);
18
+ std::string pathhierarchy_volume_entry_name(const PathHierarchy &logical, std::size_t index);
19
+ bool pathhierarchy_is_multivolume(const PathHierarchy &hierarchy);
20
+ PathHierarchy pathhierarchy_select_single_part(const PathHierarchy &logical, std::size_t index);
21
+
22
+ /** Combine sibling hierarchies that differ only by their terminal part list. */
23
+ PathHierarchy merge_multi_volume_sources(const std::vector<PathHierarchy> &sources);
24
+
25
+ /** Sort hierarchies using PathHierarchyLess semantics. */
26
+ void sort_hierarchies(std::vector<PathHierarchy> &hierarchies);
27
+
28
+ /** Render helpers converting entries to flattened strings for diagnostics. */
29
+ bool flatten_entry_to_string(const PathEntry &entry, std::string &output);
30
+ bool entry_name_from_component(const PathEntry &entry, std::string &output);
31
+
32
+ /** Human readable pretty-printers used in logging and debug output. */
33
+ std::string path_entry_display(const PathEntry &entry);
34
+ std::string hierarchy_display(const PathHierarchy &hierarchy);
35
+
36
+
37
+ } // namespace archive_r