archive_r_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +56 -0
- data/README.md +103 -0
- data/ext/archive_r/archive_r_ext.cc +910 -0
- data/ext/archive_r/extconf.rb +90 -0
- data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
- data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
- data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
- data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
- data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
- data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
- data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
- data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
- data/lib/archive_r.rb +80 -0
- metadata +112 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
require 'mkmf'
|
|
5
|
+
|
|
6
|
+
def archive_r_core_root
|
|
7
|
+
candidates = []
|
|
8
|
+
|
|
9
|
+
env_root = ENV['ARCHIVE_R_CORE_ROOT']
|
|
10
|
+
candidates << File.expand_path(env_root) if env_root && !env_root.empty?
|
|
11
|
+
|
|
12
|
+
repo_root = File.expand_path('../../../..', __dir__)
|
|
13
|
+
candidates << repo_root
|
|
14
|
+
|
|
15
|
+
vendor_root = File.expand_path('vendor/archive_r', __dir__)
|
|
16
|
+
candidates << vendor_root
|
|
17
|
+
|
|
18
|
+
candidates.each do |root|
|
|
19
|
+
next unless root
|
|
20
|
+
include_dir = File.join(root, 'include')
|
|
21
|
+
src_dir = File.join(root, 'src')
|
|
22
|
+
return root if Dir.exist?(include_dir) && Dir.exist?(src_dir)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
archive_r_root = archive_r_core_root
|
|
28
|
+
|
|
29
|
+
unless archive_r_root
|
|
30
|
+
abort <<~MSG
|
|
31
|
+
archive_r core library not found.
|
|
32
|
+
Please set ARCHIVE_R_CORE_ROOT to a repository checkout or use the vendored gem package.
|
|
33
|
+
MSG
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
vendor_root = File.expand_path('vendor/archive_r', __dir__)
|
|
37
|
+
|
|
38
|
+
if archive_r_root == vendor_root
|
|
39
|
+
puts 'Using vendored archive_r core sources'
|
|
40
|
+
elsif ENV['ARCHIVE_R_CORE_ROOT'] && File.expand_path(ENV['ARCHIVE_R_CORE_ROOT']) == archive_r_root
|
|
41
|
+
puts "Using archive_r core from #{archive_r_root} (ARCHIVE_R_CORE_ROOT)"
|
|
42
|
+
else
|
|
43
|
+
puts "Using archive_r core from #{archive_r_root}"
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
archive_r_include = File.join(archive_r_root, 'include')
|
|
47
|
+
archive_r_src = File.join(archive_r_root, 'src')
|
|
48
|
+
archive_r_lib_dir = File.join(archive_r_root, 'build')
|
|
49
|
+
glue_source = File.join(__dir__, 'archive_r_ext.cc')
|
|
50
|
+
|
|
51
|
+
# Ensure make can locate vendored sources via VPATH
|
|
52
|
+
$VPATH ||= ''
|
|
53
|
+
unless $VPATH.empty?
|
|
54
|
+
$VPATH << File::PATH_SEPARATOR
|
|
55
|
+
end
|
|
56
|
+
$VPATH << archive_r_src
|
|
57
|
+
|
|
58
|
+
# Add include paths
|
|
59
|
+
$INCFLAGS << " -I#{archive_r_include}"
|
|
60
|
+
$INCFLAGS << " -I#{archive_r_src}"
|
|
61
|
+
|
|
62
|
+
# C++17 standard
|
|
63
|
+
$CXXFLAGS << " -std=c++17"
|
|
64
|
+
|
|
65
|
+
# Check for libarchive
|
|
66
|
+
unless have_library('archive')
|
|
67
|
+
abort "libarchive is required but not found"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Try to link with pre-built static library first
|
|
71
|
+
prebuilt_lib = File.join(archive_r_lib_dir, 'libarchive_r_core.a')
|
|
72
|
+
|
|
73
|
+
if File.exist?(prebuilt_lib)
|
|
74
|
+
$LOCAL_LIBS << " #{prebuilt_lib}"
|
|
75
|
+
puts "Using pre-built archive_r core library"
|
|
76
|
+
else
|
|
77
|
+
# Build from source as fallback (ensure the Ruby glue source is compiled too)
|
|
78
|
+
puts "Pre-built library not found, will build from source"
|
|
79
|
+
|
|
80
|
+
srcs = [glue_source] + Dir.glob(File.join(archive_r_src, '*.cc'))
|
|
81
|
+
$srcs = srcs
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Guarantee the Ruby glue source is part of the compilation list when $srcs is set
|
|
85
|
+
if defined?($srcs) && $srcs
|
|
86
|
+
$srcs.unshift(glue_source) unless $srcs.include?(glue_source)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Create Makefile
|
|
90
|
+
create_makefile('archive_r/archive_r')
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
archive_r License
|
|
2
|
+
Version: 0.1.0 (2025-10-25)
|
|
3
|
+
|
|
4
|
+
----------------------------------------
|
|
5
|
+
Primary License
|
|
6
|
+
----------------------------------------
|
|
7
|
+
|
|
8
|
+
MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2025 archive_r Team
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
|
|
30
|
+
----------------------------------------
|
|
31
|
+
Third-Party Notices
|
|
32
|
+
----------------------------------------
|
|
33
|
+
|
|
34
|
+
This distribution bundles or links against the following third-party
|
|
35
|
+
components. Their respective license terms apply in addition to the MIT
|
|
36
|
+
License shown above.
|
|
37
|
+
|
|
38
|
+
1. libarchive
|
|
39
|
+
- Purpose: core archive reading and writing functionality for the C++
|
|
40
|
+
library and language bindings.
|
|
41
|
+
- License: New BSD License (https://www.libarchive.org/)
|
|
42
|
+
|
|
43
|
+
2. pybind11
|
|
44
|
+
- Purpose: header-only binding generator for the Python extension module.
|
|
45
|
+
- License: BSD-style License (https://github.com/pybind/pybind11)
|
|
46
|
+
|
|
47
|
+
3. rake (development dependency for Ruby bindings)
|
|
48
|
+
- Purpose: build and release tasks for the Ruby gem.
|
|
49
|
+
- License: MIT License (https://github.com/ruby/rake)
|
|
50
|
+
|
|
51
|
+
4. minitest (development dependency for Ruby bindings)
|
|
52
|
+
- Purpose: unit testing framework for the Ruby gem.
|
|
53
|
+
- License: MIT License (https://github.com/minitest/minitest)
|
|
54
|
+
|
|
55
|
+
Users of archive_r should review the linked third-party licenses to ensure
|
|
56
|
+
compliance with their terms when redistributing this software.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/path_hierarchy.h"
|
|
7
|
+
|
|
8
|
+
#include <functional>
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <sys/types.h>
|
|
11
|
+
|
|
12
|
+
namespace archive_r {
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* @brief Abstract stream interface used by the archive traversal stack
|
|
16
|
+
*/
|
|
17
|
+
class IDataStream {
|
|
18
|
+
public:
|
|
19
|
+
virtual ~IDataStream() = default;
|
|
20
|
+
virtual ssize_t read(void *buffer, size_t size) = 0;
|
|
21
|
+
virtual void rewind() = 0;
|
|
22
|
+
virtual bool at_end() const = 0;
|
|
23
|
+
virtual int64_t seek(int64_t offset, int whence) { return -1; }
|
|
24
|
+
virtual int64_t tell() const { return -1; }
|
|
25
|
+
virtual bool can_seek() const { return false; }
|
|
26
|
+
virtual PathHierarchy source_hierarchy() const = 0;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
using RootStreamFactory = std::function<std::shared_ptr<IDataStream>(const PathHierarchy &)>;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @brief Register the default factory used for root PathHierarchy streams
|
|
33
|
+
*/
|
|
34
|
+
void set_root_stream_factory(RootStreamFactory factory);
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* @brief Retrieve the currently registered root stream factory
|
|
38
|
+
*/
|
|
39
|
+
RootStreamFactory get_root_stream_factory();
|
|
40
|
+
|
|
41
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include <cstdint>
|
|
7
|
+
#include <filesystem>
|
|
8
|
+
#include <memory>
|
|
9
|
+
#include <string>
|
|
10
|
+
#include <sys/types.h>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
#include "archive_r/entry_fault.h"
|
|
14
|
+
#include "archive_r/entry_metadata.h"
|
|
15
|
+
#include "archive_r/path_hierarchy.h"
|
|
16
|
+
|
|
17
|
+
namespace archive_r {
|
|
18
|
+
|
|
19
|
+
class ArchiveStackOrchestrator;
|
|
20
|
+
|
|
21
|
+
struct MultiVolumeGroupOptions {
|
|
22
|
+
PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @brief Represents a single entry in an archive traversal
|
|
27
|
+
*
|
|
28
|
+
* Entry objects encapsulate all information about an archive entry including:
|
|
29
|
+
* - Path information (path, path hierarchy)
|
|
30
|
+
* - Metadata (size, type, timestamps)
|
|
31
|
+
* - Content access (read operations)
|
|
32
|
+
* - Multi-volume archive grouping support
|
|
33
|
+
*
|
|
34
|
+
* Entry objects are typically obtained from ArchiveTraverser::Iterator and
|
|
35
|
+
* remain valid until the iterator advances.
|
|
36
|
+
*/
|
|
37
|
+
class Entry {
|
|
38
|
+
public:
|
|
39
|
+
/**
|
|
40
|
+
* @brief Get the entry name (last element of the path hierarchy)
|
|
41
|
+
* @return Entry name relative to its containing archive (e.g., "dir/subdir/file.txt" when the
|
|
42
|
+
* hierarchy is {"outer/archive.zip", "dir/subdir/file.txt"})
|
|
43
|
+
*/
|
|
44
|
+
std::string name() const;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* @brief Get the entry path as a string
|
|
48
|
+
* @return Joined path including outer archives (e.g., "outer/archive.zip/dir/subdir/file.txt"
|
|
49
|
+
* when the hierarchy is {"outer/archive.zip", "dir/subdir/file.txt"})
|
|
50
|
+
*/
|
|
51
|
+
std::string path() const;
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* @brief Get the entry path as a hierarchy of components
|
|
55
|
+
* @return Vector describing each descent step (e.g., {"outer/archive.zip",
|
|
56
|
+
* "dir/subdir/file.txt"})
|
|
57
|
+
*/
|
|
58
|
+
const PathHierarchy &path_hierarchy() const;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* @brief Check if entry is a directory
|
|
62
|
+
* @return true if entry represents a directory
|
|
63
|
+
*/
|
|
64
|
+
bool is_directory() const;
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* @brief Check if entry is a regular file
|
|
68
|
+
* @return true if entry represents a regular file
|
|
69
|
+
*/
|
|
70
|
+
bool is_file() const;
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* @brief Get the uncompressed size of the entry
|
|
74
|
+
* @return Size in bytes, or 0 if unknown
|
|
75
|
+
*/
|
|
76
|
+
uint64_t size() const;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* @brief Get the archive nesting depth
|
|
80
|
+
* @return 0 for top-level archive, 1 for nested archive, etc.
|
|
81
|
+
*/
|
|
82
|
+
size_t depth() const;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* @brief Read data from the entry
|
|
86
|
+
*
|
|
87
|
+
* Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
|
|
88
|
+
* if the owning iterator advances or other traversal work continues in parallel.
|
|
89
|
+
*
|
|
90
|
+
* @param buffer Buffer to read data into
|
|
91
|
+
* @param length Maximum number of bytes to read
|
|
92
|
+
* @return Number of bytes read, 0 on EOF, -1 on error
|
|
93
|
+
*/
|
|
94
|
+
ssize_t read(void *buffer, size_t length);
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* @brief Enable or disable automatic descent into this entry
|
|
98
|
+
* @param enabled true to descend (default), false to keep traversal at current level
|
|
99
|
+
*/
|
|
100
|
+
void set_descent(bool enabled);
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* @brief Check if automatic descent is currently enabled
|
|
104
|
+
*/
|
|
105
|
+
bool descent_enabled() const;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* @brief Register this entry as part of a multi-volume (split) archive
|
|
109
|
+
* @param base_name Base name without the volume suffix (e.g., "archive.tar.gz")
|
|
110
|
+
* @param options Optional configuration (e.g., preserve Given ordering)
|
|
111
|
+
*
|
|
112
|
+
* Register each entry that belongs to the same multi-volume group so that
|
|
113
|
+
* once traversal of the parent archive finishes, the parts are combined
|
|
114
|
+
* automatically. The traverser will then descend into the combined archive
|
|
115
|
+
* and continue processing its contents.
|
|
116
|
+
*
|
|
117
|
+
* Example:
|
|
118
|
+
* @code
|
|
119
|
+
* for (Entry& entry : traverser) {
|
|
120
|
+
* if (entry.path().find(".part") != std::string::npos) {
|
|
121
|
+
* std::string base = extract_base_name(entry.path());
|
|
122
|
+
* entry.set_multi_volume_group(base);
|
|
123
|
+
* }
|
|
124
|
+
* }
|
|
125
|
+
* @endcode
|
|
126
|
+
*/
|
|
127
|
+
void set_multi_volume_group(const std::string &base_name, const MultiVolumeGroupOptions &options = {});
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* @brief Get metadata captured for this entry
|
|
131
|
+
* @return Immutable metadata map keyed by libarchive field names
|
|
132
|
+
*/
|
|
133
|
+
const EntryMetadataMap &metadata() const;
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* @brief Look up a metadata value by key
|
|
137
|
+
* @param key Metadata key (e.g., "uid", "mtime")
|
|
138
|
+
* @return Pointer to the stored value, or nullptr if not present
|
|
139
|
+
*/
|
|
140
|
+
const EntryMetadataValue *find_metadata(const std::string &key) const;
|
|
141
|
+
|
|
142
|
+
static std::unique_ptr<Entry> create(PathHierarchy hierarchy, std::shared_ptr<ArchiveStackOrchestrator> data_source_orchestrator, bool default_descent);
|
|
143
|
+
|
|
144
|
+
// Copy/move operations
|
|
145
|
+
Entry(const Entry &);
|
|
146
|
+
Entry &operator=(const Entry &);
|
|
147
|
+
Entry(Entry &&) noexcept;
|
|
148
|
+
Entry &operator=(Entry &&) noexcept;
|
|
149
|
+
|
|
150
|
+
~Entry();
|
|
151
|
+
|
|
152
|
+
private:
|
|
153
|
+
class Impl;
|
|
154
|
+
std::unique_ptr<Impl> _impl;
|
|
155
|
+
|
|
156
|
+
// Private constructor - only friends can create Entry objects
|
|
157
|
+
explicit Entry(Impl *impl);
|
|
158
|
+
Entry(const PathHierarchy &hierarchy, std::shared_ptr<ArchiveStackOrchestrator> data_source_orchestrator, bool default_descent);
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/path_hierarchy.h"
|
|
7
|
+
|
|
8
|
+
#include <functional>
|
|
9
|
+
#include <string>
|
|
10
|
+
|
|
11
|
+
namespace archive_r {
|
|
12
|
+
|
|
13
|
+
/** Describes a recoverable failure encountered while visiting an entry. */
|
|
14
|
+
struct EntryFault {
|
|
15
|
+
PathHierarchy hierarchy; ///< Path hierarchy where the fault occurred
|
|
16
|
+
std::string message; ///< Human readable description
|
|
17
|
+
int errno_value = 0; ///< Optional errno captured from the failing API
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Callback signature used to surface EntryFault notifications. */
|
|
21
|
+
using FaultCallback = std::function<void(const EntryFault &)>;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @brief Register a global callback to receive EntryFault notifications.
|
|
25
|
+
* Pass an empty std::function to clear the callback.
|
|
26
|
+
*/
|
|
27
|
+
void register_fault_callback(FaultCallback callback);
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* @brief Dispatch a fault through the globally registered callback, if any.
|
|
31
|
+
*/
|
|
32
|
+
void dispatch_registered_fault(const EntryFault &fault);
|
|
33
|
+
|
|
34
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include <cstdint>
|
|
7
|
+
#include <string>
|
|
8
|
+
#include <unordered_map>
|
|
9
|
+
#include <variant>
|
|
10
|
+
#include <vector>
|
|
11
|
+
|
|
12
|
+
namespace archive_r {
|
|
13
|
+
|
|
14
|
+
/** POSIX-style timestamp with sub-second precision. */
|
|
15
|
+
struct EntryMetadataTime {
|
|
16
|
+
int64_t seconds;
|
|
17
|
+
int32_t nanoseconds;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
/** Sparse file chunk (offset + stored length). */
|
|
21
|
+
struct EntryMetadataSparseChunk {
|
|
22
|
+
int64_t offset;
|
|
23
|
+
int64_t length;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/** Extended attribute key/value pair. */
|
|
27
|
+
struct EntryMetadataXattr {
|
|
28
|
+
std::string name;
|
|
29
|
+
std::vector<uint8_t> value;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
/** Generic digest (algorithm + raw bytes). */
|
|
33
|
+
struct EntryMetadataDigest {
|
|
34
|
+
std::string algorithm;
|
|
35
|
+
std::vector<uint8_t> value;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
/** Device identifiers for special files. */
|
|
39
|
+
struct EntryMetadataDeviceNumbers {
|
|
40
|
+
uint64_t major;
|
|
41
|
+
uint64_t minor;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/** BSD-style file flags (bits to set/clear). */
|
|
45
|
+
struct EntryMetadataFileFlags {
|
|
46
|
+
uint64_t set;
|
|
47
|
+
uint64_t clear;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
using EntryMetadataValue = std::variant<std::monostate, bool, int64_t, uint64_t, std::string, std::vector<uint8_t>, EntryMetadataTime, EntryMetadataDeviceNumbers, EntryMetadataFileFlags,
|
|
51
|
+
std::vector<EntryMetadataXattr>, std::vector<EntryMetadataSparseChunk>, std::vector<EntryMetadataDigest>>;
|
|
52
|
+
|
|
53
|
+
/** Unordered map storing metadata captured during traversal. */
|
|
54
|
+
using EntryMetadataMap = std::unordered_map<std::string, EntryMetadataValue>;
|
|
55
|
+
|
|
56
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/data_stream.h"
|
|
7
|
+
#include "archive_r/path_hierarchy.h"
|
|
8
|
+
|
|
9
|
+
#include <cstddef>
|
|
10
|
+
#include <cstdint>
|
|
11
|
+
#include <memory>
|
|
12
|
+
|
|
13
|
+
namespace archive_r {
|
|
14
|
+
|
|
15
|
+
class MultiVolumeStreamBase : public IDataStream {
|
|
16
|
+
public:
|
|
17
|
+
~MultiVolumeStreamBase() override;
|
|
18
|
+
|
|
19
|
+
ssize_t read(void *buffer, size_t size) override;
|
|
20
|
+
void rewind() override;
|
|
21
|
+
bool at_end() const override;
|
|
22
|
+
int64_t seek(int64_t offset, int whence) override;
|
|
23
|
+
int64_t tell() const override;
|
|
24
|
+
bool can_seek() const override { return _supports_seek; }
|
|
25
|
+
PathHierarchy source_hierarchy() const override { return _logical_path; }
|
|
26
|
+
|
|
27
|
+
protected:
|
|
28
|
+
MultiVolumeStreamBase(PathHierarchy logical_path, bool supports_seek);
|
|
29
|
+
|
|
30
|
+
virtual void open_single_part(const PathHierarchy &single_part) = 0;
|
|
31
|
+
virtual void close_single_part() = 0;
|
|
32
|
+
virtual ssize_t read_from_single_part(void *buffer, size_t size) = 0;
|
|
33
|
+
virtual int64_t seek_within_single_part(int64_t offset, int whence) = 0;
|
|
34
|
+
virtual int64_t size_of_single_part(const PathHierarchy &single_part) = 0;
|
|
35
|
+
|
|
36
|
+
PathHierarchy _logical_path;
|
|
37
|
+
void deactivate_active_part();
|
|
38
|
+
|
|
39
|
+
private:
|
|
40
|
+
friend struct Impl;
|
|
41
|
+
struct Impl;
|
|
42
|
+
std::unique_ptr<Impl> _impl;
|
|
43
|
+
const bool _supports_seek;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include <cstddef>
|
|
7
|
+
#include <stdexcept>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <utility>
|
|
10
|
+
#include <variant>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
namespace archive_r {
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* @brief Represents a single component within a logical path hierarchy.
|
|
17
|
+
*
|
|
18
|
+
* A component can be one of three shapes:
|
|
19
|
+
* - single string value (most common)
|
|
20
|
+
* - multi-volume part list (split archives that share a common base name)
|
|
21
|
+
* - nested list of child entries (used for synthetic grouping)
|
|
22
|
+
*/
|
|
23
|
+
class PathEntry {
|
|
24
|
+
public:
|
|
25
|
+
struct Parts {
|
|
26
|
+
std::vector<std::string> values;
|
|
27
|
+
enum class Ordering { Natural, Given } ordering = Ordering::Natural;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
using NodeList = std::vector<PathEntry>;
|
|
31
|
+
|
|
32
|
+
PathEntry() = default;
|
|
33
|
+
|
|
34
|
+
explicit PathEntry(std::string value)
|
|
35
|
+
: _value(std::move(value)) {}
|
|
36
|
+
|
|
37
|
+
explicit PathEntry(Parts parts)
|
|
38
|
+
: _value(std::move(parts)) {}
|
|
39
|
+
|
|
40
|
+
explicit PathEntry(NodeList nodes)
|
|
41
|
+
: _value(std::move(nodes)) {}
|
|
42
|
+
|
|
43
|
+
static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
|
|
44
|
+
|
|
45
|
+
static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
|
|
46
|
+
Parts parts{ std::move(entries), ordering };
|
|
47
|
+
if (parts.values.empty()) {
|
|
48
|
+
throw std::invalid_argument("multi-volume parts cannot be empty");
|
|
49
|
+
}
|
|
50
|
+
return PathEntry(std::move(parts));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
static PathEntry nested(NodeList hierarchies) {
|
|
54
|
+
if (hierarchies.empty()) {
|
|
55
|
+
throw std::invalid_argument("nested hierarchies cannot be empty");
|
|
56
|
+
}
|
|
57
|
+
return PathEntry(std::move(hierarchies));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
bool is_single() const { return std::holds_alternative<std::string>(_value); }
|
|
61
|
+
bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
|
|
62
|
+
bool is_nested() const { return std::holds_alternative<NodeList>(_value); }
|
|
63
|
+
const std::string &single_value() const { return std::get<std::string>(_value); }
|
|
64
|
+
const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
|
|
65
|
+
Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
|
|
66
|
+
const NodeList &nested_nodes() const { return std::get<NodeList>(_value); }
|
|
67
|
+
NodeList &nested_nodes_mut() { return std::get<NodeList>(_value); }
|
|
68
|
+
|
|
69
|
+
private:
|
|
70
|
+
std::variant<std::string, Parts, NodeList> _value;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
using PathHierarchy = std::vector<PathEntry>;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Compare two entries using the ordering enforced throughout archive_r.
|
|
77
|
+
*
|
|
78
|
+
* Ordering rules:
|
|
79
|
+
* 1. Entry categories are ordered single < multi-volume < nested node-list.
|
|
80
|
+
* 2. Single entries compare by string value.
|
|
81
|
+
* 3. Multi-volume entries first compare their ordering flag (Natural < Given),
|
|
82
|
+
* then compare corresponding part names lexicographically, finally by list length.
|
|
83
|
+
* 4. Nested node-lists compare child entries pairwise using the same rules.
|
|
84
|
+
*/
|
|
85
|
+
int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
|
|
86
|
+
|
|
87
|
+
/** Compare complete hierarchies lexicographically using compare_entries on each level. */
|
|
88
|
+
int compare_hierarchies(const PathHierarchy &lhs, const PathHierarchy &rhs);
|
|
89
|
+
|
|
90
|
+
/** Shorthand equality helpers for entries and hierarchies. */
|
|
91
|
+
bool entries_equal(const PathEntry &lhs, const PathEntry &rhs);
|
|
92
|
+
bool hierarchies_equal(const PathHierarchy &lhs, const PathHierarchy &rhs);
|
|
93
|
+
|
|
94
|
+
/** Strict-weak-order functor suitable for associative containers. */
|
|
95
|
+
struct PathHierarchyLess {
|
|
96
|
+
bool operator()(const PathHierarchy &lhs, const PathHierarchy &rhs) const;
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
/** Build a hierarchy containing a single leaf component. */
|
|
100
|
+
PathHierarchy make_single_path(const std::string &root);
|
|
101
|
+
|
|
102
|
+
/** Append helpers for single and multi-volume components. */
|
|
103
|
+
void append_single(PathHierarchy &hierarchy, std::string value);
|
|
104
|
+
void append_multi_volume(PathHierarchy &hierarchy, std::vector<std::string> parts, PathEntry::Parts::Ordering ordering = PathEntry::Parts::Ordering::Natural);
|
|
105
|
+
|
|
106
|
+
/** Extract prefix/slice helpers. */
|
|
107
|
+
PathHierarchy pathhierarchy_prefix_until(const PathHierarchy &hierarchy, size_t inclusive_index);
|
|
108
|
+
PathHierarchy parent_hierarchy(const PathHierarchy &hierarchy);
|
|
109
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/path_hierarchy.h"
|
|
7
|
+
#include <cstddef>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <vector>
|
|
10
|
+
|
|
11
|
+
namespace archive_r {
|
|
12
|
+
|
|
13
|
+
/** Return pointer to the Nth single value of an entry (nullptr if absent). */
|
|
14
|
+
const std::string *path_entry_component_at(const PathEntry &entry, std::size_t index);
|
|
15
|
+
|
|
16
|
+
/** Convenience helpers for multi-volume PathHierarchy nodes. */
|
|
17
|
+
std::size_t pathhierarchy_volume_size(const PathHierarchy &logical);
|
|
18
|
+
std::string pathhierarchy_volume_entry_name(const PathHierarchy &logical, std::size_t index);
|
|
19
|
+
bool pathhierarchy_is_multivolume(const PathHierarchy &hierarchy);
|
|
20
|
+
PathHierarchy pathhierarchy_select_single_part(const PathHierarchy &logical, std::size_t index);
|
|
21
|
+
|
|
22
|
+
/** Combine sibling hierarchies that differ only by their terminal part list. */
|
|
23
|
+
PathHierarchy merge_multi_volume_sources(const std::vector<PathHierarchy> &sources);
|
|
24
|
+
|
|
25
|
+
/** Sort hierarchies using PathHierarchyLess semantics. */
|
|
26
|
+
void sort_hierarchies(std::vector<PathHierarchy> &hierarchies);
|
|
27
|
+
|
|
28
|
+
/** Render helpers converting entries to flattened strings for diagnostics. */
|
|
29
|
+
bool flatten_entry_to_string(const PathEntry &entry, std::string &output);
|
|
30
|
+
bool entry_name_from_component(const PathEntry &entry, std::string &output);
|
|
31
|
+
|
|
32
|
+
/** Human readable pretty-printers used in logging and debug output. */
|
|
33
|
+
std::string path_entry_display(const PathEntry &entry);
|
|
34
|
+
std::string hierarchy_display(const PathHierarchy &hierarchy);
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
} // namespace archive_r
|