archive_r_ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +56 -0
- data/README.md +103 -0
- data/ext/archive_r/archive_r_ext.cc +910 -0
- data/ext/archive_r/extconf.rb +90 -0
- data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
- data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
- data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
- data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
- data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
- data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
- data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
- data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
- data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
- data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
- data/lib/archive_r.rb +80 -0
- metadata +112 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/entry_fault.h"
|
|
7
|
+
#include "archive_r/path_hierarchy.h"
|
|
8
|
+
#include "entry.h"
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
namespace archive_r {
|
|
14
|
+
|
|
15
|
+
struct TraverserOptions {
|
|
16
|
+
std::vector<std::string> passphrases; ///< Passphrases for encrypted archives
|
|
17
|
+
std::vector<std::string> formats; ///< Specific archive formats to enable (empty = all)
|
|
18
|
+
std::vector<std::string> metadata_keys; ///< Metadata keys to capture for entries
|
|
19
|
+
bool descend_archives = true; ///< Whether to descend into archives by default
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @brief Iterator-based traversal for archives and directories
|
|
24
|
+
*
|
|
25
|
+
* Traverser provides a unified iterator-based interface for traversing
|
|
26
|
+
* entries within archives and directories, including support for nested
|
|
27
|
+
* archives and automatic descent.
|
|
28
|
+
*
|
|
29
|
+
* Supports multiple archive formats via libarchive (tar, zip, gzip, etc.)
|
|
30
|
+
* and filesystem directories.
|
|
31
|
+
*
|
|
32
|
+
* Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
|
|
33
|
+
|
|
34
|
+
* @see Entry, ArchiveStackOrchestrator
|
|
35
|
+
*
|
|
36
|
+
* Usage:
|
|
37
|
+
* Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
|
|
38
|
+
* for (Entry& entry : traverser) {
|
|
39
|
+
* // Process entry
|
|
40
|
+
* }
|
|
41
|
+
*
|
|
42
|
+
* @note Thread Safety
|
|
43
|
+
* Traverser instances are not thread-safe. To use the traverser in a
|
|
44
|
+
* multi-threaded environment, create a separate Traverser instance for each
|
|
45
|
+
* thread. Do not share a single instance across multiple threads.
|
|
46
|
+
*/
|
|
47
|
+
class Traverser {
|
|
48
|
+
public:
|
|
49
|
+
/**
|
|
50
|
+
* @brief Construct traverser for archives or directories
|
|
51
|
+
* @param paths Paths to archive files or directories
|
|
52
|
+
*
|
|
53
|
+
* Provide one or more paths to traverse. Single-path traversal can be
|
|
54
|
+
* achieved by passing a container with one element:
|
|
55
|
+
* Traverser traverser({make_single_path("archive.tar.gz")});
|
|
56
|
+
*/
|
|
57
|
+
explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
|
|
58
|
+
|
|
59
|
+
~Traverser();
|
|
60
|
+
|
|
61
|
+
// Non-copyable
|
|
62
|
+
Traverser(const Traverser &) = delete;
|
|
63
|
+
Traverser &operator=(const Traverser &) = delete;
|
|
64
|
+
|
|
65
|
+
// ========================================================================
|
|
66
|
+
// Iterator API
|
|
67
|
+
// ========================================================================
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* @brief Forward iterator for traversing entries
|
|
71
|
+
*
|
|
72
|
+
* Satisfies InputIterator requirements:
|
|
73
|
+
* - Move-only (non-copyable)
|
|
74
|
+
* - Equality comparable
|
|
75
|
+
* - Dereferenceable (returns Entry&)
|
|
76
|
+
* - Incrementable
|
|
77
|
+
*/
|
|
78
|
+
class Iterator {
|
|
79
|
+
public:
|
|
80
|
+
using iterator_category = std::input_iterator_tag;
|
|
81
|
+
using value_type = Entry;
|
|
82
|
+
using difference_type = std::ptrdiff_t;
|
|
83
|
+
using pointer = Entry *;
|
|
84
|
+
using reference = Entry &;
|
|
85
|
+
|
|
86
|
+
reference operator*();
|
|
87
|
+
pointer operator->();
|
|
88
|
+
Iterator &operator++();
|
|
89
|
+
bool operator==(const Iterator &other) const;
|
|
90
|
+
bool operator!=(const Iterator &other) const;
|
|
91
|
+
|
|
92
|
+
~Iterator();
|
|
93
|
+
Iterator(const Iterator &) = delete;
|
|
94
|
+
Iterator &operator=(const Iterator &) = delete;
|
|
95
|
+
Iterator(Iterator &&) noexcept;
|
|
96
|
+
Iterator &operator=(Iterator &&) noexcept;
|
|
97
|
+
|
|
98
|
+
private:
|
|
99
|
+
friend class Traverser;
|
|
100
|
+
class Impl;
|
|
101
|
+
std::unique_ptr<Impl> _impl;
|
|
102
|
+
explicit Iterator(std::unique_ptr<Impl> impl);
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* @brief Get iterator to first entry
|
|
107
|
+
* @return Iterator pointing to first entry
|
|
108
|
+
*/
|
|
109
|
+
Iterator begin();
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* @brief Get end iterator
|
|
113
|
+
* @return End iterator (sentinel)
|
|
114
|
+
*/
|
|
115
|
+
Iterator end();
|
|
116
|
+
|
|
117
|
+
private:
|
|
118
|
+
std::vector<PathHierarchy> _initial_paths; ///< Initial paths provided to constructor
|
|
119
|
+
TraverserOptions _options; ///< Options controlling archive handling
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#include "archive_stack_cursor.h"
|
|
5
|
+
|
|
6
|
+
#include "archive_r/path_hierarchy_utils.h"
|
|
7
|
+
#include "system_file_stream.h"
|
|
8
|
+
#include <exception>
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <stdexcept>
|
|
11
|
+
#include <typeinfo>
|
|
12
|
+
#include <utility>
|
|
13
|
+
|
|
14
|
+
namespace archive_r {
|
|
15
|
+
|
|
16
|
+
namespace {
|
|
17
|
+
|
|
18
|
+
[[noreturn]] void throw_entry_fault(const std::string &message, const PathHierarchy &hierarchy) { throw make_entry_fault_error(message, hierarchy); }
|
|
19
|
+
|
|
20
|
+
} // namespace
|
|
21
|
+
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// StreamArchive Implementation
|
|
24
|
+
// ============================================================================
|
|
25
|
+
|
|
26
|
+
StreamArchive::StreamArchive(std::shared_ptr<IDataStream> stream, ArchiveOption options)
|
|
27
|
+
: Archive()
|
|
28
|
+
, _stream(std::move(stream))
|
|
29
|
+
, _options(std::move(options)) {
|
|
30
|
+
if (!_stream) {
|
|
31
|
+
throw std::invalid_argument("StreamArchive requires a valid data stream");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
open_archive();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
StreamArchive::~StreamArchive() = default;
|
|
38
|
+
|
|
39
|
+
void StreamArchive::open_archive() {
|
|
40
|
+
archive_ptr ar = new_read_archive_common(_options.passphrases, _options.formats, [this](struct archive *ar) -> int {
|
|
41
|
+
archive_read_set_callback_data(ar, this);
|
|
42
|
+
archive_read_set_read_callback(ar, read_callback_bridge);
|
|
43
|
+
if (_stream->can_seek()) {
|
|
44
|
+
archive_read_set_skip_callback(ar, skip_callback_bridge);
|
|
45
|
+
archive_read_set_seek_callback(ar, seek_callback_bridge);
|
|
46
|
+
}
|
|
47
|
+
return archive_read_open1(ar);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
_ar = ar.release();
|
|
51
|
+
current_entryname.clear();
|
|
52
|
+
_at_eof = false;
|
|
53
|
+
_current_entry_content_ready = false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void StreamArchive::rewind() {
|
|
57
|
+
_stream->rewind();
|
|
58
|
+
Archive::rewind();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
PathHierarchy StreamArchive::source_hierarchy() const { return _stream->source_hierarchy(); }
|
|
62
|
+
|
|
63
|
+
la_ssize_t StreamArchive::read_callback_bridge(struct archive *a, void *client_data, const void **buff) {
|
|
64
|
+
auto *archive = static_cast<StreamArchive *>(client_data);
|
|
65
|
+
|
|
66
|
+
ssize_t bytes_read = 0;
|
|
67
|
+
try {
|
|
68
|
+
bytes_read = archive->_stream->read(archive->_buffer.data(), archive->_buffer.size());
|
|
69
|
+
} catch (const std::exception &) {
|
|
70
|
+
return -1;
|
|
71
|
+
}
|
|
72
|
+
if (bytes_read < 0) {
|
|
73
|
+
return static_cast<la_ssize_t>(bytes_read);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
*buff = archive->_buffer.data();
|
|
77
|
+
|
|
78
|
+
return static_cast<la_ssize_t>(bytes_read);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
la_int64_t StreamArchive::seek_callback_bridge(struct archive *a, void *client_data, la_int64_t request, int whence) {
|
|
82
|
+
auto *archive = static_cast<StreamArchive *>(client_data);
|
|
83
|
+
try {
|
|
84
|
+
return archive->_stream->seek(request, whence);
|
|
85
|
+
} catch (const std::exception &) {
|
|
86
|
+
return -1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
la_int64_t StreamArchive::skip_callback_bridge(struct archive *a, void *client_data, la_int64_t request) {
|
|
91
|
+
auto *archive = static_cast<StreamArchive *>(client_data);
|
|
92
|
+
try {
|
|
93
|
+
la_int64_t current = archive->_stream->tell();
|
|
94
|
+
if (current < 0) {
|
|
95
|
+
current = archive->_stream->seek(0, SEEK_CUR);
|
|
96
|
+
}
|
|
97
|
+
if (current < 0) {
|
|
98
|
+
return 0;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
auto result = archive->_stream->seek(request, SEEK_CUR);
|
|
102
|
+
if (result >= 0) {
|
|
103
|
+
return result - current;
|
|
104
|
+
}
|
|
105
|
+
} catch (const std::exception &) {
|
|
106
|
+
return 0;
|
|
107
|
+
}
|
|
108
|
+
return 0;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ============================================================================
|
|
112
|
+
// EntryPayloadStream Implementation
|
|
113
|
+
// ============================================================================
|
|
114
|
+
|
|
115
|
+
EntryPayloadStream::EntryPayloadStream(std::shared_ptr<StreamArchive> parent_archive, PathHierarchy logical_path)
|
|
116
|
+
: MultiVolumeStreamBase(std::move(logical_path), false)
|
|
117
|
+
, _parent_archive(std::move(parent_archive)) {
|
|
118
|
+
if (!_parent_archive) {
|
|
119
|
+
throw std::invalid_argument("Invalid parent archive context");
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
EntryPayloadStream::~EntryPayloadStream() = default;
|
|
124
|
+
|
|
125
|
+
std::shared_ptr<StreamArchive> EntryPayloadStream::parent_archive() const { return _parent_archive; }
|
|
126
|
+
|
|
127
|
+
void EntryPayloadStream::rewind() {
|
|
128
|
+
MultiVolumeStreamBase::rewind();
|
|
129
|
+
const PathHierarchy first_part = pathhierarchy_select_single_part(_logical_path, 0);
|
|
130
|
+
const std::string entry_name = first_part.back().single_value();
|
|
131
|
+
|
|
132
|
+
if (!_parent_archive->skip_to_entry(entry_name)) {
|
|
133
|
+
throw_entry_fault("Parent archive does not contain requested stream part", first_part);
|
|
134
|
+
}
|
|
135
|
+
// leave parent positioned at the beginning of the first part so subsequent reads start cleanly
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
void EntryPayloadStream::open_single_part(const PathHierarchy &single_part) {
|
|
139
|
+
const std::string entry_name = single_part.back().single_value();
|
|
140
|
+
if (!_parent_archive->skip_to_entry(entry_name)) {
|
|
141
|
+
throw_entry_fault("Parent archive does not contain requested stream part", single_part);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
void EntryPayloadStream::close_single_part() {
|
|
146
|
+
if (_parent_archive->current_entryname.empty()) {
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
_parent_archive->skip_data();
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
ssize_t EntryPayloadStream::read_from_single_part(void *buffer, size_t size) {
|
|
154
|
+
if (size == 0) {
|
|
155
|
+
return 0;
|
|
156
|
+
}
|
|
157
|
+
return _parent_archive->read_current(buffer, size);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
int64_t EntryPayloadStream::seek_within_single_part(int64_t offset, int whence) { return -1; }
|
|
161
|
+
|
|
162
|
+
int64_t EntryPayloadStream::size_of_single_part(const PathHierarchy &single_part) {
|
|
163
|
+
(void)single_part;
|
|
164
|
+
return -1;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ============================================================================
|
|
168
|
+
// ArchiveStackCursor Implementation
|
|
169
|
+
// ============================================================================
|
|
170
|
+
|
|
171
|
+
ArchiveStackCursor::ArchiveStackCursor()
|
|
172
|
+
: options_snapshot()
|
|
173
|
+
, stream_stack() {}
|
|
174
|
+
|
|
175
|
+
void ArchiveStackCursor::configure(const ArchiveOption &options) {
|
|
176
|
+
options_snapshot = options;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
void ArchiveStackCursor::reset() {
|
|
180
|
+
options_snapshot = ArchiveOption{};
|
|
181
|
+
stream_stack.clear();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
bool ArchiveStackCursor::descend() {
|
|
185
|
+
if (stream_stack.empty()) {
|
|
186
|
+
throw std::logic_error("stream stack is empty");
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
auto stream = stream_stack.back();
|
|
190
|
+
|
|
191
|
+
if (auto *archive = current_archive()) {
|
|
192
|
+
if (stream && !archive->current_entry_content_ready()) {
|
|
193
|
+
stream->rewind();
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
PathHierarchy dummy_hierarchy = stream->source_hierarchy();
|
|
198
|
+
auto archive_ptr = std::make_shared<StreamArchive>(std::move(stream), options_snapshot);
|
|
199
|
+
append_single(dummy_hierarchy, std::string{});
|
|
200
|
+
stream_stack.emplace_back(std::make_shared<EntryPayloadStream>(archive_ptr, std::move(dummy_hierarchy)));
|
|
201
|
+
return true;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
bool ArchiveStackCursor::ascend() {
|
|
205
|
+
if (stream_stack.size() <= 0) {
|
|
206
|
+
return false;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
stream_stack.pop_back();
|
|
210
|
+
|
|
211
|
+
return true;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
bool ArchiveStackCursor::next() {
|
|
215
|
+
StreamArchive *archive = current_archive();
|
|
216
|
+
if (!archive) {
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
while (true) {
|
|
221
|
+
if (!archive->skip_to_next_header()) {
|
|
222
|
+
return false;
|
|
223
|
+
}
|
|
224
|
+
if (!archive->current_entryname.empty()) {
|
|
225
|
+
break;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
stream_stack.back() = create_stream(current_entry_hierarchy());
|
|
229
|
+
return true;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
bool ArchiveStackCursor::synchronize_to_hierarchy(const PathHierarchy &target_hierarchy) {
|
|
233
|
+
if (target_hierarchy.empty()) {
|
|
234
|
+
throw_entry_fault("target hierarchy cannot be empty", {});
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const size_t last_depth = target_hierarchy.size() - 1;
|
|
238
|
+
if (stream_stack.size() < target_hierarchy.size()) {
|
|
239
|
+
stream_stack.resize(target_hierarchy.size());
|
|
240
|
+
}
|
|
241
|
+
for (size_t depth = 0; depth < target_hierarchy.size(); ++depth) {
|
|
242
|
+
auto prefix = pathhierarchy_prefix_until(target_hierarchy, depth);
|
|
243
|
+
auto stream = stream_stack[depth];
|
|
244
|
+
|
|
245
|
+
// Reuse the existing stream when it already matches this prefix.
|
|
246
|
+
if (stream && hierarchies_equal(stream->source_hierarchy(), prefix)) {
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
// Shrink the stack to the current depth before creating a fresh stream.
|
|
250
|
+
stream_stack.resize(depth+1);
|
|
251
|
+
stream = create_stream(prefix);
|
|
252
|
+
stream_stack.back() = stream;
|
|
253
|
+
stream->rewind();
|
|
254
|
+
|
|
255
|
+
if (depth == last_depth) {
|
|
256
|
+
return true;
|
|
257
|
+
}
|
|
258
|
+
// Descend into the archive for the next level of the hierarchy.
|
|
259
|
+
descend();
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
ssize_t ArchiveStackCursor::read(void *buff, size_t len) {
|
|
266
|
+
if (len == 0) {
|
|
267
|
+
return 0;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (stream_stack.empty()) {
|
|
271
|
+
throw_entry_fault("Stream stack is empty", {});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
auto stream = stream_stack.back();
|
|
275
|
+
ssize_t bytes = 0;
|
|
276
|
+
bytes = stream->read(buff, len);
|
|
277
|
+
|
|
278
|
+
if (bytes < 0) {
|
|
279
|
+
const std::string message = "Failed to read from active stream";
|
|
280
|
+
throw_entry_fault(message, current_entry_hierarchy());
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
return bytes;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
StreamArchive *ArchiveStackCursor::current_archive() {
|
|
287
|
+
if (stream_stack.size() <= 0) {
|
|
288
|
+
return nullptr;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const auto stream = std::dynamic_pointer_cast<EntryPayloadStream>(stream_stack.back());
|
|
292
|
+
if (!stream) {
|
|
293
|
+
return nullptr;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
auto parent_archive = stream->parent_archive();
|
|
297
|
+
return parent_archive ? parent_archive.get() : nullptr;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
PathHierarchy ArchiveStackCursor::current_entry_hierarchy() {
|
|
301
|
+
if (stream_stack.empty() || !stream_stack.front()) {
|
|
302
|
+
return {};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
if (StreamArchive *archive = current_archive()) {
|
|
306
|
+
PathHierarchy path = archive->source_hierarchy();
|
|
307
|
+
if (!archive->current_entryname.empty()) {
|
|
308
|
+
append_single(path, archive->current_entryname);
|
|
309
|
+
}
|
|
310
|
+
return path;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return stream_stack.front()->source_hierarchy();
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
std::shared_ptr<IDataStream> ArchiveStackCursor::create_stream(const PathHierarchy &hierarchy) {
|
|
317
|
+
if (hierarchy.size() == 1) {
|
|
318
|
+
if (auto factory = get_root_stream_factory()) {
|
|
319
|
+
if (auto stream = factory(hierarchy)) {
|
|
320
|
+
return stream;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return std::make_shared<SystemFileStream>(hierarchy);
|
|
324
|
+
}
|
|
325
|
+
auto stream = std::dynamic_pointer_cast<EntryPayloadStream>(stream_stack.back());
|
|
326
|
+
|
|
327
|
+
return std::make_shared<EntryPayloadStream>(stream->parent_archive(), hierarchy);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "archive_r/data_stream.h"
|
|
7
|
+
#include "archive_r/path_hierarchy.h"
|
|
8
|
+
#include "archive_type.h"
|
|
9
|
+
#include "entry_fault_error.h"
|
|
10
|
+
#include "archive_r/multi_volume_stream_base.h"
|
|
11
|
+
#include <array>
|
|
12
|
+
#include <cstddef>
|
|
13
|
+
#include <exception>
|
|
14
|
+
#include <memory>
|
|
15
|
+
#include <string>
|
|
16
|
+
#include <sys/types.h>
|
|
17
|
+
#include <vector>
|
|
18
|
+
|
|
19
|
+
namespace archive_r {
|
|
20
|
+
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// StreamArchive Interface
|
|
23
|
+
// ============================================================================
|
|
24
|
+
|
|
25
|
+
class StreamArchive : public Archive {
|
|
26
|
+
public:
|
|
27
|
+
explicit StreamArchive(std::shared_ptr<IDataStream> stream, ArchiveOption options = {});
|
|
28
|
+
|
|
29
|
+
~StreamArchive() override;
|
|
30
|
+
|
|
31
|
+
void open_archive() override;
|
|
32
|
+
void rewind() override;
|
|
33
|
+
|
|
34
|
+
PathHierarchy source_hierarchy() const;
|
|
35
|
+
|
|
36
|
+
private:
|
|
37
|
+
static la_ssize_t read_callback_bridge(struct archive *a, void *client_data, const void **buff);
|
|
38
|
+
static la_int64_t seek_callback_bridge(struct archive *a, void *client_data, la_int64_t request, int whence);
|
|
39
|
+
static la_int64_t skip_callback_bridge(struct archive *a, void *client_data, la_int64_t request);
|
|
40
|
+
|
|
41
|
+
static constexpr size_t BUFFER_SIZE = 65536;
|
|
42
|
+
std::shared_ptr<IDataStream> _stream;
|
|
43
|
+
std::array<char, BUFFER_SIZE> _buffer;
|
|
44
|
+
ArchiveOption _options;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
// ============================================================================
|
|
48
|
+
// EntryPayloadStream Interface
|
|
49
|
+
// ============================================================================
|
|
50
|
+
|
|
51
|
+
class EntryPayloadStream : public MultiVolumeStreamBase {
|
|
52
|
+
public:
|
|
53
|
+
EntryPayloadStream(std::shared_ptr<StreamArchive> parent_archive, PathHierarchy logical_path);
|
|
54
|
+
~EntryPayloadStream() override;
|
|
55
|
+
|
|
56
|
+
std::shared_ptr<StreamArchive> parent_archive() const;
|
|
57
|
+
void rewind() override;
|
|
58
|
+
|
|
59
|
+
private:
|
|
60
|
+
std::shared_ptr<StreamArchive> _parent_archive;
|
|
61
|
+
|
|
62
|
+
void open_single_part(const PathHierarchy &single_part) override;
|
|
63
|
+
void close_single_part() override;
|
|
64
|
+
ssize_t read_from_single_part(void *buffer, size_t size) override;
|
|
65
|
+
int64_t seek_within_single_part(int64_t offset, int whence) override;
|
|
66
|
+
int64_t size_of_single_part(const PathHierarchy &single_part) override;
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// ============================================================================
|
|
70
|
+
// ArchiveStackCursor Interface
|
|
71
|
+
// ============================================================================
|
|
72
|
+
|
|
73
|
+
struct ArchiveStackCursor {
|
|
74
|
+
|
|
75
|
+
ArchiveStackCursor();
|
|
76
|
+
|
|
77
|
+
void configure(const ArchiveOption &options);
|
|
78
|
+
void reset();
|
|
79
|
+
bool has_stream() const { return !stream_stack.empty(); }
|
|
80
|
+
|
|
81
|
+
bool descend();
|
|
82
|
+
bool ascend();
|
|
83
|
+
bool next();
|
|
84
|
+
bool synchronize_to_hierarchy(const PathHierarchy &hierarchy);
|
|
85
|
+
ssize_t read(void *buffer, size_t len);
|
|
86
|
+
|
|
87
|
+
size_t depth() const { return stream_stack.size(); }
|
|
88
|
+
StreamArchive *current_archive();
|
|
89
|
+
|
|
90
|
+
PathHierarchy current_entry_hierarchy();
|
|
91
|
+
|
|
92
|
+
std::shared_ptr<IDataStream> create_stream(const PathHierarchy &hierarchy);
|
|
93
|
+
|
|
94
|
+
ArchiveOption options_snapshot;
|
|
95
|
+
std::vector<std::shared_ptr<IDataStream>> stream_stack;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
} // namespace archive_r
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#include "archive_stack_orchestrator.h"
|
|
5
|
+
#include "archive_r/path_hierarchy_utils.h"
|
|
6
|
+
#include "archive_r/entry_fault.h"
|
|
7
|
+
#include "system_file_stream.h"
|
|
8
|
+
|
|
9
|
+
#include <algorithm>
|
|
10
|
+
#include <cstddef>
|
|
11
|
+
#include <cstdio>
|
|
12
|
+
#include <exception>
|
|
13
|
+
#include <limits>
|
|
14
|
+
#include <memory>
|
|
15
|
+
#include <stdexcept>
|
|
16
|
+
#include <typeinfo>
|
|
17
|
+
#include <utility>
|
|
18
|
+
|
|
19
|
+
namespace archive_r {
|
|
20
|
+
|
|
21
|
+
ArchiveStackOrchestrator::ArchiveStackOrchestrator(const ArchiveOption &options)
|
|
22
|
+
: _archive_options(options)
|
|
23
|
+
, _metadata_keys(options.metadata_keys.begin(), options.metadata_keys.end()) {
|
|
24
|
+
_head.configure(_archive_options);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
ArchiveStackOrchestrator::~ArchiveStackOrchestrator() = default;
|
|
28
|
+
|
|
29
|
+
size_t ArchiveStackOrchestrator::depth() const { return _head.depth(); }
|
|
30
|
+
|
|
31
|
+
StreamArchive *ArchiveStackOrchestrator::current_archive() { return _head.current_archive(); }
|
|
32
|
+
|
|
33
|
+
// Drives the traversal state machine:
|
|
34
|
+
// 1. Optionally descend into the current entry when requested.
|
|
35
|
+
// 2. Attempt to advance within the active archive; report faults but keep looping.
|
|
36
|
+
// 3. Drain any pending multi-volume groups before bubbling up to the parent so multipart
|
|
37
|
+
// archives are consumed contiguously.
|
|
38
|
+
// 4. When leaving a multi-volume context, rewind the parent archive by skipping to EOF to
|
|
39
|
+
// avoid re-reading already processed entries.
|
|
40
|
+
bool ArchiveStackOrchestrator::advance(bool descend_request) {
|
|
41
|
+
bool request_descend = descend_request;
|
|
42
|
+
|
|
43
|
+
while (true) {
|
|
44
|
+
if (depth() == 0) {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
if (request_descend) {
|
|
49
|
+
request_descend = false;
|
|
50
|
+
_head.descend();
|
|
51
|
+
}
|
|
52
|
+
} catch (const EntryFaultError &error) {
|
|
53
|
+
dispatch_fault(error.fault());
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
if (_head.next()) {
|
|
59
|
+
return true;
|
|
60
|
+
}
|
|
61
|
+
} catch (const EntryFaultError &error) {
|
|
62
|
+
dispatch_fault(error.fault());
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
// Consume any pending multi-volume siblings so we do not return to the parent mid-series.
|
|
68
|
+
if (descend_pending_multi_volumes()) {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
} catch (const EntryFaultError &error) {
|
|
72
|
+
dispatch_fault(error.fault());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
PathHierarchy prev_ascend_hierarchy = _head.current_entry_hierarchy();
|
|
76
|
+
_head.ascend();
|
|
77
|
+
|
|
78
|
+
if (!pathhierarchy_is_multivolume(prev_ascend_hierarchy)) {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
// If same-level multi-volume siblings remain, keep draining them before touching the parent next().
|
|
84
|
+
if (descend_pending_multi_volumes()) {
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
} catch (const EntryFaultError &error) {
|
|
88
|
+
dispatch_fault(error.fault());
|
|
89
|
+
}
|
|
90
|
+
try {
|
|
91
|
+
StreamArchive *archive = _head.current_archive();
|
|
92
|
+
if (archive) {
|
|
93
|
+
// After all volumes are processed, push the parent back to EOF to avoid duplicate next().
|
|
94
|
+
archive->skip_to_eof();
|
|
95
|
+
}
|
|
96
|
+
} catch (const EntryFaultError &error) {
|
|
97
|
+
dispatch_fault(error.fault());
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const std::string &ArchiveStackOrchestrator::current_entryname() {
|
|
103
|
+
StreamArchive *archive = current_archive();
|
|
104
|
+
if (!archive) {
|
|
105
|
+
static const std::string empty;
|
|
106
|
+
return empty;
|
|
107
|
+
}
|
|
108
|
+
return archive->current_entryname;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
PathHierarchy ArchiveStackOrchestrator::current_entry_hierarchy() { return _head.current_entry_hierarchy(); }
|
|
112
|
+
|
|
113
|
+
bool ArchiveStackOrchestrator::synchronize_to_hierarchy(const PathHierarchy &path_hierarchy) {
|
|
114
|
+
try {
|
|
115
|
+
_head.synchronize_to_hierarchy(path_hierarchy);
|
|
116
|
+
return true;
|
|
117
|
+
} catch (const EntryFaultError &error) {
|
|
118
|
+
dispatch_fault(error.fault());
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
ssize_t ArchiveStackOrchestrator::read_head(void *buff, size_t len) {
|
|
124
|
+
try {
|
|
125
|
+
return _head.read(buff, len);
|
|
126
|
+
} catch (const EntryFaultError &error) {
|
|
127
|
+
dispatch_fault(error.fault());
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return -1;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
void ArchiveStackOrchestrator::mark_entry_as_multi_volume(const PathHierarchy &entry_path, const std::string &base_name, PathEntry::Parts::Ordering ordering) {
|
|
134
|
+
_multi_volume_manager.mark_entry_as_multi_volume(entry_path, base_name, ordering);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
bool ArchiveStackOrchestrator::descend_pending_multi_volumes() {
|
|
138
|
+
const PathHierarchy current_hierarchy = _head.current_entry_hierarchy();
|
|
139
|
+
PathHierarchy multi_volume_target;
|
|
140
|
+
if (!_multi_volume_manager.pop_multi_volume_group(current_hierarchy, multi_volume_target)) {
|
|
141
|
+
return false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
_head.synchronize_to_hierarchy(multi_volume_target);
|
|
145
|
+
_head.descend();
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
void ArchiveStackOrchestrator::open_root_hierarchy(const PathHierarchy &root_hierarchy) {
|
|
150
|
+
_head.synchronize_to_hierarchy(root_hierarchy);
|
|
151
|
+
_head.descend();
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
void ArchiveStackOrchestrator::dispatch_fault(EntryFault fault) {
|
|
155
|
+
if (fault.hierarchy.empty()) {
|
|
156
|
+
fault.hierarchy = _head.current_entry_hierarchy();
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
dispatch_registered_fault(fault);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
} // namespace archive_r
|