archive_r_ruby 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{LICENSE → LICENSE.txt} +77 -77
- data/README.md +103 -103
- data/ext/archive_r/Makefile +48 -45
- data/ext/archive_r/archive_r-x64-mingw-ucrt.def +2 -0
- data/ext/archive_r/archive_r_ext.cc +1106 -1106
- data/ext/archive_r/archive_r_ext.o +0 -0
- data/ext/archive_r/extconf.rb +120 -120
- data/ext/archive_r/mkmf.log +23 -18
- data/ext/archive_r/vendor/archive_r/LICENSE.txt +77 -77
- data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +52 -52
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +166 -166
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -34
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -56
- data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -46
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -109
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -37
- data/ext/archive_r/vendor/archive_r/include/archive_r/platform_compat.h +19 -19
- data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -122
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -330
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +97 -97
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -162
- data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -54
- data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -552
- data/ext/archive_r/vendor/archive_r/src/archive_type.h +77 -77
- data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -35
- data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -253
- data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -26
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -54
- data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -32
- data/ext/archive_r/vendor/archive_r/src/entry_impl.h +57 -57
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -81
- data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -41
- data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -199
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -151
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -304
- data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -120
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +295 -295
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -46
- data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -314
- data/lib/archive_r.rb +105 -105
- metadata +11 -8
- data/ext/archive_r/archive_r.bundle +0 -0
|
@@ -1,199 +1,199 @@
|
|
|
1
|
-
// SPDX-License-Identifier: MIT
|
|
2
|
-
// Copyright (c) 2025 archive_r Team
|
|
3
|
-
|
|
4
|
-
#include "archive_r/multi_volume_stream_base.h"
|
|
5
|
-
|
|
6
|
-
#include "archive_r/path_hierarchy_utils.h"
|
|
7
|
-
|
|
8
|
-
#include <algorithm>
|
|
9
|
-
#include <memory>
|
|
10
|
-
#include <stdexcept>
|
|
11
|
-
#include <vector>
|
|
12
|
-
|
|
13
|
-
namespace archive_r {
|
|
14
|
-
|
|
15
|
-
struct MultiVolumeStreamBase::Impl {
|
|
16
|
-
explicit Impl(MultiVolumeStreamBase &owner)
|
|
17
|
-
: self(owner) {}
|
|
18
|
-
|
|
19
|
-
MultiVolumeStreamBase &self;
|
|
20
|
-
std::vector<int64_t> part_offsets;
|
|
21
|
-
std::size_t total_parts = 0;
|
|
22
|
-
std::size_t active_part_index = 0;
|
|
23
|
-
bool part_open = false;
|
|
24
|
-
int64_t logical_offset = 0;
|
|
25
|
-
int64_t total_size = -1;
|
|
26
|
-
|
|
27
|
-
void ensure_part_active(std::size_t part_index);
|
|
28
|
-
bool advance_to_next_part();
|
|
29
|
-
bool ensure_size_metadata();
|
|
30
|
-
int64_t compute_target_offset(int64_t offset, int whence) const;
|
|
31
|
-
std::size_t locate_part_for_offset(int64_t target, int64_t &offset_within_part) const;
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
MultiVolumeStreamBase::MultiVolumeStreamBase(PathHierarchy logical_path, bool supports_seek)
|
|
35
|
-
: _logical_path(std::move(logical_path))
|
|
36
|
-
, _supports_seek(supports_seek)
|
|
37
|
-
, _impl(std::make_unique<Impl>(*this)) {
|
|
38
|
-
_impl->total_parts = pathhierarchy_volume_size(_logical_path);
|
|
39
|
-
if (_impl->total_parts == 0) {
|
|
40
|
-
throw std::invalid_argument("MultiVolumeStreamBase requires at least one volume component");
|
|
41
|
-
}
|
|
42
|
-
_impl->part_offsets.assign(_impl->total_parts + 1, 0);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
MultiVolumeStreamBase::~MultiVolumeStreamBase() = default;
|
|
46
|
-
|
|
47
|
-
ssize_t MultiVolumeStreamBase::read(void *buffer, size_t size) {
|
|
48
|
-
if (size == 0) {
|
|
49
|
-
return 0;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
std::size_t total_read = 0;
|
|
53
|
-
auto *out = static_cast<char *>(buffer);
|
|
54
|
-
|
|
55
|
-
while (total_read < size) {
|
|
56
|
-
if (_impl->active_part_index >= _impl->total_parts) {
|
|
57
|
-
break;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
_impl->ensure_part_active(_impl->active_part_index);
|
|
61
|
-
const ssize_t bytes = read_from_single_part(out + total_read, size - total_read);
|
|
62
|
-
if (bytes > 0) {
|
|
63
|
-
total_read += static_cast<std::size_t>(bytes);
|
|
64
|
-
_impl->logical_offset += bytes;
|
|
65
|
-
continue;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (bytes < 0) {
|
|
69
|
-
return bytes;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
if (!_impl->advance_to_next_part()) {
|
|
73
|
-
break;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return static_cast<ssize_t>(total_read);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
void MultiVolumeStreamBase::rewind() {
|
|
81
|
-
deactivate_active_part();
|
|
82
|
-
_impl->active_part_index = 0;
|
|
83
|
-
_impl->logical_offset = 0;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
bool MultiVolumeStreamBase::at_end() const {
|
|
87
|
-
return (_impl->active_part_index >= _impl->total_parts) && !_impl->part_open;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
|
|
91
|
-
if (!_supports_seek) {
|
|
92
|
-
return -1;
|
|
93
|
-
}
|
|
94
|
-
if (!_impl->ensure_size_metadata()) {
|
|
95
|
-
return -1;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
const int64_t target = _impl->compute_target_offset(offset, whence);
|
|
99
|
-
if (target < 0 || target > _impl->total_size) {
|
|
100
|
-
return -1;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
if (target == _impl->total_size) {
|
|
104
|
-
deactivate_active_part();
|
|
105
|
-
_impl->active_part_index = _impl->total_parts;
|
|
106
|
-
_impl->logical_offset = target;
|
|
107
|
-
return _impl->logical_offset;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
int64_t offset_within_part = 0;
|
|
111
|
-
const std::size_t part_index = _impl->locate_part_for_offset(target, offset_within_part);
|
|
112
|
-
_impl->ensure_part_active(part_index);
|
|
113
|
-
if (seek_within_single_part(offset_within_part, SEEK_SET) < 0) {
|
|
114
|
-
return -1;
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
_impl->logical_offset = target;
|
|
118
|
-
_impl->active_part_index = part_index;
|
|
119
|
-
return _impl->logical_offset;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
int64_t MultiVolumeStreamBase::tell() const { return _impl->logical_offset; }
|
|
123
|
-
|
|
124
|
-
void MultiVolumeStreamBase::Impl::ensure_part_active(std::size_t part_index) {
|
|
125
|
-
if (part_open && active_part_index == part_index) {
|
|
126
|
-
return;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
self.deactivate_active_part();
|
|
130
|
-
PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, part_index);
|
|
131
|
-
self.open_single_part(single_part);
|
|
132
|
-
active_part_index = part_index;
|
|
133
|
-
part_open = true;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
void MultiVolumeStreamBase::deactivate_active_part() {
|
|
137
|
-
if (_impl->part_open) {
|
|
138
|
-
close_single_part();
|
|
139
|
-
_impl->part_open = false;
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
bool MultiVolumeStreamBase::Impl::advance_to_next_part() {
|
|
144
|
-
if (active_part_index >= total_parts) {
|
|
145
|
-
return false;
|
|
146
|
-
}
|
|
147
|
-
self.deactivate_active_part();
|
|
148
|
-
++active_part_index;
|
|
149
|
-
return active_part_index < total_parts;
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
bool MultiVolumeStreamBase::Impl::ensure_size_metadata() {
|
|
153
|
-
if (total_size >= 0) {
|
|
154
|
-
return true;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
int64_t prefix = 0;
|
|
158
|
-
part_offsets[0] = 0;
|
|
159
|
-
for (std::size_t index = 0; index < total_parts; ++index) {
|
|
160
|
-
PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, index);
|
|
161
|
-
const int64_t size = self.size_of_single_part(single_part);
|
|
162
|
-
if (size < 0) {
|
|
163
|
-
total_size = -1;
|
|
164
|
-
return false;
|
|
165
|
-
}
|
|
166
|
-
prefix += size;
|
|
167
|
-
part_offsets[index + 1] = prefix;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
total_size = prefix;
|
|
171
|
-
return true;
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
int64_t MultiVolumeStreamBase::Impl::compute_target_offset(int64_t offset, int whence) const {
|
|
175
|
-
switch (whence) {
|
|
176
|
-
case SEEK_SET:
|
|
177
|
-
return offset;
|
|
178
|
-
case SEEK_CUR:
|
|
179
|
-
return logical_offset + offset;
|
|
180
|
-
case SEEK_END:
|
|
181
|
-
return total_size + offset;
|
|
182
|
-
default:
|
|
183
|
-
return -1;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
std::size_t MultiVolumeStreamBase::Impl::locate_part_for_offset(int64_t target, int64_t &offset_within_part) const {
|
|
188
|
-
auto it = std::upper_bound(part_offsets.begin(), part_offsets.end(), target);
|
|
189
|
-
if (it == part_offsets.begin()) {
|
|
190
|
-
offset_within_part = target;
|
|
191
|
-
return 0;
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const std::size_t index = static_cast<std::size_t>(std::distance(part_offsets.begin(), it) - 1);
|
|
195
|
-
offset_within_part = target - part_offsets[index];
|
|
196
|
-
return index;
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
} // namespace archive_r
|
|
1
|
+
// SPDX-License-Identifier: MIT
|
|
2
|
+
// Copyright (c) 2025 archive_r Team
|
|
3
|
+
|
|
4
|
+
#include "archive_r/multi_volume_stream_base.h"
|
|
5
|
+
|
|
6
|
+
#include "archive_r/path_hierarchy_utils.h"
|
|
7
|
+
|
|
8
|
+
#include <algorithm>
|
|
9
|
+
#include <memory>
|
|
10
|
+
#include <stdexcept>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
namespace archive_r {
|
|
14
|
+
|
|
15
|
+
struct MultiVolumeStreamBase::Impl {
|
|
16
|
+
explicit Impl(MultiVolumeStreamBase &owner)
|
|
17
|
+
: self(owner) {}
|
|
18
|
+
|
|
19
|
+
MultiVolumeStreamBase &self;
|
|
20
|
+
std::vector<int64_t> part_offsets;
|
|
21
|
+
std::size_t total_parts = 0;
|
|
22
|
+
std::size_t active_part_index = 0;
|
|
23
|
+
bool part_open = false;
|
|
24
|
+
int64_t logical_offset = 0;
|
|
25
|
+
int64_t total_size = -1;
|
|
26
|
+
|
|
27
|
+
void ensure_part_active(std::size_t part_index);
|
|
28
|
+
bool advance_to_next_part();
|
|
29
|
+
bool ensure_size_metadata();
|
|
30
|
+
int64_t compute_target_offset(int64_t offset, int whence) const;
|
|
31
|
+
std::size_t locate_part_for_offset(int64_t target, int64_t &offset_within_part) const;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
MultiVolumeStreamBase::MultiVolumeStreamBase(PathHierarchy logical_path, bool supports_seek)
|
|
35
|
+
: _logical_path(std::move(logical_path))
|
|
36
|
+
, _supports_seek(supports_seek)
|
|
37
|
+
, _impl(std::make_unique<Impl>(*this)) {
|
|
38
|
+
_impl->total_parts = pathhierarchy_volume_size(_logical_path);
|
|
39
|
+
if (_impl->total_parts == 0) {
|
|
40
|
+
throw std::invalid_argument("MultiVolumeStreamBase requires at least one volume component");
|
|
41
|
+
}
|
|
42
|
+
_impl->part_offsets.assign(_impl->total_parts + 1, 0);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
MultiVolumeStreamBase::~MultiVolumeStreamBase() = default;
|
|
46
|
+
|
|
47
|
+
ssize_t MultiVolumeStreamBase::read(void *buffer, size_t size) {
|
|
48
|
+
if (size == 0) {
|
|
49
|
+
return 0;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
std::size_t total_read = 0;
|
|
53
|
+
auto *out = static_cast<char *>(buffer);
|
|
54
|
+
|
|
55
|
+
while (total_read < size) {
|
|
56
|
+
if (_impl->active_part_index >= _impl->total_parts) {
|
|
57
|
+
break;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
_impl->ensure_part_active(_impl->active_part_index);
|
|
61
|
+
const ssize_t bytes = read_from_single_part(out + total_read, size - total_read);
|
|
62
|
+
if (bytes > 0) {
|
|
63
|
+
total_read += static_cast<std::size_t>(bytes);
|
|
64
|
+
_impl->logical_offset += bytes;
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (bytes < 0) {
|
|
69
|
+
return bytes;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (!_impl->advance_to_next_part()) {
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return static_cast<ssize_t>(total_read);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
void MultiVolumeStreamBase::rewind() {
|
|
81
|
+
deactivate_active_part();
|
|
82
|
+
_impl->active_part_index = 0;
|
|
83
|
+
_impl->logical_offset = 0;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
bool MultiVolumeStreamBase::at_end() const {
|
|
87
|
+
return (_impl->active_part_index >= _impl->total_parts) && !_impl->part_open;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
|
|
91
|
+
if (!_supports_seek) {
|
|
92
|
+
return -1;
|
|
93
|
+
}
|
|
94
|
+
if (!_impl->ensure_size_metadata()) {
|
|
95
|
+
return -1;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const int64_t target = _impl->compute_target_offset(offset, whence);
|
|
99
|
+
if (target < 0 || target > _impl->total_size) {
|
|
100
|
+
return -1;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (target == _impl->total_size) {
|
|
104
|
+
deactivate_active_part();
|
|
105
|
+
_impl->active_part_index = _impl->total_parts;
|
|
106
|
+
_impl->logical_offset = target;
|
|
107
|
+
return _impl->logical_offset;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
int64_t offset_within_part = 0;
|
|
111
|
+
const std::size_t part_index = _impl->locate_part_for_offset(target, offset_within_part);
|
|
112
|
+
_impl->ensure_part_active(part_index);
|
|
113
|
+
if (seek_within_single_part(offset_within_part, SEEK_SET) < 0) {
|
|
114
|
+
return -1;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
_impl->logical_offset = target;
|
|
118
|
+
_impl->active_part_index = part_index;
|
|
119
|
+
return _impl->logical_offset;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
int64_t MultiVolumeStreamBase::tell() const { return _impl->logical_offset; }
|
|
123
|
+
|
|
124
|
+
void MultiVolumeStreamBase::Impl::ensure_part_active(std::size_t part_index) {
|
|
125
|
+
if (part_open && active_part_index == part_index) {
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
self.deactivate_active_part();
|
|
130
|
+
PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, part_index);
|
|
131
|
+
self.open_single_part(single_part);
|
|
132
|
+
active_part_index = part_index;
|
|
133
|
+
part_open = true;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
void MultiVolumeStreamBase::deactivate_active_part() {
|
|
137
|
+
if (_impl->part_open) {
|
|
138
|
+
close_single_part();
|
|
139
|
+
_impl->part_open = false;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
bool MultiVolumeStreamBase::Impl::advance_to_next_part() {
|
|
144
|
+
if (active_part_index >= total_parts) {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
self.deactivate_active_part();
|
|
148
|
+
++active_part_index;
|
|
149
|
+
return active_part_index < total_parts;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
bool MultiVolumeStreamBase::Impl::ensure_size_metadata() {
|
|
153
|
+
if (total_size >= 0) {
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
int64_t prefix = 0;
|
|
158
|
+
part_offsets[0] = 0;
|
|
159
|
+
for (std::size_t index = 0; index < total_parts; ++index) {
|
|
160
|
+
PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, index);
|
|
161
|
+
const int64_t size = self.size_of_single_part(single_part);
|
|
162
|
+
if (size < 0) {
|
|
163
|
+
total_size = -1;
|
|
164
|
+
return false;
|
|
165
|
+
}
|
|
166
|
+
prefix += size;
|
|
167
|
+
part_offsets[index + 1] = prefix;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
total_size = prefix;
|
|
171
|
+
return true;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
int64_t MultiVolumeStreamBase::Impl::compute_target_offset(int64_t offset, int whence) const {
|
|
175
|
+
switch (whence) {
|
|
176
|
+
case SEEK_SET:
|
|
177
|
+
return offset;
|
|
178
|
+
case SEEK_CUR:
|
|
179
|
+
return logical_offset + offset;
|
|
180
|
+
case SEEK_END:
|
|
181
|
+
return total_size + offset;
|
|
182
|
+
default:
|
|
183
|
+
return -1;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
std::size_t MultiVolumeStreamBase::Impl::locate_part_for_offset(int64_t target, int64_t &offset_within_part) const {
|
|
188
|
+
auto it = std::upper_bound(part_offsets.begin(), part_offsets.end(), target);
|
|
189
|
+
if (it == part_offsets.begin()) {
|
|
190
|
+
offset_within_part = target;
|
|
191
|
+
return 0;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const std::size_t index = static_cast<std::size_t>(std::distance(part_offsets.begin(), it) - 1);
|
|
195
|
+
offset_within_part = target - part_offsets[index];
|
|
196
|
+
return index;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
} // namespace archive_r
|