archive_r_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +56 -0
  3. data/README.md +103 -0
  4. data/ext/archive_r/archive_r_ext.cc +910 -0
  5. data/ext/archive_r/extconf.rb +90 -0
  6. data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
  7. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
  8. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
  9. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
  15. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
  16. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
  17. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
  18. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
  19. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
  20. data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
  21. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
  22. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
  23. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
  24. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
  25. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
  26. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
  27. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
  28. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
  29. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
  30. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
  31. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
  32. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
  33. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
  34. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
  35. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
  36. data/lib/archive_r.rb +80 -0
  37. metadata +112 -0
@@ -0,0 +1,120 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+ #include <string>
6
+
7
+ #include <chrono>
8
+ #include <iostream>
9
+ #include <map>
10
+ #include <vector>
11
+ #include <algorithm>
12
+ #include <iomanip>
13
+ #include <mutex>
14
+
15
+
16
+ namespace archive_r {
17
+ namespace internal {
18
+
19
+
20
+
21
+ class SimpleProfiler {
22
+ public:
23
+ static SimpleProfiler& instance() {
24
+ static SimpleProfiler inst;
25
+ return inst;
26
+ }
27
+
28
+ ~SimpleProfiler() {
29
+ report();
30
+ }
31
+
32
+ void start(const std::string& name) {
33
+ std::lock_guard<std::mutex> lock(mutex_);
34
+ auto now = std::chrono::high_resolution_clock::now();
35
+ start_times_[name] = now;
36
+ }
37
+
38
+ void stop(const std::string& name) {
39
+ std::lock_guard<std::mutex> lock(mutex_);
40
+ auto now = std::chrono::high_resolution_clock::now();
41
+ auto it = start_times_.find(name);
42
+ if (it != start_times_.end()) {
43
+ auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(now - it->second).count();
44
+ durations_[name] += duration;
45
+ counts_[name]++;
46
+ }
47
+ }
48
+
49
+ void report() {
50
+ std::lock_guard<std::mutex> lock(mutex_);
51
+ if (durations_.empty()) return;
52
+
53
+ std::cout << "\n=== Profiling Report (archive_r::internal) ===" << std::endl;
54
+ std::vector<std::pair<std::string, long long>> sorted_durations;
55
+ for (const auto& pair : durations_) {
56
+ sorted_durations.push_back(pair);
57
+ }
58
+
59
+ std::sort(sorted_durations.begin(), sorted_durations.end(),
60
+ [](const auto& a, const auto& b) { return a.second > b.second; });
61
+
62
+ std::cout << std::left << std::setw(40) << "Name"
63
+ << std::right << std::setw(15) << "Total (ms)"
64
+ << std::setw(10) << "Count"
65
+ << std::setw(15) << "Avg (us)" << std::endl;
66
+ std::cout << std::string(80, '-') << std::endl;
67
+
68
+ for (const auto& pair : sorted_durations) {
69
+ const auto& name = pair.first;
70
+ long long total_ns = pair.second;
71
+ long long count = counts_[name];
72
+ double avg_ns = count > 0 ? (double)total_ns / count : 0;
73
+
74
+ std::cout << std::left << std::setw(40) << name
75
+ << std::right << std::setw(15) << std::fixed << std::setprecision(3) << total_ns / 1000000.0
76
+ << std::setw(10) << count
77
+ << std::setw(15) << std::fixed << std::setprecision(3) << avg_ns / 1000.0
78
+ << std::endl;
79
+ }
80
+ std::cout << "==============================================\n" << std::endl;
81
+ }
82
+
83
+ void reset() {
84
+ std::lock_guard<std::mutex> lock(mutex_);
85
+ start_times_.clear();
86
+ durations_.clear();
87
+ counts_.clear();
88
+ }
89
+
90
+ private:
91
+ std::map<std::string, std::chrono::high_resolution_clock::time_point> start_times_;
92
+ std::map<std::string, long long> durations_;
93
+ std::map<std::string, long long> counts_;
94
+ std::mutex mutex_;
95
+ };
96
+
97
+ class ScopedTimer {
98
+ public:
99
+ ScopedTimer(const std::string& name) : name_(name) {
100
+ SimpleProfiler::instance().start(name_);
101
+ }
102
+ ~ScopedTimer() {
103
+ SimpleProfiler::instance().stop(name_);
104
+ }
105
+ private:
106
+ std::string name_;
107
+ };
108
+
109
+ } // namespace internal
110
+ } // namespace archive_r
111
+
112
+ #ifdef ARCHIVE_R_SIMPLE_PROFILER_DISABLED
113
+ #define ARCHIVE_R_PROFILE(name) ((void)0)
114
+ #else
115
+ #define ARCHIVE_R_PROFILE(name) ::archive_r::internal::ScopedTimer ARCHIVE_R_PROFILE_UNIQUE_NAME(name)(name)
116
+ #endif
117
+
118
+ #define ARCHIVE_R_PROFILE_UNIQUE_NAME(name) ARCHIVE_R_PROFILE_CONCAT(_archive_r_profiler_scope_, __COUNTER__)
119
+ #define ARCHIVE_R_PROFILE_CONCAT(a, b) ARCHIVE_R_PROFILE_CONCAT_INNER(a, b)
120
+ #define ARCHIVE_R_PROFILE_CONCAT_INNER(a, b) a##b
@@ -0,0 +1,263 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #include "system_file_stream.h"
5
+ #include "archive_r/path_hierarchy_utils.h"
6
+ #include "entry_fault_error.h"
7
+
8
+ #include <algorithm>
9
+ #include <cerrno>
10
+ #include <filesystem>
11
+ #include <grp.h>
12
+ #include <pwd.h>
13
+ #include <stdexcept>
14
+ #include <sys/stat.h>
15
+ #include <system_error>
16
+ #include <string_view>
17
+ #include <unistd.h>
18
+ #include <utility>
19
+ #include <vector>
20
+
21
+ namespace archive_r {
22
+
23
+ namespace {
24
+
25
+ static long determine_buffer_size(int name) {
26
+ long size = ::sysconf(name);
27
+ if (size < 0) {
28
+ size = 16384; // Fallback for systems without a specific limit
29
+ }
30
+ return size;
31
+ }
32
+
33
+ static bool lookup_username(uid_t uid, std::string &name_out) {
34
+ const long buf_size = determine_buffer_size(_SC_GETPW_R_SIZE_MAX);
35
+ std::vector<char> buffer(static_cast<std::size_t>(buf_size));
36
+ struct passwd pwd;
37
+ struct passwd *result = nullptr;
38
+
39
+ if (::getpwuid_r(uid, &pwd, buffer.data(), buffer.size(), &result) == 0 && result && result->pw_name) {
40
+ name_out.assign(result->pw_name);
41
+ return true;
42
+ }
43
+ return false;
44
+ }
45
+
46
+ static bool lookup_groupname(gid_t gid, std::string &name_out) {
47
+ const long buf_size = determine_buffer_size(_SC_GETGR_R_SIZE_MAX);
48
+ std::vector<char> buffer(static_cast<std::size_t>(buf_size));
49
+ struct group grp;
50
+ struct group *result = nullptr;
51
+
52
+ if (::getgrgid_r(gid, &grp, buffer.data(), buffer.size(), &result) == 0 && result && result->gr_name) {
53
+ name_out.assign(result->gr_name);
54
+ return true;
55
+ }
56
+ return false;
57
+ }
58
+
59
+ } // namespace
60
+
61
+ SystemFileStream::SystemFileStream(PathHierarchy logical_path)
62
+ : MultiVolumeStreamBase(std::move(logical_path), true)
63
+ , _handle(nullptr) {
64
+ if (_logical_path.empty()) {
65
+ throw std::invalid_argument("Root file hierarchy cannot be empty");
66
+ }
67
+
68
+ const PathEntry &root_entry = _logical_path.front();
69
+ if (!root_entry.is_single() && !root_entry.is_multi_volume()) {
70
+ throw std::invalid_argument("Root file hierarchy must be a single file or multi-volume source");
71
+ }
72
+ }
73
+
74
+ SystemFileStream::~SystemFileStream() = default;
75
+
76
+ void SystemFileStream::open_single_part(const PathHierarchy &single_part) {
77
+ const PathEntry &entry = single_part.back();
78
+
79
+ const std::string path = entry.single_value();
80
+ errno = 0;
81
+ FILE *handle = std::fopen(path.c_str(), "rb");
82
+ if (!handle) {
83
+ const int err = errno;
84
+ throw make_entry_fault_error(format_path_errno_error("Failed to open root file", path, err), single_part, err);
85
+ }
86
+
87
+ _handle = handle;
88
+ _active_path = path;
89
+ }
90
+
91
+ void SystemFileStream::close_single_part() {
92
+ std::fclose(_handle);
93
+ _handle = nullptr;
94
+ _active_path.clear();
95
+ }
96
+
97
+ ssize_t SystemFileStream::read_from_single_part(void *buffer, size_t size) {
98
+ errno = 0;
99
+ const std::size_t bytes_read = std::fread(buffer, 1, size, _handle);
100
+ if (bytes_read > 0) {
101
+ return static_cast<ssize_t>(bytes_read);
102
+ }
103
+
104
+ if (std::feof(_handle)) {
105
+ return 0;
106
+ }
107
+
108
+ if (std::ferror(_handle)) {
109
+ report_read_failure(errno);
110
+ }
111
+ return -1;
112
+ }
113
+
114
+ int64_t SystemFileStream::seek_within_single_part(int64_t offset, int whence) {
115
+ if (fseeko(_handle, offset, whence) != 0) {
116
+ return -1;
117
+ }
118
+ const auto position = ftello(_handle);
119
+ return position >= 0 ? position : -1;
120
+ }
121
+
122
+ int64_t SystemFileStream::size_of_single_part(const PathHierarchy &single_part) {
123
+ const PathEntry &entry = single_part.back();
124
+
125
+ struct stat st;
126
+ if (::stat(entry.single_value().c_str(), &st) != 0) {
127
+ return -1;
128
+ }
129
+ return static_cast<int64_t>(st.st_size);
130
+ }
131
+
132
+ void SystemFileStream::report_read_failure(int err) {
133
+ const std::string detailed = format_path_errno_error("Failed to read root file", _active_path, err);
134
+ close_single_part();
135
+ throw make_entry_fault_error(detailed, _logical_path, err);
136
+ }
137
+
138
+ FilesystemMetadataInfo collect_root_path_metadata(const PathHierarchy &hierarchy, const std::unordered_set<std::string> &allowed_keys) {
139
+ FilesystemMetadataInfo info;
140
+
141
+ if (hierarchy.empty()) {
142
+ return info;
143
+ }
144
+
145
+ std::error_code ec;
146
+ const PathEntry &root_entry = hierarchy[0];
147
+ if (!root_entry.is_single()) {
148
+ return info;
149
+ }
150
+
151
+ const std::filesystem::path target(root_entry.single_value());
152
+ std::filesystem::directory_entry entry(target, ec);
153
+ if (ec) {
154
+ return info;
155
+ }
156
+
157
+ mode_t filetype = 0;
158
+ uint64_t size = 0;
159
+
160
+ ec.clear();
161
+ const bool is_regular = entry.is_regular_file(ec);
162
+ if (!ec && is_regular) {
163
+ ec.clear();
164
+ size = entry.file_size(ec);
165
+ if (ec) {
166
+ size = 0;
167
+ }
168
+ filetype = S_IFREG;
169
+ } else {
170
+ ec.clear();
171
+ const bool is_directory = entry.is_directory(ec);
172
+ if (!ec && is_directory) {
173
+ filetype = S_IFDIR;
174
+ } else {
175
+ ec.clear();
176
+ const bool is_symlink = entry.is_symlink(ec);
177
+ if (!ec && is_symlink) {
178
+ filetype = S_IFLNK;
179
+ }
180
+ }
181
+ }
182
+
183
+ info.size = size;
184
+ info.filetype = filetype;
185
+ EntryMetadataMap metadata;
186
+ if (!allowed_keys.empty()) {
187
+ const auto wants = [&allowed_keys](std::string_view key) {
188
+ return allowed_keys.find(std::string(key)) != allowed_keys.end();
189
+ };
190
+
191
+ // Path hierarchy / directory entry derived metadata
192
+ if (wants("pathname")) {
193
+ const PathEntry &tail = hierarchy.back();
194
+ if (tail.is_single()) {
195
+ metadata["pathname"] = tail.single_value();
196
+ } else {
197
+ metadata["pathname"] = path_entry_display(tail);
198
+ }
199
+ }
200
+
201
+ if (wants("filetype")) {
202
+ metadata["filetype"] = static_cast<uint64_t>(filetype);
203
+ }
204
+
205
+ if (wants("mode")) {
206
+ std::error_code status_ec;
207
+ const auto status = entry.status(status_ec);
208
+ if (!status_ec) {
209
+ metadata["mode"] = static_cast<uint64_t>(status.permissions());
210
+ }
211
+ }
212
+
213
+ const bool wants_size = wants("size");
214
+ const bool wants_uid = wants("uid");
215
+ const bool wants_gid = wants("gid");
216
+ const bool wants_uname = wants("uname");
217
+ const bool wants_gname = wants("gname");
218
+ const bool needs_stat = (wants_size && size == 0) || wants_uid || wants_gid || wants_uname || wants_gname;
219
+
220
+ struct stat stat_buffer;
221
+ bool have_stat = false;
222
+ if (needs_stat) {
223
+ const std::string native_path = entry.path().string();
224
+ have_stat = (::stat(native_path.c_str(), &stat_buffer) == 0);
225
+ }
226
+
227
+ if (wants_size) {
228
+ uint64_t resolved = size;
229
+ if (resolved == 0 && have_stat) {
230
+ resolved = static_cast<uint64_t>(stat_buffer.st_size);
231
+ }
232
+ if (resolved > 0 || (size == 0 && have_stat)) {
233
+ metadata["size"] = resolved;
234
+ }
235
+ }
236
+
237
+ if (have_stat) {
238
+ if (wants_uid) {
239
+ metadata["uid"] = static_cast<int64_t>(stat_buffer.st_uid);
240
+ }
241
+ if (wants_gid) {
242
+ metadata["gid"] = static_cast<int64_t>(stat_buffer.st_gid);
243
+ }
244
+ if (wants_uname) {
245
+ std::string uname;
246
+ if (lookup_username(stat_buffer.st_uid, uname)) {
247
+ metadata["uname"] = std::move(uname);
248
+ }
249
+ }
250
+ if (wants_gname) {
251
+ std::string gname;
252
+ if (lookup_groupname(stat_buffer.st_gid, gname)) {
253
+ metadata["gname"] = std::move(gname);
254
+ }
255
+ }
256
+ }
257
+ }
258
+
259
+ info.metadata = std::move(metadata);
260
+ return info;
261
+ }
262
+
263
+ } // namespace archive_r
@@ -0,0 +1,46 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #pragma once
5
+
6
+ #include <cstddef>
7
+ #include <cstdio>
8
+ #include <filesystem>
9
+ #include <string>
10
+ #include <sys/types.h>
11
+ #include <unordered_set>
12
+ #include <vector>
13
+
14
+ #include "archive_r/multi_volume_stream_base.h"
15
+ #include "archive_r/entry_metadata.h"
16
+ #include "archive_r/path_hierarchy.h"
17
+
18
+ namespace archive_r {
19
+
20
+ class SystemFileStream : public MultiVolumeStreamBase {
21
+ public:
22
+ explicit SystemFileStream(PathHierarchy logical_path);
23
+ ~SystemFileStream() override;
24
+
25
+ private:
26
+ void open_single_part(const PathHierarchy &single_part) override;
27
+ void close_single_part() override;
28
+ ssize_t read_from_single_part(void *buffer, size_t size) override;
29
+ int64_t seek_within_single_part(int64_t offset, int whence) override;
30
+ int64_t size_of_single_part(const PathHierarchy &single_part) override;
31
+
32
+ void report_read_failure(int err);
33
+
34
+ FILE *_handle;
35
+ std::string _active_path;
36
+ };
37
+
38
+ struct FilesystemMetadataInfo {
39
+ uint64_t size = 0;
40
+ mode_t filetype = 0;
41
+ EntryMetadataMap metadata;
42
+ };
43
+
44
+ FilesystemMetadataInfo collect_root_path_metadata(const PathHierarchy &hierarchy, const std::unordered_set<std::string> &allowed_keys);
45
+
46
+ } // namespace archive_r