archive_r_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +56 -0
  3. data/README.md +103 -0
  4. data/ext/archive_r/archive_r_ext.cc +910 -0
  5. data/ext/archive_r/extconf.rb +90 -0
  6. data/ext/archive_r/vendor/archive_r/LICENSE.txt +56 -0
  7. data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +41 -0
  8. data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +161 -0
  9. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h +34 -0
  10. data/ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h +56 -0
  11. data/ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h +46 -0
  12. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +109 -0
  13. data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h +37 -0
  14. data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +122 -0
  15. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +330 -0
  16. data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +98 -0
  17. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc +162 -0
  18. data/ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h +54 -0
  19. data/ext/archive_r/vendor/archive_r/src/archive_type.cc +552 -0
  20. data/ext/archive_r/vendor/archive_r/src/archive_type.h +76 -0
  21. data/ext/archive_r/vendor/archive_r/src/data_stream.cc +35 -0
  22. data/ext/archive_r/vendor/archive_r/src/entry.cc +253 -0
  23. data/ext/archive_r/vendor/archive_r/src/entry_fault.cc +26 -0
  24. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.cc +54 -0
  25. data/ext/archive_r/vendor/archive_r/src/entry_fault_error.h +32 -0
  26. data/ext/archive_r/vendor/archive_r/src/entry_impl.h +58 -0
  27. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc +81 -0
  28. data/ext/archive_r/vendor/archive_r/src/multi_volume_manager.h +41 -0
  29. data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +199 -0
  30. data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +151 -0
  31. data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +304 -0
  32. data/ext/archive_r/vendor/archive_r/src/simple_profiler.h +120 -0
  33. data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +263 -0
  34. data/ext/archive_r/vendor/archive_r/src/system_file_stream.h +46 -0
  35. data/ext/archive_r/vendor/archive_r/src/traverser.cc +314 -0
  36. data/lib/archive_r.rb +80 -0
  37. metadata +112 -0
@@ -0,0 +1,314 @@
1
+ // SPDX-License-Identifier: MIT
2
+ // Copyright (c) 2025 archive_r Team
3
+
4
+ #include "archive_r/traverser.h"
5
+ #include "archive_r/entry.h"
6
+ #include "archive_r/path_hierarchy.h"
7
+ #include "archive_r/path_hierarchy_utils.h"
8
+ #include "archive_r/entry_fault.h"
9
+ #include "archive_stack_orchestrator.h"
10
+ #include "archive_type.h"
11
+ #include "entry_fault_error.h"
12
+ #include "system_file_stream.h"
13
+ #include <iostream>
14
+ #include <filesystem>
15
+ #include <memory>
16
+ #include <stdexcept>
17
+ #include <sys/stat.h>
18
+ #include <system_error>
19
+ #include <unordered_set>
20
+ #include <utility>
21
+ #include <vector>
22
+
23
+ namespace archive_r {
24
+
25
+ // ============================================================================
26
+ // Traverser Implementation
27
+ // ============================================================================
28
+
29
+ namespace {
30
+
31
+ archive_r::ArchiveOption to_archive_option(const TraverserOptions &options) {
32
+ archive_r::ArchiveOption converted;
33
+ converted.passphrases = options.passphrases;
34
+ converted.formats = options.formats;
35
+ converted.metadata_keys = options.metadata_keys;
36
+ return converted;
37
+ }
38
+
39
+ } // namespace
40
+
41
+ Traverser::Traverser(std::vector<PathHierarchy> paths, TraverserOptions options)
42
+ : _initial_paths(std::move(paths))
43
+ , _options(std::move(options)) {
44
+ if (_initial_paths.empty()) {
45
+ throw std::invalid_argument("paths cannot be empty");
46
+ }
47
+ for (const auto &hierarchy : _initial_paths) {
48
+ if (hierarchy.empty()) {
49
+ throw std::invalid_argument("path hierarchy cannot be empty");
50
+ }
51
+ }
52
+
53
+ }
54
+
55
+ Traverser::~Traverser() = default;
56
+
57
+ // ============================================================================
58
+ // Iterator Implementation
59
+ // ============================================================================
60
+
61
+ class Traverser::Iterator::Impl {
62
+ public:
63
+ Impl(std::vector<PathHierarchy> paths, bool at_end, const TraverserOptions &traverser_options)
64
+ : _paths(std::move(paths))
65
+ , _at_end(at_end)
66
+ , _archive_options(to_archive_option(traverser_options))
67
+ , _default_descent(traverser_options.descend_archives) {
68
+ if (_at_end) {
69
+ return;
70
+ }
71
+ if (_paths.empty()) {
72
+ throw std::invalid_argument("paths cannot be empty");
73
+ }
74
+ ensure_shared_orchestrator();
75
+
76
+ if (!advance_to_next_root()) {
77
+ _at_end = true;
78
+ }
79
+ }
80
+
81
+ Entry &get_entry() {
82
+ if (!_current_entry) {
83
+ throw std::logic_error("Cannot dereference end iterator");
84
+ }
85
+ return *_current_entry;
86
+ }
87
+
88
+ void advance() {
89
+ if (_at_end) {
90
+ return;
91
+ }
92
+
93
+ bool request_descend_into_archive = _current_entry && _current_entry->descent_enabled() && !_current_entry->is_directory();
94
+
95
+ if (_current_entry->depth() == 0 && request_descend_into_archive && !_current_entry->is_directory()) {
96
+ request_descend_into_archive = false;
97
+ attempt_descend_into_root(_current_entry->path_hierarchy());
98
+ }
99
+ _current_entry.reset();
100
+
101
+ if (fetch_from_archive(request_descend_into_archive)) {
102
+ return;
103
+ }
104
+
105
+ if (fetch_from_directory()) {
106
+ return;
107
+ }
108
+
109
+ if (advance_to_next_root()) {
110
+ return;
111
+ }
112
+
113
+ descend_pending_multi_volumes();
114
+
115
+ if (fetch_from_archive(false)) {
116
+ return;
117
+ }
118
+
119
+ _at_end = true;
120
+ }
121
+
122
+ bool equals(const Impl *other) const {
123
+ if (this == other) {
124
+ return true;
125
+ }
126
+ if (!other) {
127
+ return false;
128
+ }
129
+ return _at_end && other->_at_end;
130
+ }
131
+
132
+ private:
133
+ std::shared_ptr<ArchiveStackOrchestrator> ensure_shared_orchestrator() {
134
+ if (!_shared_orchestrator) {
135
+ _shared_orchestrator = std::make_shared<ArchiveStackOrchestrator>(_archive_options);
136
+ }
137
+ return _shared_orchestrator;
138
+ }
139
+
140
+ std::string normalize_path_string(const std::string &value) {
141
+ if (value.empty()) {
142
+ return value;
143
+ }
144
+ std::filesystem::path path_value(value);
145
+ return path_value.lexically_normal().string();
146
+ }
147
+
148
+ bool fetch_from_directory() {
149
+ if (_directory_iterator == _directory_end) {
150
+ return false;
151
+ }
152
+ const std::filesystem::directory_entry entry = *_directory_iterator;
153
+ set_current_entry(make_single_path(normalize_path_string(entry.path().string())));
154
+ if (_current_entry->is_directory() && !_current_entry->descent_enabled()) {
155
+ _directory_iterator.disable_recursion_pending();
156
+ }
157
+ ++_directory_iterator;
158
+ return true;
159
+ }
160
+
161
+ bool fetch_from_archive(bool request_descend_into_archive) {
162
+ if (!archive_active()) {
163
+ return false;
164
+ }
165
+ ArchiveStackOrchestrator &orchestrator = *ensure_shared_orchestrator();
166
+
167
+ try {
168
+ if (orchestrator.advance(request_descend_into_archive)) {
169
+ set_current_entry(orchestrator.current_entry_hierarchy());
170
+ return true;
171
+ }
172
+ } catch (const EntryFaultError &error) {
173
+ EntryFault fault = enrich_orchestrator_error(error, orchestrator);
174
+ handle_orchestrator_error(fault);
175
+ }
176
+ return false;
177
+ }
178
+
179
+ bool advance_to_next_root() {
180
+ if (_current_path_index >= _paths.size()) {
181
+ return false;
182
+ }
183
+ const PathHierarchy &hierarchy = _paths[_current_path_index];
184
+ reset_source_state();
185
+ set_current_entry(hierarchy);
186
+ if (hierarchy.size() == 1 && hierarchy.front().is_single()) {
187
+ const std::filesystem::path fs_path(hierarchy.front().single_value());
188
+ std::error_code ec;
189
+ const bool path_is_directory = std::filesystem::is_directory(fs_path, ec) && !ec;
190
+ if (path_is_directory) {
191
+ _directory_iterator = std::filesystem::recursive_directory_iterator(fs_path, std::filesystem::directory_options::skip_permission_denied);
192
+ _directory_end = std::filesystem::recursive_directory_iterator();
193
+ }
194
+ }
195
+ ++_current_path_index;
196
+ return true;
197
+ }
198
+
199
+ bool descend_pending_multi_volumes() {
200
+ auto orchestrator = ensure_shared_orchestrator();
201
+ try {
202
+ if (orchestrator->descend_pending_multi_volumes()) {
203
+ return true;
204
+ }
205
+ } catch (const EntryFaultError &error) {
206
+ EntryFault fault = enrich_orchestrator_error(error, *orchestrator);
207
+ handle_orchestrator_error(fault);
208
+ }
209
+ return false;
210
+ }
211
+
212
+ void attempt_descend_into_root(const PathHierarchy &hierarchy) {
213
+ auto shared_orchestrator = ensure_shared_orchestrator();
214
+ try {
215
+ shared_orchestrator->open_root_hierarchy(hierarchy);
216
+ } catch (const EntryFaultError &error) {
217
+ EntryFault fault = enrich_orchestrator_error(error, *shared_orchestrator);
218
+ handle_orchestrator_error(fault);
219
+ }
220
+ }
221
+
222
+ void set_current_entry(PathHierarchy hierarchy) {
223
+ _current_entry = Entry::create(std::move(hierarchy), ensure_shared_orchestrator(), _default_descent);
224
+ }
225
+
226
+ void handle_orchestrator_error(const EntryFault &fault) {
227
+ dispatch_registered_fault(fault);
228
+ }
229
+
230
+ void reset_source_state() {
231
+ reset_directory_traversal();
232
+ _current_entry.reset();
233
+ }
234
+
235
+ bool archive_active() const { return _shared_orchestrator && _shared_orchestrator->depth() > 0; }
236
+
237
+ bool directory_traversal_active() const { return _directory_iterator != _directory_end; }
238
+
239
+ void reset_directory_traversal() {
240
+ _directory_iterator = std::filesystem::recursive_directory_iterator();
241
+ _directory_end = std::filesystem::recursive_directory_iterator();
242
+ }
243
+
244
+ EntryFault enrich_orchestrator_error(const EntryFaultError &error, ArchiveStackOrchestrator &orchestrator) {
245
+ EntryFault fault = error.fault();
246
+ if (fault.hierarchy.empty()) {
247
+ fault.hierarchy = orchestrator.current_entry_hierarchy();
248
+ }
249
+ if (fault.message.empty() && error.what()) {
250
+ fault.message = error.what();
251
+ }
252
+ return fault;
253
+ }
254
+
255
+ std::vector<PathHierarchy> _paths;
256
+ size_t _current_path_index = 0;
257
+ std::filesystem::recursive_directory_iterator _directory_iterator;
258
+ std::filesystem::recursive_directory_iterator _directory_end;
259
+ bool _at_end = false;
260
+ std::unique_ptr<Entry> _current_entry;
261
+
262
+ ArchiveOption _archive_options;
263
+ std::shared_ptr<ArchiveStackOrchestrator> _shared_orchestrator;
264
+ bool _default_descent = true;
265
+ };
266
+ // ============================================================================
267
+ // Iterator public interface
268
+ // ============================================================================
269
+
270
+ Traverser::Iterator::Iterator(std::unique_ptr<Impl> impl)
271
+ : _impl(std::move(impl)) {}
272
+
273
+ Traverser::Iterator::~Iterator() = default;
274
+
275
+ Traverser::Iterator::Iterator(Iterator &&other) noexcept
276
+ : _impl(std::move(other._impl)) {}
277
+
278
+ Traverser::Iterator &Traverser::Iterator::operator=(Iterator &&other) noexcept {
279
+ _impl = std::move(other._impl);
280
+ return *this;
281
+ }
282
+
283
+ Traverser::Iterator::reference Traverser::Iterator::operator*() { return _impl->get_entry(); }
284
+
285
+ Traverser::Iterator::pointer Traverser::Iterator::operator->() { return &_impl->get_entry(); }
286
+
287
+ Traverser::Iterator &Traverser::Iterator::operator++() {
288
+ if (_impl) {
289
+ _impl->advance();
290
+ }
291
+ return *this;
292
+ }
293
+
294
+ bool Traverser::Iterator::operator==(const Iterator &other) const {
295
+ if (!_impl && !other._impl) {
296
+ return true;
297
+ }
298
+ if (!_impl || !other._impl) {
299
+ return false;
300
+ }
301
+ return _impl->equals(other._impl.get());
302
+ }
303
+
304
+ bool Traverser::Iterator::operator!=(const Iterator &other) const { return !(*this == other); }
305
+
306
+ // ============================================================================
307
+ // Traverser public interface
308
+ // ============================================================================
309
+
310
+ Traverser::Iterator Traverser::begin() { return Iterator(std::make_unique<Iterator::Impl>(_initial_paths, false, _options)); }
311
+
312
+ Traverser::Iterator Traverser::end() { return Iterator(std::make_unique<Iterator::Impl>(_initial_paths, true, _options)); }
313
+
314
+ } // namespace archive_r
data/lib/archive_r.rb ADDED
@@ -0,0 +1,80 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 archive_r Team
3
+
4
+ begin
5
+ # Prefer the packaged gem layout (lib/archive_r/archive_r.so)
6
+ require_relative 'archive_r/archive_r'
7
+ rescue LoadError
8
+ # Fallback to the local development layout (bindings/ruby/archive_r.so)
9
+ require_relative '../archive_r'
10
+ end
11
+
12
+ module Archive_r
13
+ VERSION = "0.1.0"
14
+ # Common archive formats excluding libarchive's mtree/raw pseudo formats
15
+ STANDARD_FORMATS = %w[
16
+ 7zip ar cab cpio empty iso9660 lha rar tar warc xar zip
17
+ ].freeze
18
+
19
+ def self.normalize_options(opts = nil)
20
+ options =
21
+ case opts
22
+ when nil
23
+ {}
24
+ when Hash
25
+ opts.dup
26
+ else
27
+ opts.to_hash.dup
28
+ end
29
+
30
+ options[:formats] = STANDARD_FORMATS unless options.key?(:formats)
31
+ options
32
+ end
33
+
34
+ def self.traverse(paths, **opts, &block)
35
+ options = normalize_options(opts)
36
+
37
+ if block
38
+ Traverser.open(paths, options) { |traverser| traverser.each(&block) }
39
+ else
40
+ Traverser.new(paths, options).each
41
+ end
42
+ end
43
+
44
+ class Entry
45
+ # Additional helper methods can be added here
46
+
47
+ def to_s
48
+ path
49
+ end
50
+
51
+ def inspect
52
+ "#<Archive_r::Entry path=#{path.inspect} size=#{size} depth=#{depth}>"
53
+ end
54
+ end
55
+
56
+ class Traverser
57
+ # Additional helper methods can be added here
58
+
59
+ class << self
60
+ alias_method :__archive_r_c_open, :open
61
+
62
+ def open(paths, opts = nil, &block)
63
+ __archive_r_c_open(paths, Archive_r.normalize_options(opts), &block)
64
+ end
65
+ end
66
+
67
+ alias_method :__archive_r_c_initialize, :initialize
68
+
69
+ def initialize(paths, opts = nil)
70
+ __archive_r_c_initialize(paths, Archive_r.normalize_options(opts))
71
+ end
72
+
73
+ # Count entries
74
+ def count
75
+ n = 0
76
+ each { |entry| n += 1 }
77
+ n
78
+ end
79
+ end
80
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: archive_r_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - raiso.tcs
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-11-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '13.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ description: Fast archive traversal library with support for nested archives and multipart
42
+ files
43
+ email:
44
+ - raiso.tcs@users.noreply.github.com
45
+ executables: []
46
+ extensions:
47
+ - ext/archive_r/extconf.rb
48
+ extra_rdoc_files: []
49
+ files:
50
+ - LICENSE
51
+ - README.md
52
+ - ext/archive_r/archive_r_ext.cc
53
+ - ext/archive_r/extconf.rb
54
+ - ext/archive_r/vendor/archive_r/LICENSE.txt
55
+ - ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h
56
+ - ext/archive_r/vendor/archive_r/include/archive_r/entry.h
57
+ - ext/archive_r/vendor/archive_r/include/archive_r/entry_fault.h
58
+ - ext/archive_r/vendor/archive_r/include/archive_r/entry_metadata.h
59
+ - ext/archive_r/vendor/archive_r/include/archive_r/multi_volume_stream_base.h
60
+ - ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h
61
+ - ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy_utils.h
62
+ - ext/archive_r/vendor/archive_r/include/archive_r/traverser.h
63
+ - ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc
64
+ - ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h
65
+ - ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.cc
66
+ - ext/archive_r/vendor/archive_r/src/archive_stack_orchestrator.h
67
+ - ext/archive_r/vendor/archive_r/src/archive_type.cc
68
+ - ext/archive_r/vendor/archive_r/src/archive_type.h
69
+ - ext/archive_r/vendor/archive_r/src/data_stream.cc
70
+ - ext/archive_r/vendor/archive_r/src/entry.cc
71
+ - ext/archive_r/vendor/archive_r/src/entry_fault.cc
72
+ - ext/archive_r/vendor/archive_r/src/entry_fault_error.cc
73
+ - ext/archive_r/vendor/archive_r/src/entry_fault_error.h
74
+ - ext/archive_r/vendor/archive_r/src/entry_impl.h
75
+ - ext/archive_r/vendor/archive_r/src/multi_volume_manager.cc
76
+ - ext/archive_r/vendor/archive_r/src/multi_volume_manager.h
77
+ - ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc
78
+ - ext/archive_r/vendor/archive_r/src/path_hierarchy.cc
79
+ - ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc
80
+ - ext/archive_r/vendor/archive_r/src/simple_profiler.h
81
+ - ext/archive_r/vendor/archive_r/src/system_file_stream.cc
82
+ - ext/archive_r/vendor/archive_r/src/system_file_stream.h
83
+ - ext/archive_r/vendor/archive_r/src/traverser.cc
84
+ - lib/archive_r.rb
85
+ homepage: https://github.com/raizo-tcs/archive_r
86
+ licenses:
87
+ - MIT
88
+ metadata:
89
+ homepage_uri: https://github.com/raizo-tcs/archive_r
90
+ source_code_uri: https://github.com/raizo-tcs/archive_r
91
+ bug_tracker_uri: https://github.com/raizo-tcs/archive_r/issues
92
+ changelog_uri: https://github.com/raizo-tcs/archive_r/releases
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: 2.7.0
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubygems_version: 3.4.20
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Ruby bindings for archive_r library
112
+ test_files: []