archive_r_ruby 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +27 -7
- data/README.md +5 -2
- data/ext/archive_r/archive_r_ext.cc +7 -16
- data/ext/archive_r/extconf.rb +27 -22
- data/ext/archive_r/vendor/archive_r/LICENSE.txt +27 -7
- data/ext/archive_r/vendor/archive_r/include/archive_r/data_stream.h +1 -11
- data/ext/archive_r/vendor/archive_r/include/archive_r/entry.h +24 -10
- data/ext/archive_r/vendor/archive_r/include/archive_r/path_hierarchy.h +2 -19
- data/ext/archive_r/vendor/archive_r/include/archive_r/platform_compat.h +16 -1
- data/ext/archive_r/vendor/archive_r/include/archive_r/traverser.h +35 -1
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.cc +60 -66
- data/ext/archive_r/vendor/archive_r/src/archive_stack_cursor.h +18 -3
- data/ext/archive_r/vendor/archive_r/src/multi_volume_stream_base.cc +4 -4
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy.cc +1 -22
- data/ext/archive_r/vendor/archive_r/src/path_hierarchy_utils.cc +1 -32
- data/ext/archive_r/vendor/archive_r/src/system_file_stream.cc +7 -3
- data/ext/archive_r/vendor/archive_r/src/traverser.cc +6 -1
- data/lib/archive_r.rb +5 -1
- metadata +9 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 648064959d139565d2e122215cb49ae178b9d5e735a99176b8d785238b753800
|
|
4
|
+
data.tar.gz: d9f05748fbcca7aa76bb615b53480c9133423905c1137005d1c6df895a8b09c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dd1b45fd72e08536d0b95b261b8ca071d68fa0a1840c984c9d5aae4d9449b394a11ad6e7f8a4ca8df53b68ee7aef68791468e98404fd6a4ec098a34d831affd6
|
|
7
|
+
data.tar.gz: ebf2a08b3ff59ba317b48cd175d87bf0624e2d4c67d2c95c90b1c8fb32cd9c1afa6ff48f0d24cb467401d3b9b2364cf368946cf4af01e8bdb744431ee9cd961f
|
data/LICENSE.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
archive_r License
|
|
2
|
-
Version: 0.1.
|
|
2
|
+
Version: 0.1.7 (2025-12-16)
|
|
3
3
|
|
|
4
4
|
----------------------------------------
|
|
5
5
|
Primary License
|
|
@@ -66,12 +66,32 @@ The following components are redistributed only because libarchive (bundled with
|
|
|
66
66
|
- Purpose: libarchive dependency providing Zstandard compression; shipped within archive_r binaries.
|
|
67
67
|
- License: BSD License (https://github.com/facebook/zstd)
|
|
68
68
|
|
|
69
|
-
8.
|
|
70
|
-
- Purpose: libarchive dependency providing cryptographic support
|
|
71
|
-
- License:
|
|
69
|
+
8. Nettle
|
|
70
|
+
- Purpose: libarchive dependency providing cryptographic support (macOS/Linux); bundled with archive_r binaries.
|
|
71
|
+
- License: GNU LGPLv3+ or GNU GPLv2+ (https://www.lysator.liu.se/~nisse/nettle/)
|
|
72
72
|
|
|
73
|
-
9.
|
|
74
|
-
- Purpose:
|
|
75
|
-
- License: GNU
|
|
73
|
+
9. mini-gmp
|
|
74
|
+
- Purpose: Nettle dependency for arithmetic operations (macOS/Linux); bundled with archive_r binaries.
|
|
75
|
+
- License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
|
|
76
|
+
|
|
77
|
+
10. OpenSSL 3
|
|
78
|
+
- Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
|
|
79
|
+
- License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
|
|
80
|
+
|
|
81
|
+
11. lz4
|
|
82
|
+
- Purpose: libarchive dependency providing LZ4 compression; shipped with archive_r artifacts when required.
|
|
83
|
+
- License: BSD 2-Clause (https://github.com/lz4/lz4)
|
|
84
|
+
|
|
85
|
+
12. libb2 (BLAKE2)
|
|
86
|
+
- Purpose: libarchive dependency providing BLAKE2 hashing; bundled when archive formats require it.
|
|
87
|
+
- License: CC0 1.0 Universal (https://github.com/BLAKE2/libb2)
|
|
88
|
+
|
|
89
|
+
13. libattr
|
|
90
|
+
- Purpose: libarchive dependency providing extended attribute support on POSIX platforms; included in POSIX builds only.
|
|
91
|
+
- License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/attr)
|
|
92
|
+
|
|
93
|
+
14. libacl
|
|
94
|
+
- Purpose: libarchive dependency providing POSIX ACL support; included in POSIX builds only.
|
|
95
|
+
- License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/acl)
|
|
76
96
|
Users of archive_r should review the linked third-party licenses to ensure
|
|
77
97
|
compliance with their terms when redistributing this software.
|
data/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# archive_r Ruby Binding
|
|
2
2
|
|
|
3
|
+
Ruby bindings for archive_r, a libarchive-based library for processing many archive formats.
|
|
4
|
+
It streams entry data directly from the source to recursively read nested archives without extracting to temporary files or loading large in-memory buffers.
|
|
5
|
+
|
|
3
6
|
Ruby bindings expose the archive_r traverser API with a natural, block-friendly interface. This document consolidates the Ruby-specific instructions that previously lived in the repository root README.
|
|
4
7
|
|
|
5
8
|
## Requirements
|
|
@@ -37,9 +40,9 @@ bundle exec rake build # creates archive_r-<version>.gem locally
|
|
|
37
40
|
|
|
38
41
|
The `rake test` task compiles the extension, installs it into `lib/`, and executes the Minitest suite.
|
|
39
42
|
|
|
40
|
-
## Running the
|
|
43
|
+
## Running the repository test suite
|
|
41
44
|
|
|
42
|
-
`./
|
|
45
|
+
From the repository root run `./bindings/ruby/run_binding_tests.sh`. The script prepares a clean GEM_HOME (`build/ruby_gem_home`), installs the gem produced in `build/bindings/ruby`, runs `bindings/ruby/test/test_traverser.rb`, and saves the install log to `build/logs/ruby_gem_install.log`. CI invokes this script after the core tests.
|
|
43
46
|
|
|
44
47
|
## Usage Example
|
|
45
48
|
|
|
@@ -72,11 +72,7 @@ static VALUE path_entry_to_rb(const PathEntry &entry) {
|
|
|
72
72
|
}
|
|
73
73
|
return array;
|
|
74
74
|
}
|
|
75
|
-
|
|
76
|
-
for (const auto &child : entry.nested_nodes()) {
|
|
77
|
-
rb_ary_push(array, path_entry_to_rb(child));
|
|
78
|
-
}
|
|
79
|
-
return array;
|
|
75
|
+
return Qnil;
|
|
80
76
|
}
|
|
81
77
|
|
|
82
78
|
static VALUE path_hierarchy_to_rb(const PathHierarchy &hierarchy) {
|
|
@@ -118,7 +114,7 @@ public:
|
|
|
118
114
|
}
|
|
119
115
|
|
|
120
116
|
~RubyUserStream() override {
|
|
121
|
-
|
|
117
|
+
release_active_io_resources(false);
|
|
122
118
|
rb_gc_unregister_address(&_ruby_stream);
|
|
123
119
|
}
|
|
124
120
|
|
|
@@ -140,7 +136,7 @@ protected:
|
|
|
140
136
|
|
|
141
137
|
void close_single_part() override {
|
|
142
138
|
if (_has_open_part_io) {
|
|
143
|
-
|
|
139
|
+
release_active_io_resources(true);
|
|
144
140
|
if (_has_close_part_io) {
|
|
145
141
|
rb_funcall(_ruby_stream, rb_id_close_part_io_method, 0);
|
|
146
142
|
}
|
|
@@ -250,7 +246,7 @@ private:
|
|
|
250
246
|
if (!rb_respond_to(io, rb_id_read_method)) {
|
|
251
247
|
rb_raise(rb_eTypeError, "open_part_io must return an object responding to #read");
|
|
252
248
|
}
|
|
253
|
-
|
|
249
|
+
release_active_io_resources(true);
|
|
254
250
|
_active_io = io;
|
|
255
251
|
rb_gc_register_address(&_active_io);
|
|
256
252
|
_active_io_seekable = rb_respond_to(io, rb_id_seek_method);
|
|
@@ -259,11 +255,11 @@ private:
|
|
|
259
255
|
_active_io_has_size = rb_respond_to(io, rb_id_size_method);
|
|
260
256
|
}
|
|
261
257
|
|
|
262
|
-
void
|
|
258
|
+
void release_active_io_resources(bool close_io) {
|
|
263
259
|
if (_active_io == Qnil) {
|
|
264
260
|
return;
|
|
265
261
|
}
|
|
266
|
-
if (_active_io_has_close) {
|
|
262
|
+
if (close_io && _active_io_has_close) {
|
|
267
263
|
rb_funcall(_active_io, rb_id_close_method, 0);
|
|
268
264
|
}
|
|
269
265
|
rb_gc_unregister_address(&_active_io);
|
|
@@ -509,12 +505,7 @@ static PathEntry rb_value_to_path_entry(VALUE value) {
|
|
|
509
505
|
return PathEntry::multi_volume(std::move(parts));
|
|
510
506
|
}
|
|
511
507
|
|
|
512
|
-
PathEntry
|
|
513
|
-
nodes.reserve(static_cast<size_t>(length));
|
|
514
|
-
for (long i = 0; i < length; ++i) {
|
|
515
|
-
nodes.emplace_back(rb_value_to_path_entry(rb_ary_entry(array, i)));
|
|
516
|
-
}
|
|
517
|
-
return PathEntry::nested(std::move(nodes));
|
|
508
|
+
rb_raise(rb_eTypeError, "PathEntry array must contain only Strings");
|
|
518
509
|
}
|
|
519
510
|
|
|
520
511
|
// Helper: Convert Ruby path argument into vector of PathHierarchy
|
data/ext/archive_r/extconf.rb
CHANGED
|
@@ -46,6 +46,7 @@ end
|
|
|
46
46
|
archive_r_include = File.join(archive_r_root, 'include')
|
|
47
47
|
archive_r_src = File.join(archive_r_root, 'src')
|
|
48
48
|
archive_r_lib_dir = File.join(archive_r_root, 'build')
|
|
49
|
+
archive_r_local_libs = File.expand_path('.libs', __dir__)
|
|
49
50
|
glue_source = File.join(__dir__, 'archive_r_ext.cc')
|
|
50
51
|
|
|
51
52
|
# Ensure make can locate vendored sources via VPATH
|
|
@@ -58,6 +59,11 @@ $VPATH << archive_r_src
|
|
|
58
59
|
# Add include paths
|
|
59
60
|
$INCFLAGS << " -I#{archive_r_include}"
|
|
60
61
|
$INCFLAGS << " -I#{archive_r_src}"
|
|
62
|
+
$LIBPATH.unshift(archive_r_local_libs)
|
|
63
|
+
|
|
64
|
+
unless Gem.win_platform?
|
|
65
|
+
$LDFLAGS << ' -Wl,-rpath,$ORIGIN/.libs'
|
|
66
|
+
end
|
|
61
67
|
|
|
62
68
|
# C++17 standard
|
|
63
69
|
$CXXFLAGS << " -std=c++17"
|
|
@@ -82,31 +88,30 @@ if ENV['LIBARCHIVE_LIBRARY_DIRS']
|
|
|
82
88
|
end
|
|
83
89
|
|
|
84
90
|
# Check for libarchive
|
|
85
|
-
unless have_library('archive')
|
|
86
|
-
|
|
87
|
-
unless have_library('archive_static') || have_library('libarchive')
|
|
88
|
-
abort "libarchive is required but not found"
|
|
89
|
-
end
|
|
91
|
+
unless have_library('archive') || have_library('libarchive')
|
|
92
|
+
abort "libarchive is required but not found"
|
|
90
93
|
end
|
|
91
94
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
95
|
+
shared_candidates = [
|
|
96
|
+
File.join(archive_r_lib_dir, 'libarchive_r_core.so'),
|
|
97
|
+
File.join(archive_r_lib_dir, 'libarchive_r_core.dylib'),
|
|
98
|
+
File.join(archive_r_lib_dir, 'archive_r_core.dll'),
|
|
99
|
+
File.join(archive_r_lib_dir, 'archive_r_core.lib'),
|
|
100
|
+
File.join(archive_r_lib_dir, 'Release', 'archive_r_core.dll'),
|
|
101
|
+
File.join(archive_r_lib_dir, 'Release', 'archive_r_core.lib'),
|
|
102
|
+
File.join(archive_r_local_libs, 'libarchive_r_core.so'),
|
|
103
|
+
File.join(archive_r_local_libs, 'libarchive_r_core.dylib'),
|
|
104
|
+
File.join(archive_r_local_libs, 'archive_r_core.dll'),
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
found_shared = shared_candidates.find { |path| File.exist?(path) }
|
|
108
|
+
|
|
109
|
+
if found_shared
|
|
110
|
+
$LIBPATH.unshift(File.dirname(found_shared))
|
|
111
|
+
$libs = "-larchive_r_core #{$libs}"
|
|
112
|
+
puts "Using pre-built shared archive_r core: #{found_shared}"
|
|
106
113
|
else
|
|
107
|
-
|
|
108
|
-
puts "Pre-built library not found, will build from source"
|
|
109
|
-
|
|
114
|
+
puts "Pre-built shared library not found, will build from source"
|
|
110
115
|
srcs = [glue_source] + Dir.glob(File.join(archive_r_src, '*.cc'))
|
|
111
116
|
$srcs = srcs
|
|
112
117
|
end
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
archive_r License
|
|
2
|
-
Version: 0.1.
|
|
2
|
+
Version: 0.1.7 (2025-12-16)
|
|
3
3
|
|
|
4
4
|
----------------------------------------
|
|
5
5
|
Primary License
|
|
@@ -66,12 +66,32 @@ The following components are redistributed only because libarchive (bundled with
|
|
|
66
66
|
- Purpose: libarchive dependency providing Zstandard compression; shipped within archive_r binaries.
|
|
67
67
|
- License: BSD License (https://github.com/facebook/zstd)
|
|
68
68
|
|
|
69
|
-
8.
|
|
70
|
-
- Purpose: libarchive dependency providing cryptographic support
|
|
71
|
-
- License:
|
|
69
|
+
8. Nettle
|
|
70
|
+
- Purpose: libarchive dependency providing cryptographic support (macOS/Linux); bundled with archive_r binaries.
|
|
71
|
+
- License: GNU LGPLv3+ or GNU GPLv2+ (https://www.lysator.liu.se/~nisse/nettle/)
|
|
72
72
|
|
|
73
|
-
9.
|
|
74
|
-
- Purpose:
|
|
75
|
-
- License: GNU
|
|
73
|
+
9. mini-gmp
|
|
74
|
+
- Purpose: Nettle dependency for arithmetic operations (macOS/Linux); bundled with archive_r binaries.
|
|
75
|
+
- License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
|
|
76
|
+
|
|
77
|
+
10. OpenSSL 3
|
|
78
|
+
- Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
|
|
79
|
+
- License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
|
|
80
|
+
|
|
81
|
+
11. lz4
|
|
82
|
+
- Purpose: libarchive dependency providing LZ4 compression; shipped with archive_r artifacts when required.
|
|
83
|
+
- License: BSD 2-Clause (https://github.com/lz4/lz4)
|
|
84
|
+
|
|
85
|
+
12. libb2 (BLAKE2)
|
|
86
|
+
- Purpose: libarchive dependency providing BLAKE2 hashing; bundled when archive formats require it.
|
|
87
|
+
- License: CC0 1.0 Universal (https://github.com/BLAKE2/libb2)
|
|
88
|
+
|
|
89
|
+
13. libattr
|
|
90
|
+
- Purpose: libarchive dependency providing extended attribute support on POSIX platforms; included in POSIX builds only.
|
|
91
|
+
- License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/attr)
|
|
92
|
+
|
|
93
|
+
14. libacl
|
|
94
|
+
- Purpose: libarchive dependency providing POSIX ACL support; included in POSIX builds only.
|
|
95
|
+
- License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/acl)
|
|
76
96
|
Users of archive_r should review the linked third-party licenses to ensure
|
|
77
97
|
compliance with their terms when redistributing this software.
|
|
@@ -3,23 +3,13 @@
|
|
|
3
3
|
|
|
4
4
|
#pragma once
|
|
5
5
|
|
|
6
|
+
#include "archive_r/platform_compat.h"
|
|
6
7
|
#include "archive_r/path_hierarchy.h"
|
|
7
8
|
|
|
8
9
|
#include <functional>
|
|
9
10
|
#include <memory>
|
|
10
|
-
#include <sys/types.h>
|
|
11
11
|
#include <cstdint>
|
|
12
12
|
|
|
13
|
-
#ifdef _WIN32
|
|
14
|
-
#include <basetsd.h>
|
|
15
|
-
using ssize_t = SSIZE_T;
|
|
16
|
-
#endif
|
|
17
|
-
|
|
18
|
-
// Avoid conflict with potential 'read' macro on Windows
|
|
19
|
-
#ifdef read
|
|
20
|
-
#undef read
|
|
21
|
-
#endif
|
|
22
|
-
|
|
23
13
|
namespace archive_r {
|
|
24
14
|
|
|
25
15
|
/**
|
|
@@ -7,17 +7,12 @@
|
|
|
7
7
|
#include <filesystem>
|
|
8
8
|
#include <memory>
|
|
9
9
|
#include <string>
|
|
10
|
-
#include <sys/types.h>
|
|
11
10
|
#include <vector>
|
|
12
11
|
|
|
13
|
-
#ifdef _MSC_VER
|
|
14
|
-
#include <BaseTsd.h>
|
|
15
|
-
typedef SSIZE_T ssize_t;
|
|
16
|
-
#endif
|
|
17
|
-
|
|
18
12
|
#include "archive_r/entry_fault.h"
|
|
19
13
|
#include "archive_r/entry_metadata.h"
|
|
20
14
|
#include "archive_r/path_hierarchy.h"
|
|
15
|
+
#include "archive_r/platform_compat.h"
|
|
21
16
|
|
|
22
17
|
namespace archive_r {
|
|
23
18
|
|
|
@@ -36,8 +31,21 @@ struct MultiVolumeGroupOptions {
|
|
|
36
31
|
* - Content access (read operations)
|
|
37
32
|
* - Multi-volume archive grouping support
|
|
38
33
|
*
|
|
39
|
-
*
|
|
40
|
-
*
|
|
34
|
+
* \par Lifetime and Copying
|
|
35
|
+
* - An Entry& obtained while iterating a Traverser is typically valid until the
|
|
36
|
+
* iterator advances.
|
|
37
|
+
* - Entry is copyable. Copies retain metadata (name/path/metadata/etc), but do not
|
|
38
|
+
* retain traverser-managed traversal control state. Calling set_descent() or
|
|
39
|
+
* set_multi_volume_group() on such copies will report a fault and has no effect.
|
|
40
|
+
* Prefer calling these control methods on the Entry& inside the iteration loop,
|
|
41
|
+
* before advancing.
|
|
42
|
+
*
|
|
43
|
+
* \par Reading
|
|
44
|
+
* - read() returns >0 for bytes read, 0 for EOF, -1 for error.
|
|
45
|
+
* - On error, read() dispatches an EntryFault via the registered fault callback
|
|
46
|
+
* (if any).
|
|
47
|
+
* - After any successful read() (including EOF), descent is disabled until
|
|
48
|
+
* explicitly re-enabled via set_descent(true).
|
|
41
49
|
*/
|
|
42
50
|
class Entry {
|
|
43
51
|
public:
|
|
@@ -89,8 +97,8 @@ public:
|
|
|
89
97
|
/**
|
|
90
98
|
* @brief Read data from the entry
|
|
91
99
|
*
|
|
92
|
-
|
|
93
|
-
|
|
100
|
+
* Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
|
|
101
|
+
* if the owning iterator advances.
|
|
94
102
|
*
|
|
95
103
|
* @param buffer Buffer to read data into
|
|
96
104
|
* @param length Maximum number of bytes to read
|
|
@@ -101,6 +109,9 @@ public:
|
|
|
101
109
|
/**
|
|
102
110
|
* @brief Enable or disable automatic descent into this entry
|
|
103
111
|
* @param enabled true to descend (default), false to keep traversal at current level
|
|
112
|
+
*
|
|
113
|
+
* This control is only available for entries that are managed by a Traverser.
|
|
114
|
+
* Calling this on an Entry that is not traverser-managed reports a fault.
|
|
104
115
|
*/
|
|
105
116
|
void set_descent(bool enabled);
|
|
106
117
|
|
|
@@ -128,6 +139,9 @@ public:
|
|
|
128
139
|
* }
|
|
129
140
|
* }
|
|
130
141
|
* @endcode
|
|
142
|
+
*
|
|
143
|
+
* This control is only available for entries that are managed by a Traverser.
|
|
144
|
+
* Calling this on an Entry that is not traverser-managed reports a fault.
|
|
131
145
|
*/
|
|
132
146
|
void set_multi_volume_group(const std::string &base_name, const MultiVolumeGroupOptions &options = {});
|
|
133
147
|
|
|
@@ -18,7 +18,6 @@ namespace archive_r {
|
|
|
18
18
|
* A component can be one of three shapes:
|
|
19
19
|
* - single string value (most common)
|
|
20
20
|
* - multi-volume part list (split archives that share a common base name)
|
|
21
|
-
* - nested list of child entries (used for synthetic grouping)
|
|
22
21
|
*/
|
|
23
22
|
class PathEntry {
|
|
24
23
|
public:
|
|
@@ -27,8 +26,6 @@ public:
|
|
|
27
26
|
enum class Ordering { Natural, Given } ordering = Ordering::Natural;
|
|
28
27
|
};
|
|
29
28
|
|
|
30
|
-
using NodeList = std::vector<PathEntry>;
|
|
31
|
-
|
|
32
29
|
PathEntry() = default;
|
|
33
30
|
|
|
34
31
|
explicit PathEntry(std::string value)
|
|
@@ -37,9 +34,6 @@ public:
|
|
|
37
34
|
explicit PathEntry(Parts parts)
|
|
38
35
|
: _value(std::move(parts)) {}
|
|
39
36
|
|
|
40
|
-
explicit PathEntry(NodeList nodes)
|
|
41
|
-
: _value(std::move(nodes)) {}
|
|
42
|
-
|
|
43
37
|
static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
|
|
44
38
|
|
|
45
39
|
static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
|
|
@@ -50,24 +44,14 @@ public:
|
|
|
50
44
|
return PathEntry(std::move(parts));
|
|
51
45
|
}
|
|
52
46
|
|
|
53
|
-
static PathEntry nested(NodeList hierarchies) {
|
|
54
|
-
if (hierarchies.empty()) {
|
|
55
|
-
throw std::invalid_argument("nested hierarchies cannot be empty");
|
|
56
|
-
}
|
|
57
|
-
return PathEntry(std::move(hierarchies));
|
|
58
|
-
}
|
|
59
|
-
|
|
60
47
|
bool is_single() const { return std::holds_alternative<std::string>(_value); }
|
|
61
48
|
bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
|
|
62
|
-
bool is_nested() const { return std::holds_alternative<NodeList>(_value); }
|
|
63
49
|
const std::string &single_value() const { return std::get<std::string>(_value); }
|
|
64
50
|
const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
|
|
65
51
|
Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
|
|
66
|
-
const NodeList &nested_nodes() const { return std::get<NodeList>(_value); }
|
|
67
|
-
NodeList &nested_nodes_mut() { return std::get<NodeList>(_value); }
|
|
68
52
|
|
|
69
53
|
private:
|
|
70
|
-
std::variant<std::string, Parts
|
|
54
|
+
std::variant<std::string, Parts> _value;
|
|
71
55
|
};
|
|
72
56
|
|
|
73
57
|
using PathHierarchy = std::vector<PathEntry>;
|
|
@@ -76,11 +60,10 @@ using PathHierarchy = std::vector<PathEntry>;
|
|
|
76
60
|
* Compare two entries using the ordering enforced throughout archive_r.
|
|
77
61
|
*
|
|
78
62
|
* Ordering rules:
|
|
79
|
-
* 1. Entry categories are ordered single < multi-volume
|
|
63
|
+
* 1. Entry categories are ordered single < multi-volume.
|
|
80
64
|
* 2. Single entries compare by string value.
|
|
81
65
|
* 3. Multi-volume entries first compare their ordering flag (Natural < Given),
|
|
82
66
|
* then compare corresponding part names lexicographically, finally by list length.
|
|
83
|
-
* 4. Nested node-lists compare child entries pairwise using the same rules.
|
|
84
67
|
*/
|
|
85
68
|
int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
|
|
86
69
|
|
|
@@ -7,8 +7,8 @@
|
|
|
7
7
|
|
|
8
8
|
#if defined(_WIN32)
|
|
9
9
|
# include <sys/stat.h>
|
|
10
|
+
# include <BaseTsd.h>
|
|
10
11
|
# if !defined(_SSIZE_T_DEFINED)
|
|
11
|
-
# include <BaseTsd.h>
|
|
12
12
|
using ssize_t = SSIZE_T;
|
|
13
13
|
# define _SSIZE_T_DEFINED
|
|
14
14
|
# endif
|
|
@@ -17,3 +17,18 @@ using mode_t = unsigned short; // MSVC does not expose POSIX mode_t by default
|
|
|
17
17
|
# define _MODE_T_DEFINED
|
|
18
18
|
# endif
|
|
19
19
|
#endif
|
|
20
|
+
|
|
21
|
+
namespace archive_r {
|
|
22
|
+
|
|
23
|
+
// Expose POSIX-like types within the archive_r namespace.
|
|
24
|
+
// - On POSIX platforms, ssize_t/mode_t come from <sys/types.h>.
|
|
25
|
+
// - On Windows, platform_compat provides fallback definitions above.
|
|
26
|
+
#if defined(_WIN32)
|
|
27
|
+
using ssize_t = SSIZE_T;
|
|
28
|
+
using mode_t = unsigned short;
|
|
29
|
+
#else
|
|
30
|
+
using ssize_t = ::ssize_t;
|
|
31
|
+
using mode_t = ::mode_t;
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
} // namespace archive_r
|
|
@@ -30,9 +30,31 @@ struct TraverserOptions {
|
|
|
30
30
|
* and filesystem directories.
|
|
31
31
|
*
|
|
32
32
|
* Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
|
|
33
|
-
|
|
34
33
|
* @see Entry, ArchiveStackOrchestrator
|
|
35
34
|
*
|
|
35
|
+
* \par Inputs
|
|
36
|
+
* - The input list must not be empty, and each PathHierarchy must not be empty.
|
|
37
|
+
* Violations throw std::invalid_argument.
|
|
38
|
+
* - For the common single-root case, prefer make_single_path("...") or
|
|
39
|
+
* Traverser(const std::string&, ...).
|
|
40
|
+
*
|
|
41
|
+
* \par How Roots Are Interpreted
|
|
42
|
+
* - If the root hierarchy is exactly one single path and it refers to a directory,
|
|
43
|
+
* Traverser enumerates it using std::filesystem::recursive_directory_iterator.
|
|
44
|
+
* - Otherwise, Traverser attempts archive traversal using libarchive.
|
|
45
|
+
*
|
|
46
|
+
* \par Error Model (Exceptions vs Faults)
|
|
47
|
+
* - Invalid arguments are reported via exceptions (std::invalid_argument).
|
|
48
|
+
* - Recoverable data / I/O errors during archive traversal are reported via the
|
|
49
|
+
* global fault callback (EntryFault) and traversal continues.
|
|
50
|
+
* - Directory traversal uses std::filesystem iterators; filesystem exceptions
|
|
51
|
+
* (e.g. std::filesystem::filesystem_error) may be thrown and are not converted
|
|
52
|
+
* to faults.
|
|
53
|
+
*
|
|
54
|
+
* \par Iterator Semantics
|
|
55
|
+
* - Traverser::Iterator is an input iterator (single-pass).
|
|
56
|
+
* - Dereferencing the end iterator throws std::logic_error.
|
|
57
|
+
*
|
|
36
58
|
* Usage:
|
|
37
59
|
* Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
|
|
38
60
|
* for (Entry& entry : traverser) {
|
|
@@ -53,9 +75,21 @@ public:
|
|
|
53
75
|
* Provide one or more paths to traverse. Single-path traversal can be
|
|
54
76
|
* achieved by passing a container with one element:
|
|
55
77
|
* Traverser traverser({make_single_path("archive.tar.gz")});
|
|
78
|
+
*
|
|
79
|
+
* @throws std::invalid_argument if paths is empty or contains an empty hierarchy
|
|
56
80
|
*/
|
|
57
81
|
explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
|
|
58
82
|
|
|
83
|
+
/**
|
|
84
|
+
* @brief Construct traverser for a single hierarchy
|
|
85
|
+
*/
|
|
86
|
+
explicit Traverser(PathHierarchy path, TraverserOptions options = {});
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* @brief Construct traverser for a single archive or directory path
|
|
90
|
+
*/
|
|
91
|
+
explicit Traverser(const std::string &path, TraverserOptions options = {});
|
|
92
|
+
|
|
59
93
|
~Traverser();
|
|
60
94
|
|
|
61
95
|
// Non-copyable
|
|
@@ -60,6 +60,11 @@ void StreamArchive::rewind() {
|
|
|
60
60
|
|
|
61
61
|
PathHierarchy StreamArchive::source_hierarchy() const { return _stream->source_hierarchy(); }
|
|
62
62
|
|
|
63
|
+
std::shared_ptr<StreamArchive> StreamArchive::parent_archive() const {
|
|
64
|
+
auto entry_stream = std::dynamic_pointer_cast<EntryPayloadStream>(_stream);
|
|
65
|
+
return entry_stream ? entry_stream->parent_archive() : nullptr;
|
|
66
|
+
}
|
|
67
|
+
|
|
63
68
|
la_ssize_t StreamArchive::read_callback_bridge(struct archive *a, void *client_data, const void **buff) {
|
|
64
69
|
auto *archive = static_cast<StreamArchive *>(client_data);
|
|
65
70
|
|
|
@@ -120,7 +125,9 @@ EntryPayloadStream::EntryPayloadStream(std::shared_ptr<StreamArchive> parent_arc
|
|
|
120
125
|
}
|
|
121
126
|
}
|
|
122
127
|
|
|
123
|
-
EntryPayloadStream::~EntryPayloadStream()
|
|
128
|
+
EntryPayloadStream::~EntryPayloadStream() {
|
|
129
|
+
deactivate_active_part();
|
|
130
|
+
}
|
|
124
131
|
|
|
125
132
|
std::shared_ptr<StreamArchive> EntryPayloadStream::parent_archive() const { return _parent_archive; }
|
|
126
133
|
|
|
@@ -143,11 +150,8 @@ void EntryPayloadStream::open_single_part(const PathHierarchy &single_part) {
|
|
|
143
150
|
}
|
|
144
151
|
|
|
145
152
|
void EntryPayloadStream::close_single_part() {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
_parent_archive->skip_data();
|
|
153
|
+
// libarchive automatically skips unread data when reading the next header,
|
|
154
|
+
// so explicit skipping here is unnecessary and avoids potential exceptions in destructor.
|
|
151
155
|
}
|
|
152
156
|
|
|
153
157
|
ssize_t EntryPayloadStream::read_from_single_part(void *buffer, size_t size) {
|
|
@@ -170,7 +174,8 @@ int64_t EntryPayloadStream::size_of_single_part(const PathHierarchy &single_part
|
|
|
170
174
|
|
|
171
175
|
ArchiveStackCursor::ArchiveStackCursor()
|
|
172
176
|
: options_snapshot()
|
|
173
|
-
,
|
|
177
|
+
, _current_stream(nullptr)
|
|
178
|
+
, _current_archive(nullptr) {}
|
|
174
179
|
|
|
175
180
|
void ArchiveStackCursor::configure(const ArchiveOption &options) {
|
|
176
181
|
options_snapshot = options;
|
|
@@ -178,16 +183,16 @@ void ArchiveStackCursor::configure(const ArchiveOption &options) {
|
|
|
178
183
|
|
|
179
184
|
void ArchiveStackCursor::reset() {
|
|
180
185
|
options_snapshot = ArchiveOption{};
|
|
181
|
-
|
|
186
|
+
_current_stream = nullptr;
|
|
187
|
+
_current_archive = nullptr;
|
|
182
188
|
}
|
|
183
189
|
|
|
184
190
|
bool ArchiveStackCursor::descend() {
|
|
185
|
-
if (
|
|
186
|
-
throw std::logic_error("stream
|
|
191
|
+
if (!_current_stream) {
|
|
192
|
+
throw std::logic_error("current stream is empty");
|
|
187
193
|
}
|
|
188
194
|
|
|
189
|
-
auto stream =
|
|
190
|
-
|
|
195
|
+
auto stream = _current_stream;
|
|
191
196
|
if (auto *archive = current_archive()) {
|
|
192
197
|
if (stream && !archive->current_entry_content_ready()) {
|
|
193
198
|
stream->rewind();
|
|
@@ -196,17 +201,22 @@ bool ArchiveStackCursor::descend() {
|
|
|
196
201
|
|
|
197
202
|
PathHierarchy dummy_hierarchy = stream->source_hierarchy();
|
|
198
203
|
auto archive_ptr = std::make_shared<StreamArchive>(std::move(stream), options_snapshot);
|
|
199
|
-
|
|
200
|
-
|
|
204
|
+
_current_archive = archive_ptr;
|
|
205
|
+
_current_stream = nullptr;
|
|
201
206
|
return true;
|
|
202
207
|
}
|
|
203
208
|
|
|
204
209
|
bool ArchiveStackCursor::ascend() {
|
|
205
|
-
if (
|
|
210
|
+
if (depth() <= 0) {
|
|
206
211
|
return false;
|
|
207
212
|
}
|
|
208
213
|
|
|
209
|
-
|
|
214
|
+
if (_current_archive) {
|
|
215
|
+
_current_stream = _current_archive->get_stream();
|
|
216
|
+
_current_archive = _current_archive->parent_archive();
|
|
217
|
+
} else {
|
|
218
|
+
_current_stream = nullptr;
|
|
219
|
+
}
|
|
210
220
|
|
|
211
221
|
return true;
|
|
212
222
|
}
|
|
@@ -217,6 +227,8 @@ bool ArchiveStackCursor::next() {
|
|
|
217
227
|
return false;
|
|
218
228
|
}
|
|
219
229
|
|
|
230
|
+
_current_stream = nullptr;
|
|
231
|
+
|
|
220
232
|
while (true) {
|
|
221
233
|
if (!archive->skip_to_next_header()) {
|
|
222
234
|
return false;
|
|
@@ -225,7 +237,8 @@ bool ArchiveStackCursor::next() {
|
|
|
225
237
|
break;
|
|
226
238
|
}
|
|
227
239
|
}
|
|
228
|
-
|
|
240
|
+
|
|
241
|
+
_current_stream = create_stream(current_entry_hierarchy());
|
|
229
242
|
return true;
|
|
230
243
|
}
|
|
231
244
|
|
|
@@ -233,33 +246,32 @@ bool ArchiveStackCursor::synchronize_to_hierarchy(const PathHierarchy &target_hi
|
|
|
233
246
|
if (target_hierarchy.empty()) {
|
|
234
247
|
throw_entry_fault("target hierarchy cannot be empty", {});
|
|
235
248
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
249
|
+
|
|
250
|
+
// 1. Ascend until we find a common ancestor
|
|
251
|
+
while (depth() > 0) {
|
|
252
|
+
auto current_h = _current_archive->source_hierarchy();
|
|
253
|
+
if (current_h.size() <= target_hierarchy.size() &&
|
|
254
|
+
hierarchies_equal(current_h, pathhierarchy_prefix_until(target_hierarchy, current_h.size() - 1))) {
|
|
255
|
+
break;
|
|
256
|
+
}
|
|
257
|
+
ascend();
|
|
240
258
|
}
|
|
241
|
-
for (size_t depth = 0; depth < target_hierarchy.size(); ++depth) {
|
|
242
|
-
auto prefix = pathhierarchy_prefix_until(target_hierarchy, depth);
|
|
243
|
-
auto stream = stream_stack[depth];
|
|
244
259
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
260
|
+
// 2. Descend to target
|
|
261
|
+
for (size_t d = depth(); d < target_hierarchy.size(); ++d) {
|
|
262
|
+
auto prefix = pathhierarchy_prefix_until(target_hierarchy, d);
|
|
263
|
+
|
|
264
|
+
if (!_current_stream || !hierarchies_equal(_current_stream->source_hierarchy(), prefix)) {
|
|
265
|
+
_current_stream = create_stream(prefix);
|
|
266
|
+
_current_stream->rewind();
|
|
248
267
|
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
stream_stack.back() = stream;
|
|
253
|
-
stream->rewind();
|
|
254
|
-
|
|
255
|
-
if (depth == last_depth) {
|
|
256
|
-
return true;
|
|
268
|
+
|
|
269
|
+
if (d < target_hierarchy.size() - 1) {
|
|
270
|
+
descend();
|
|
257
271
|
}
|
|
258
|
-
// Descend into the archive for the next level of the hierarchy.
|
|
259
|
-
descend();
|
|
260
272
|
}
|
|
261
|
-
|
|
262
|
-
|
|
273
|
+
|
|
274
|
+
return true;
|
|
263
275
|
}
|
|
264
276
|
|
|
265
277
|
ssize_t ArchiveStackCursor::read(void *buff, size_t len) {
|
|
@@ -267,38 +279,22 @@ ssize_t ArchiveStackCursor::read(void *buff, size_t len) {
|
|
|
267
279
|
return 0;
|
|
268
280
|
}
|
|
269
281
|
|
|
270
|
-
if (
|
|
271
|
-
|
|
282
|
+
if (StreamArchive *archive = current_archive()) {
|
|
283
|
+
return archive->read_current(buff, len);
|
|
272
284
|
}
|
|
273
285
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
bytes = stream->read(buff, len);
|
|
277
|
-
|
|
278
|
-
if (bytes < 0) {
|
|
279
|
-
const std::string message = "Failed to read from active stream";
|
|
280
|
-
throw_entry_fault(message, current_entry_hierarchy());
|
|
286
|
+
if (_current_stream) {
|
|
287
|
+
return _current_stream->read(buff, len);
|
|
281
288
|
}
|
|
282
|
-
|
|
283
|
-
return bytes;
|
|
289
|
+
return 0;
|
|
284
290
|
}
|
|
285
291
|
|
|
286
292
|
StreamArchive *ArchiveStackCursor::current_archive() {
|
|
287
|
-
|
|
288
|
-
return nullptr;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
const auto stream = std::dynamic_pointer_cast<EntryPayloadStream>(stream_stack.back());
|
|
292
|
-
if (!stream) {
|
|
293
|
-
return nullptr;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
auto parent_archive = stream->parent_archive();
|
|
297
|
-
return parent_archive ? parent_archive.get() : nullptr;
|
|
293
|
+
return _current_archive.get();
|
|
298
294
|
}
|
|
299
295
|
|
|
300
296
|
PathHierarchy ArchiveStackCursor::current_entry_hierarchy() {
|
|
301
|
-
if (
|
|
297
|
+
if (depth() == 0 || (!_current_stream && !_current_archive)) {
|
|
302
298
|
return {};
|
|
303
299
|
}
|
|
304
300
|
|
|
@@ -310,7 +306,7 @@ PathHierarchy ArchiveStackCursor::current_entry_hierarchy() {
|
|
|
310
306
|
return path;
|
|
311
307
|
}
|
|
312
308
|
|
|
313
|
-
return
|
|
309
|
+
return _current_stream->source_hierarchy();
|
|
314
310
|
}
|
|
315
311
|
|
|
316
312
|
std::shared_ptr<IDataStream> ArchiveStackCursor::create_stream(const PathHierarchy &hierarchy) {
|
|
@@ -322,9 +318,7 @@ std::shared_ptr<IDataStream> ArchiveStackCursor::create_stream(const PathHierarc
|
|
|
322
318
|
}
|
|
323
319
|
return std::make_shared<SystemFileStream>(hierarchy);
|
|
324
320
|
}
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
return std::make_shared<EntryPayloadStream>(stream->parent_archive(), hierarchy);
|
|
321
|
+
return std::make_shared<EntryPayloadStream>(_current_archive, hierarchy);
|
|
328
322
|
}
|
|
329
323
|
|
|
330
324
|
} // namespace archive_r
|
|
@@ -31,6 +31,9 @@ public:
|
|
|
31
31
|
void rewind() override;
|
|
32
32
|
|
|
33
33
|
PathHierarchy source_hierarchy() const;
|
|
34
|
+
std::shared_ptr<StreamArchive> parent_archive() const;
|
|
35
|
+
|
|
36
|
+
std::shared_ptr<IDataStream> get_stream() const { return _stream; }
|
|
34
37
|
|
|
35
38
|
private:
|
|
36
39
|
static la_ssize_t read_callback_bridge(struct archive *a, void *client_data, const void **buff);
|
|
@@ -75,7 +78,7 @@ struct ArchiveStackCursor {
|
|
|
75
78
|
|
|
76
79
|
void configure(const ArchiveOption &options);
|
|
77
80
|
void reset();
|
|
78
|
-
bool has_stream() const { return
|
|
81
|
+
bool has_stream() const { return _current_stream != nullptr; }
|
|
79
82
|
|
|
80
83
|
bool descend();
|
|
81
84
|
bool ascend();
|
|
@@ -83,7 +86,16 @@ struct ArchiveStackCursor {
|
|
|
83
86
|
bool synchronize_to_hierarchy(const PathHierarchy &hierarchy);
|
|
84
87
|
ssize_t read(void *buffer, size_t len);
|
|
85
88
|
|
|
86
|
-
size_t depth() const {
|
|
89
|
+
size_t depth() const {
|
|
90
|
+
size_t d = 0;
|
|
91
|
+
auto a = _current_archive;
|
|
92
|
+
while (a) {
|
|
93
|
+
d++;
|
|
94
|
+
a = a->parent_archive();
|
|
95
|
+
}
|
|
96
|
+
return d;
|
|
97
|
+
}
|
|
98
|
+
|
|
87
99
|
StreamArchive *current_archive();
|
|
88
100
|
|
|
89
101
|
PathHierarchy current_entry_hierarchy();
|
|
@@ -91,7 +103,10 @@ struct ArchiveStackCursor {
|
|
|
91
103
|
std::shared_ptr<IDataStream> create_stream(const PathHierarchy &hierarchy);
|
|
92
104
|
|
|
93
105
|
ArchiveOption options_snapshot;
|
|
94
|
-
|
|
106
|
+
|
|
107
|
+
private:
|
|
108
|
+
std::shared_ptr<IDataStream> _current_stream;
|
|
109
|
+
std::shared_ptr<StreamArchive> _current_archive;
|
|
95
110
|
};
|
|
96
111
|
|
|
97
112
|
} // namespace archive_r
|
|
@@ -20,6 +20,7 @@ struct MultiVolumeStreamBase::Impl {
|
|
|
20
20
|
std::vector<int64_t> part_offsets;
|
|
21
21
|
std::size_t total_parts = 0;
|
|
22
22
|
std::size_t active_part_index = 0;
|
|
23
|
+
std::size_t open_part_index = 0;
|
|
23
24
|
bool part_open = false;
|
|
24
25
|
int64_t logical_offset = 0;
|
|
25
26
|
int64_t total_size = -1;
|
|
@@ -84,7 +85,7 @@ void MultiVolumeStreamBase::rewind() {
|
|
|
84
85
|
}
|
|
85
86
|
|
|
86
87
|
bool MultiVolumeStreamBase::at_end() const {
|
|
87
|
-
return
|
|
88
|
+
return _impl->active_part_index >= _impl->total_parts;
|
|
88
89
|
}
|
|
89
90
|
|
|
90
91
|
int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
|
|
@@ -122,14 +123,14 @@ int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
|
|
|
122
123
|
int64_t MultiVolumeStreamBase::tell() const { return _impl->logical_offset; }
|
|
123
124
|
|
|
124
125
|
void MultiVolumeStreamBase::Impl::ensure_part_active(std::size_t part_index) {
|
|
125
|
-
if (part_open &&
|
|
126
|
+
if (part_open && open_part_index == part_index) {
|
|
126
127
|
return;
|
|
127
128
|
}
|
|
128
129
|
|
|
129
130
|
self.deactivate_active_part();
|
|
130
131
|
PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, part_index);
|
|
131
132
|
self.open_single_part(single_part);
|
|
132
|
-
|
|
133
|
+
open_part_index = part_index;
|
|
133
134
|
part_open = true;
|
|
134
135
|
}
|
|
135
136
|
|
|
@@ -144,7 +145,6 @@ bool MultiVolumeStreamBase::Impl::advance_to_next_part() {
|
|
|
144
145
|
if (active_part_index >= total_parts) {
|
|
145
146
|
return false;
|
|
146
147
|
}
|
|
147
|
-
self.deactivate_active_part();
|
|
148
148
|
++active_part_index;
|
|
149
149
|
return active_part_index < total_parts;
|
|
150
150
|
}
|
|
@@ -14,14 +14,9 @@ int entry_type_rank(const PathEntry &entry) {
|
|
|
14
14
|
if (entry.is_single()) {
|
|
15
15
|
return 0;
|
|
16
16
|
}
|
|
17
|
-
|
|
18
|
-
return 1;
|
|
19
|
-
}
|
|
20
|
-
return 2;
|
|
17
|
+
return 1;
|
|
21
18
|
}
|
|
22
19
|
|
|
23
|
-
int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs);
|
|
24
|
-
|
|
25
20
|
int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
|
|
26
21
|
const int lhs_rank = entry_type_rank(lhs);
|
|
27
22
|
const int rhs_rank = entry_type_rank(rhs);
|
|
@@ -69,22 +64,6 @@ int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
|
|
|
69
64
|
return 0;
|
|
70
65
|
}
|
|
71
66
|
|
|
72
|
-
return compare_node_lists_impl(lhs.nested_nodes(), rhs.nested_nodes());
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs) {
|
|
76
|
-
const std::size_t lsize = lhs.size();
|
|
77
|
-
const std::size_t rsize = rhs.size();
|
|
78
|
-
const std::size_t compare_count = lsize < rsize ? lsize : rsize;
|
|
79
|
-
for (std::size_t i = 0; i < compare_count; ++i) {
|
|
80
|
-
const int cmp = compare_entries_impl(lhs[i], rhs[i]);
|
|
81
|
-
if (cmp != 0) {
|
|
82
|
-
return cmp;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
if (lsize != rsize) {
|
|
86
|
-
return lsize < rsize ? -1 : 1;
|
|
87
|
-
}
|
|
88
67
|
return 0;
|
|
89
68
|
}
|
|
90
69
|
|
|
@@ -203,27 +203,6 @@ bool flatten_entry_to_string(const PathEntry &entry, std::string &output) {
|
|
|
203
203
|
return true;
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
-
if (entry.is_nested()) {
|
|
207
|
-
std::string result;
|
|
208
|
-
bool first = true;
|
|
209
|
-
for (const auto &child : entry.nested_nodes()) {
|
|
210
|
-
std::string component;
|
|
211
|
-
if (!flatten_entry_to_string(child, component)) {
|
|
212
|
-
return false;
|
|
213
|
-
}
|
|
214
|
-
if (component.empty()) {
|
|
215
|
-
return false;
|
|
216
|
-
}
|
|
217
|
-
if (!first) {
|
|
218
|
-
result.push_back('/');
|
|
219
|
-
}
|
|
220
|
-
result += component;
|
|
221
|
-
first = false;
|
|
222
|
-
}
|
|
223
|
-
output = result;
|
|
224
|
-
return !result.empty();
|
|
225
|
-
}
|
|
226
|
-
|
|
227
206
|
return false;
|
|
228
207
|
}
|
|
229
208
|
|
|
@@ -275,17 +254,7 @@ std::string path_entry_display(const PathEntry &entry) {
|
|
|
275
254
|
value.push_back(']');
|
|
276
255
|
return value;
|
|
277
256
|
}
|
|
278
|
-
|
|
279
|
-
bool first = true;
|
|
280
|
-
for (const auto &child : entry.nested_nodes()) {
|
|
281
|
-
if (!first) {
|
|
282
|
-
value.push_back('/');
|
|
283
|
-
}
|
|
284
|
-
value += path_entry_display(child);
|
|
285
|
-
first = false;
|
|
286
|
-
}
|
|
287
|
-
value.push_back('}');
|
|
288
|
-
return value;
|
|
257
|
+
return {};
|
|
289
258
|
}
|
|
290
259
|
|
|
291
260
|
std::string hierarchy_display(const PathHierarchy &hierarchy) {
|
|
@@ -78,7 +78,9 @@ SystemFileStream::SystemFileStream(PathHierarchy logical_path)
|
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
-
SystemFileStream::~SystemFileStream()
|
|
81
|
+
SystemFileStream::~SystemFileStream() {
|
|
82
|
+
deactivate_active_part();
|
|
83
|
+
}
|
|
82
84
|
|
|
83
85
|
void SystemFileStream::open_single_part(const PathHierarchy &single_part) {
|
|
84
86
|
const PathEntry &entry = single_part.back();
|
|
@@ -104,8 +106,10 @@ void SystemFileStream::open_single_part(const PathHierarchy &single_part) {
|
|
|
104
106
|
}
|
|
105
107
|
|
|
106
108
|
void SystemFileStream::close_single_part() {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
+
if (_handle) {
|
|
110
|
+
std::fclose(_handle);
|
|
111
|
+
_handle = nullptr;
|
|
112
|
+
}
|
|
109
113
|
_active_path.clear();
|
|
110
114
|
}
|
|
111
115
|
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
#include "archive_type.h"
|
|
11
11
|
#include "entry_fault_error.h"
|
|
12
12
|
#include "system_file_stream.h"
|
|
13
|
-
#include <iostream>
|
|
14
13
|
#include <filesystem>
|
|
15
14
|
#include <memory>
|
|
16
15
|
#include <stdexcept>
|
|
@@ -52,6 +51,12 @@ Traverser::Traverser(std::vector<PathHierarchy> paths, TraverserOptions options)
|
|
|
52
51
|
|
|
53
52
|
}
|
|
54
53
|
|
|
54
|
+
Traverser::Traverser(PathHierarchy path, TraverserOptions options)
|
|
55
|
+
: Traverser(std::vector<PathHierarchy>{std::move(path)}, std::move(options)) {}
|
|
56
|
+
|
|
57
|
+
Traverser::Traverser(const std::string &path, TraverserOptions options)
|
|
58
|
+
: Traverser(std::vector<PathHierarchy>{make_single_path(path)}, std::move(options)) {}
|
|
59
|
+
|
|
55
60
|
Traverser::~Traverser() = default;
|
|
56
61
|
|
|
57
62
|
// ============================================================================
|
data/lib/archive_r.rb
CHANGED
|
@@ -35,7 +35,7 @@ rescue LoadError
|
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
module Archive_r
|
|
38
|
-
VERSION = "0.1.
|
|
38
|
+
VERSION = "0.1.7"
|
|
39
39
|
# Common archive formats excluding libarchive's mtree/raw pseudo formats
|
|
40
40
|
STANDARD_FORMATS = %w[
|
|
41
41
|
7zip ar cab cpio empty iso9660 lha rar tar warc xar zip
|
|
@@ -87,6 +87,10 @@ module Archive_r
|
|
|
87
87
|
def open(paths, opts = nil, &block)
|
|
88
88
|
__archive_r_c_open(paths, Archive_r.normalize_options(opts), &block)
|
|
89
89
|
end
|
|
90
|
+
|
|
91
|
+
def open_hierarchy(hierarchy, opts = nil, &block)
|
|
92
|
+
open([hierarchy], opts, &block)
|
|
93
|
+
end
|
|
90
94
|
end
|
|
91
95
|
|
|
92
96
|
alias_method :__archive_r_c_initialize, :initialize
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: archive_r_ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- raizo.tcs
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rake
|
|
@@ -38,8 +37,10 @@ dependencies:
|
|
|
38
37
|
- - "~>"
|
|
39
38
|
- !ruby/object:Gem::Version
|
|
40
39
|
version: '5.0'
|
|
41
|
-
description: Ruby bindings for archive_r
|
|
42
|
-
|
|
40
|
+
description: Ruby bindings for archive_r, a libarchive-based library for processing
|
|
41
|
+
many archive formats. It streams entry data directly from the source to recursively
|
|
42
|
+
read nested archives without extracting to temporary files or loading large in-memory
|
|
43
|
+
buffers.
|
|
43
44
|
email:
|
|
44
45
|
- raizo.tcs@users.noreply.github.com
|
|
45
46
|
executables: []
|
|
@@ -91,7 +92,6 @@ metadata:
|
|
|
91
92
|
source_code_uri: https://github.com/raizo-tcs/archive_r
|
|
92
93
|
bug_tracker_uri: https://github.com/raizo-tcs/archive_r/issues
|
|
93
94
|
changelog_uri: https://github.com/raizo-tcs/archive_r/releases
|
|
94
|
-
post_install_message:
|
|
95
95
|
rdoc_options: []
|
|
96
96
|
require_paths:
|
|
97
97
|
- lib
|
|
@@ -106,8 +106,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
106
106
|
- !ruby/object:Gem::Version
|
|
107
107
|
version: '0'
|
|
108
108
|
requirements: []
|
|
109
|
-
rubygems_version:
|
|
110
|
-
signing_key:
|
|
109
|
+
rubygems_version: 4.0.1
|
|
111
110
|
specification_version: 4
|
|
112
|
-
summary: Ruby bindings for archive_r
|
|
111
|
+
summary: 'Ruby bindings for archive_r: libarchive-based streaming traversal for recursive
|
|
112
|
+
nested archives (no temp files, no large in-memory buffers)'
|
|
113
113
|
test_files: []
|