archive_r_ruby 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 073f63cf2d5a0b176c74824c73c10e2aa95e3038a01212812c8d10655c71727c
4
- data.tar.gz: 84d8e7c7c4f2b83696024bec5df22d5e6943f42732f17ff7ad1be6fb20831bd1
3
+ metadata.gz: 648064959d139565d2e122215cb49ae178b9d5e735a99176b8d785238b753800
4
+ data.tar.gz: d9f05748fbcca7aa76bb615b53480c9133423905c1137005d1c6df895a8b09c3
5
5
  SHA512:
6
- metadata.gz: acd57fa4d9d0009832296a1a48b53c2b04959a639f291ffe564f62f23d40feddc66ec42289f788668e89d11728f09d4ebb3d7f0abc185c20f62e05622140c0a0
7
- data.tar.gz: 802effdd52cbc83796e27d81a87ec75745d4993c3f2bde1619a3489ab18730e4f6ed2b39f70f8987b5bbb7765f013a18ad360c54e52c59defe53ae0ccdf88cc5
6
+ metadata.gz: dd1b45fd72e08536d0b95b261b8ca071d68fa0a1840c984c9d5aae4d9449b394a11ad6e7f8a4ca8df53b68ee7aef68791468e98404fd6a4ec098a34d831affd6
7
+ data.tar.gz: ebf2a08b3ff59ba317b48cd175d87bf0624e2d4c67d2c95c90b1c8fb32cd9c1afa6ff48f0d24cb467401d3b9b2364cf368946cf4af01e8bdb744431ee9cd961f
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  archive_r License
2
- Version: 0.1.3 (2025-12-02)
2
+ Version: 0.1.7 (2025-12-16)
3
3
 
4
4
  ----------------------------------------
5
5
  Primary License
@@ -66,12 +66,32 @@ The following components are redistributed only because libarchive (bundled with
66
66
  - Purpose: libarchive dependency providing Zstandard compression; shipped within archive_r binaries.
67
67
  - License: BSD License (https://github.com/facebook/zstd)
68
68
 
69
- 8. OpenSSL 3
70
- - Purpose: libarchive dependency providing cryptographic support for encrypted archives; included with archive_r packages.
71
- - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
69
+ 8. Nettle
70
+ - Purpose: libarchive dependency providing cryptographic support (macOS/Linux); bundled with archive_r binaries.
71
+ - License: GNU LGPLv3+ or GNU GPLv2+ (https://www.lysator.liu.se/~nisse/nettle/)
72
72
 
73
- 9. libiconv / libcharset
74
- - Purpose: libxml2/libarchive dependency for character set conversion; redistributed with archive_r artifacts.
75
- - License: GNU LGPLv2.1+ (https://www.gnu.org/software/libiconv/)
73
+ 9. mini-gmp
74
+ - Purpose: Nettle dependency for arithmetic operations (macOS/Linux); bundled with archive_r binaries.
75
+ - License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
76
+
77
+ 10. OpenSSL 3
78
+ - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
79
+ - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
80
+
81
+ 11. lz4
82
+ - Purpose: libarchive dependency providing LZ4 compression; shipped with archive_r artifacts when required.
83
+ - License: BSD 2-Clause (https://github.com/lz4/lz4)
84
+
85
+ 12. libb2 (BLAKE2)
86
+ - Purpose: libarchive dependency providing BLAKE2 hashing; bundled when archive formats require it.
87
+ - License: CC0 1.0 Universal (https://github.com/BLAKE2/libb2)
88
+
89
+ 13. libattr
90
+ - Purpose: libarchive dependency providing extended attribute support on POSIX platforms; included in POSIX builds only.
91
+ - License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/attr)
92
+
93
+ 14. libacl
94
+ - Purpose: libarchive dependency providing POSIX ACL support; included in POSIX builds only.
95
+ - License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/acl)
76
96
  Users of archive_r should review the linked third-party licenses to ensure
77
97
  compliance with their terms when redistributing this software.
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # archive_r Ruby Binding
2
2
 
3
+ Ruby bindings for archive_r, a libarchive-based library for processing many archive formats.
4
+ It streams entry data directly from the source to recursively read nested archives without extracting to temporary files or loading large in-memory buffers.
5
+
3
6
  Ruby bindings expose the archive_r traverser API with a natural, block-friendly interface. This document consolidates the Ruby-specific instructions that previously lived in the repository root README.
4
7
 
5
8
  ## Requirements
@@ -37,9 +40,9 @@ bundle exec rake build # creates archive_r-<version>.gem locally
37
40
 
38
41
  The `rake test` task compiles the extension, installs it into `lib/`, and executes the Minitest suite.
39
42
 
40
- ## Running the full repository test suite
43
+ ## Running the repository test suite
41
44
 
42
- `./run_tests.sh` prepares a clean GEM_HOME (`build/ruby_gem_home`), installs the gem produced in `build/bindings/ruby`, and runs `bindings/ruby/test/test_traverser.rb`. The script now streams the `gem install` log to the console and preserves it in `build/logs/ruby_gem_install.log` for later inspection.
45
+ From the repository root run `./bindings/ruby/run_binding_tests.sh`. The script prepares a clean GEM_HOME (`build/ruby_gem_home`), installs the gem produced in `build/bindings/ruby`, runs `bindings/ruby/test/test_traverser.rb`, and saves the install log to `build/logs/ruby_gem_install.log`. CI invokes this script after the core tests.
43
46
 
44
47
  ## Usage Example
45
48
 
@@ -72,11 +72,7 @@ static VALUE path_entry_to_rb(const PathEntry &entry) {
72
72
  }
73
73
  return array;
74
74
  }
75
- VALUE array = rb_ary_new_capa(entry.nested_nodes().size());
76
- for (const auto &child : entry.nested_nodes()) {
77
- rb_ary_push(array, path_entry_to_rb(child));
78
- }
79
- return array;
75
+ return Qnil;
80
76
  }
81
77
 
82
78
  static VALUE path_hierarchy_to_rb(const PathHierarchy &hierarchy) {
@@ -118,7 +114,7 @@ public:
118
114
  }
119
115
 
120
116
  ~RubyUserStream() override {
121
- release_active_io();
117
+ release_active_io_resources(false);
122
118
  rb_gc_unregister_address(&_ruby_stream);
123
119
  }
124
120
 
@@ -140,7 +136,7 @@ protected:
140
136
 
141
137
  void close_single_part() override {
142
138
  if (_has_open_part_io) {
143
- release_active_io();
139
+ release_active_io_resources(true);
144
140
  if (_has_close_part_io) {
145
141
  rb_funcall(_ruby_stream, rb_id_close_part_io_method, 0);
146
142
  }
@@ -250,7 +246,7 @@ private:
250
246
  if (!rb_respond_to(io, rb_id_read_method)) {
251
247
  rb_raise(rb_eTypeError, "open_part_io must return an object responding to #read");
252
248
  }
253
- release_active_io();
249
+ release_active_io_resources(true);
254
250
  _active_io = io;
255
251
  rb_gc_register_address(&_active_io);
256
252
  _active_io_seekable = rb_respond_to(io, rb_id_seek_method);
@@ -259,11 +255,11 @@ private:
259
255
  _active_io_has_size = rb_respond_to(io, rb_id_size_method);
260
256
  }
261
257
 
262
- void release_active_io() {
258
+ void release_active_io_resources(bool close_io) {
263
259
  if (_active_io == Qnil) {
264
260
  return;
265
261
  }
266
- if (_active_io_has_close) {
262
+ if (close_io && _active_io_has_close) {
267
263
  rb_funcall(_active_io, rb_id_close_method, 0);
268
264
  }
269
265
  rb_gc_unregister_address(&_active_io);
@@ -509,12 +505,7 @@ static PathEntry rb_value_to_path_entry(VALUE value) {
509
505
  return PathEntry::multi_volume(std::move(parts));
510
506
  }
511
507
 
512
- PathEntry::NodeList nodes;
513
- nodes.reserve(static_cast<size_t>(length));
514
- for (long i = 0; i < length; ++i) {
515
- nodes.emplace_back(rb_value_to_path_entry(rb_ary_entry(array, i)));
516
- }
517
- return PathEntry::nested(std::move(nodes));
508
+ rb_raise(rb_eTypeError, "PathEntry array must contain only Strings");
518
509
  }
519
510
 
520
511
  // Helper: Convert Ruby path argument into vector of PathHierarchy
@@ -46,6 +46,7 @@ end
46
46
  archive_r_include = File.join(archive_r_root, 'include')
47
47
  archive_r_src = File.join(archive_r_root, 'src')
48
48
  archive_r_lib_dir = File.join(archive_r_root, 'build')
49
+ archive_r_local_libs = File.expand_path('.libs', __dir__)
49
50
  glue_source = File.join(__dir__, 'archive_r_ext.cc')
50
51
 
51
52
  # Ensure make can locate vendored sources via VPATH
@@ -58,6 +59,11 @@ $VPATH << archive_r_src
58
59
  # Add include paths
59
60
  $INCFLAGS << " -I#{archive_r_include}"
60
61
  $INCFLAGS << " -I#{archive_r_src}"
62
+ $LIBPATH.unshift(archive_r_local_libs)
63
+
64
+ unless Gem.win_platform?
65
+ $LDFLAGS << ' -Wl,-rpath,$ORIGIN/.libs'
66
+ end
61
67
 
62
68
  # C++17 standard
63
69
  $CXXFLAGS << " -std=c++17"
@@ -82,31 +88,30 @@ if ENV['LIBARCHIVE_LIBRARY_DIRS']
82
88
  end
83
89
 
84
90
  # Check for libarchive
85
- unless have_library('archive')
86
- # Try alternative names for Windows/Static builds
87
- unless have_library('archive_static') || have_library('libarchive')
88
- abort "libarchive is required but not found"
89
- end
91
+ unless have_library('archive') || have_library('libarchive')
92
+ abort "libarchive is required but not found"
90
93
  end
91
94
 
92
- # Try to link with pre-built static library first
93
- prebuilt_lib = File.join(archive_r_lib_dir, 'libarchive_r_core.a')
94
- prebuilt_lib_win = File.join(archive_r_lib_dir, 'archive_r_core.lib')
95
- prebuilt_lib_win_release = File.join(archive_r_lib_dir, 'Release', 'archive_r_core.lib')
96
-
97
- if File.exist?(prebuilt_lib)
98
- $LOCAL_LIBS << " #{prebuilt_lib}"
99
- puts "Using pre-built archive_r core library (Unix style)"
100
- elsif File.exist?(prebuilt_lib_win)
101
- $LOCAL_LIBS << " \"#{prebuilt_lib_win}\""
102
- puts "Using pre-built archive_r core library (Windows style)"
103
- elsif File.exist?(prebuilt_lib_win_release)
104
- $LOCAL_LIBS << " \"#{prebuilt_lib_win_release}\""
105
- puts "Using pre-built archive_r core library (Windows Release style)"
95
+ shared_candidates = [
96
+ File.join(archive_r_lib_dir, 'libarchive_r_core.so'),
97
+ File.join(archive_r_lib_dir, 'libarchive_r_core.dylib'),
98
+ File.join(archive_r_lib_dir, 'archive_r_core.dll'),
99
+ File.join(archive_r_lib_dir, 'archive_r_core.lib'),
100
+ File.join(archive_r_lib_dir, 'Release', 'archive_r_core.dll'),
101
+ File.join(archive_r_lib_dir, 'Release', 'archive_r_core.lib'),
102
+ File.join(archive_r_local_libs, 'libarchive_r_core.so'),
103
+ File.join(archive_r_local_libs, 'libarchive_r_core.dylib'),
104
+ File.join(archive_r_local_libs, 'archive_r_core.dll'),
105
+ ]
106
+
107
+ found_shared = shared_candidates.find { |path| File.exist?(path) }
108
+
109
+ if found_shared
110
+ $LIBPATH.unshift(File.dirname(found_shared))
111
+ $libs = "-larchive_r_core #{$libs}"
112
+ puts "Using pre-built shared archive_r core: #{found_shared}"
106
113
  else
107
- # Build from source as fallback (ensure the Ruby glue source is compiled too)
108
- puts "Pre-built library not found, will build from source"
109
-
114
+ puts "Pre-built shared library not found, will build from source"
110
115
  srcs = [glue_source] + Dir.glob(File.join(archive_r_src, '*.cc'))
111
116
  $srcs = srcs
112
117
  end
@@ -1,5 +1,5 @@
1
1
  archive_r License
2
- Version: 0.1.3 (2025-12-02)
2
+ Version: 0.1.7 (2025-12-16)
3
3
 
4
4
  ----------------------------------------
5
5
  Primary License
@@ -66,12 +66,32 @@ The following components are redistributed only because libarchive (bundled with
66
66
  - Purpose: libarchive dependency providing Zstandard compression; shipped within archive_r binaries.
67
67
  - License: BSD License (https://github.com/facebook/zstd)
68
68
 
69
- 8. OpenSSL 3
70
- - Purpose: libarchive dependency providing cryptographic support for encrypted archives; included with archive_r packages.
71
- - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
69
+ 8. Nettle
70
+ - Purpose: libarchive dependency providing cryptographic support (macOS/Linux); bundled with archive_r binaries.
71
+ - License: GNU LGPLv3+ or GNU GPLv2+ (https://www.lysator.liu.se/~nisse/nettle/)
72
72
 
73
- 9. libiconv / libcharset
74
- - Purpose: libxml2/libarchive dependency for character set conversion; redistributed with archive_r artifacts.
75
- - License: GNU LGPLv2.1+ (https://www.gnu.org/software/libiconv/)
73
+ 9. mini-gmp
74
+ - Purpose: Nettle dependency for arithmetic operations (macOS/Linux); bundled with archive_r binaries.
75
+ - License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
76
+
77
+ 10. OpenSSL 3
78
+ - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
79
+ - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
80
+
81
+ 11. lz4
82
+ - Purpose: libarchive dependency providing LZ4 compression; shipped with archive_r artifacts when required.
83
+ - License: BSD 2-Clause (https://github.com/lz4/lz4)
84
+
85
+ 12. libb2 (BLAKE2)
86
+ - Purpose: libarchive dependency providing BLAKE2 hashing; bundled when archive formats require it.
87
+ - License: CC0 1.0 Universal (https://github.com/BLAKE2/libb2)
88
+
89
+ 13. libattr
90
+ - Purpose: libarchive dependency providing extended attribute support on POSIX platforms; included in POSIX builds only.
91
+ - License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/attr)
92
+
93
+ 14. libacl
94
+ - Purpose: libarchive dependency providing POSIX ACL support; included in POSIX builds only.
95
+ - License: LGPL-2.1-or-later for the library (https://savannah.nongnu.org/projects/acl)
76
96
  Users of archive_r should review the linked third-party licenses to ensure
77
97
  compliance with their terms when redistributing this software.
@@ -3,23 +3,13 @@
3
3
 
4
4
  #pragma once
5
5
 
6
+ #include "archive_r/platform_compat.h"
6
7
  #include "archive_r/path_hierarchy.h"
7
8
 
8
9
  #include <functional>
9
10
  #include <memory>
10
- #include <sys/types.h>
11
11
  #include <cstdint>
12
12
 
13
- #ifdef _WIN32
14
- #include <basetsd.h>
15
- using ssize_t = SSIZE_T;
16
- #endif
17
-
18
- // Avoid conflict with potential 'read' macro on Windows
19
- #ifdef read
20
- #undef read
21
- #endif
22
-
23
13
  namespace archive_r {
24
14
 
25
15
  /**
@@ -7,17 +7,12 @@
7
7
  #include <filesystem>
8
8
  #include <memory>
9
9
  #include <string>
10
- #include <sys/types.h>
11
10
  #include <vector>
12
11
 
13
- #ifdef _MSC_VER
14
- #include <BaseTsd.h>
15
- typedef SSIZE_T ssize_t;
16
- #endif
17
-
18
12
  #include "archive_r/entry_fault.h"
19
13
  #include "archive_r/entry_metadata.h"
20
14
  #include "archive_r/path_hierarchy.h"
15
+ #include "archive_r/platform_compat.h"
21
16
 
22
17
  namespace archive_r {
23
18
 
@@ -36,8 +31,21 @@ struct MultiVolumeGroupOptions {
36
31
  * - Content access (read operations)
37
32
  * - Multi-volume archive grouping support
38
33
  *
39
- * Entry objects are typically obtained from ArchiveTraverser::Iterator and
40
- * remain valid until the iterator advances.
34
+ * \par Lifetime and Copying
35
+ * - An Entry& obtained while iterating a Traverser is typically valid until the
36
+ * iterator advances.
37
+ * - Entry is copyable. Copies retain metadata (name/path/metadata/etc), but do not
38
+ * retain traverser-managed traversal control state. Calling set_descent() or
39
+ * set_multi_volume_group() on such copies will report a fault and has no effect.
40
+ * Prefer calling these control methods on the Entry& inside the iteration loop,
41
+ * before advancing.
42
+ *
43
+ * \par Reading
44
+ * - read() returns >0 for bytes read, 0 for EOF, -1 for error.
45
+ * - On error, read() dispatches an EntryFault via the registered fault callback
46
+ * (if any).
47
+ * - After any successful read() (including EOF), descent is disabled until
48
+ * explicitly re-enabled via set_descent(true).
41
49
  */
42
50
  class Entry {
43
51
  public:
@@ -89,8 +97,8 @@ public:
89
97
  /**
90
98
  * @brief Read data from the entry
91
99
  *
92
- * Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
93
- * if the owning iterator advances or other traversal work continues in parallel.
100
+ * Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
101
+ * if the owning iterator advances.
94
102
  *
95
103
  * @param buffer Buffer to read data into
96
104
  * @param length Maximum number of bytes to read
@@ -101,6 +109,9 @@ public:
101
109
  /**
102
110
  * @brief Enable or disable automatic descent into this entry
103
111
  * @param enabled true to descend (default), false to keep traversal at current level
112
+ *
113
+ * This control is only available for entries that are managed by a Traverser.
114
+ * Calling this on an Entry that is not traverser-managed reports a fault.
104
115
  */
105
116
  void set_descent(bool enabled);
106
117
 
@@ -128,6 +139,9 @@ public:
128
139
  * }
129
140
  * }
130
141
  * @endcode
142
+ *
143
+ * This control is only available for entries that are managed by a Traverser.
144
+ * Calling this on an Entry that is not traverser-managed reports a fault.
131
145
  */
132
146
  void set_multi_volume_group(const std::string &base_name, const MultiVolumeGroupOptions &options = {});
133
147
 
@@ -18,7 +18,6 @@ namespace archive_r {
18
18
  * A component can be one of three shapes:
19
19
  * - single string value (most common)
20
20
  * - multi-volume part list (split archives that share a common base name)
21
- * - nested list of child entries (used for synthetic grouping)
22
21
  */
23
22
  class PathEntry {
24
23
  public:
@@ -27,8 +26,6 @@ public:
27
26
  enum class Ordering { Natural, Given } ordering = Ordering::Natural;
28
27
  };
29
28
 
30
- using NodeList = std::vector<PathEntry>;
31
-
32
29
  PathEntry() = default;
33
30
 
34
31
  explicit PathEntry(std::string value)
@@ -37,9 +34,6 @@ public:
37
34
  explicit PathEntry(Parts parts)
38
35
  : _value(std::move(parts)) {}
39
36
 
40
- explicit PathEntry(NodeList nodes)
41
- : _value(std::move(nodes)) {}
42
-
43
37
  static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
44
38
 
45
39
  static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
@@ -50,24 +44,14 @@ public:
50
44
  return PathEntry(std::move(parts));
51
45
  }
52
46
 
53
- static PathEntry nested(NodeList hierarchies) {
54
- if (hierarchies.empty()) {
55
- throw std::invalid_argument("nested hierarchies cannot be empty");
56
- }
57
- return PathEntry(std::move(hierarchies));
58
- }
59
-
60
47
  bool is_single() const { return std::holds_alternative<std::string>(_value); }
61
48
  bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
62
- bool is_nested() const { return std::holds_alternative<NodeList>(_value); }
63
49
  const std::string &single_value() const { return std::get<std::string>(_value); }
64
50
  const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
65
51
  Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
66
- const NodeList &nested_nodes() const { return std::get<NodeList>(_value); }
67
- NodeList &nested_nodes_mut() { return std::get<NodeList>(_value); }
68
52
 
69
53
  private:
70
- std::variant<std::string, Parts, NodeList> _value;
54
+ std::variant<std::string, Parts> _value;
71
55
  };
72
56
 
73
57
  using PathHierarchy = std::vector<PathEntry>;
@@ -76,11 +60,10 @@ using PathHierarchy = std::vector<PathEntry>;
76
60
  * Compare two entries using the ordering enforced throughout archive_r.
77
61
  *
78
62
  * Ordering rules:
79
- * 1. Entry categories are ordered single < multi-volume < nested node-list.
63
+ * 1. Entry categories are ordered single < multi-volume.
80
64
  * 2. Single entries compare by string value.
81
65
  * 3. Multi-volume entries first compare their ordering flag (Natural < Given),
82
66
  * then compare corresponding part names lexicographically, finally by list length.
83
- * 4. Nested node-lists compare child entries pairwise using the same rules.
84
67
  */
85
68
  int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
86
69
 
@@ -7,8 +7,8 @@
7
7
 
8
8
  #if defined(_WIN32)
9
9
  # include <sys/stat.h>
10
+ # include <BaseTsd.h>
10
11
  # if !defined(_SSIZE_T_DEFINED)
11
- # include <BaseTsd.h>
12
12
  using ssize_t = SSIZE_T;
13
13
  # define _SSIZE_T_DEFINED
14
14
  # endif
@@ -17,3 +17,18 @@ using mode_t = unsigned short; // MSVC does not expose POSIX mode_t by default
17
17
  # define _MODE_T_DEFINED
18
18
  # endif
19
19
  #endif
20
+
21
+ namespace archive_r {
22
+
23
+ // Expose POSIX-like types within the archive_r namespace.
24
+ // - On POSIX platforms, ssize_t/mode_t come from <sys/types.h>.
25
+ // - On Windows, platform_compat provides fallback definitions above.
26
+ #if defined(_WIN32)
27
+ using ssize_t = SSIZE_T;
28
+ using mode_t = unsigned short;
29
+ #else
30
+ using ssize_t = ::ssize_t;
31
+ using mode_t = ::mode_t;
32
+ #endif
33
+
34
+ } // namespace archive_r
@@ -30,9 +30,31 @@ struct TraverserOptions {
30
30
  * and filesystem directories.
31
31
  *
32
32
  * Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
33
-
34
33
  * @see Entry, ArchiveStackOrchestrator
35
34
  *
35
+ * \par Inputs
36
+ * - The input list must not be empty, and each PathHierarchy must not be empty.
37
+ * Violations throw std::invalid_argument.
38
+ * - For the common single-root case, prefer make_single_path("...") or
39
+ * Traverser(const std::string&, ...).
40
+ *
41
+ * \par How Roots Are Interpreted
42
+ * - If the root hierarchy is exactly one single path and it refers to a directory,
43
+ * Traverser enumerates it using std::filesystem::recursive_directory_iterator.
44
+ * - Otherwise, Traverser attempts archive traversal using libarchive.
45
+ *
46
+ * \par Error Model (Exceptions vs Faults)
47
+ * - Invalid arguments are reported via exceptions (std::invalid_argument).
48
+ * - Recoverable data / I/O errors during archive traversal are reported via the
49
+ * global fault callback (EntryFault) and traversal continues.
50
+ * - Directory traversal uses std::filesystem iterators; filesystem exceptions
51
+ * (e.g. std::filesystem::filesystem_error) may be thrown and are not converted
52
+ * to faults.
53
+ *
54
+ * \par Iterator Semantics
55
+ * - Traverser::Iterator is an input iterator (single-pass).
56
+ * - Dereferencing the end iterator throws std::logic_error.
57
+ *
36
58
  * Usage:
37
59
  * Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
38
60
  * for (Entry& entry : traverser) {
@@ -53,9 +75,21 @@ public:
53
75
  * Provide one or more paths to traverse. Single-path traversal can be
54
76
  * achieved by passing a container with one element:
55
77
  * Traverser traverser({make_single_path("archive.tar.gz")});
78
+ *
79
+ * @throws std::invalid_argument if paths is empty or contains an empty hierarchy
56
80
  */
57
81
  explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
58
82
 
83
+ /**
84
+ * @brief Construct traverser for a single hierarchy
85
+ */
86
+ explicit Traverser(PathHierarchy path, TraverserOptions options = {});
87
+
88
+ /**
89
+ * @brief Construct traverser for a single archive or directory path
90
+ */
91
+ explicit Traverser(const std::string &path, TraverserOptions options = {});
92
+
59
93
  ~Traverser();
60
94
 
61
95
  // Non-copyable
@@ -60,6 +60,11 @@ void StreamArchive::rewind() {
60
60
 
61
61
  PathHierarchy StreamArchive::source_hierarchy() const { return _stream->source_hierarchy(); }
62
62
 
63
+ std::shared_ptr<StreamArchive> StreamArchive::parent_archive() const {
64
+ auto entry_stream = std::dynamic_pointer_cast<EntryPayloadStream>(_stream);
65
+ return entry_stream ? entry_stream->parent_archive() : nullptr;
66
+ }
67
+
63
68
  la_ssize_t StreamArchive::read_callback_bridge(struct archive *a, void *client_data, const void **buff) {
64
69
  auto *archive = static_cast<StreamArchive *>(client_data);
65
70
 
@@ -120,7 +125,9 @@ EntryPayloadStream::EntryPayloadStream(std::shared_ptr<StreamArchive> parent_arc
120
125
  }
121
126
  }
122
127
 
123
- EntryPayloadStream::~EntryPayloadStream() = default;
128
+ EntryPayloadStream::~EntryPayloadStream() {
129
+ deactivate_active_part();
130
+ }
124
131
 
125
132
  std::shared_ptr<StreamArchive> EntryPayloadStream::parent_archive() const { return _parent_archive; }
126
133
 
@@ -143,11 +150,8 @@ void EntryPayloadStream::open_single_part(const PathHierarchy &single_part) {
143
150
  }
144
151
 
145
152
  void EntryPayloadStream::close_single_part() {
146
- if (_parent_archive->current_entryname.empty()) {
147
- return;
148
- }
149
-
150
- _parent_archive->skip_data();
153
+ // libarchive automatically skips unread data when reading the next header,
154
+ // so explicit skipping here is unnecessary and avoids potential exceptions in destructor.
151
155
  }
152
156
 
153
157
  ssize_t EntryPayloadStream::read_from_single_part(void *buffer, size_t size) {
@@ -170,7 +174,8 @@ int64_t EntryPayloadStream::size_of_single_part(const PathHierarchy &single_part
170
174
 
171
175
  ArchiveStackCursor::ArchiveStackCursor()
172
176
  : options_snapshot()
173
- , stream_stack() {}
177
+ , _current_stream(nullptr)
178
+ , _current_archive(nullptr) {}
174
179
 
175
180
  void ArchiveStackCursor::configure(const ArchiveOption &options) {
176
181
  options_snapshot = options;
@@ -178,16 +183,16 @@ void ArchiveStackCursor::configure(const ArchiveOption &options) {
178
183
 
179
184
  void ArchiveStackCursor::reset() {
180
185
  options_snapshot = ArchiveOption{};
181
- stream_stack.clear();
186
+ _current_stream = nullptr;
187
+ _current_archive = nullptr;
182
188
  }
183
189
 
184
190
  bool ArchiveStackCursor::descend() {
185
- if (stream_stack.empty()) {
186
- throw std::logic_error("stream stack is empty");
191
+ if (!_current_stream) {
192
+ throw std::logic_error("current stream is empty");
187
193
  }
188
194
 
189
- auto stream = stream_stack.back();
190
-
195
+ auto stream = _current_stream;
191
196
  if (auto *archive = current_archive()) {
192
197
  if (stream && !archive->current_entry_content_ready()) {
193
198
  stream->rewind();
@@ -196,17 +201,22 @@ bool ArchiveStackCursor::descend() {
196
201
 
197
202
  PathHierarchy dummy_hierarchy = stream->source_hierarchy();
198
203
  auto archive_ptr = std::make_shared<StreamArchive>(std::move(stream), options_snapshot);
199
- append_single(dummy_hierarchy, std::string{});
200
- stream_stack.emplace_back(std::make_shared<EntryPayloadStream>(archive_ptr, std::move(dummy_hierarchy)));
204
+ _current_archive = archive_ptr;
205
+ _current_stream = nullptr;
201
206
  return true;
202
207
  }
203
208
 
204
209
  bool ArchiveStackCursor::ascend() {
205
- if (stream_stack.size() <= 0) {
210
+ if (depth() <= 0) {
206
211
  return false;
207
212
  }
208
213
 
209
- stream_stack.pop_back();
214
+ if (_current_archive) {
215
+ _current_stream = _current_archive->get_stream();
216
+ _current_archive = _current_archive->parent_archive();
217
+ } else {
218
+ _current_stream = nullptr;
219
+ }
210
220
 
211
221
  return true;
212
222
  }
@@ -217,6 +227,8 @@ bool ArchiveStackCursor::next() {
217
227
  return false;
218
228
  }
219
229
 
230
+ _current_stream = nullptr;
231
+
220
232
  while (true) {
221
233
  if (!archive->skip_to_next_header()) {
222
234
  return false;
@@ -225,7 +237,8 @@ bool ArchiveStackCursor::next() {
225
237
  break;
226
238
  }
227
239
  }
228
- stream_stack.back() = create_stream(current_entry_hierarchy());
240
+
241
+ _current_stream = create_stream(current_entry_hierarchy());
229
242
  return true;
230
243
  }
231
244
 
@@ -233,33 +246,32 @@ bool ArchiveStackCursor::synchronize_to_hierarchy(const PathHierarchy &target_hi
233
246
  if (target_hierarchy.empty()) {
234
247
  throw_entry_fault("target hierarchy cannot be empty", {});
235
248
  }
236
-
237
- const size_t last_depth = target_hierarchy.size() - 1;
238
- if (stream_stack.size() < target_hierarchy.size()) {
239
- stream_stack.resize(target_hierarchy.size());
249
+
250
+ // 1. Ascend until we find a common ancestor
251
+ while (depth() > 0) {
252
+ auto current_h = _current_archive->source_hierarchy();
253
+ if (current_h.size() <= target_hierarchy.size() &&
254
+ hierarchies_equal(current_h, pathhierarchy_prefix_until(target_hierarchy, current_h.size() - 1))) {
255
+ break;
256
+ }
257
+ ascend();
240
258
  }
241
- for (size_t depth = 0; depth < target_hierarchy.size(); ++depth) {
242
- auto prefix = pathhierarchy_prefix_until(target_hierarchy, depth);
243
- auto stream = stream_stack[depth];
244
259
 
245
- // Reuse the existing stream when it already matches this prefix.
246
- if (stream && hierarchies_equal(stream->source_hierarchy(), prefix)) {
247
- continue;
260
+ // 2. Descend to target
261
+ for (size_t d = depth(); d < target_hierarchy.size(); ++d) {
262
+ auto prefix = pathhierarchy_prefix_until(target_hierarchy, d);
263
+
264
+ if (!_current_stream || !hierarchies_equal(_current_stream->source_hierarchy(), prefix)) {
265
+ _current_stream = create_stream(prefix);
266
+ _current_stream->rewind();
248
267
  }
249
- // Shrink the stack to the current depth before creating a fresh stream.
250
- stream_stack.resize(depth+1);
251
- stream = create_stream(prefix);
252
- stream_stack.back() = stream;
253
- stream->rewind();
254
-
255
- if (depth == last_depth) {
256
- return true;
268
+
269
+ if (d < target_hierarchy.size() - 1) {
270
+ descend();
257
271
  }
258
- // Descend into the archive for the next level of the hierarchy.
259
- descend();
260
272
  }
261
-
262
- return true;
273
+
274
+ return true;
263
275
  }
264
276
 
265
277
  ssize_t ArchiveStackCursor::read(void *buff, size_t len) {
@@ -267,38 +279,22 @@ ssize_t ArchiveStackCursor::read(void *buff, size_t len) {
267
279
  return 0;
268
280
  }
269
281
 
270
- if (stream_stack.empty()) {
271
- throw_entry_fault("Stream stack is empty", {});
282
+ if (StreamArchive *archive = current_archive()) {
283
+ return archive->read_current(buff, len);
272
284
  }
273
285
 
274
- auto stream = stream_stack.back();
275
- ssize_t bytes = 0;
276
- bytes = stream->read(buff, len);
277
-
278
- if (bytes < 0) {
279
- const std::string message = "Failed to read from active stream";
280
- throw_entry_fault(message, current_entry_hierarchy());
286
+ if (_current_stream) {
287
+ return _current_stream->read(buff, len);
281
288
  }
282
-
283
- return bytes;
289
+ return 0;
284
290
  }
285
291
 
286
292
  StreamArchive *ArchiveStackCursor::current_archive() {
287
- if (stream_stack.size() <= 0) {
288
- return nullptr;
289
- }
290
-
291
- const auto stream = std::dynamic_pointer_cast<EntryPayloadStream>(stream_stack.back());
292
- if (!stream) {
293
- return nullptr;
294
- }
295
-
296
- auto parent_archive = stream->parent_archive();
297
- return parent_archive ? parent_archive.get() : nullptr;
293
+ return _current_archive.get();
298
294
  }
299
295
 
300
296
  PathHierarchy ArchiveStackCursor::current_entry_hierarchy() {
301
- if (stream_stack.empty() || !stream_stack.front()) {
297
+ if (depth() == 0 || (!_current_stream && !_current_archive)) {
302
298
  return {};
303
299
  }
304
300
 
@@ -310,7 +306,7 @@ PathHierarchy ArchiveStackCursor::current_entry_hierarchy() {
310
306
  return path;
311
307
  }
312
308
 
313
- return stream_stack.front()->source_hierarchy();
309
+ return _current_stream->source_hierarchy();
314
310
  }
315
311
 
316
312
  std::shared_ptr<IDataStream> ArchiveStackCursor::create_stream(const PathHierarchy &hierarchy) {
@@ -322,9 +318,7 @@ std::shared_ptr<IDataStream> ArchiveStackCursor::create_stream(const PathHierarc
322
318
  }
323
319
  return std::make_shared<SystemFileStream>(hierarchy);
324
320
  }
325
- auto stream = std::dynamic_pointer_cast<EntryPayloadStream>(stream_stack.back());
326
-
327
- return std::make_shared<EntryPayloadStream>(stream->parent_archive(), hierarchy);
321
+ return std::make_shared<EntryPayloadStream>(_current_archive, hierarchy);
328
322
  }
329
323
 
330
324
  } // namespace archive_r
@@ -31,6 +31,9 @@ public:
31
31
  void rewind() override;
32
32
 
33
33
  PathHierarchy source_hierarchy() const;
34
+ std::shared_ptr<StreamArchive> parent_archive() const;
35
+
36
+ std::shared_ptr<IDataStream> get_stream() const { return _stream; }
34
37
 
35
38
  private:
36
39
  static la_ssize_t read_callback_bridge(struct archive *a, void *client_data, const void **buff);
@@ -75,7 +78,7 @@ struct ArchiveStackCursor {
75
78
 
76
79
  void configure(const ArchiveOption &options);
77
80
  void reset();
78
- bool has_stream() const { return !stream_stack.empty(); }
81
+ bool has_stream() const { return _current_stream != nullptr; }
79
82
 
80
83
  bool descend();
81
84
  bool ascend();
@@ -83,7 +86,16 @@ struct ArchiveStackCursor {
83
86
  bool synchronize_to_hierarchy(const PathHierarchy &hierarchy);
84
87
  ssize_t read(void *buffer, size_t len);
85
88
 
86
- size_t depth() const { return stream_stack.size(); }
89
+ size_t depth() const {
90
+ size_t d = 0;
91
+ auto a = _current_archive;
92
+ while (a) {
93
+ d++;
94
+ a = a->parent_archive();
95
+ }
96
+ return d;
97
+ }
98
+
87
99
  StreamArchive *current_archive();
88
100
 
89
101
  PathHierarchy current_entry_hierarchy();
@@ -91,7 +103,10 @@ struct ArchiveStackCursor {
91
103
  std::shared_ptr<IDataStream> create_stream(const PathHierarchy &hierarchy);
92
104
 
93
105
  ArchiveOption options_snapshot;
94
- std::vector<std::shared_ptr<IDataStream>> stream_stack;
106
+
107
+ private:
108
+ std::shared_ptr<IDataStream> _current_stream;
109
+ std::shared_ptr<StreamArchive> _current_archive;
95
110
  };
96
111
 
97
112
  } // namespace archive_r
@@ -20,6 +20,7 @@ struct MultiVolumeStreamBase::Impl {
20
20
  std::vector<int64_t> part_offsets;
21
21
  std::size_t total_parts = 0;
22
22
  std::size_t active_part_index = 0;
23
+ std::size_t open_part_index = 0;
23
24
  bool part_open = false;
24
25
  int64_t logical_offset = 0;
25
26
  int64_t total_size = -1;
@@ -84,7 +85,7 @@ void MultiVolumeStreamBase::rewind() {
84
85
  }
85
86
 
86
87
  bool MultiVolumeStreamBase::at_end() const {
87
- return (_impl->active_part_index >= _impl->total_parts) && !_impl->part_open;
88
+ return _impl->active_part_index >= _impl->total_parts;
88
89
  }
89
90
 
90
91
  int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
@@ -122,14 +123,14 @@ int64_t MultiVolumeStreamBase::seek(int64_t offset, int whence) {
122
123
  int64_t MultiVolumeStreamBase::tell() const { return _impl->logical_offset; }
123
124
 
124
125
  void MultiVolumeStreamBase::Impl::ensure_part_active(std::size_t part_index) {
125
- if (part_open && active_part_index == part_index) {
126
+ if (part_open && open_part_index == part_index) {
126
127
  return;
127
128
  }
128
129
 
129
130
  self.deactivate_active_part();
130
131
  PathHierarchy single_part = pathhierarchy_select_single_part(self._logical_path, part_index);
131
132
  self.open_single_part(single_part);
132
- active_part_index = part_index;
133
+ open_part_index = part_index;
133
134
  part_open = true;
134
135
  }
135
136
 
@@ -144,7 +145,6 @@ bool MultiVolumeStreamBase::Impl::advance_to_next_part() {
144
145
  if (active_part_index >= total_parts) {
145
146
  return false;
146
147
  }
147
- self.deactivate_active_part();
148
148
  ++active_part_index;
149
149
  return active_part_index < total_parts;
150
150
  }
@@ -14,14 +14,9 @@ int entry_type_rank(const PathEntry &entry) {
14
14
  if (entry.is_single()) {
15
15
  return 0;
16
16
  }
17
- if (entry.is_multi_volume()) {
18
- return 1;
19
- }
20
- return 2;
17
+ return 1;
21
18
  }
22
19
 
23
- int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs);
24
-
25
20
  int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
26
21
  const int lhs_rank = entry_type_rank(lhs);
27
22
  const int rhs_rank = entry_type_rank(rhs);
@@ -69,22 +64,6 @@ int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
69
64
  return 0;
70
65
  }
71
66
 
72
- return compare_node_lists_impl(lhs.nested_nodes(), rhs.nested_nodes());
73
- }
74
-
75
- int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs) {
76
- const std::size_t lsize = lhs.size();
77
- const std::size_t rsize = rhs.size();
78
- const std::size_t compare_count = lsize < rsize ? lsize : rsize;
79
- for (std::size_t i = 0; i < compare_count; ++i) {
80
- const int cmp = compare_entries_impl(lhs[i], rhs[i]);
81
- if (cmp != 0) {
82
- return cmp;
83
- }
84
- }
85
- if (lsize != rsize) {
86
- return lsize < rsize ? -1 : 1;
87
- }
88
67
  return 0;
89
68
  }
90
69
 
@@ -203,27 +203,6 @@ bool flatten_entry_to_string(const PathEntry &entry, std::string &output) {
203
203
  return true;
204
204
  }
205
205
 
206
- if (entry.is_nested()) {
207
- std::string result;
208
- bool first = true;
209
- for (const auto &child : entry.nested_nodes()) {
210
- std::string component;
211
- if (!flatten_entry_to_string(child, component)) {
212
- return false;
213
- }
214
- if (component.empty()) {
215
- return false;
216
- }
217
- if (!first) {
218
- result.push_back('/');
219
- }
220
- result += component;
221
- first = false;
222
- }
223
- output = result;
224
- return !result.empty();
225
- }
226
-
227
206
  return false;
228
207
  }
229
208
 
@@ -275,17 +254,7 @@ std::string path_entry_display(const PathEntry &entry) {
275
254
  value.push_back(']');
276
255
  return value;
277
256
  }
278
- std::string value = "{";
279
- bool first = true;
280
- for (const auto &child : entry.nested_nodes()) {
281
- if (!first) {
282
- value.push_back('/');
283
- }
284
- value += path_entry_display(child);
285
- first = false;
286
- }
287
- value.push_back('}');
288
- return value;
257
+ return {};
289
258
  }
290
259
 
291
260
  std::string hierarchy_display(const PathHierarchy &hierarchy) {
@@ -78,7 +78,9 @@ SystemFileStream::SystemFileStream(PathHierarchy logical_path)
78
78
  }
79
79
  }
80
80
 
81
- SystemFileStream::~SystemFileStream() = default;
81
+ SystemFileStream::~SystemFileStream() {
82
+ deactivate_active_part();
83
+ }
82
84
 
83
85
  void SystemFileStream::open_single_part(const PathHierarchy &single_part) {
84
86
  const PathEntry &entry = single_part.back();
@@ -104,8 +106,10 @@ void SystemFileStream::open_single_part(const PathHierarchy &single_part) {
104
106
  }
105
107
 
106
108
  void SystemFileStream::close_single_part() {
107
- std::fclose(_handle);
108
- _handle = nullptr;
109
+ if (_handle) {
110
+ std::fclose(_handle);
111
+ _handle = nullptr;
112
+ }
109
113
  _active_path.clear();
110
114
  }
111
115
 
@@ -10,7 +10,6 @@
10
10
  #include "archive_type.h"
11
11
  #include "entry_fault_error.h"
12
12
  #include "system_file_stream.h"
13
- #include <iostream>
14
13
  #include <filesystem>
15
14
  #include <memory>
16
15
  #include <stdexcept>
@@ -52,6 +51,12 @@ Traverser::Traverser(std::vector<PathHierarchy> paths, TraverserOptions options)
52
51
 
53
52
  }
54
53
 
54
+ Traverser::Traverser(PathHierarchy path, TraverserOptions options)
55
+ : Traverser(std::vector<PathHierarchy>{std::move(path)}, std::move(options)) {}
56
+
57
+ Traverser::Traverser(const std::string &path, TraverserOptions options)
58
+ : Traverser(std::vector<PathHierarchy>{make_single_path(path)}, std::move(options)) {}
59
+
55
60
  Traverser::~Traverser() = default;
56
61
 
57
62
  // ============================================================================
data/lib/archive_r.rb CHANGED
@@ -35,7 +35,7 @@ rescue LoadError
35
35
  end
36
36
 
37
37
  module Archive_r
38
- VERSION = "0.1.3"
38
+ VERSION = "0.1.7"
39
39
  # Common archive formats excluding libarchive's mtree/raw pseudo formats
40
40
  STANDARD_FORMATS = %w[
41
41
  7zip ar cab cpio empty iso9660 lha rar tar warc xar zip
@@ -87,6 +87,10 @@ module Archive_r
87
87
  def open(paths, opts = nil, &block)
88
88
  __archive_r_c_open(paths, Archive_r.normalize_options(opts), &block)
89
89
  end
90
+
91
+ def open_hierarchy(hierarchy, opts = nil, &block)
92
+ open([hierarchy], opts, &block)
93
+ end
90
94
  end
91
95
 
92
96
  alias_method :__archive_r_c_initialize, :initialize
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: archive_r_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - raizo.tcs
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2025-12-04 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rake
@@ -38,8 +37,10 @@ dependencies:
38
37
  - - "~>"
39
38
  - !ruby/object:Gem::Version
40
39
  version: '5.0'
41
- description: Ruby bindings for archive_r that recursively walk nested and multipart
42
- archives directly from the source stream without creating temporary files
40
+ description: Ruby bindings for archive_r, a libarchive-based library for processing
41
+ many archive formats. It streams entry data directly from the source to recursively
42
+ read nested archives without extracting to temporary files or loading large in-memory
43
+ buffers.
43
44
  email:
44
45
  - raizo.tcs@users.noreply.github.com
45
46
  executables: []
@@ -91,7 +92,6 @@ metadata:
91
92
  source_code_uri: https://github.com/raizo-tcs/archive_r
92
93
  bug_tracker_uri: https://github.com/raizo-tcs/archive_r/issues
93
94
  changelog_uri: https://github.com/raizo-tcs/archive_r/releases
94
- post_install_message:
95
95
  rdoc_options: []
96
96
  require_paths:
97
97
  - lib
@@ -106,8 +106,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
106
106
  - !ruby/object:Gem::Version
107
107
  version: '0'
108
108
  requirements: []
109
- rubygems_version: 3.4.20
110
- signing_key:
109
+ rubygems_version: 4.0.1
111
110
  specification_version: 4
112
- summary: Ruby bindings for archive_r that traverse nested archives without temp extraction
111
+ summary: 'Ruby bindings for archive_r: libarchive-based streaming traversal for recursive
112
+ nested archives (no temp files, no large in-memory buffers)'
113
113
  test_files: []