archive_r_ruby 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c5c817b1729dcccb9268c75be4c2ce7fe8ca6a41e38421190d0028faa6223eaa
4
- data.tar.gz: f6257d3a247d2abd5f88d149e09cd0d16a78623403b430c78f1e613db99f9b4d
3
+ metadata.gz: 648064959d139565d2e122215cb49ae178b9d5e735a99176b8d785238b753800
4
+ data.tar.gz: d9f05748fbcca7aa76bb615b53480c9133423905c1137005d1c6df895a8b09c3
5
5
  SHA512:
6
- metadata.gz: b9076f2e8c632e23374174921c5862814348f8d64ca20405e3851101f798c15eaf56afef46339227b50b9a5da70ea40833ace51d3894435dc488a20cbe86cdb4
7
- data.tar.gz: 9236f13270d0a46db84e592b0603817fc1b255c50d3ec80e7659d6c3eba02d7f6bcc457c303800a3c03d374e6209f4cf33a6c3e4857d0257989d9768e8bbf9b5
6
+ metadata.gz: dd1b45fd72e08536d0b95b261b8ca071d68fa0a1840c984c9d5aae4d9449b394a11ad6e7f8a4ca8df53b68ee7aef68791468e98404fd6a4ec098a34d831affd6
7
+ data.tar.gz: ebf2a08b3ff59ba317b48cd175d87bf0624e2d4c67d2c95c90b1c8fb32cd9c1afa6ff48f0d24cb467401d3b9b2364cf368946cf4af01e8bdb744431ee9cd961f
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  archive_r License
2
- Version: 0.1.6 (2025-12-10)
2
+ Version: 0.1.7 (2025-12-16)
3
3
 
4
4
  ----------------------------------------
5
5
  Primary License
@@ -75,7 +75,7 @@ The following components are redistributed only because libarchive (bundled with
75
75
  - License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
76
76
 
77
77
  10. OpenSSL 3
78
- - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r binaries.
78
+ - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
79
79
  - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
80
80
 
81
81
  11. lz4
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # archive_r Ruby Binding
2
2
 
3
+ Ruby bindings for archive_r, a libarchive-based library for processing many archive formats.
4
+ It streams entry data directly from the source to recursively read nested archives without extracting to temporary files or loading large in-memory buffers.
5
+
3
6
  Ruby bindings expose the archive_r traverser API with a natural, block-friendly interface. This document consolidates the Ruby-specific instructions that previously lived in the repository root README.
4
7
 
5
8
  ## Requirements
@@ -72,11 +72,7 @@ static VALUE path_entry_to_rb(const PathEntry &entry) {
72
72
  }
73
73
  return array;
74
74
  }
75
- VALUE array = rb_ary_new_capa(entry.nested_nodes().size());
76
- for (const auto &child : entry.nested_nodes()) {
77
- rb_ary_push(array, path_entry_to_rb(child));
78
- }
79
- return array;
75
+ return Qnil;
80
76
  }
81
77
 
82
78
  static VALUE path_hierarchy_to_rb(const PathHierarchy &hierarchy) {
@@ -509,12 +505,7 @@ static PathEntry rb_value_to_path_entry(VALUE value) {
509
505
  return PathEntry::multi_volume(std::move(parts));
510
506
  }
511
507
 
512
- PathEntry::NodeList nodes;
513
- nodes.reserve(static_cast<size_t>(length));
514
- for (long i = 0; i < length; ++i) {
515
- nodes.emplace_back(rb_value_to_path_entry(rb_ary_entry(array, i)));
516
- }
517
- return PathEntry::nested(std::move(nodes));
508
+ rb_raise(rb_eTypeError, "PathEntry array must contain only Strings");
518
509
  }
519
510
 
520
511
  // Helper: Convert Ruby path argument into vector of PathHierarchy
@@ -1,5 +1,5 @@
1
1
  archive_r License
2
- Version: 0.1.6 (2025-12-10)
2
+ Version: 0.1.7 (2025-12-16)
3
3
 
4
4
  ----------------------------------------
5
5
  Primary License
@@ -75,7 +75,7 @@ The following components are redistributed only because libarchive (bundled with
75
75
  - License: GNU LGPLv3+ or GNU GPLv2+ (https://gmplib.org/)
76
76
 
77
77
  10. OpenSSL 3
78
- - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r binaries.
78
+ - Purpose: libarchive dependency providing cryptographic support (Windows); bundled with archive_r Windows wheels.
79
79
  - License: Apache License 2.0 with OpenSSL exception (https://www.openssl.org/source/license.html)
80
80
 
81
81
  11. lz4
@@ -3,23 +3,13 @@
3
3
 
4
4
  #pragma once
5
5
 
6
+ #include "archive_r/platform_compat.h"
6
7
  #include "archive_r/path_hierarchy.h"
7
8
 
8
9
  #include <functional>
9
10
  #include <memory>
10
- #include <sys/types.h>
11
11
  #include <cstdint>
12
12
 
13
- #ifdef _WIN32
14
- #include <basetsd.h>
15
- using ssize_t = SSIZE_T;
16
- #endif
17
-
18
- // Avoid conflict with potential 'read' macro on Windows
19
- #ifdef read
20
- #undef read
21
- #endif
22
-
23
13
  namespace archive_r {
24
14
 
25
15
  /**
@@ -7,17 +7,12 @@
7
7
  #include <filesystem>
8
8
  #include <memory>
9
9
  #include <string>
10
- #include <sys/types.h>
11
10
  #include <vector>
12
11
 
13
- #ifdef _MSC_VER
14
- #include <BaseTsd.h>
15
- typedef SSIZE_T ssize_t;
16
- #endif
17
-
18
12
  #include "archive_r/entry_fault.h"
19
13
  #include "archive_r/entry_metadata.h"
20
14
  #include "archive_r/path_hierarchy.h"
15
+ #include "archive_r/platform_compat.h"
21
16
 
22
17
  namespace archive_r {
23
18
 
@@ -36,8 +31,21 @@ struct MultiVolumeGroupOptions {
36
31
  * - Content access (read operations)
37
32
  * - Multi-volume archive grouping support
38
33
  *
39
- * Entry objects are typically obtained from ArchiveTraverser::Iterator and
40
- * remain valid until the iterator advances.
34
+ * \par Lifetime and Copying
35
+ * - An Entry& obtained while iterating a Traverser is typically valid until the
36
+ * iterator advances.
37
+ * - Entry is copyable. Copies retain metadata (name/path/metadata/etc), but do not
38
+ * retain traverser-managed traversal control state. Calling set_descent() or
39
+ * set_multi_volume_group() on such copies will report a fault and has no effect.
40
+ * Prefer calling these control methods on the Entry& inside the iteration loop,
41
+ * before advancing.
42
+ *
43
+ * \par Reading
44
+ * - read() returns >0 for bytes read, 0 for EOF, -1 for error.
45
+ * - On error, read() dispatches an EntryFault via the registered fault callback
46
+ * (if any).
47
+ * - After any successful read() (including EOF), descent is disabled until
48
+ * explicitly re-enabled via set_descent(true).
41
49
  */
42
50
  class Entry {
43
51
  public:
@@ -89,8 +97,8 @@ public:
89
97
  /**
90
98
  * @brief Read data from the entry
91
99
  *
92
- * Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
93
- * if the owning iterator advances or other traversal work continues in parallel.
100
+ * Each call uses an internal ArchiveStackOrchestrator so reads remain valid even
101
+ * if the owning iterator advances.
94
102
  *
95
103
  * @param buffer Buffer to read data into
96
104
  * @param length Maximum number of bytes to read
@@ -101,6 +109,9 @@ public:
101
109
  /**
102
110
  * @brief Enable or disable automatic descent into this entry
103
111
  * @param enabled true to descend (default), false to keep traversal at current level
112
+ *
113
+ * This control is only available for entries that are managed by a Traverser.
114
+ * Calling this on an Entry that is not traverser-managed reports a fault.
104
115
  */
105
116
  void set_descent(bool enabled);
106
117
 
@@ -128,6 +139,9 @@ public:
128
139
  * }
129
140
  * }
130
141
  * @endcode
142
+ *
143
+ * This control is only available for entries that are managed by a Traverser.
144
+ * Calling this on an Entry that is not traverser-managed reports a fault.
131
145
  */
132
146
  void set_multi_volume_group(const std::string &base_name, const MultiVolumeGroupOptions &options = {});
133
147
 
@@ -18,7 +18,6 @@ namespace archive_r {
18
18
  * A component can be one of three shapes:
19
19
  * - single string value (most common)
20
20
  * - multi-volume part list (split archives that share a common base name)
21
- * - nested list of child entries (used for synthetic grouping)
22
21
  */
23
22
  class PathEntry {
24
23
  public:
@@ -27,8 +26,6 @@ public:
27
26
  enum class Ordering { Natural, Given } ordering = Ordering::Natural;
28
27
  };
29
28
 
30
- using NodeList = std::vector<PathEntry>;
31
-
32
29
  PathEntry() = default;
33
30
 
34
31
  explicit PathEntry(std::string value)
@@ -37,9 +34,6 @@ public:
37
34
  explicit PathEntry(Parts parts)
38
35
  : _value(std::move(parts)) {}
39
36
 
40
- explicit PathEntry(NodeList nodes)
41
- : _value(std::move(nodes)) {}
42
-
43
37
  static PathEntry single(std::string entry) { return PathEntry(std::move(entry)); }
44
38
 
45
39
  static PathEntry multi_volume(std::vector<std::string> entries, Parts::Ordering ordering = Parts::Ordering::Natural) {
@@ -50,24 +44,14 @@ public:
50
44
  return PathEntry(std::move(parts));
51
45
  }
52
46
 
53
- static PathEntry nested(NodeList hierarchies) {
54
- if (hierarchies.empty()) {
55
- throw std::invalid_argument("nested hierarchies cannot be empty");
56
- }
57
- return PathEntry(std::move(hierarchies));
58
- }
59
-
60
47
  bool is_single() const { return std::holds_alternative<std::string>(_value); }
61
48
  bool is_multi_volume() const { return std::holds_alternative<Parts>(_value); }
62
- bool is_nested() const { return std::holds_alternative<NodeList>(_value); }
63
49
  const std::string &single_value() const { return std::get<std::string>(_value); }
64
50
  const Parts &multi_volume_parts() const { return std::get<Parts>(_value); }
65
51
  Parts &multi_volume_parts_mut() { return std::get<Parts>(_value); }
66
- const NodeList &nested_nodes() const { return std::get<NodeList>(_value); }
67
- NodeList &nested_nodes_mut() { return std::get<NodeList>(_value); }
68
52
 
69
53
  private:
70
- std::variant<std::string, Parts, NodeList> _value;
54
+ std::variant<std::string, Parts> _value;
71
55
  };
72
56
 
73
57
  using PathHierarchy = std::vector<PathEntry>;
@@ -76,11 +60,10 @@ using PathHierarchy = std::vector<PathEntry>;
76
60
  * Compare two entries using the ordering enforced throughout archive_r.
77
61
  *
78
62
  * Ordering rules:
79
- * 1. Entry categories are ordered single < multi-volume < nested node-list.
63
+ * 1. Entry categories are ordered single < multi-volume.
80
64
  * 2. Single entries compare by string value.
81
65
  * 3. Multi-volume entries first compare their ordering flag (Natural < Given),
82
66
  * then compare corresponding part names lexicographically, finally by list length.
83
- * 4. Nested node-lists compare child entries pairwise using the same rules.
84
67
  */
85
68
  int compare_entries(const PathEntry &lhs, const PathEntry &rhs);
86
69
 
@@ -7,8 +7,8 @@
7
7
 
8
8
  #if defined(_WIN32)
9
9
  # include <sys/stat.h>
10
+ # include <BaseTsd.h>
10
11
  # if !defined(_SSIZE_T_DEFINED)
11
- # include <BaseTsd.h>
12
12
  using ssize_t = SSIZE_T;
13
13
  # define _SSIZE_T_DEFINED
14
14
  # endif
@@ -17,3 +17,18 @@ using mode_t = unsigned short; // MSVC does not expose POSIX mode_t by default
17
17
  # define _MODE_T_DEFINED
18
18
  # endif
19
19
  #endif
20
+
21
+ namespace archive_r {
22
+
23
+ // Expose POSIX-like types within the archive_r namespace.
24
+ // - On POSIX platforms, ssize_t/mode_t come from <sys/types.h>.
25
+ // - On Windows, platform_compat provides fallback definitions above.
26
+ #if defined(_WIN32)
27
+ using ssize_t = SSIZE_T;
28
+ using mode_t = unsigned short;
29
+ #else
30
+ using ssize_t = ::ssize_t;
31
+ using mode_t = ::mode_t;
32
+ #endif
33
+
34
+ } // namespace archive_r
@@ -30,9 +30,31 @@ struct TraverserOptions {
30
30
  * and filesystem directories.
31
31
  *
32
32
  * Uses std::filesystem for directory traversal and ArchiveStackOrchestrator for archives.
33
-
34
33
  * @see Entry, ArchiveStackOrchestrator
35
34
  *
35
+ * \par Inputs
36
+ * - The input list must not be empty, and each PathHierarchy must not be empty.
37
+ * Violations throw std::invalid_argument.
38
+ * - For the common single-root case, prefer make_single_path("...") or
39
+ * Traverser(const std::string&, ...).
40
+ *
41
+ * \par How Roots Are Interpreted
42
+ * - If the root hierarchy is exactly one single path and it refers to a directory,
43
+ * Traverser enumerates it using std::filesystem::recursive_directory_iterator.
44
+ * - Otherwise, Traverser attempts archive traversal using libarchive.
45
+ *
46
+ * \par Error Model (Exceptions vs Faults)
47
+ * - Invalid arguments are reported via exceptions (std::invalid_argument).
48
+ * - Recoverable data / I/O errors during archive traversal are reported via the
49
+ * global fault callback (EntryFault) and traversal continues.
50
+ * - Directory traversal uses std::filesystem iterators; filesystem exceptions
51
+ * (e.g. std::filesystem::filesystem_error) may be thrown and are not converted
52
+ * to faults.
53
+ *
54
+ * \par Iterator Semantics
55
+ * - Traverser::Iterator is an input iterator (single-pass).
56
+ * - Dereferencing the end iterator throws std::logic_error.
57
+ *
36
58
  * Usage:
37
59
  * Traverser traverser({make_single_path("archive.tar.gz")}); // or directory path
38
60
  * for (Entry& entry : traverser) {
@@ -53,9 +75,21 @@ public:
53
75
  * Provide one or more paths to traverse. Single-path traversal can be
54
76
  * achieved by passing a container with one element:
55
77
  * Traverser traverser({make_single_path("archive.tar.gz")});
78
+ *
79
+ * @throws std::invalid_argument if paths is empty or contains an empty hierarchy
56
80
  */
57
81
  explicit Traverser(std::vector<PathHierarchy> paths, TraverserOptions options = {});
58
82
 
83
+ /**
84
+ * @brief Construct traverser for a single hierarchy
85
+ */
86
+ explicit Traverser(PathHierarchy path, TraverserOptions options = {});
87
+
88
+ /**
89
+ * @brief Construct traverser for a single archive or directory path
90
+ */
91
+ explicit Traverser(const std::string &path, TraverserOptions options = {});
92
+
59
93
  ~Traverser();
60
94
 
61
95
  // Non-copyable
@@ -14,14 +14,9 @@ int entry_type_rank(const PathEntry &entry) {
14
14
  if (entry.is_single()) {
15
15
  return 0;
16
16
  }
17
- if (entry.is_multi_volume()) {
18
- return 1;
19
- }
20
- return 2;
17
+ return 1;
21
18
  }
22
19
 
23
- int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs);
24
-
25
20
  int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
26
21
  const int lhs_rank = entry_type_rank(lhs);
27
22
  const int rhs_rank = entry_type_rank(rhs);
@@ -69,22 +64,6 @@ int compare_entries_impl(const PathEntry &lhs, const PathEntry &rhs) {
69
64
  return 0;
70
65
  }
71
66
 
72
- return compare_node_lists_impl(lhs.nested_nodes(), rhs.nested_nodes());
73
- }
74
-
75
- int compare_node_lists_impl(const PathEntry::NodeList &lhs, const PathEntry::NodeList &rhs) {
76
- const std::size_t lsize = lhs.size();
77
- const std::size_t rsize = rhs.size();
78
- const std::size_t compare_count = lsize < rsize ? lsize : rsize;
79
- for (std::size_t i = 0; i < compare_count; ++i) {
80
- const int cmp = compare_entries_impl(lhs[i], rhs[i]);
81
- if (cmp != 0) {
82
- return cmp;
83
- }
84
- }
85
- if (lsize != rsize) {
86
- return lsize < rsize ? -1 : 1;
87
- }
88
67
  return 0;
89
68
  }
90
69
 
@@ -203,27 +203,6 @@ bool flatten_entry_to_string(const PathEntry &entry, std::string &output) {
203
203
  return true;
204
204
  }
205
205
 
206
- if (entry.is_nested()) {
207
- std::string result;
208
- bool first = true;
209
- for (const auto &child : entry.nested_nodes()) {
210
- std::string component;
211
- if (!flatten_entry_to_string(child, component)) {
212
- return false;
213
- }
214
- if (component.empty()) {
215
- return false;
216
- }
217
- if (!first) {
218
- result.push_back('/');
219
- }
220
- result += component;
221
- first = false;
222
- }
223
- output = result;
224
- return !result.empty();
225
- }
226
-
227
206
  return false;
228
207
  }
229
208
 
@@ -275,17 +254,7 @@ std::string path_entry_display(const PathEntry &entry) {
275
254
  value.push_back(']');
276
255
  return value;
277
256
  }
278
- std::string value = "{";
279
- bool first = true;
280
- for (const auto &child : entry.nested_nodes()) {
281
- if (!first) {
282
- value.push_back('/');
283
- }
284
- value += path_entry_display(child);
285
- first = false;
286
- }
287
- value.push_back('}');
288
- return value;
257
+ return {};
289
258
  }
290
259
 
291
260
  std::string hierarchy_display(const PathHierarchy &hierarchy) {
@@ -51,6 +51,12 @@ Traverser::Traverser(std::vector<PathHierarchy> paths, TraverserOptions options)
51
51
 
52
52
  }
53
53
 
54
+ Traverser::Traverser(PathHierarchy path, TraverserOptions options)
55
+ : Traverser(std::vector<PathHierarchy>{std::move(path)}, std::move(options)) {}
56
+
57
+ Traverser::Traverser(const std::string &path, TraverserOptions options)
58
+ : Traverser(std::vector<PathHierarchy>{make_single_path(path)}, std::move(options)) {}
59
+
54
60
  Traverser::~Traverser() = default;
55
61
 
56
62
  // ============================================================================
data/lib/archive_r.rb CHANGED
@@ -35,7 +35,7 @@ rescue LoadError
35
35
  end
36
36
 
37
37
  module Archive_r
38
- VERSION = "0.1.6"
38
+ VERSION = "0.1.7"
39
39
  # Common archive formats excluding libarchive's mtree/raw pseudo formats
40
40
  STANDARD_FORMATS = %w[
41
41
  7zip ar cab cpio empty iso9660 lha rar tar warc xar zip
@@ -87,6 +87,10 @@ module Archive_r
87
87
  def open(paths, opts = nil, &block)
88
88
  __archive_r_c_open(paths, Archive_r.normalize_options(opts), &block)
89
89
  end
90
+
91
+ def open_hierarchy(hierarchy, opts = nil, &block)
92
+ open([hierarchy], opts, &block)
93
+ end
90
94
  end
91
95
 
92
96
  alias_method :__archive_r_c_initialize, :initialize
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: archive_r_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - raizo.tcs
@@ -37,8 +37,10 @@ dependencies:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
39
  version: '5.0'
40
- description: Ruby bindings for archive_r that recursively walk nested and multipart
41
- archives directly from the source stream without creating temporary files
40
+ description: Ruby bindings for archive_r, a libarchive-based library for processing
41
+ many archive formats. It streams entry data directly from the source to recursively
42
+ read nested archives without extracting to temporary files or loading large in-memory
43
+ buffers.
42
44
  email:
43
45
  - raizo.tcs@users.noreply.github.com
44
46
  executables: []
@@ -104,7 +106,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
106
  - !ruby/object:Gem::Version
105
107
  version: '0'
106
108
  requirements: []
107
- rubygems_version: 4.0.0
109
+ rubygems_version: 4.0.1
108
110
  specification_version: 4
109
- summary: Ruby bindings for archive_r that traverse nested archives without temp extraction
111
+ summary: 'Ruby bindings for archive_r: libarchive-based streaming traversal for recursive
112
+ nested archives (no temp files, no large in-memory buffers)'
110
113
  test_files: []