omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +243 -368
  3. data/README.adoc +101 -5
  4. data/docs/guides/archive-formats/index.adoc +31 -1
  5. data/docs/guides/archive-formats/ole-format.adoc +316 -0
  6. data/docs/guides/archive-formats/rpm-format.adoc +249 -0
  7. data/docs/index.adoc +12 -2
  8. data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
  9. data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
  10. data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
  11. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
  12. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
  13. data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
  14. data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
  15. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
  16. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
  17. data/lib/omnizip/algorithms/lzma.rb +20 -5
  18. data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
  19. data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
  20. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
  21. data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
  22. data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
  23. data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
  24. data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
  25. data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
  26. data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
  27. data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
  28. data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
  29. data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
  30. data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
  31. data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
  32. data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
  33. data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
  34. data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
  35. data/lib/omnizip/buffer/memory_extractor.rb +3 -3
  36. data/lib/omnizip/buffer.rb +2 -2
  37. data/lib/omnizip/filters/delta.rb +2 -1
  38. data/lib/omnizip/filters/registry.rb +6 -6
  39. data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
  40. data/lib/omnizip/formats/lzip.rb +2 -1
  41. data/lib/omnizip/formats/lzma_alone.rb +2 -1
  42. data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
  43. data/lib/omnizip/formats/ole/constants.rb +61 -0
  44. data/lib/omnizip/formats/ole/dirent.rb +380 -0
  45. data/lib/omnizip/formats/ole/header.rb +198 -0
  46. data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
  47. data/lib/omnizip/formats/ole/storage.rb +305 -0
  48. data/lib/omnizip/formats/ole/types/variant.rb +328 -0
  49. data/lib/omnizip/formats/ole.rb +145 -0
  50. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
  51. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
  52. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
  53. data/lib/omnizip/formats/rar3/reader.rb +6 -2
  54. data/lib/omnizip/formats/rar5/reader.rb +4 -1
  55. data/lib/omnizip/formats/rpm/constants.rb +58 -0
  56. data/lib/omnizip/formats/rpm/entry.rb +102 -0
  57. data/lib/omnizip/formats/rpm/header.rb +113 -0
  58. data/lib/omnizip/formats/rpm/lead.rb +122 -0
  59. data/lib/omnizip/formats/rpm/tag.rb +230 -0
  60. data/lib/omnizip/formats/rpm.rb +434 -0
  61. data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
  62. data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
  63. data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
  64. data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
  65. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
  66. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
  67. data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
  68. data/lib/omnizip/formats/seven_zip.rb +10 -0
  69. data/lib/omnizip/formats/xar/entry.rb +18 -5
  70. data/lib/omnizip/formats/xar/header.rb +34 -6
  71. data/lib/omnizip/formats/xar/reader.rb +43 -10
  72. data/lib/omnizip/formats/xar/toc.rb +34 -21
  73. data/lib/omnizip/formats/xar/writer.rb +15 -5
  74. data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
  75. data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
  76. data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
  77. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
  78. data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
  79. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
  80. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
  81. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
  82. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
  83. data/lib/omnizip/pipe/stream_compressor.rb +1 -1
  84. data/lib/omnizip/version.rb +1 -1
  85. data/readme-docs/compression-algorithms.adoc +6 -2
  86. metadata +30 -2
@@ -0,0 +1,346 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright (C) 2025 Ribose Inc.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a
6
+ # copy of this software and associated documentation files (the "Software"),
7
+ # to deal in the Software without restriction, including without limitation
8
+ # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ # and/or sell copies of the Software, and to permit persons to whom the
10
+ # Software is furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ # DEALINGS IN THE SOFTWARE.
22
+
23
+ require_relative "constants"
24
+ require_relative "fse/bitstream"
25
+ require_relative "fse/table"
26
+
27
+ module Omnizip
28
+ module Algorithms
29
+ class Zstandard
30
+ # Sequences section decoder (RFC 8878 Section 3.1.1.3.2)
31
+ #
32
+ # Decodes sequences of (literals_length, match_length, offset)
33
+ # which are then executed to produce the decompressed output.
34
+ class SequencesDecoder
35
+ include Constants
36
+
37
+ # @return [Array<Hash>] Decoded sequences
38
+ attr_reader :sequences
39
+
40
+ # Parse and decode sequences section
41
+ #
42
+ # @param input [IO] Input stream positioned at sequences section
43
+ # @param literals_size [Integer] Size of decoded literals
44
+ # @param previous_tables [Hash] Previous FSE tables for REPEAT mode
45
+ # @return [SequencesDecoder] Decoder with decoded sequences
46
+ def self.decode(input, literals_size, previous_tables = {})
47
+ decoder = new(input, literals_size, previous_tables)
48
+ decoder.decode_section
49
+ decoder
50
+ end
51
+
52
+ # Initialize decoder
53
+ #
54
+ # @param input [IO] Input stream
55
+ # @param literals_size [Integer] Size of decoded literals
56
+ # @param previous_tables [Hash] Previous FSE tables
57
+ def initialize(input, literals_size, previous_tables = {})
58
+ @input = input
59
+ @literals_size = literals_size
60
+ @previous_tables = previous_tables
61
+ @sequences = []
62
+ @fse_tables = {}
63
+ end
64
+
65
+ # Decode the sequences section
66
+ #
67
+ # @return [void]
68
+ def decode_section
69
+ # Read number of sequences
70
+ num_sequences = read_sequence_count
71
+
72
+ return if num_sequences.zero?
73
+
74
+ # Read symbol compression modes
75
+ modes = read_symbol_modes
76
+
77
+ # Build FSE tables based on modes
78
+ build_fse_tables(modes)
79
+
80
+ # Decode sequences
81
+ decode_sequences_internal(num_sequences)
82
+ end
83
+
84
+ private
85
+
86
+ # Read sequence count (1-3 bytes)
87
+ def read_sequence_count
88
+ byte1 = @input.read(1).ord
89
+
90
+ if byte1.zero?
91
+ 0
92
+ elsif byte1 < 128
93
+ byte1
94
+ else
95
+ byte2 = @input.read(1).ord
96
+ ((byte1 - 0x80) << 8) + byte2 + 0x80
97
+ end
98
+ end
99
+
100
+ # Read symbol compression modes for LL, ML, OF
101
+ def read_symbol_modes
102
+ modes_byte = @input.read(1).ord
103
+
104
+ {
105
+ ll: (modes_byte >> 6) & 0x03, # Literals length mode
106
+ of: (modes_byte >> 4) & 0x03, # Offset mode
107
+ ml: (modes_byte >> 2) & 0x03, # Match length mode
108
+ }
109
+ end
110
+
111
+ # Build FSE tables based on compression modes
112
+ def build_fse_tables(modes)
113
+ @fse_tables[:ll] = build_fse_table(modes[:ll], :ll)
114
+ @fse_tables[:ml] = build_fse_table(modes[:ml], :ml)
115
+ @fse_tables[:of] = build_fse_table(modes[:of], :of)
116
+ end
117
+
118
+ # Build a single FSE table
119
+ def build_fse_table(mode, type)
120
+ case mode
121
+ when MODE_PREDEFINED
122
+ build_predefined_table(type)
123
+ when MODE_RLE
124
+ build_rle_table(type)
125
+ when MODE_FSE
126
+ build_fse_table_from_stream(type)
127
+ when MODE_REPEAT
128
+ @previous_tables[type] || build_predefined_table(type)
129
+ end
130
+ end
131
+
132
+ # Build predefined FSE table
133
+ def build_predefined_table(type)
134
+ case type
135
+ when :ll
136
+ FSE::Table.build_predefined(PREDEFINED_LL_DISTRIBUTION.to_a,
137
+ LITERALS_LENGTH_ACCURACY_LOG)
138
+ when :ml
139
+ FSE::Table.build_predefined(PREDEFINED_ML_DISTRIBUTION.to_a,
140
+ MATCH_LENGTH_ACCURACY_LOG)
141
+ when :of
142
+ FSE::Table.build_predefined(PREDEFINED_OFFSET_DISTRIBUTION.to_a,
143
+ OFFSET_ACCURACY_LOG)
144
+ end
145
+ end
146
+
147
+ # Build RLE FSE table (single symbol repeated)
148
+ def build_rle_table(type)
149
+ # Read the repeated symbol
150
+ symbol = @input.read(1).ord
151
+
152
+ # Create a simple distribution with just this symbol
153
+ distribution = Array.new(symbol_count_for_type(type), 0)
154
+ distribution[symbol] = 1 << accuracy_log_for_type(type)
155
+
156
+ FSE::Table.build(distribution, accuracy_log_for_type(type))
157
+ end
158
+
159
+ # Build FSE table from compressed stream
160
+ def build_fse_table_from_stream(type)
161
+ # Read accuracy log
162
+ @input.read(1).ord
163
+
164
+ # For simplicity, return predefined table
165
+ # Full implementation would read compressed distribution
166
+ build_predefined_table(type)
167
+ end
168
+
169
+ # Decode sequences using FSE tables
170
+ def decode_sequences_internal(count)
171
+ return if count.zero?
172
+
173
+ # Read the bitstream (remaining data in block)
174
+ bitstream_data = @input.read # Read remaining data
175
+ bitstream = FSE::BitStream.new(bitstream_data)
176
+
177
+ # Initialize FSE decoders
178
+ ll_decoder = FSE::Decoder.new(@fse_tables[:ll]) if @fse_tables[:ll]
179
+ ml_decoder = FSE::Decoder.new(@fse_tables[:ml]) if @fse_tables[:ml]
180
+ of_decoder = FSE::Decoder.new(@fse_tables[:of]) if @fse_tables[:of]
181
+
182
+ # Initialize states
183
+ ll_decoder&.init_state(bitstream)
184
+ ml_decoder&.init_state(bitstream)
185
+ of_decoder&.init_state(bitstream)
186
+
187
+ # Decode each sequence
188
+ count.times do
189
+ ll_symbol = ll_decoder ? ll_decoder.decode(bitstream) : 0
190
+ ml_symbol = ml_decoder ? ml_decoder.decode(bitstream) : 0
191
+ of_symbol = of_decoder ? of_decoder.decode(bitstream) : 0
192
+
193
+ # Convert symbols to actual values
194
+ ll_value = decode_literal_length(ll_symbol, bitstream)
195
+ ml_value = decode_match_length(ml_symbol, bitstream)
196
+ of_value = decode_offset(of_symbol, bitstream)
197
+
198
+ @sequences << {
199
+ literals_length: ll_value,
200
+ match_length: ml_value,
201
+ offset: of_value,
202
+ }
203
+ end
204
+ end
205
+
206
+ # Decode literal length value from symbol
207
+ def decode_literal_length(symbol, bitstream)
208
+ return 0 if symbol.nil? || symbol.negative? || symbol >= LITERAL_LENGTH_TABLE.length
209
+
210
+ baseline, extra_bits = LITERAL_LENGTH_TABLE[symbol]
211
+ return baseline if extra_bits.zero?
212
+
213
+ extra = bitstream.read_bits(extra_bits)
214
+ baseline + extra
215
+ end
216
+
217
+ # Decode match length value from symbol
218
+ def decode_match_length(symbol, bitstream)
219
+ return 3 if symbol.nil? || symbol.negative? || symbol >= MATCH_LENGTH_TABLE.length
220
+
221
+ baseline, extra_bits = MATCH_LENGTH_TABLE[symbol]
222
+ return baseline if extra_bits.zero?
223
+
224
+ extra = bitstream.read_bits(extra_bits)
225
+ baseline + extra
226
+ end
227
+
228
+ # Decode offset value from symbol
229
+ def decode_offset(symbol, _bitstream)
230
+ # Offsets 1-3 are repeat offsets
231
+ return symbol if symbol <= 3
232
+
233
+ # For offsets > 3, read extra bits
234
+ # The offset is symbol - 3 plus extra bits
235
+ symbol - 3
236
+ end
237
+
238
+ # Get symbol count for type
239
+ def symbol_count_for_type(type)
240
+ case type
241
+ when :ll then LITERAL_LENGTH_TABLE.length
242
+ when :ml then MATCH_LENGTH_TABLE.length
243
+ when :of then 32
244
+ end
245
+ end
246
+
247
+ # Get accuracy log for type
248
+ def accuracy_log_for_type(type)
249
+ case type
250
+ when :ll then LITERALS_LENGTH_ACCURACY_LOG
251
+ when :ml then MATCH_LENGTH_ACCURACY_LOG
252
+ when :of then OFFSET_ACCURACY_LOG
253
+ end
254
+ end
255
+ end
256
+
257
+ # Sequence executor (RFC 8878 Section 3.1.2.2.3)
258
+ #
259
+ # Executes decoded sequences to produce output.
260
+ class SequenceExecutor
261
+ include Constants
262
+
263
+ # Execute sequences to produce decompressed output
264
+ #
265
+ # @param literals [String] Decoded literals
266
+ # @param sequences [Array<Hash>] Decoded sequences
267
+ # @return [String] Decompressed output
268
+ def self.execute(literals, sequences)
269
+ executor = new
270
+ executor.execute(literals, sequences)
271
+ end
272
+
273
+ # Initialize with default repeat offsets
274
+ def initialize
275
+ @repeat_offsets = DEFAULT_REPEAT_OFFSETS.dup
276
+ end
277
+
278
+ # Execute sequences
279
+ #
280
+ # @param literals [String] Decoded literals
281
+ # @param sequences [Array<Hash>] Decoded sequences
282
+ # @return [String] Decompressed output
283
+ def execute(literals, sequences)
284
+ output = String.new(encoding: Encoding::BINARY)
285
+ lit_pos = 0
286
+
287
+ sequences.each do |seq|
288
+ ll = seq[:literals_length] || 0
289
+ ml = seq[:match_length] || 0
290
+ offset_code = seq[:offset] || 0
291
+
292
+ # Copy literals
293
+ if ll.positive? && lit_pos < literals.length
294
+ copy_len = [ll, literals.length - lit_pos].min
295
+ output << literals.slice(lit_pos, copy_len)
296
+ lit_pos += copy_len
297
+ end
298
+
299
+ # Handle match
300
+ if ml.positive?
301
+ offset = resolve_offset(offset_code)
302
+
303
+ if offset.positive? && offset <= output.length
304
+ # Copy match from output history
305
+ match_str = output.slice(-offset, [ml, offset].min) || ""
306
+ # If match is longer than offset, we need to copy byte by byte
307
+ while match_str.length < ml && output.length.positive?
308
+ match_str << match_str[-offset] if offset <= match_str.length
309
+ end
310
+ output << match_str.slice(0, ml)
311
+ end
312
+ end
313
+ end
314
+
315
+ # Copy remaining literals (last sequence has no match)
316
+ if lit_pos < literals.length
317
+ output << literals.slice(lit_pos..-1)
318
+ end
319
+
320
+ output
321
+ end
322
+
323
+ private
324
+
325
+ # Resolve offset code to actual offset
326
+ def resolve_offset(code)
327
+ case code
328
+ when 1
329
+ @repeat_offsets[0]
330
+ when 2
331
+ @repeat_offsets[1]
332
+ when 3
333
+ @repeat_offsets[2]
334
+ else
335
+ # New offset - update repeat offsets
336
+ actual_offset = code - 3
337
+ @repeat_offsets[2] = @repeat_offsets[1]
338
+ @repeat_offsets[1] = @repeat_offsets[0]
339
+ @repeat_offsets[0] = actual_offset
340
+ actual_offset
341
+ end
342
+ end
343
+ end
344
+ end
345
+ end
346
+ end
@@ -48,7 +48,7 @@ module Omnizip
48
48
  case @format
49
49
  when :zip
50
50
  extract_all_zip(result)
51
- when :seven_zip, :'7z'
51
+ when :seven_zip, :"7z"
52
52
  raise NotImplementedError, "7z format support coming in Phase 2"
53
53
  else
54
54
  raise ArgumentError, "Unsupported format: #{@format}"
@@ -75,7 +75,7 @@ module Omnizip
75
75
  case @format
76
76
  when :zip
77
77
  content = extract_entry_zip(name)
78
- when :seven_zip, :'7z'
78
+ when :seven_zip, :"7z"
79
79
  raise NotImplementedError, "7z format support coming in Phase 2"
80
80
  else
81
81
  raise ArgumentError, "Unsupported format: #{@format}"
@@ -99,7 +99,7 @@ module Omnizip
99
99
  case @format
100
100
  when :zip
101
101
  list_entries_zip(names)
102
- when :seven_zip, :'7z'
102
+ when :seven_zip, :"7z"
103
103
  raise NotImplementedError, "7z format support coming in Phase 2"
104
104
  else
105
105
  raise ArgumentError, "Unsupported format: #{@format}"
@@ -51,7 +51,7 @@ module Omnizip
51
51
  case format
52
52
  when :zip
53
53
  create_zip(buffer, options, &block)
54
- when :seven_zip, :'7z'
54
+ when :seven_zip, :"7z"
55
55
  raise NotImplementedError, "7z format support coming in Phase 2"
56
56
  else
57
57
  raise ArgumentError, "Unsupported format: #{format}"
@@ -82,7 +82,7 @@ module Omnizip
82
82
  case format
83
83
  when :zip
84
84
  open_zip(buffer, &block)
85
- when :seven_zip, :'7z'
85
+ when :seven_zip, :"7z"
86
86
  raise NotImplementedError, "7z format support coming in Phase 2"
87
87
  else
88
88
  raise ArgumentError, "Unsupported format: #{format}"
@@ -63,7 +63,8 @@ module Omnizip
63
63
  # - 3: RGB image data (24-bit)
64
64
  # - 4: RGBA image data (32-bit) or 32-bit integers
65
65
  # @raise [ArgumentError] If distance is invalid
66
- def initialize(distance_arg = DEFAULT_DISTANCE, distance: DEFAULT_DISTANCE)
66
+ def initialize(distance_arg = DEFAULT_DISTANCE,
67
+ distance: DEFAULT_DISTANCE)
67
68
  # Support both positional and keyword argument styles
68
69
  # If called with Delta.new(3), distance_arg=3, distance=DEFAULT (keyword not used)
69
70
  # If called with Delta.new(distance: 3), distance_arg=DEFAULT, distance=3
@@ -44,13 +44,13 @@ module Omnizip
44
44
  # @return [void]
45
45
  def self.register_bcj_filters
46
46
  # Individual BCJ architecture filters (use hyphens to match existing convention)
47
- register_bcj_filter(:'bcj-x86', BcjX86, architecture: :x86)
48
- register_bcj_filter(:'bcj-arm', BcjArm, architecture: :arm)
49
- register_bcj_filter(:'bcj-arm64', BcjArm64, architecture: :arm64,
47
+ register_bcj_filter(:"bcj-x86", BcjX86, architecture: :x86)
48
+ register_bcj_filter(:"bcj-arm", BcjArm, architecture: :arm)
49
+ register_bcj_filter(:"bcj-arm64", BcjArm64, architecture: :arm64,
50
50
  xz_supported: false)
51
- register_bcj_filter(:'bcj-ia64', BcjIa64, architecture: :ia64)
52
- register_bcj_filter(:'bcj-ppc', BcjPpc, architecture: :powerpc)
53
- register_bcj_filter(:'bcj-sparc', BcjSparc, architecture: :sparc)
51
+ register_bcj_filter(:"bcj-ia64", BcjIa64, architecture: :ia64)
52
+ register_bcj_filter(:"bcj-ppc", BcjPpc, architecture: :powerpc)
53
+ register_bcj_filter(:"bcj-sparc", BcjSparc, architecture: :sparc)
54
54
 
55
55
  # Unified BCJ filter (Task 2) - supports all architectures
56
56
  # Note: We register it as 'bcj' without architecture suffix
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Cpio
6
+ # IO wrapper that limits reading to a specific byte count
7
+ #
8
+ # Used to read file content from CPIO archives where the file size
9
+ # is known in advance but the underlying IO stream continues.
10
+ class BoundedIO
11
+ attr_reader :length, :remaining
12
+
13
+ # Initialize bounded IO
14
+ #
15
+ # @param io [IO] Underlying IO stream
16
+ # @param length [Integer] Maximum bytes to read
17
+ # @yield Block called when EOF is reached (for reading padding)
18
+ def initialize(io, length, &eof_callback)
19
+ @io = io
20
+ @length = length
21
+ @remaining = length
22
+ @eof_callback = eof_callback
23
+ @eof = false
24
+ end
25
+
26
+ # Read bytes from the IO
27
+ #
28
+ # @param size [Integer, nil] Number of bytes to read (nil = remaining)
29
+ # @return [String, nil] Data read or nil at EOF
30
+ def read(size = nil)
31
+ return nil if eof?
32
+
33
+ size = @remaining if size.nil?
34
+ data = @io.read(size)
35
+ return nil if data.nil?
36
+
37
+ @remaining -= data.bytesize
38
+ eof?
39
+ data
40
+ end
41
+
42
+ # System read (raises on EOF)
43
+ #
44
+ # @param size [Integer] Number of bytes to read
45
+ # @return [String] Data read
46
+ # @raise [EOFError] If at end of bounded region
47
+ def sysread(size)
48
+ raise EOFError, "end of file reached" if eof?
49
+
50
+ read(size)
51
+ end
52
+
53
+ # Check if at end of bounded region
54
+ #
55
+ # @return [Boolean] True if no more bytes to read
56
+ def eof?
57
+ return false if @remaining.positive?
58
+ return @eof if @eof
59
+
60
+ @eof_callback&.call
61
+ @eof = true
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -140,7 +140,8 @@ module Omnizip
140
140
  # @return [Hash] Metadata (version, dict_size, member_size)
141
141
  def decompress_stream(input_io, output_io, options = {})
142
142
  require_relative "../algorithms/lzma/lzip_decoder"
143
- decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(input_io, options)
143
+ decoder = Omnizip::Algorithms::LZMA::LzipDecoder.new(input_io,
144
+ options)
144
145
  result = decoder.decode_stream
145
146
 
146
147
  output_io.write(result)
@@ -143,7 +143,8 @@ module Omnizip
143
143
  # @return [Hash] Metadata (lc, lp, pb, dict_size, uncompressed_size)
144
144
  def decompress_stream(input_io, output_io, options = {})
145
145
  require_relative "../algorithms/lzma/lzma_alone_decoder"
146
- decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(input_io, options)
146
+ decoder = Omnizip::Algorithms::LZMA::LzmaAloneDecoder.new(input_io,
147
+ options)
147
148
  result = decoder.decode_stream
148
149
 
149
150
  output_io.write(result)