omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +243 -368
  3. data/README.adoc +101 -5
  4. data/docs/guides/archive-formats/index.adoc +31 -1
  5. data/docs/guides/archive-formats/ole-format.adoc +316 -0
  6. data/docs/guides/archive-formats/rpm-format.adoc +249 -0
  7. data/docs/index.adoc +12 -2
  8. data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
  9. data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
  10. data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
  11. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
  12. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
  13. data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
  14. data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
  15. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
  16. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
  17. data/lib/omnizip/algorithms/lzma.rb +20 -5
  18. data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
  19. data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
  20. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
  21. data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
  22. data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
  23. data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
  24. data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
  25. data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
  26. data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
  27. data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
  28. data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
  29. data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
  30. data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
  31. data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
  32. data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
  33. data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
  34. data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
  35. data/lib/omnizip/buffer/memory_extractor.rb +3 -3
  36. data/lib/omnizip/buffer.rb +2 -2
  37. data/lib/omnizip/filters/delta.rb +2 -1
  38. data/lib/omnizip/filters/registry.rb +6 -6
  39. data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
  40. data/lib/omnizip/formats/lzip.rb +2 -1
  41. data/lib/omnizip/formats/lzma_alone.rb +2 -1
  42. data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
  43. data/lib/omnizip/formats/ole/constants.rb +61 -0
  44. data/lib/omnizip/formats/ole/dirent.rb +380 -0
  45. data/lib/omnizip/formats/ole/header.rb +198 -0
  46. data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
  47. data/lib/omnizip/formats/ole/storage.rb +305 -0
  48. data/lib/omnizip/formats/ole/types/variant.rb +328 -0
  49. data/lib/omnizip/formats/ole.rb +145 -0
  50. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
  51. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
  52. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
  53. data/lib/omnizip/formats/rar3/reader.rb +6 -2
  54. data/lib/omnizip/formats/rar5/reader.rb +4 -1
  55. data/lib/omnizip/formats/rpm/constants.rb +58 -0
  56. data/lib/omnizip/formats/rpm/entry.rb +102 -0
  57. data/lib/omnizip/formats/rpm/header.rb +113 -0
  58. data/lib/omnizip/formats/rpm/lead.rb +122 -0
  59. data/lib/omnizip/formats/rpm/tag.rb +230 -0
  60. data/lib/omnizip/formats/rpm.rb +434 -0
  61. data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
  62. data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
  63. data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
  64. data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
  65. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
  66. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
  67. data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
  68. data/lib/omnizip/formats/seven_zip.rb +10 -0
  69. data/lib/omnizip/formats/xar/entry.rb +18 -5
  70. data/lib/omnizip/formats/xar/header.rb +34 -6
  71. data/lib/omnizip/formats/xar/reader.rb +43 -10
  72. data/lib/omnizip/formats/xar/toc.rb +34 -21
  73. data/lib/omnizip/formats/xar/writer.rb +15 -5
  74. data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
  75. data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
  76. data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
  77. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
  78. data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
  79. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
  80. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
  81. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
  82. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
  83. data/lib/omnizip/pipe/stream_compressor.rb +1 -1
  84. data/lib/omnizip/version.rb +1 -1
  85. data/readme-docs/compression-algorithms.adoc +6 -2
  86. metadata +30 -2
@@ -0,0 +1,198 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Formats
7
+ module Ole
8
+ # OLE header parser
9
+ #
10
+ # Parses the 512-byte header block from OLE compound documents.
11
+ # The first 76 bytes contain the header structure, followed by
12
+ # up to 109 BAT block indices.
13
+ class Header
14
+ include Constants
15
+
16
+ # Pack format for header structure
17
+ PACK = "a8 a16 v2 a2 v2 a6 V3 a4 V5"
18
+
19
+ # @return [String] 8-byte OLE magic signature
20
+ attr_accessor :magic
21
+
22
+ # @return [String] 16-byte CLSID (usually zeros)
23
+ attr_accessor :clsid
24
+
25
+ # @return [Integer] Minor version (usually 59)
26
+ attr_accessor :minor_ver
27
+
28
+ # @return [Integer] Major version (3 or 4)
29
+ attr_accessor :major_ver
30
+
31
+ # @return [String] 2-byte byte order marker
32
+ attr_accessor :byte_order
33
+
34
+ # @return [Integer] Big block shift (9 = 512 bytes)
35
+ attr_accessor :b_shift
36
+
37
+ # @return [Integer] Small block shift (6 = 64 bytes)
38
+ attr_accessor :s_shift
39
+
40
+ # @return [String] 6-byte reserved field
41
+ attr_accessor :reserved
42
+
43
+ # @return [Integer] Number of SECTs in directory (0 for v3)
44
+ attr_accessor :csectdir
45
+
46
+ # @return [Integer] Number of BAT blocks
47
+ attr_accessor :num_bat
48
+
49
+ # @return [Integer] First block of directory entries
50
+ attr_accessor :dirent_start
51
+
52
+ # @return [String] 4-byte transaction signature
53
+ attr_accessor :transacting_signature
54
+
55
+ # @return [Integer] Small block threshold (4096)
56
+ attr_accessor :threshold
57
+
58
+ # @return [Integer] First block of SBAT
59
+ attr_accessor :sbat_start
60
+
61
+ # @return [Integer] Number of SBAT blocks
62
+ attr_accessor :num_sbat
63
+
64
+ # @return [Integer] First block of Meta BAT
65
+ attr_accessor :mbat_start
66
+
67
+ # @return [Integer] Number of Meta BAT blocks
68
+ attr_accessor :num_mbat
69
+
70
+ # Parse header from binary data
71
+ #
72
+ # @param data [String] 512-byte header block
73
+ # @return [Header] Parsed header object
74
+ # @raise [ArgumentError] If data is invalid
75
+ def self.parse(data)
76
+ raise ArgumentError, "Header data too short" if data.nil? || data.bytesize < HEADER_SIZE
77
+
78
+ header = new
79
+ header.unpack(data)
80
+ header.validate!
81
+ header
82
+ end
83
+
84
+ # Create default header for new documents
85
+ #
86
+ # @return [Header] New header with default values
87
+ def self.create
88
+ header = new
89
+ header.apply_defaults
90
+ header
91
+ end
92
+
93
+ # Initialize header
94
+ def initialize
95
+ apply_defaults
96
+ end
97
+
98
+ # Apply default values
99
+ def apply_defaults
100
+ @magic = MAGIC.dup
101
+ @clsid = "\x00".b * 16
102
+ @minor_ver = 59
103
+ @major_ver = 3
104
+ @byte_order = BYTE_ORDER_LE.dup
105
+ @b_shift = DEFAULT_BIG_BLOCK_SHIFT
106
+ @s_shift = DEFAULT_SMALL_BLOCK_SHIFT
107
+ @reserved = "\x00".b * 6
108
+ @csectdir = 0
109
+ @num_bat = 1
110
+ @dirent_start = EOC
111
+ @transacting_signature = "\x00".b * 4
112
+ @threshold = DEFAULT_THRESHOLD
113
+ @sbat_start = EOC
114
+ @num_sbat = 0
115
+ @mbat_start = EOC
116
+ @num_mbat = 0
117
+ end
118
+
119
+ # Get big block size
120
+ #
121
+ # @return [Integer] Block size in bytes
122
+ def big_block_size
123
+ 1 << @b_shift
124
+ end
125
+
126
+ # Get small block size
127
+ #
128
+ # @return [Integer] Block size in bytes
129
+ def small_block_size
130
+ 1 << @s_shift
131
+ end
132
+
133
+ # Unpack header from binary data
134
+ #
135
+ # @param data [String] Binary data
136
+ def unpack(data)
137
+ values = data[0, HEADER_SIZE].unpack(PACK)
138
+ @magic = values[0]
139
+ @clsid = values[1]
140
+ @minor_ver = values[2]
141
+ @major_ver = values[3]
142
+ @byte_order = values[4]
143
+ @b_shift = values[5]
144
+ @s_shift = values[6]
145
+ @reserved = values[7]
146
+ @csectdir = values[8]
147
+ @num_bat = values[9]
148
+ @dirent_start = values[10]
149
+ @transacting_signature = values[11]
150
+ @threshold = values[12]
151
+ @sbat_start = values[13]
152
+ @num_sbat = values[14]
153
+ @mbat_start = values[15]
154
+ @num_mbat = values[16]
155
+ end
156
+
157
+ # Pack header to binary data
158
+ #
159
+ # @return [String] 76-byte header binary data
160
+ def pack
161
+ [
162
+ @magic, @clsid, @minor_ver, @major_ver, @byte_order,
163
+ @b_shift, @s_shift, @reserved, @csectdir, @num_bat,
164
+ @dirent_start, @transacting_signature, @threshold,
165
+ @sbat_start, @num_sbat, @mbat_start, @num_mbat
166
+ ].pack(PACK)
167
+ end
168
+
169
+ # Validate header structure
170
+ #
171
+ # @raise [ArgumentError] If header is invalid
172
+ def validate!
173
+ unless @magic == MAGIC
174
+ raise ArgumentError, "Invalid OLE magic signature"
175
+ end
176
+
177
+ if @num_bat.zero?
178
+ raise ArgumentError, "Invalid OLE: no BAT blocks"
179
+ end
180
+
181
+ if @s_shift > @b_shift || @b_shift <= 6 || @b_shift >= 31
182
+ raise ArgumentError, "Invalid block shift values"
183
+ end
184
+
185
+ unless @byte_order == BYTE_ORDER_LE
186
+ raise ArgumentError, "Only little-endian OLE files are supported"
187
+ end
188
+
189
+ if @threshold != DEFAULT_THRESHOLD
190
+ # Warning, not error
191
+ end
192
+
193
+ true
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Omnizip
4
+ module Formats
5
+ module Ole
6
+ # Virtual IO for non-contiguous byte ranges
7
+ #
8
+ # Provides a contiguous IO interface over scattered byte ranges
9
+ # in an underlying IO object.
10
+ class RangesIO
11
+ # @return [IO] Underlying IO object
12
+ attr_reader :io
13
+
14
+ # @return [Array<Array<Integer, Integer>>] Byte ranges [[offset, length], ...]
15
+ attr_reader :ranges
16
+
17
+ # @return [Integer] Total size in bytes
18
+ attr_reader :size
19
+
20
+ # @return [Integer] Current position
21
+ attr_reader :pos
22
+
23
+ # Initialize RangesIO
24
+ #
25
+ # @param io [IO] Underlying IO object
26
+ # @param ranges [Array<Array<Integer, Integer>>] Byte ranges
27
+ def initialize(io, ranges = [])
28
+ @io = io
29
+ @pos = 0
30
+ @active = 0
31
+ self.ranges = ranges
32
+ end
33
+
34
+ # Open with block support
35
+ #
36
+ # @yield [RangesIO]
37
+ def self.open(io, ranges = [])
38
+ ranges_io = new(io, ranges)
39
+ if block_given?
40
+ begin
41
+ yield ranges_io
42
+ ensure
43
+ ranges_io.close
44
+ end
45
+ else
46
+ ranges_io
47
+ end
48
+ end
49
+
50
+ # Set ranges
51
+ #
52
+ # @param ranges [Array<Range, Array>] Byte ranges
53
+ def ranges=(ranges)
54
+ # Convert Range objects to arrays
55
+ @ranges = ranges.map do |r|
56
+ r.is_a?(Range) ? [r.begin, r.end - r.begin] : r
57
+ end
58
+
59
+ # Calculate cumulative offsets
60
+ @size = 0
61
+ @offsets = []
62
+ @ranges.map(&:last).each do |len|
63
+ @offsets << @size
64
+ @size += len
65
+ end
66
+
67
+ # Reset position
68
+ @active = 0
69
+ @pos = 0
70
+ end
71
+
72
+ # Set position
73
+ #
74
+ # @param new_pos [Integer]
75
+ # @param whence [Integer] IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END
76
+ def pos=(new_pos, whence = ::IO::SEEK_SET)
77
+ case whence
78
+ when ::IO::SEEK_SET
79
+ # use new_pos as is
80
+ when ::IO::SEEK_CUR
81
+ new_pos = @pos + new_pos
82
+ when ::IO::SEEK_END
83
+ new_pos = @size + new_pos
84
+ else
85
+ raise Errno::EINVAL
86
+ end
87
+
88
+ raise Errno::EINVAL unless (0..@size).cover?(new_pos)
89
+
90
+ @pos = new_pos
91
+
92
+ # Binary search for active range
93
+ low = 0
94
+ high = @offsets.length
95
+ while low < high
96
+ mid = (low + high) / 2
97
+ if @pos < @offsets[mid]
98
+ high = mid
99
+ else
100
+ low = mid + 1
101
+ end
102
+ end
103
+
104
+ @active = low - 1
105
+ end
106
+
107
+ alias seek :pos=
108
+ alias tell :pos
109
+
110
+ # Rewind to beginning
111
+ def rewind
112
+ seek(0)
113
+ end
114
+
115
+ # Check if at end
116
+ #
117
+ # @return [Boolean]
118
+ def eof?
119
+ @pos == @size
120
+ end
121
+
122
+ # Read data
123
+ #
124
+ # @param limit [Integer, nil] Maximum bytes to read
125
+ # @return [String]
126
+ def read(limit = nil)
127
+ data = "".b
128
+ return data if eof?
129
+
130
+ limit ||= @size
131
+ return data if limit <= 0
132
+
133
+ range_pos, range_len = @ranges[@active]
134
+ diff = @pos - @offsets[@active]
135
+ range_pos += diff
136
+ range_len -= diff
137
+
138
+ loop do
139
+ @io.seek(range_pos)
140
+
141
+ if limit < range_len
142
+ chunk = @io.read(limit).to_s
143
+ @pos += chunk.length
144
+ data << chunk
145
+ break
146
+ end
147
+
148
+ chunk = @io.read(range_len).to_s
149
+ @pos += chunk.length
150
+ data << chunk
151
+
152
+ break if chunk.length != range_len
153
+
154
+ limit -= range_len
155
+ break if @active >= @ranges.length - 1
156
+
157
+ @active += 1
158
+ range_pos, range_len = @ranges[@active]
159
+ end
160
+
161
+ data
162
+ end
163
+
164
+ # Write data
165
+ #
166
+ # @param data [String] Data to write
167
+ # @return [Integer] Bytes written
168
+ def write(data)
169
+ data = data.dup.force_encoding(Encoding::ASCII_8BIT) if data.respond_to?(:encoding)
170
+ return 0 if data.empty?
171
+
172
+ # Grow if needed
173
+ if data.length > @size - @pos
174
+ truncate(@pos + data.length)
175
+ end
176
+
177
+ range_pos, range_len = @ranges[@active]
178
+ diff = @pos - @offsets[@active]
179
+ range_pos += diff
180
+ range_len -= diff
181
+
182
+ written = 0
183
+
184
+ loop do
185
+ @io.seek(range_pos)
186
+
187
+ if written + range_len > data.length
188
+ chunk = data[written..]
189
+ @io.write(chunk)
190
+ @pos += chunk.length
191
+ break
192
+ end
193
+
194
+ @io.write(data[written, range_len])
195
+ @pos += range_len
196
+ written += range_len
197
+
198
+ break if @active >= @ranges.length - 1
199
+
200
+ @active += 1
201
+ range_pos, range_len = @ranges[@active]
202
+ end
203
+
204
+ data.length
205
+ end
206
+
207
+ alias << :write
208
+
209
+ # Truncate (not supported by default)
210
+ #
211
+ # @param _size [Integer] New size
212
+ def truncate(_size)
213
+ raise NotImplementedError, "truncate not supported"
214
+ end
215
+
216
+ # Close (no-op by default)
217
+ def close
218
+ # No-op
219
+ end
220
+
221
+ # Inspect
222
+ def inspect
223
+ "#<#{self.class} io=#{@io.inspect}, size=#{@size}, pos=#{@pos}>"
224
+ end
225
+ end
226
+
227
+ # Resizeable RangesIO backed by AllocationTable
228
+ class RangesIOResizeable < RangesIO
229
+ # @return [AllocationTable] Backing allocation table
230
+ attr_reader :bat
231
+
232
+ # @return [Integer] First block index
233
+ attr_accessor :first_block
234
+
235
+ # Initialize resizeable RangesIO
236
+ #
237
+ # @param bat [AllocationTable] Allocation table
238
+ # @param first_block [Integer] First block index
239
+ # @param size [Integer, nil] Optional size
240
+ def initialize(bat, first_block:, size: nil)
241
+ @bat = bat
242
+ @first_block = first_block
243
+ @blocks = first_block == Constants::EOC ? [] : bat.chain(first_block)
244
+
245
+ super(bat.io, bat.ranges(@blocks, size))
246
+ end
247
+
248
+ # Truncate to new size
249
+ #
250
+ # @param new_size [Integer] New size in bytes
251
+ def truncate(new_size)
252
+ @bat.resize_chain(@blocks, new_size)
253
+ @pos = new_size if @pos > new_size
254
+ self.ranges = @bat.ranges(@blocks, new_size)
255
+ @first_block = @blocks.empty? ? Constants::EOC : @blocks.first
256
+
257
+ # Grow underlying IO if needed
258
+ max_pos = @ranges.map { |pos, len| pos + len }.max || 0
259
+ @io.truncate(max_pos) if max_pos > @io.size
260
+ end
261
+ end
262
+ end
263
+ end
264
+ end