omnizip 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +243 -368
- data/README.adoc +101 -5
- data/docs/guides/archive-formats/index.adoc +31 -1
- data/docs/guides/archive-formats/ole-format.adoc +316 -0
- data/docs/guides/archive-formats/rpm-format.adoc +249 -0
- data/docs/index.adoc +12 -2
- data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
- data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
- data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
- data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
- data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
- data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
- data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
- data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
- data/lib/omnizip/algorithms/lzma.rb +20 -5
- data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
- data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
- data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
- data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
- data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
- data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
- data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
- data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
- data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
- data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
- data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
- data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
- data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
- data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
- data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
- data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
- data/lib/omnizip/buffer/memory_extractor.rb +3 -3
- data/lib/omnizip/buffer.rb +2 -2
- data/lib/omnizip/filters/delta.rb +2 -1
- data/lib/omnizip/filters/registry.rb +6 -6
- data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
- data/lib/omnizip/formats/lzip.rb +2 -1
- data/lib/omnizip/formats/lzma_alone.rb +2 -1
- data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
- data/lib/omnizip/formats/ole/constants.rb +61 -0
- data/lib/omnizip/formats/ole/dirent.rb +380 -0
- data/lib/omnizip/formats/ole/header.rb +198 -0
- data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
- data/lib/omnizip/formats/ole/storage.rb +305 -0
- data/lib/omnizip/formats/ole/types/variant.rb +328 -0
- data/lib/omnizip/formats/ole.rb +145 -0
- data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
- data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
- data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
- data/lib/omnizip/formats/rar3/reader.rb +6 -2
- data/lib/omnizip/formats/rar5/reader.rb +4 -1
- data/lib/omnizip/formats/rpm/constants.rb +58 -0
- data/lib/omnizip/formats/rpm/entry.rb +102 -0
- data/lib/omnizip/formats/rpm/header.rb +113 -0
- data/lib/omnizip/formats/rpm/lead.rb +122 -0
- data/lib/omnizip/formats/rpm/tag.rb +230 -0
- data/lib/omnizip/formats/rpm.rb +434 -0
- data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
- data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
- data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
- data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
- data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
- data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
- data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
- data/lib/omnizip/formats/seven_zip.rb +10 -0
- data/lib/omnizip/formats/xar/entry.rb +18 -5
- data/lib/omnizip/formats/xar/header.rb +34 -6
- data/lib/omnizip/formats/xar/reader.rb +43 -10
- data/lib/omnizip/formats/xar/toc.rb +34 -21
- data/lib/omnizip/formats/xar/writer.rb +15 -5
- data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
- data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
- data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
- data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
- data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
- data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
- data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
- data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
- data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
- data/lib/omnizip/pipe/stream_compressor.rb +1 -1
- data/lib/omnizip/version.rb +1 -1
- data/readme-docs/compression-algorithms.adoc +6 -2
- metadata +30 -2
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../constants"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class Zstandard
|
|
28
|
+
module Frame
|
|
29
|
+
# Zstandard block header parser (RFC 8878 Section 3.1.1.2)
|
|
30
|
+
#
|
|
31
|
+
# Block_Header structure (3 bytes, little-endian):
|
|
32
|
+
# - Last_Block: bit 0
|
|
33
|
+
# - Block_Type: bits 1-2
|
|
34
|
+
# - Block_Size: bits 3-23
|
|
35
|
+
class Block
|
|
36
|
+
include Constants
|
|
37
|
+
|
|
38
|
+
# @return [Boolean] True if this is the last block
|
|
39
|
+
attr_reader :last_block
|
|
40
|
+
|
|
41
|
+
# @return [Integer] Block type (0=Raw, 1=RLE, 2=Compressed, 3=Reserved)
|
|
42
|
+
attr_reader :block_type
|
|
43
|
+
|
|
44
|
+
# @return [Integer] Block size in bytes
|
|
45
|
+
attr_reader :block_size
|
|
46
|
+
|
|
47
|
+
# @return [Integer] Block header bytes
|
|
48
|
+
attr_reader :raw_header
|
|
49
|
+
|
|
50
|
+
# Parse block header from input
|
|
51
|
+
#
|
|
52
|
+
# @param input [IO] Input stream positioned at block header
|
|
53
|
+
# @return [Block] Parsed block header
|
|
54
|
+
def self.parse(input)
|
|
55
|
+
bytes = input.read(3)
|
|
56
|
+
# Read 3 bytes as little-endian 24-bit value
|
|
57
|
+
raw = bytes.nil? ? 0 : (bytes.getbyte(0) | (bytes.getbyte(1) << 8) | (bytes.getbyte(2) << 16))
|
|
58
|
+
last_block = raw.allbits?(0x01)
|
|
59
|
+
block_type = (raw >> 1) & 0x03
|
|
60
|
+
block_size = (raw >> 3) & 0x1FFFFF
|
|
61
|
+
|
|
62
|
+
new(last_block, block_type, block_size, raw)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Initialize with parsed values
|
|
66
|
+
#
|
|
67
|
+
# @param last_block [Boolean]
|
|
68
|
+
# @param block_type [Integer]
|
|
69
|
+
# @param block_size [Integer]
|
|
70
|
+
# @param raw_header [Integer]
|
|
71
|
+
def initialize(last_block, block_type, block_size, raw_header)
|
|
72
|
+
@last_block = last_block
|
|
73
|
+
@block_type = block_type
|
|
74
|
+
@block_size = block_size
|
|
75
|
+
@raw_header = raw_header
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Check if this is a raw (uncompressed) block
|
|
79
|
+
#
|
|
80
|
+
# @return [Boolean]
|
|
81
|
+
def raw?
|
|
82
|
+
@block_type == BLOCK_TYPE_RAW
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Check if this is an RLE block
|
|
86
|
+
#
|
|
87
|
+
# @return [Boolean]
|
|
88
|
+
def rle?
|
|
89
|
+
@block_type == BLOCK_TYPE_RLE
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Check if this is a compressed block
|
|
93
|
+
#
|
|
94
|
+
# @return [Boolean]
|
|
95
|
+
def compressed?
|
|
96
|
+
@block_type == BLOCK_TYPE_COMPRESSED
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Check if block type is reserved
|
|
100
|
+
#
|
|
101
|
+
# @return [Boolean]
|
|
102
|
+
def reserved?
|
|
103
|
+
@block_type == BLOCK_TYPE_RESERVED
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Validate block type
|
|
107
|
+
#
|
|
108
|
+
# @return [Boolean] True if block type is valid
|
|
109
|
+
def valid?
|
|
110
|
+
!reserved?
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get block type name
|
|
114
|
+
#
|
|
115
|
+
# @return [Symbol]
|
|
116
|
+
def type_name
|
|
117
|
+
case @block_type
|
|
118
|
+
when BLOCK_TYPE_RAW then :raw
|
|
119
|
+
when BLOCK_TYPE_RLE then :rle
|
|
120
|
+
when BLOCK_TYPE_COMPRESSED then :compressed
|
|
121
|
+
when BLOCK_TYPE_RESERVED then :reserved
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
require_relative "../constants"
|
|
24
|
+
|
|
25
|
+
module Omnizip
|
|
26
|
+
module Algorithms
|
|
27
|
+
class Zstandard
|
|
28
|
+
module Frame
|
|
29
|
+
# Zstandard frame header parser (RFC 8878 Section 3.1.1.1)
|
|
30
|
+
#
|
|
31
|
+
# Frame_Header structure:
|
|
32
|
+
# - Frame_Header_Descriptor: 1 byte
|
|
33
|
+
# - Window_Descriptor: 0-1 byte (optional)
|
|
34
|
+
# - Dictionary_ID: 0-4 bytes (optional)
|
|
35
|
+
# - Frame_Content_Size: 0-8 bytes (optional)
|
|
36
|
+
class Header
|
|
37
|
+
include Constants
|
|
38
|
+
|
|
39
|
+
# @return [Integer] Frame content size flag (bits 6-7)
|
|
40
|
+
attr_reader :content_size_flag
|
|
41
|
+
|
|
42
|
+
# @return [Boolean] Single segment flag (bit 5)
|
|
43
|
+
attr_reader :single_segment
|
|
44
|
+
|
|
45
|
+
# @return [Integer] Content checksum flag (bit 2)
|
|
46
|
+
attr_reader :checksum_flag
|
|
47
|
+
|
|
48
|
+
# @return [Integer] Dictionary ID flag (bits 0-1)
|
|
49
|
+
attr_reader :dictionary_id_flag
|
|
50
|
+
|
|
51
|
+
# @return [Integer, nil] Window log value
|
|
52
|
+
attr_reader :window_log
|
|
53
|
+
|
|
54
|
+
# @return [Integer, nil] Dictionary ID
|
|
55
|
+
attr_reader :dictionary_id
|
|
56
|
+
|
|
57
|
+
# @return [Integer, nil] Frame content size
|
|
58
|
+
attr_reader :content_size
|
|
59
|
+
|
|
60
|
+
# @return [Integer] Total header size in bytes
|
|
61
|
+
attr_reader :header_size
|
|
62
|
+
|
|
63
|
+
# Parse frame header from input
|
|
64
|
+
#
|
|
65
|
+
# @param input [IO] Input stream positioned at frame header
|
|
66
|
+
# @return [Header] Parsed header
|
|
67
|
+
def self.parse(input)
|
|
68
|
+
descriptor = input.read(1).ord
|
|
69
|
+
|
|
70
|
+
header = new(descriptor)
|
|
71
|
+
|
|
72
|
+
# Parse optional fields based on descriptor bits
|
|
73
|
+
if header.window_descriptor?
|
|
74
|
+
header.send(:parse_window_descriptor, input)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if header.dictionary_id?
|
|
78
|
+
header.send(:parse_dictionary_id, input)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
if header.content_size?
|
|
82
|
+
header.send(:parse_content_size, input)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
header
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Initialize with descriptor byte
|
|
89
|
+
#
|
|
90
|
+
# @param descriptor [Integer] Frame header descriptor byte
|
|
91
|
+
def initialize(descriptor)
|
|
92
|
+
@descriptor = descriptor
|
|
93
|
+
|
|
94
|
+
# Extract flags from descriptor byte
|
|
95
|
+
@content_size_flag = (descriptor >> 6) & 0x03
|
|
96
|
+
@single_segment = (descriptor >> 5).allbits?(0x01)
|
|
97
|
+
@checksum_flag = (descriptor >> 2) & 0x01
|
|
98
|
+
@dictionary_id_flag = descriptor & 0x03
|
|
99
|
+
|
|
100
|
+
@window_log = nil
|
|
101
|
+
@dictionary_id = nil
|
|
102
|
+
@content_size = nil
|
|
103
|
+
@header_size = 1
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Check if window descriptor is present
|
|
107
|
+
#
|
|
108
|
+
# @return [Boolean]
|
|
109
|
+
def window_descriptor?
|
|
110
|
+
!@single_segment
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Check if dictionary ID is present
|
|
114
|
+
#
|
|
115
|
+
# @return [Boolean]
|
|
116
|
+
def dictionary_id?
|
|
117
|
+
@dictionary_id_flag != 0
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Check if content size is present
|
|
121
|
+
#
|
|
122
|
+
# @return [Boolean]
|
|
123
|
+
def content_size?
|
|
124
|
+
@content_size_flag != 0 || @single_segment
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Check if content checksum is present
|
|
128
|
+
#
|
|
129
|
+
# @return [Boolean]
|
|
130
|
+
def content_checksum?
|
|
131
|
+
@checksum_flag == 1
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Get the size of dictionary ID field
|
|
135
|
+
#
|
|
136
|
+
# @return [Integer]
|
|
137
|
+
def dictionary_id_size
|
|
138
|
+
case @dictionary_id_flag
|
|
139
|
+
when 0 then 0
|
|
140
|
+
when 1 then 1
|
|
141
|
+
when 2 then 2
|
|
142
|
+
when 3 then 4
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Get the size of content size field
|
|
147
|
+
#
|
|
148
|
+
# @return [Integer]
|
|
149
|
+
def content_size_size
|
|
150
|
+
if @single_segment
|
|
151
|
+
# For single segment, FCS size depends on content_size_flag
|
|
152
|
+
case @content_size_flag
|
|
153
|
+
when 0 then 1
|
|
154
|
+
when 1 then 2
|
|
155
|
+
when 2 then 4
|
|
156
|
+
when 3 then 8
|
|
157
|
+
end
|
|
158
|
+
else
|
|
159
|
+
case @content_size_flag
|
|
160
|
+
when 0 then 0
|
|
161
|
+
when 1 then 2
|
|
162
|
+
when 2 then 4
|
|
163
|
+
when 3 then 8
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Get window size
|
|
169
|
+
#
|
|
170
|
+
# @return [Integer, nil] Window size or nil if not applicable
|
|
171
|
+
def window_size
|
|
172
|
+
return nil unless @window_log
|
|
173
|
+
|
|
174
|
+
@window_log - 10
|
|
175
|
+
mantissa = @window_log < 22 ? (@window_log - 10) : (@window_log - 11)
|
|
176
|
+
(1 << @window_log) + (mantissa << (@window_log - 4))
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
private
|
|
180
|
+
|
|
181
|
+
# Parse window descriptor byte
|
|
182
|
+
def parse_window_descriptor(input)
|
|
183
|
+
byte = input.read(1).ord
|
|
184
|
+
exponent = (byte >> 3) & 0x1F
|
|
185
|
+
byte & 0x07
|
|
186
|
+
@window_log = 10 + exponent
|
|
187
|
+
@header_size += 1
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Parse dictionary ID (variable size)
|
|
191
|
+
def parse_dictionary_id(input)
|
|
192
|
+
size = dictionary_id_size
|
|
193
|
+
bytes = input.read(size)
|
|
194
|
+
|
|
195
|
+
@dictionary_id = case size
|
|
196
|
+
when 1 then bytes.ord
|
|
197
|
+
when 2 then bytes.unpack1("v")
|
|
198
|
+
when 4 then bytes.unpack1("V")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
@header_size += size
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Parse content size (variable size)
|
|
205
|
+
def parse_content_size(input)
|
|
206
|
+
size = content_size_size
|
|
207
|
+
bytes = input.read(size)
|
|
208
|
+
|
|
209
|
+
@content_size = case size
|
|
210
|
+
when 1 then bytes.ord
|
|
211
|
+
when 2 then bytes.unpack1("v")
|
|
212
|
+
when 4 then bytes.unpack1("V")
|
|
213
|
+
when 8
|
|
214
|
+
low, high = bytes.unpack("VV")
|
|
215
|
+
low + (high << 32)
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
@header_size += size
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
end
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Copyright (C) 2025 Ribose Inc.
|
|
4
|
+
#
|
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a
|
|
6
|
+
# copy of this software and associated documentation files (the "Software"),
|
|
7
|
+
# to deal in the Software without restriction, including without limitation
|
|
8
|
+
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
9
|
+
# and/or sell copies of the Software, and to permit persons to whom the
|
|
10
|
+
# Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
|
13
|
+
# all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
20
|
+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
21
|
+
# DEALINGS IN THE SOFTWARE.
|
|
22
|
+
|
|
23
|
+
module Omnizip
|
|
24
|
+
module Algorithms
|
|
25
|
+
class Zstandard
|
|
26
|
+
module FSE
|
|
27
|
+
# FSE bitstream reader (RFC 8878 Section 4.1)
|
|
28
|
+
#
|
|
29
|
+
# Reads FSE-encoded bitstreams which are read in reverse order
|
|
30
|
+
# (from end to beginning) according to RFC 8878.
|
|
31
|
+
#
|
|
32
|
+
# The bitstream is consumed from the end toward the beginning,
|
|
33
|
+
# with bits read from LSB to MSB within each byte.
|
|
34
|
+
class BitStream
|
|
35
|
+
# @return [String] The compressed data
|
|
36
|
+
attr_reader :data
|
|
37
|
+
|
|
38
|
+
# @return [Integer] Current bit position (from end)
|
|
39
|
+
attr_reader :bit_position
|
|
40
|
+
|
|
41
|
+
# Initialize bitstream with data
|
|
42
|
+
#
|
|
43
|
+
# @param data [String] The compressed bitstream data
|
|
44
|
+
def initialize(data)
|
|
45
|
+
@data = data.dup.force_encoding(Encoding::BINARY)
|
|
46
|
+
@bit_position = data.bytesize * 8
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Read bits from the stream (in reverse order)
|
|
50
|
+
#
|
|
51
|
+
# Bits are read from LSB to MSB, starting from the end of the stream.
|
|
52
|
+
#
|
|
53
|
+
# @param count [Integer] Number of bits to read
|
|
54
|
+
# @return [Integer] The read bits
|
|
55
|
+
def read_bits(count)
|
|
56
|
+
return 0 if count.zero?
|
|
57
|
+
|
|
58
|
+
result = 0
|
|
59
|
+
count.times do |i|
|
|
60
|
+
bit = read_single_bit
|
|
61
|
+
result |= (bit << i)
|
|
62
|
+
end
|
|
63
|
+
result
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Peek at bits without consuming them
|
|
67
|
+
#
|
|
68
|
+
# @param count [Integer] Number of bits to peek
|
|
69
|
+
# @return [Integer] The peeked bits
|
|
70
|
+
def peek_bits(count)
|
|
71
|
+
saved_position = @bit_position
|
|
72
|
+
result = read_bits(count)
|
|
73
|
+
@bit_position = saved_position
|
|
74
|
+
result
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Check if bitstream is exhausted
|
|
78
|
+
#
|
|
79
|
+
# @return [Boolean]
|
|
80
|
+
def exhausted?
|
|
81
|
+
@bit_position <= 0
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get remaining bits
|
|
85
|
+
#
|
|
86
|
+
# @return [Integer]
|
|
87
|
+
def remaining_bits
|
|
88
|
+
@bit_position
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Align to byte boundary (skip remaining bits in current byte)
|
|
92
|
+
def align_to_byte
|
|
93
|
+
@bit_position = ((@bit_position + 7) / 8) * 8
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
# Read a single bit from the stream
|
|
99
|
+
#
|
|
100
|
+
# @return [Integer] 0 or 1
|
|
101
|
+
def read_single_bit
|
|
102
|
+
return 0 if @bit_position <= 0
|
|
103
|
+
|
|
104
|
+
@bit_position -= 1
|
|
105
|
+
byte_index = @bit_position / 8
|
|
106
|
+
bit_index = @bit_position % 8
|
|
107
|
+
|
|
108
|
+
return 0 if byte_index.negative? || byte_index >= @data.bytesize
|
|
109
|
+
|
|
110
|
+
byte = @data.getbyte(byte_index)
|
|
111
|
+
(byte >> bit_index) & 1
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Forward bitstream reader (for Huffman decoding)
|
|
116
|
+
#
|
|
117
|
+
# Reads bits in normal forward order from a starting position.
|
|
118
|
+
class ForwardBitStream
|
|
119
|
+
# @return [String] The compressed data
|
|
120
|
+
attr_reader :data
|
|
121
|
+
|
|
122
|
+
# @return [Integer] Current bit position
|
|
123
|
+
attr_reader :bit_position
|
|
124
|
+
|
|
125
|
+
# Initialize bitstream with data
|
|
126
|
+
#
|
|
127
|
+
# @param data [String] The compressed bitstream data
|
|
128
|
+
# @param start_byte [Integer] Starting byte position
|
|
129
|
+
def initialize(data, start_byte = 0)
|
|
130
|
+
@data = data.dup.force_encoding(Encoding::BINARY)
|
|
131
|
+
@bit_position = start_byte * 8
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Read bits from the stream (in forward order)
|
|
135
|
+
#
|
|
136
|
+
# Bits are read from MSB to LSB within each byte.
|
|
137
|
+
#
|
|
138
|
+
# @param count [Integer] Number of bits to read
|
|
139
|
+
# @return [Integer] The read bits
|
|
140
|
+
def read_bits(count)
|
|
141
|
+
return 0 if count.zero?
|
|
142
|
+
|
|
143
|
+
result = 0
|
|
144
|
+
count.times do
|
|
145
|
+
result = (result << 1) | read_single_bit
|
|
146
|
+
end
|
|
147
|
+
result
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Check if bitstream is exhausted
|
|
151
|
+
#
|
|
152
|
+
# @return [Boolean]
|
|
153
|
+
def exhausted?
|
|
154
|
+
@bit_position >= @data.bytesize * 8
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Get current byte position
|
|
158
|
+
#
|
|
159
|
+
# @return [Integer]
|
|
160
|
+
def byte_position
|
|
161
|
+
@bit_position / 8
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
# Read a single bit from the stream
|
|
167
|
+
#
|
|
168
|
+
# @return [Integer] 0 or 1
|
|
169
|
+
def read_single_bit
|
|
170
|
+
return 0 if exhausted?
|
|
171
|
+
|
|
172
|
+
byte_index = @bit_position / 8
|
|
173
|
+
bit_index = 7 - (@bit_position % 8) # MSB first
|
|
174
|
+
|
|
175
|
+
@bit_position += 1
|
|
176
|
+
|
|
177
|
+
return 0 if byte_index >= @data.bytesize
|
|
178
|
+
|
|
179
|
+
byte = @data.getbyte(byte_index)
|
|
180
|
+
(byte >> bit_index) & 1
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|