omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +243 -368
  3. data/README.adoc +101 -5
  4. data/docs/guides/archive-formats/index.adoc +31 -1
  5. data/docs/guides/archive-formats/ole-format.adoc +316 -0
  6. data/docs/guides/archive-formats/rpm-format.adoc +249 -0
  7. data/docs/index.adoc +12 -2
  8. data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
  9. data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
  10. data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
  11. data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
  12. data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
  13. data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
  14. data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
  15. data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
  16. data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
  17. data/lib/omnizip/algorithms/lzma.rb +20 -5
  18. data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
  19. data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
  20. data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
  21. data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
  22. data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
  23. data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
  24. data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
  25. data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
  26. data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
  27. data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
  28. data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
  29. data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
  30. data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
  31. data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
  32. data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
  33. data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
  34. data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
  35. data/lib/omnizip/buffer/memory_extractor.rb +3 -3
  36. data/lib/omnizip/buffer.rb +2 -2
  37. data/lib/omnizip/filters/delta.rb +2 -1
  38. data/lib/omnizip/filters/registry.rb +6 -6
  39. data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
  40. data/lib/omnizip/formats/lzip.rb +2 -1
  41. data/lib/omnizip/formats/lzma_alone.rb +2 -1
  42. data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
  43. data/lib/omnizip/formats/ole/constants.rb +61 -0
  44. data/lib/omnizip/formats/ole/dirent.rb +380 -0
  45. data/lib/omnizip/formats/ole/header.rb +198 -0
  46. data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
  47. data/lib/omnizip/formats/ole/storage.rb +305 -0
  48. data/lib/omnizip/formats/ole/types/variant.rb +328 -0
  49. data/lib/omnizip/formats/ole.rb +145 -0
  50. data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
  51. data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
  52. data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
  53. data/lib/omnizip/formats/rar3/reader.rb +6 -2
  54. data/lib/omnizip/formats/rar5/reader.rb +4 -1
  55. data/lib/omnizip/formats/rpm/constants.rb +58 -0
  56. data/lib/omnizip/formats/rpm/entry.rb +102 -0
  57. data/lib/omnizip/formats/rpm/header.rb +113 -0
  58. data/lib/omnizip/formats/rpm/lead.rb +122 -0
  59. data/lib/omnizip/formats/rpm/tag.rb +230 -0
  60. data/lib/omnizip/formats/rpm.rb +434 -0
  61. data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
  62. data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
  63. data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
  64. data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
  65. data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
  66. data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
  67. data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
  68. data/lib/omnizip/formats/seven_zip.rb +10 -0
  69. data/lib/omnizip/formats/xar/entry.rb +18 -5
  70. data/lib/omnizip/formats/xar/header.rb +34 -6
  71. data/lib/omnizip/formats/xar/reader.rb +43 -10
  72. data/lib/omnizip/formats/xar/toc.rb +34 -21
  73. data/lib/omnizip/formats/xar/writer.rb +15 -5
  74. data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
  75. data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
  76. data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
  77. data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
  78. data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
  79. data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
  80. data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
  81. data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
  82. data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
  83. data/lib/omnizip/pipe/stream_compressor.rb +1 -1
  84. data/lib/omnizip/version.rb +1 -1
  85. data/readme-docs/compression-algorithms.adoc +6 -2
  86. metadata +30 -2
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+ require_relative "tag"
5
+
6
+ module Omnizip
7
+ module Formats
8
+ module Rpm
9
+ # RPM header parser
10
+ #
11
+ # Parses RPM header structure including the 16-byte header header,
12
+ # tag entries, and data blob. Used for both signature and main headers.
13
+ class Header
14
+ include Constants
15
+
16
+ # @return [String] 8-byte header magic
17
+ attr_reader :magic
18
+
19
+ # @return [Integer] Number of tag entries
20
+ attr_reader :entry_count
21
+
22
+ # @return [Integer] Data blob length
23
+ attr_reader :data_length
24
+
25
+ # @return [Array<Tag>] Parsed tags
26
+ attr_reader :tags
27
+
28
+ # @return [Integer] Total header length
29
+ attr_reader :length
30
+
31
+ # Parse header from IO
32
+ #
33
+ # @param io [IO] Input stream positioned at header
34
+ # @return [Header] Parsed header object
35
+ # @raise [ArgumentError] If magic is invalid
36
+ def self.parse(io)
37
+ new.tap do |header|
38
+ header.send(:parse!, io)
39
+ end
40
+ end
41
+
42
+ # Get tag value by name
43
+ #
44
+ # @param name [Symbol] Tag name
45
+ # @return [Object, nil] Tag value or nil if not found
46
+ def [](name)
47
+ tag = find_tag(name)
48
+ tag&.value
49
+ end
50
+
51
+ # Find tag by name
52
+ #
53
+ # @param name [Symbol] Tag name
54
+ # @return [Tag, nil] Tag object or nil
55
+ def find_tag(name)
56
+ @tags.find { |t| t.name == name }
57
+ end
58
+
59
+ # Get all tags as hash
60
+ #
61
+ # @return [Hash] Tag names to values
62
+ def to_h
63
+ @tags.each_with_object({}) do |tag, hash|
64
+ hash[tag.name] = tag.value
65
+ end
66
+ end
67
+
68
+ # Validate header
69
+ #
70
+ # @raise [ArgumentError] If validation fails
71
+ def validate!
72
+ unless @magic == HEADER_MAGIC
73
+ raise ArgumentError,
74
+ "Invalid header magic: #{@magic.inspect}"
75
+ end
76
+ end
77
+
78
+ private
79
+
80
+ def parse!(io)
81
+ # Read header header (16 bytes)
82
+ header_data = io.read(HEADER_HEADER_SIZE)
83
+ raise ArgumentError, "Failed to read RPM header" unless header_data
84
+
85
+ @magic = header_data[0, 8]
86
+ @entry_count = header_data[8, 4].unpack1("N")
87
+ @data_length = header_data[12, 4].unpack1("N")
88
+
89
+ validate!
90
+
91
+ # Read tag entries
92
+ tag_data_size = @entry_count * TAG_ENTRY_SIZE
93
+ tag_entries_data = io.read(tag_data_size)
94
+
95
+ # Read data blob
96
+ data_blob = io.read(@data_length)
97
+
98
+ # Parse tags
99
+ @tags = []
100
+ @entry_count.times do |i|
101
+ offset = i * TAG_ENTRY_SIZE
102
+ entry = tag_entries_data[offset, TAG_ENTRY_SIZE].unpack("NNNN")
103
+
104
+ tag = Tag.new(entry[0], entry[1], entry[2], entry[3], data_blob)
105
+ @tags << tag
106
+ end
107
+
108
+ @length = HEADER_HEADER_SIZE + tag_data_size + @data_length
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Formats
7
+ module Rpm
8
+ # RPM lead structure parser
9
+ #
10
+ # The lead is a 96-byte deprecated header at the start of RPM files.
11
+ # It contains basic package identification but most information
12
+ # is now stored in the main header.
13
+ class Lead
14
+ include Constants
15
+
16
+ # @return [String] 4-byte magic
17
+ attr_reader :magic
18
+
19
+ # @return [Integer] Major version
20
+ attr_reader :major_version
21
+
22
+ # @return [Integer] Minor version
23
+ attr_reader :minor_version
24
+
25
+ # @return [Integer] Package type (binary=0, source=1)
26
+ attr_reader :type
27
+
28
+ # @return [Integer] Architecture number
29
+ attr_reader :architecture
30
+
31
+ # @return [String] Package name (66 bytes)
32
+ attr_reader :name
33
+
34
+ # @return [Integer] OS number
35
+ attr_reader :os
36
+
37
+ # @return [Integer] Signature type
38
+ attr_reader :signature_type
39
+
40
+ # @return [Integer] Total length (always 96)
41
+ attr_reader :length
42
+
43
+ # Parse lead from IO
44
+ #
45
+ # @param io [IO] Input stream positioned at lead
46
+ # @return [Lead] Parsed lead object
47
+ # @raise [ArgumentError] If magic is invalid
48
+ def self.parse(io)
49
+ data = io.read(LEAD_SIZE)
50
+ raise ArgumentError, "Failed to read RPM lead" unless data
51
+ raise ArgumentError, "Truncated RPM lead" if data.bytesize < LEAD_SIZE
52
+
53
+ new.tap do |lead|
54
+ lead.instance_variable_set(:@length, LEAD_SIZE)
55
+
56
+ # Unpack lead structure
57
+ # A4 = 4-byte string (magic)
58
+ # CC = 2 unsigned chars (major, minor)
59
+ # n = big-endian short (type)
60
+ # n = big-endian short (architecture)
61
+ # A66 = 66-byte string (name)
62
+ # n = big-endian short (os)
63
+ # n = big-endian short (signature_type)
64
+ # A16 = 16-byte reserved
65
+ fields = data.unpack("A4 CC n n A66 n n A16")
66
+
67
+ lead.instance_variable_set(:@magic, fields[0])
68
+ lead.instance_variable_set(:@major_version, fields[1])
69
+ lead.instance_variable_set(:@minor_version, fields[2])
70
+ lead.instance_variable_set(:@type, fields[3])
71
+ lead.instance_variable_set(:@architecture, fields[4])
72
+ lead.instance_variable_set(:@name, fields[5].strip)
73
+ lead.instance_variable_set(:@os, fields[6])
74
+ lead.instance_variable_set(:@signature_type, fields[7])
75
+
76
+ lead.validate!
77
+ end
78
+ end
79
+
80
+ # Validate lead structure
81
+ #
82
+ # @raise [ArgumentError] If validation fails
83
+ def validate!
84
+ if @magic.nil? || @magic.bytesize < 4
85
+ raise ArgumentError, "Invalid RPM magic: missing or truncated"
86
+ end
87
+
88
+ unless @magic == LEAD_MAGIC
89
+ raise ArgumentError,
90
+ format("Invalid RPM magic: 0x%08x (expected 0x%08x)",
91
+ @magic.unpack1("N"), LEAD_MAGIC.unpack1("N"))
92
+ end
93
+
94
+ unless [PACKAGE_BINARY, PACKAGE_SOURCE].include?(@type)
95
+ raise ArgumentError, "Invalid RPM type: #{@type}"
96
+ end
97
+ end
98
+
99
+ # Check if package is binary
100
+ #
101
+ # @return [Boolean]
102
+ def binary?
103
+ @type == PACKAGE_BINARY
104
+ end
105
+
106
+ # Check if package is source
107
+ #
108
+ # @return [Boolean]
109
+ def source?
110
+ @type == PACKAGE_SOURCE
111
+ end
112
+
113
+ # Get type name
114
+ #
115
+ # @return [Symbol] :binary or :source
116
+ def type_name
117
+ binary? ? :binary : :source
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "constants"
4
+
5
+ module Omnizip
6
+ module Formats
7
+ module Rpm
8
+ # RPM tag definitions and value extraction
9
+ #
10
+ # Maps tag IDs to symbolic names and handles typed value extraction
11
+ # from the header data blob.
12
+ class Tag
13
+ include Constants
14
+
15
+ # Tag ID to name mapping (from rpm/rpmtag.h)
16
+ TAG_IDS = {
17
+ # Signature tags
18
+ 257 => :sigsize,
19
+ 261 => :sigmd5,
20
+ 262 => :siggpg,
21
+ 263 => :sigpgp5,
22
+ 267 => :dsaheader,
23
+ 268 => :rsaheader,
24
+ 269 => :sha1header,
25
+ 270 => :longsigsize,
26
+ 271 => :longarchivesize,
27
+
28
+ # Header tags
29
+ 1000 => :name,
30
+ 1001 => :version,
31
+ 1002 => :release,
32
+ 1003 => :epoch,
33
+ 1004 => :summary,
34
+ 1005 => :description,
35
+ 1006 => :buildtime,
36
+ 1007 => :buildhost,
37
+ 1009 => :size,
38
+ 1010 => :distribution,
39
+ 1011 => :vendor,
40
+ 1014 => :license,
41
+ 1015 => :packager,
42
+ 1016 => :group,
43
+ 1020 => :url,
44
+ 1021 => :os,
45
+ 1022 => :arch,
46
+ 1023 => :prein,
47
+ 1024 => :postin,
48
+ 1025 => :preun,
49
+ 1026 => :postun,
50
+ 1027 => :oldfilenames,
51
+ 1028 => :filesizes,
52
+ 1029 => :filestates,
53
+ 1030 => :filemodes,
54
+ 1031 => :fileuids,
55
+ 1032 => :filegids,
56
+ 1033 => :filerdevs,
57
+ 1034 => :filemtimes,
58
+ 1035 => :filedigests,
59
+ 1036 => :filelinktos,
60
+ 1037 => :fileflags,
61
+ 1039 => :fileusername,
62
+ 1040 => :filegroupname,
63
+ 1044 => :sourcerpm,
64
+ 1046 => :archivesize,
65
+ 1047 => :providename,
66
+ 1048 => :requireflags,
67
+ 1049 => :requirename,
68
+ 1050 => :requireversion,
69
+ 1053 => :conflictflags,
70
+ 1054 => :conflictname,
71
+ 1055 => :conflictversion,
72
+ 1064 => :rpmversion,
73
+ 1090 => :obsoletename,
74
+ 1112 => :provideflags,
75
+ 1113 => :provideversion,
76
+ 1114 => :obsoleteflags,
77
+ 1115 => :obsoleteversion,
78
+ 1116 => :dirindexes,
79
+ 1117 => :basenames,
80
+ 1118 => :dirnames,
81
+ 1124 => :payloadformat,
82
+ 1125 => :payloadcompressor,
83
+ 1126 => :payloadflags,
84
+
85
+ # Extended tags
86
+ 5000 => :filenames,
87
+ 5008 => :longfilesizes,
88
+ 5009 => :longsize,
89
+ 5013 => :evr,
90
+ 5014 => :nvr,
91
+ 5016 => :nevra,
92
+ 5019 => :epochnum,
93
+ }.freeze
94
+
95
+ # Type ID to name mapping
96
+ TYPE_NAMES = {
97
+ TYPE_NULL => :null,
98
+ TYPE_CHAR => :char,
99
+ TYPE_INT8 => :int8,
100
+ TYPE_INT16 => :int16,
101
+ TYPE_INT32 => :int32,
102
+ TYPE_INT64 => :int64,
103
+ TYPE_STRING => :string,
104
+ TYPE_BINARY => :binary,
105
+ TYPE_STRING_ARRAY => :string_array,
106
+ TYPE_I18NSTRING => :i18nstring,
107
+ }.freeze
108
+
109
+ # @return [Integer] Tag ID
110
+ attr_reader :tag_id
111
+
112
+ # @return [Integer] Tag type
113
+ attr_reader :type_id
114
+
115
+ # @return [Integer] Offset into data blob
116
+ attr_reader :offset
117
+
118
+ # @return [Integer] Count of items
119
+ attr_reader :count
120
+
121
+ # @return [String] Data blob reference
122
+ attr_reader :data
123
+
124
+ # Initialize tag
125
+ #
126
+ # @param tag_id [Integer] Tag identifier
127
+ # @param type_id [Integer] Type identifier
128
+ # @param offset [Integer] Offset into data blob
129
+ # @param count [Integer] Item count
130
+ # @param data [String] Reference to data blob
131
+ def initialize(tag_id, type_id, offset, count, data)
132
+ @tag_id = tag_id
133
+ @type_id = type_id
134
+ @offset = offset
135
+ @count = count
136
+ @data = data
137
+ @value = nil
138
+ end
139
+
140
+ # Get tag name
141
+ #
142
+ # @return [Symbol, Integer] Tag name or ID if unknown
143
+ def name
144
+ TAG_IDS.fetch(@tag_id, @tag_id)
145
+ end
146
+
147
+ # Get type name
148
+ #
149
+ # @return [Symbol, Integer] Type name or ID if unknown
150
+ def type
151
+ TYPE_NAMES.fetch(@type_id, @type_id)
152
+ end
153
+
154
+ # Get tag value (lazy extraction)
155
+ #
156
+ # @return [Object] Extracted value based on type
157
+ def value
158
+ return @value if @value
159
+
160
+ @value = extract_value
161
+ end
162
+
163
+ private
164
+
165
+ # Extract value based on type
166
+ #
167
+ # @return [Object] Extracted value
168
+ def extract_value
169
+ case type
170
+ when :string, :i18nstring
171
+ extract_string
172
+ when :string_array
173
+ extract_string_array
174
+ when :binary
175
+ extract_binary
176
+ when :int8
177
+ extract_int8
178
+ when :int16
179
+ extract_int16
180
+ when :int32
181
+ extract_int32
182
+ when :int64
183
+ extract_int64
184
+ when :char
185
+ extract_char
186
+ else
187
+ extract_binary
188
+ end
189
+ end
190
+
191
+ def extract_string
192
+ @data[@offset..]&.split("\0", 2)&.first || ""
193
+ end
194
+
195
+ def extract_string_array
196
+ @data[@offset..].to_s.split("\0")[0...@count] || []
197
+ end
198
+
199
+ def extract_binary
200
+ @data[@offset, @count] || ""
201
+ end
202
+
203
+ def extract_int8
204
+ @data[@offset, @count].unpack("C" * @count)
205
+ end
206
+
207
+ def extract_int16
208
+ @data[@offset, 2 * @count].unpack("n" * @count)
209
+ end
210
+
211
+ def extract_int32
212
+ @data[@offset, 4 * @count].unpack("N" * @count)
213
+ end
214
+
215
+ def extract_int64
216
+ values = []
217
+ @count.times do |i|
218
+ high, low = @data[@offset + (i * 8), 8].unpack("NN")
219
+ values << ((high << 32) | low)
220
+ end
221
+ values
222
+ end
223
+
224
+ def extract_char
225
+ @data[@offset, @count].unpack("a" * @count)
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end