cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,382 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module CAB
5
+ # Compressor creates CAB files from source files
6
+ # rubocop:disable Metrics/ClassLength
7
+ class Compressor
8
+ attr_reader :io_system, :files, :compression, :set_id, :cabinet_index
9
+
10
+ # Initialize a new compressor
11
+ #
12
+ # @param io_system [System::IOSystem] I/O system for writing
13
+ def initialize(io_system = nil)
14
+ @io_system = io_system || System::IOSystem.new
15
+ @files = []
16
+ @compression = :mszip
17
+ @set_id = rand(0xFFFF)
18
+ @cabinet_index = 0
19
+ end
20
+
21
+ # Add a file to the cabinet
22
+ #
23
+ # @param source_path [String] Path to source file
24
+ # @param cab_path [String] Path within cabinet (optional)
25
+ # @return [void]
26
+ def add_file(source_path, cab_path = nil)
27
+ unless ::File.exist?(source_path)
28
+ raise ArgumentError,
29
+ "File does not exist: #{source_path}"
30
+ end
31
+ unless ::File.file?(source_path)
32
+ raise ArgumentError,
33
+ "Not a file: #{source_path}"
34
+ end
35
+
36
+ @files << {
37
+ source: source_path,
38
+ cab_path: cab_path || ::File.basename(source_path),
39
+ }
40
+ end
41
+
42
+ # Generate the cabinet file
43
+ #
44
+ # @param output_file [String] Path to output CAB file
45
+ # @param options [Hash] Options
46
+ # @option options [Symbol] :compression Compression type (:none, :mszip, :lzx, :quantum)
47
+ # @option options [Integer] :set_id Cabinet set ID
48
+ # @option options [Integer] :cabinet_index Cabinet index in set
49
+ # @return [Integer] Bytes written
50
+ def generate(output_file, **options)
51
+ raise ArgumentError, "No files to compress" if @files.empty?
52
+
53
+ @compression = options[:compression] || @compression
54
+ @set_id = options[:set_id] || @set_id
55
+ @cabinet_index = options[:cabinet_index] || @cabinet_index
56
+
57
+ # Collect file information
58
+ file_infos = collect_file_infos
59
+
60
+ # Calculate offsets and sizes
61
+ offsets = calculate_offsets(file_infos)
62
+
63
+ # Compress files and collect data blocks
64
+ compressed_data = compress_files(file_infos)
65
+
66
+ # Write cabinet file
67
+ write_cabinet(output_file, file_infos, offsets, compressed_data)
68
+ end
69
+
70
+ private
71
+
72
+ # Collect information about all files to be compressed
73
+ def collect_file_infos
74
+ @files.map do |file_entry|
75
+ source_path = file_entry[:source]
76
+ stat = ::File.stat(source_path)
77
+
78
+ {
79
+ source_path: source_path,
80
+ cab_path: file_entry[:cab_path],
81
+ size: stat.size,
82
+ mtime: stat.mtime,
83
+ attribs: calculate_attributes(stat),
84
+ }
85
+ end
86
+ end
87
+
88
+ # Calculate file attributes based on file stats
89
+ def calculate_attributes(stat)
90
+ attribs = Constants::ATTRIB_ARCH # Default to archived
91
+
92
+ # Read-only
93
+ attribs |= Constants::ATTRIB_READONLY unless stat.writable?
94
+
95
+ # Executable (Unix systems)
96
+ attribs |= Constants::ATTRIB_EXEC if stat.executable?
97
+
98
+ attribs
99
+ end
100
+
101
+ # Calculate all offsets in the cabinet file
102
+ def calculate_offsets(file_infos)
103
+ offset = Constants::CFHEADER_SIZE
104
+ num_folders = 1 # Single folder for now
105
+ file_infos.size
106
+
107
+ # Folder entries
108
+ folders_offset = offset
109
+ offset += Constants::CFFOLDER_SIZE * num_folders
110
+
111
+ # File entries
112
+ files_offset = offset
113
+ file_infos.each do |info|
114
+ offset += Constants::CFFILE_SIZE
115
+ offset += info[:cab_path].bytesize + 1 # null-terminated
116
+ end
117
+
118
+ # Data blocks
119
+ data_offset = offset
120
+
121
+ {
122
+ folders_offset: folders_offset,
123
+ files_offset: files_offset,
124
+ data_offset: data_offset,
125
+ }
126
+ end
127
+
128
+ # Compress all files and return block data
129
+ def compress_files(file_infos)
130
+ blocks = []
131
+ total_uncompressed = 0
132
+
133
+ file_infos.each do |info|
134
+ file_data = ::File.binread(info[:source_path])
135
+ total_uncompressed += file_data.bytesize
136
+
137
+ # Split into blocks of max 32KB
138
+ offset = 0
139
+ while offset < file_data.bytesize
140
+ chunk_size = [Constants::BLOCK_MAX, file_data.bytesize - offset].min
141
+ chunk = file_data[offset, chunk_size]
142
+
143
+ # Compress chunk
144
+ compressed_chunk = compress_chunk(chunk)
145
+
146
+ blocks << {
147
+ uncompressed_size: chunk.bytesize,
148
+ compressed_size: compressed_chunk.bytesize,
149
+ data: compressed_chunk,
150
+ }
151
+
152
+ offset += chunk_size
153
+ end
154
+ end
155
+
156
+ {
157
+ blocks: blocks,
158
+ total_uncompressed: total_uncompressed,
159
+ }
160
+ end
161
+
162
+ # Compress a single chunk of data
163
+ def compress_chunk(data)
164
+ case @compression
165
+ when :none
166
+ data
167
+ when :mszip
168
+ compress_mszip(data)
169
+ when :lzx
170
+ compress_lzx(data)
171
+ when :quantum
172
+ compress_quantum(data)
173
+ else
174
+ raise ArgumentError, "Unsupported compression type: #{@compression}"
175
+ end
176
+ end
177
+
178
+ # Compress data using MSZIP
179
+ def compress_mszip(data)
180
+ input = System::MemoryHandle.new(data, Constants::MODE_READ)
181
+ output = System::MemoryHandle.new("", Constants::MODE_WRITE)
182
+
183
+ compressor = Compressors::MSZIP.new(@io_system, input, output,
184
+ Cabriolet.default_buffer_size)
185
+ compressor.compress
186
+
187
+ output.data
188
+ end
189
+
190
+ # Compress data using LZX
191
+ def compress_lzx(data)
192
+ input = System::MemoryHandle.new(data, Constants::MODE_READ)
193
+ output = System::MemoryHandle.new("", Constants::MODE_WRITE)
194
+
195
+ compressor = Compressors::LZX.new(@io_system, input, output,
196
+ Cabriolet.default_buffer_size, window_bits: 15)
197
+ compressor.compress
198
+
199
+ output.data
200
+ end
201
+
202
+ # Compress data using Quantum
203
+ def compress_quantum(data)
204
+ input = System::MemoryHandle.new(data, Constants::MODE_READ)
205
+ output = System::MemoryHandle.new("", Constants::MODE_WRITE)
206
+
207
+ compressor = Compressors::Quantum.new(@io_system, input, output,
208
+ Cabriolet.default_buffer_size, window_bits: 10)
209
+ compressor.compress
210
+
211
+ output.data
212
+ end
213
+
214
+ # Write the complete cabinet file
215
+ def write_cabinet(output_file, file_infos, offsets, compressed_data)
216
+ handle = @io_system.open(output_file, Constants::MODE_WRITE)
217
+
218
+ begin
219
+ # Calculate total cabinet size
220
+ cabinet_size = offsets[:data_offset]
221
+ compressed_data[:blocks].each do |block|
222
+ cabinet_size += Constants::CFDATA_SIZE + block[:compressed_size]
223
+ end
224
+
225
+ # Write CFHEADER
226
+ write_header(handle, file_infos.size, compressed_data[:blocks].size,
227
+ offsets[:files_offset], cabinet_size)
228
+
229
+ # Write CFFOLDER
230
+ write_folder(handle, compressed_data[:blocks].size,
231
+ offsets[:data_offset])
232
+
233
+ # Write CFFILE entries
234
+ folder_offset = 0
235
+ file_infos.each do |info|
236
+ write_file_entry(handle, info, folder_offset)
237
+ folder_offset += info[:size]
238
+ end
239
+
240
+ # Write CFDATA blocks
241
+ compressed_data[:blocks].each do |block|
242
+ write_data_block(handle, block)
243
+ end
244
+
245
+ cabinet_size
246
+ ensure
247
+ @io_system.close(handle)
248
+ end
249
+ end
250
+
251
+ # Write CFHEADER
252
+ def write_header(handle, num_files, _num_blocks, files_offset,
253
+ cabinet_size)
254
+ header = Binary::CFHeader.new
255
+ header.signature = "MSCF"
256
+ header.reserved1 = 0
257
+ header.cabinet_size = cabinet_size
258
+ header.reserved2 = 0
259
+ header.files_offset = files_offset
260
+ header.reserved3 = 0
261
+ header.minor_version = 3
262
+ header.major_version = 1
263
+ header.num_folders = 1 # Single folder for now
264
+ header.num_files = num_files
265
+ header.flags = 0 # No reserved space, no prev/next cabinet
266
+ header.set_id = @set_id
267
+ header.cabinet_index = @cabinet_index
268
+
269
+ @io_system.write(handle, header.to_binary_s)
270
+ end
271
+
272
+ # Write CFFOLDER
273
+ def write_folder(handle, num_blocks, data_offset)
274
+ folder = Binary::CFFolder.new
275
+ folder.data_offset = data_offset
276
+ folder.num_blocks = num_blocks
277
+ folder.comp_type = compression_type_value
278
+
279
+ @io_system.write(handle, folder.to_binary_s)
280
+ end
281
+
282
+ # Get compression type value
283
+ def compression_type_value
284
+ {
285
+ none: Constants::COMP_TYPE_NONE,
286
+ mszip: Constants::COMP_TYPE_MSZIP,
287
+ lzx: Constants::COMP_TYPE_LZX,
288
+ quantum: Constants::COMP_TYPE_QUANTUM,
289
+ }.fetch(@compression, Constants::COMP_TYPE_MSZIP)
290
+ end
291
+
292
+ # Write CFFILE entry
293
+ def write_file_entry(handle, info, folder_offset)
294
+ file_entry = Binary::CFFile.new
295
+ file_entry.uncompressed_size = info[:size]
296
+ file_entry.folder_offset = folder_offset
297
+ file_entry.folder_index = 0 # Single folder
298
+ file_entry.date, file_entry.time = encode_datetime(info[:mtime])
299
+ file_entry.attribs = info[:attribs]
300
+
301
+ @io_system.write(handle, file_entry.to_binary_s)
302
+ @io_system.write(handle, info[:cab_path])
303
+ @io_system.write(handle, "\x00") # null terminator
304
+ end
305
+
306
+ # Encode Time object to CAB date/time format
307
+ def encode_datetime(time)
308
+ date_bits = (time.day & 0x1F) |
309
+ ((time.month & 0x0F) << 5) |
310
+ (((time.year - 1980) & 0x7F) << 9)
311
+
312
+ time_bits = ((time.sec / 2) & 0x1F) |
313
+ ((time.min & 0x3F) << 5) |
314
+ ((time.hour & 0x1F) << 11)
315
+
316
+ [date_bits, time_bits]
317
+ end
318
+
319
+ # Write CFDATA block
320
+ def write_data_block(handle, block)
321
+ # Calculate checksum
322
+ checksum = calculate_checksum(block[:data])
323
+
324
+ # Create CFDATA header
325
+ cfdata = Binary::CFData.new
326
+ cfdata.checksum = checksum
327
+ cfdata.compressed_size = block[:compressed_size]
328
+ cfdata.uncompressed_size = block[:uncompressed_size]
329
+
330
+ # Add header checksum
331
+ header_data = cfdata.to_binary_s
332
+ cfdata.checksum = calculate_checksum(header_data[4, 4], checksum)
333
+
334
+ @io_system.write(handle, cfdata.to_binary_s)
335
+ @io_system.write(handle, block[:data])
336
+ end
337
+
338
+ # Calculate checksum for data
339
+ # Same algorithm as used in Extractor
340
+ # rubocop:disable Metrics/MethodLength
341
+ def calculate_checksum(data, initial = 0)
342
+ cksum = initial
343
+ bytes = data.bytes
344
+
345
+ # Process 4-byte chunks
346
+ (bytes.size / 4).times do |i|
347
+ offset = i * 4
348
+ value = bytes[offset] |
349
+ (bytes[offset + 1] << 8) |
350
+ (bytes[offset + 2] << 16) |
351
+ (bytes[offset + 3] << 24)
352
+ cksum ^= value
353
+ end
354
+
355
+ # Process remaining bytes
356
+ remainder = bytes.size % 4
357
+ if remainder.positive?
358
+ ul = 0
359
+ offset = bytes.size - remainder
360
+
361
+ case remainder
362
+ when 3
363
+ ul |= bytes[offset + 2] << 16
364
+ ul |= bytes[offset + 1] << 8
365
+ ul |= bytes[offset]
366
+ when 2
367
+ ul |= bytes[offset + 1] << 8
368
+ ul |= bytes[offset]
369
+ when 1
370
+ ul |= bytes[offset]
371
+ end
372
+
373
+ cksum ^= ul
374
+ end
375
+
376
+ cksum & 0xFFFFFFFF
377
+ end
378
+ # rubocop:enable Metrics/MethodLength
379
+ end
380
+ # rubocop:enable Metrics/ClassLength
381
+ end
382
+ end