cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,513 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../binary/chm_structures"
4
+ require_relative "../compressors/lzx"
5
+ require_relative "../system/io_system"
6
+ require_relative "../system/memory_handle"
7
+ require_relative "../errors"
8
+
9
+ module Cabriolet
10
+ module CHM
11
+ # Compressor for CHM (Compiled HTML Help) files
12
+ class Compressor
13
+ # GUIDs used in CHM headers (same as parser)
14
+ GUID1 = [0x10, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
15
+ 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC].pack("C*")
16
+ GUID2 = [0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
17
+ 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC].pack("C*")
18
+
19
+ # System file names
20
+ CONTENT_NAME = "::DataSpace/Storage/MSCompressed/Content"
21
+ CONTROL_NAME = "::DataSpace/Storage/MSCompressed/ControlData"
22
+ SPANINFO_NAME = "::DataSpace/Storage/MSCompressed/SpanInfo"
23
+ RTABLE_NAME = "::DataSpace/Storage/MSCompressed/Transform/" \
24
+ "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"
25
+
26
+ # LZX constants
27
+ LZX_FRAME_SIZE = 32_768
28
+
29
+ # Default chunk size for directory
30
+ DEFAULT_CHUNK_SIZE = 4096
31
+
32
+ attr_reader :io_system, :files
33
+
34
+ # Initialize CHM compressor
35
+ #
36
+ # @param io_system [System::IOSystem] I/O system for file operations
37
+ def initialize(io_system = nil)
38
+ @io_system = io_system || System::IOSystem.new
39
+ @files = []
40
+ @timestamp = Time.now.to_i
41
+ @language_id = 0x0409 # English (US)
42
+ @window_bits = 16
43
+ @window_size = 1 << @window_bits
44
+ end
45
+
46
+ # Add a file to the CHM
47
+ #
48
+ # @param source_path [String] Path to source file
49
+ # @param chm_path [String] Path within CHM (must start with /)
50
+ # @param section [Symbol] :uncompressed or :compressed
51
+ # @return [void]
52
+ def add_file(source_path, chm_path, section: :compressed)
53
+ unless chm_path.start_with?("/")
54
+ raise ArgumentError,
55
+ "CHM path must start with /"
56
+ end
57
+ unless File.exist?(source_path)
58
+ raise ArgumentError,
59
+ "Source file not found: #{source_path}"
60
+ end
61
+
62
+ @files << {
63
+ source: source_path,
64
+ chm_path: chm_path,
65
+ section: section,
66
+ }
67
+ end
68
+
69
+ # Generate the CHM file
70
+ #
71
+ # @param output_file [String] Path to output CHM file
72
+ # @param options [Hash] Options
73
+ # @option options [Integer] :timestamp Custom timestamp
74
+ # @option options [Integer] :language_id Language ID
75
+ # @option options [Integer] :window_bits LZX window size (15-21)
76
+ # @return [Integer] Bytes written
77
+ def generate(output_file, **options)
78
+ raise ArgumentError, "No files to compress" if @files.empty?
79
+
80
+ @timestamp = options[:timestamp] || @timestamp
81
+ @language_id = options[:language_id] || @language_id
82
+ @window_bits = options[:window_bits] || 16
83
+ @window_size = 1 << @window_bits
84
+
85
+ # Validate window bits
86
+ unless (15..21).cover?(@window_bits)
87
+ raise ArgumentError,
88
+ "window_bits must be 15-21, got #{@window_bits}"
89
+ end
90
+
91
+ # Open output file
92
+ output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
93
+
94
+ begin
95
+ # Organize files into sections
96
+ organize_sections
97
+
98
+ # Compress section 1 files
99
+ compress_section1
100
+
101
+ # Build directory structure
102
+ build_directory
103
+
104
+ # Calculate offsets
105
+ calculate_offsets
106
+
107
+ # Write CHM file
108
+ write_chm(output_handle)
109
+
110
+ bytes_written = output_handle.tell
111
+ output_handle.close
112
+ bytes_written
113
+ rescue StandardError => e
114
+ output_handle&.close
115
+ FileUtils.rm_f(output_file)
116
+ raise e
117
+ end
118
+ end
119
+
120
+ private
121
+
122
+ # Organize files into sections
123
+ def organize_sections
124
+ @section0_files = []
125
+ @section1_files = []
126
+
127
+ @files.each do |file_info|
128
+ if file_info[:section] == :uncompressed
129
+ @section0_files << file_info
130
+ else
131
+ @section1_files << file_info
132
+ end
133
+ end
134
+
135
+ # Sort files by name for consistent directory ordering
136
+ @section0_files.sort_by! { |f| f[:chm_path] }
137
+ @section1_files.sort_by! { |f| f[:chm_path] }
138
+ end
139
+
140
+ # Compress section 1 files using LZX
141
+ def compress_section1
142
+ return if @section1_files.empty?
143
+
144
+ # Read all section 1 files into memory
145
+ uncompressed_data = +""
146
+ @section1_files.each do |file_info|
147
+ file_info[:offset] = uncompressed_data.bytesize
148
+ data = File.binread(file_info[:source])
149
+ file_info[:length] = data.bytesize
150
+ uncompressed_data << data
151
+ end
152
+
153
+ @uncompressed_length = uncompressed_data.bytesize
154
+
155
+ # Compress data using LZX
156
+ input_handle = System::MemoryHandle.new(uncompressed_data, Constants::MODE_READ)
157
+ output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
158
+
159
+ compressor = Compressors::LZX.new(
160
+ @io_system,
161
+ input_handle,
162
+ output_handle,
163
+ 4096,
164
+ window_bits: @window_bits,
165
+ )
166
+
167
+ compressor.compress
168
+ @compressed_data = output_handle.buffer
169
+ @compressed_length = @compressed_data.bytesize
170
+
171
+ # Calculate reset interval
172
+ @reset_interval = LZX_FRAME_SIZE * 2
173
+ end
174
+
175
+ # Build directory structure with PMGL chunks
176
+ def build_directory
177
+ @directory_entries = []
178
+
179
+ # Add section 0 files
180
+ offset = 0
181
+ @section0_files.each do |file_info|
182
+ file_info[:offset] = offset
183
+ file_info[:length] = File.size(file_info[:source])
184
+
185
+ @directory_entries << {
186
+ name: file_info[:chm_path],
187
+ section: 0,
188
+ offset: file_info[:offset],
189
+ length: file_info[:length],
190
+ }
191
+
192
+ offset += file_info[:length]
193
+ end
194
+
195
+ @section0_length = offset
196
+
197
+ # Add section 1 files
198
+ @section1_files.each do |file_info|
199
+ @directory_entries << {
200
+ name: file_info[:chm_path],
201
+ section: 1,
202
+ offset: file_info[:offset],
203
+ length: file_info[:length],
204
+ }
205
+ end
206
+
207
+ # Add system files if section 1 exists
208
+ add_system_files if @section1_files.any?
209
+
210
+ # Sort entries by name
211
+ @directory_entries.sort_by! { |e| e[:name] }
212
+
213
+ # Build PMGL chunks
214
+ build_pmgl_chunks
215
+ end
216
+
217
+ # Add system files to directory
218
+ def add_system_files
219
+ # Content file (compressed data)
220
+ @directory_entries << {
221
+ name: CONTENT_NAME,
222
+ section: 0,
223
+ offset: @section0_length,
224
+ length: @compressed_length,
225
+ }
226
+
227
+ # ControlData file
228
+ @control_data = build_control_data
229
+ @directory_entries << {
230
+ name: CONTROL_NAME,
231
+ section: 0,
232
+ offset: @section0_length + @compressed_length,
233
+ length: @control_data.bytesize,
234
+ }
235
+
236
+ # ResetTable file
237
+ @reset_table = build_reset_table
238
+ @directory_entries << {
239
+ name: RTABLE_NAME,
240
+ section: 0,
241
+ offset: @section0_length + @compressed_length + @control_data.bytesize,
242
+ length: @reset_table.bytesize,
243
+ }
244
+
245
+ # SpanInfo file
246
+ @span_info = build_span_info
247
+ @directory_entries << {
248
+ name: SPANINFO_NAME,
249
+ section: 0,
250
+ offset: @section0_length + @compressed_length + @control_data.bytesize +
251
+ @reset_table.bytesize,
252
+ length: @span_info.bytesize,
253
+ }
254
+ end
255
+
256
+ # Build control data for LZX
257
+ def build_control_data
258
+ control = Binary::LZXControlData.new
259
+ control.len = 28
260
+ control.signature = "LZXC"
261
+ control.version = 2
262
+ control.reset_interval = @reset_interval / LZX_FRAME_SIZE
263
+ control.window_size = @window_size / LZX_FRAME_SIZE
264
+ control.cache_size = 0
265
+ control.unknown1 = 0
266
+ control.to_binary_s
267
+ end
268
+
269
+ # Build reset table
270
+ def build_reset_table
271
+ rtable = Binary::LZXResetTableHeader.new
272
+ rtable.unknown1 = 0
273
+ rtable.num_entries = 1
274
+ rtable.entry_size = 8
275
+ rtable.table_offset = 40
276
+ rtable.uncomp_len = @uncompressed_length
277
+ rtable.comp_len = @compressed_length
278
+ rtable.frame_len = LZX_FRAME_SIZE
279
+
280
+ # Build table with single entry (offset 0)
281
+ table_data = [0].pack("Q<")
282
+
283
+ rtable.to_binary_s + table_data
284
+ end
285
+
286
+ # Build span info
287
+ def build_span_info
288
+ [@uncompressed_length].pack("Q<")
289
+ end
290
+
291
+ # Build PMGL chunks from directory entries
292
+ def build_pmgl_chunks
293
+ @chunks = []
294
+ chunk_data = +""
295
+ entries_in_chunk = 0
296
+
297
+ @directory_entries.each do |entry|
298
+ # Encode entry
299
+ entry_data = encode_directory_entry(entry)
300
+
301
+ # Check if this entry fits in current chunk
302
+ # PMGL header (20 bytes) + entry data + quickref (2 bytes per entry) + count (2 bytes)
303
+ chunk_overhead = 20 + ((entries_in_chunk + 1) * 2) + 2
304
+ if chunk_data.bytesize + entry_data.bytesize + chunk_overhead > DEFAULT_CHUNK_SIZE && entries_in_chunk.positive?
305
+ # Finalize current chunk
306
+ @chunks << finalize_pmgl_chunk(chunk_data, entries_in_chunk)
307
+ chunk_data = +""
308
+ entries_in_chunk = 0
309
+ end
310
+
311
+ chunk_data << entry_data
312
+ entries_in_chunk += 1
313
+ end
314
+
315
+ # Finalize last chunk
316
+ if entries_in_chunk.positive?
317
+ @chunks << finalize_pmgl_chunk(chunk_data,
318
+ entries_in_chunk)
319
+ end
320
+ end
321
+
322
+ # Encode a directory entry
323
+ def encode_directory_entry(entry)
324
+ name_utf8 = entry[:name].encode("UTF-8")
325
+ name_bytes = name_utf8.b
326
+
327
+ data = +""
328
+ data << Cabriolet::Binary::ENCINTWriter.encode(name_bytes.bytesize)
329
+ data << name_bytes
330
+ data << Cabriolet::Binary::ENCINTWriter.encode(entry[:section])
331
+ data << Cabriolet::Binary::ENCINTWriter.encode(entry[:offset])
332
+ data << Cabriolet::Binary::ENCINTWriter.encode(entry[:length])
333
+ data
334
+ end
335
+
336
+ # Finalize a PMGL chunk
337
+ def finalize_pmgl_chunk(data, num_entries)
338
+ # Build quickref section (empty for simplicity)
339
+ quickref = ""
340
+
341
+ # Build chunk
342
+ chunk = +""
343
+
344
+ # Write PMGL header
345
+ header = Binary::PMGLChunkHeader.new
346
+ header.signature = "PMGL"
347
+ header.quickref_size = quickref.bytesize
348
+ header.unknown1 = 0
349
+ header.prev_chunk = -1
350
+ header.next_chunk = -1
351
+ chunk << header.to_binary_s
352
+
353
+ # Write entries
354
+ chunk << data
355
+
356
+ # Write quickref
357
+ chunk << quickref
358
+
359
+ # Pad to (chunk_size - 2) to leave room for entry count
360
+ padding_size = DEFAULT_CHUNK_SIZE - chunk.bytesize - 2
361
+ chunk << ("\0" * padding_size) if padding_size.positive?
362
+
363
+ # Write entry count in last 2 bytes
364
+ chunk << [num_entries].pack("v")
365
+
366
+ chunk
367
+ end
368
+
369
+ # Calculate all offsets in the CHM file
370
+ def calculate_offsets
371
+ # ITSF header: 56 bytes (BinData structure size)
372
+ @itsf_offset = 0
373
+ @itsf_size = 56
374
+
375
+ # Header section table: 40 bytes (version 3+)
376
+ @section_table_offset = @itsf_offset + @itsf_size
377
+ @section_table_size = 40
378
+
379
+ # Header section 0: 24 bytes
380
+ @hs0_offset = @section_table_offset + @section_table_size
381
+ @hs0_size = 24
382
+
383
+ # Header section 1 (ITSP): 84 bytes
384
+ @hs1_offset = @hs0_offset + @hs0_size
385
+ @hs1_size = 84
386
+
387
+ # Directory chunks
388
+ @dir_offset = @hs1_offset + @hs1_size
389
+ @dir_size = @chunks.length * DEFAULT_CHUNK_SIZE
390
+
391
+ # Content section 0
392
+ @cs0_offset = @dir_offset + @dir_size
393
+
394
+ # Calculate section 0 total size
395
+ @cs0_size = @section0_length
396
+ @cs0_size += @compressed_length if @section1_files.any?
397
+ @cs0_size += @control_data.bytesize if @section1_files.any?
398
+ @cs0_size += @reset_table.bytesize if @section1_files.any?
399
+ @cs0_size += @span_info.bytesize if @section1_files.any?
400
+
401
+ # Total file size
402
+ @total_size = @cs0_offset + @cs0_size
403
+ end
404
+
405
+ # Write CHM file
406
+ def write_chm(output)
407
+ write_itsf_header(output)
408
+ write_section_table(output)
409
+ write_header_section0(output)
410
+ write_header_section1(output)
411
+ write_directory(output)
412
+ write_content_section0(output)
413
+ end
414
+
415
+ # Write ITSF header
416
+ def write_itsf_header(output)
417
+ header = Binary::CHMITSFHeader.new
418
+ header.signature = "ITSF"
419
+ header.version = 3
420
+ header.header_len = 96
421
+ header.unknown1 = 1
422
+ header.timestamp = @timestamp
423
+ header.language_id = @language_id
424
+ header.guid1 = GUID1
425
+ header.guid2 = GUID2
426
+
427
+ output.write(header.to_binary_s)
428
+ end
429
+
430
+ # Write header section table
431
+ def write_section_table(output)
432
+ # Manually pack instead of using BinData (BinData doesn't preserve assigned values)
433
+ data = [
434
+ @hs0_offset,
435
+ @hs0_size,
436
+ @hs1_offset,
437
+ @hs1_size,
438
+ @cs0_offset,
439
+ ].pack("Q<Q<Q<Q<Q<")
440
+
441
+ output.write(data)
442
+ end
443
+
444
+ # Write header section 0
445
+ def write_header_section0(output)
446
+ hs0 = Binary::CHMHeaderSection0.new
447
+ hs0.unknown1 = 0
448
+ hs0.unknown2 = 0
449
+ hs0.file_len = @total_size
450
+ hs0.unknown3 = 0
451
+ hs0.unknown4 = 0
452
+
453
+ output.write(hs0.to_binary_s)
454
+ end
455
+
456
+ # Write header section 1 (directory header)
457
+ def write_header_section1(output)
458
+ hs1 = Binary::CHMHeaderSection1.new
459
+ hs1.signature = "ITSP"
460
+ hs1.version = 1
461
+ hs1.header_len = 84
462
+ hs1.unknown1 = 10
463
+ hs1.chunk_size = DEFAULT_CHUNK_SIZE
464
+ hs1.density = 2
465
+ hs1.depth = 1
466
+ hs1.index_root = -1
467
+ hs1.first_pmgl = 0
468
+ hs1.last_pmgl = @chunks.length - 1
469
+ hs1.unknown2 = -1
470
+ hs1.num_chunks = @chunks.length
471
+ hs1.language_id = @language_id
472
+ hs1.guid = GUID1
473
+ hs1.unknown3 = 0
474
+ hs1.unknown4 = 0
475
+ hs1.unknown5 = 0
476
+ hs1.unknown6 = 0
477
+
478
+ output.write(hs1.to_binary_s)
479
+ end
480
+
481
+ # Write directory chunks
482
+ def write_directory(output)
483
+ @chunks.each do |chunk|
484
+ output.write(chunk)
485
+ end
486
+ end
487
+
488
+ # Write content section 0
489
+ def write_content_section0(output)
490
+ # Write section 0 files
491
+ @section0_files.each do |file_info|
492
+ data = File.binread(file_info[:source])
493
+ output.write(data)
494
+ end
495
+
496
+ # Write system files if section 1 exists
497
+ return unless @section1_files.any?
498
+
499
+ # Write compressed content
500
+ output.write(@compressed_data)
501
+
502
+ # Write control data
503
+ output.write(@control_data)
504
+
505
+ # Write reset table
506
+ output.write(@reset_table)
507
+
508
+ # Write span info
509
+ output.write(@span_info)
510
+ end
511
+ end
512
+ end
513
+ end