cabriolet 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +3 -0
  3. data/lib/cabriolet/binary/bitstream.rb +32 -21
  4. data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
  5. data/lib/cabriolet/cab/compressor.rb +85 -53
  6. data/lib/cabriolet/cab/decompressor.rb +2 -1
  7. data/lib/cabriolet/cab/extractor.rb +170 -121
  8. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  9. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  10. data/lib/cabriolet/checksum.rb +49 -0
  11. data/lib/cabriolet/collections/file_collection.rb +175 -0
  12. data/lib/cabriolet/compressors/quantum.rb +3 -51
  13. data/lib/cabriolet/decompressors/lzx.rb +59 -1
  14. data/lib/cabriolet/decompressors/quantum.rb +81 -52
  15. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  16. data/lib/cabriolet/extraction/extractor.rb +171 -0
  17. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  18. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  19. data/lib/cabriolet/format_base.rb +79 -0
  20. data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
  21. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  22. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  23. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  24. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  25. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  26. data/lib/cabriolet/huffman/encoder.rb +15 -12
  27. data/lib/cabriolet/lit/compressor.rb +45 -689
  28. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  29. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  30. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  31. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  32. data/lib/cabriolet/lit/header_writer.rb +124 -0
  33. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  34. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  35. data/lib/cabriolet/quantum_shared.rb +105 -0
  36. data/lib/cabriolet/version.rb +1 -1
  37. data/lib/cabriolet.rb +114 -3
  38. metadata +38 -4
  39. data/lib/cabriolet/auto.rb +0 -173
  40. data/lib/cabriolet/parallel.rb +0 -333
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "fileutils"
4
+ require_relative "../checksum"
4
5
 
5
6
  module Cabriolet
6
7
  module CAB
@@ -33,25 +34,7 @@ module Cabriolet
33
34
  def extract_file(file, output_path, **options)
34
35
  salvage = options[:salvage] || @decompressor.salvage
35
36
  folder = file.folder
36
-
37
- # Validate file
38
- raise Cabriolet::ArgumentError, "File has no folder" unless folder
39
-
40
- if file.offset > Constants::LENGTH_MAX
41
- raise DecompressionError,
42
- "File offset beyond 2GB limit"
43
- end
44
-
45
- # Check file length
46
- filelen = file.length
47
- if filelen > (Constants::LENGTH_MAX - file.offset)
48
- unless salvage
49
- raise DecompressionError,
50
- "File length exceeds 2GB limit"
51
- end
52
-
53
- filelen = Constants::LENGTH_MAX - file.offset
54
- end
37
+ filelen = validate_file_for_extraction(file, folder, salvage)
55
38
 
56
39
  # Check for merge requirements
57
40
  if folder.needs_prev_merge?
@@ -59,81 +42,22 @@ module Cabriolet
59
42
  "File requires previous cabinet, cabinet set is incomplete"
60
43
  end
61
44
 
62
- # Check file fits within folder
63
- unless salvage
64
- max_len = folder.num_blocks * Constants::BLOCK_MAX
65
- if file.offset > max_len || filelen > (max_len - file.offset)
66
- raise DecompressionError, "File extends beyond folder data"
67
- end
68
- end
45
+ validate_file_in_folder(folder, file.offset, filelen, salvage)
69
46
 
70
47
  # Create output directory if needed
71
48
  output_dir = ::File.dirname(output_path)
72
49
  FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
73
50
 
74
- # Check if we need to change folder or reset (libmspack lines 1076-1078)
75
- if ENV["DEBUG_BLOCK"]
76
- warn "DEBUG extract_file: Checking reset condition for file #{file.filename} (offset=#{file.offset}, length=#{file.length})"
77
- warn " @current_folder == folder: #{@current_folder == folder} (current=#{@current_folder.object_id}, new=#{folder.object_id})"
78
- warn " @current_offset (#{@current_offset}) > file.offset (#{file.offset}): #{@current_offset > file.offset}"
79
- warn " @current_decomp.nil?: #{@current_decomp.nil?}"
80
- warn " Reset needed?: #{@current_folder != folder || @current_offset > file.offset || !@current_decomp}"
81
- end
82
-
83
- if @current_folder != folder || @current_offset > file.offset || !@current_decomp
84
- if ENV["DEBUG_BLOCK"]
85
- warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
86
- end
87
-
88
- # Reset state
89
- @current_input&.close
90
- @current_input = nil
91
- @current_decomp = nil
92
-
93
- # Create new input (libmspack lines 1092-1095)
94
- # This BlockReader will be REUSED across all files in this folder
95
- @current_input = BlockReader.new(@io_system, folder.data,
96
- folder.num_blocks, salvage)
97
- @current_folder = folder
98
- @current_offset = 0
99
-
100
- # Create decompressor ONCE and reuse it (this is the key fix!)
101
- # The decompressor maintains bitstream state across files
102
- @current_decomp = @decompressor.create_decompressor(folder,
103
- @current_input, nil)
104
- elsif ENV["DEBUG_BLOCK"]
105
- warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader and decompressor)"
106
- end
107
-
108
- # Skip ahead if needed (libmspack lines 1130-1134)
109
- if file.offset > @current_offset
110
- skip_bytes = file.offset - @current_offset
111
-
112
- # Decompress with NULL output to skip (libmspack line 1130: self->d->outfh = NULL)
113
- null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
114
-
115
- # Reuse existing decompressor, change output to NULL
116
- @current_decomp.instance_variable_set(:@output, null_output)
117
-
118
- # Set output length for LZX frame limiting
119
- @current_decomp.set_output_length(skip_bytes) if @current_decomp.respond_to?(:set_output_length)
120
-
121
- @current_decomp.decompress(skip_bytes)
122
- @current_offset += skip_bytes
123
- end
51
+ setup_decompressor_for_folder(folder, salvage, file.offset)
52
+ skip_to_file_offset(file.offset, salvage, file.filename)
124
53
 
125
54
  # Extract actual file (libmspack lines 1137-1141)
126
55
  output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
127
56
 
128
57
  begin
129
- # Reuse existing decompressor, change output to real file
130
- @current_decomp.instance_variable_set(:@output, output_fh)
131
-
132
- # Set output length for LZX frame limiting
133
- @current_decomp.set_output_length(filelen) if @current_decomp.respond_to?(:set_output_length)
134
-
135
- @current_decomp.decompress(filelen)
136
- @current_offset += filelen
58
+ write_file_data(output_fh, filelen)
59
+ rescue DecompressionError
60
+ handle_extraction_error(output_fh, output_path, file.filename, salvage, filelen)
137
61
  ensure
138
62
  output_fh.close
139
63
  end
@@ -141,6 +65,15 @@ module Cabriolet
141
65
  filelen
142
66
  end
143
67
 
68
+ # Reset extraction state (used in salvage mode to recover from errors)
69
+ def reset_state
70
+ @current_input&.close
71
+ @current_input = nil
72
+ @current_decomp = nil
73
+ @current_folder = nil
74
+ @current_offset = 0
75
+ end
76
+
144
77
  # Extract all files from a cabinet
145
78
  #
146
79
  # @param cabinet [Models::Cabinet] Cabinet to extract from
@@ -149,16 +82,19 @@ module Cabriolet
149
82
  # @option options [Boolean] :preserve_paths Preserve directory structure (default: true)
150
83
  # @option options [Boolean] :set_timestamps Set file modification times (default: true)
151
84
  # @option options [Proc] :progress Progress callback
85
+ # @option options [Boolean] :salvage Skip files that fail to extract (default: false)
152
86
  # @return [Integer] Number of files extracted
153
87
  def extract_all(cabinet, output_dir, **options)
154
88
  preserve_paths = options.fetch(:preserve_paths, true)
155
89
  set_timestamps = options.fetch(:set_timestamps, true)
156
90
  progress = options[:progress]
91
+ salvage = options[:salvage] || false
157
92
 
158
93
  # Create output directory
159
94
  FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
160
95
 
161
96
  count = 0
97
+ failed_count = 0
162
98
  cabinet.files.each do |file|
163
99
  # Determine output path
164
100
  output_path = if preserve_paths
@@ -168,8 +104,18 @@ module Cabriolet
168
104
  ::File.basename(file.filename))
169
105
  end
170
106
 
171
- # Extract file
172
- extract_file(file, output_path, **options)
107
+ # Extract file (skip if salvage mode and extraction fails)
108
+ begin
109
+ extract_file(file, output_path, **options)
110
+ rescue DecompressionError => e
111
+ if salvage
112
+ warn "Salvage: skipping #{file.filename}: #{e.message}"
113
+ failed_count += 1
114
+ next
115
+ else
116
+ raise
117
+ end
118
+ end
173
119
 
174
120
  # Set timestamp if requested
175
121
  if set_timestamps && file.modification_time
@@ -184,11 +130,148 @@ module Cabriolet
184
130
  progress&.call(file, count, cabinet.files.size)
185
131
  end
186
132
 
133
+ warn "Salvage: #{failed_count} file(s) skipped due to extraction errors" if failed_count.positive?
134
+
187
135
  count
188
136
  end
189
137
 
190
138
  private
191
139
 
140
+ # Validate file for extraction
141
+ #
142
+ # @param file [Models::File] File to validate
143
+ # @param folder [Models::Folder] Folder containing the file
144
+ # @param salvage [Boolean] Salvage mode flag
145
+ # @return [Integer] Validated file length
146
+ def validate_file_for_extraction(file, folder, salvage)
147
+ raise Cabriolet::ArgumentError, "File has no folder" unless folder
148
+
149
+ if file.offset > Constants::LENGTH_MAX
150
+ raise DecompressionError,
151
+ "File offset beyond 2GB limit"
152
+ end
153
+
154
+ filelen = file.length
155
+ if filelen > (Constants::LENGTH_MAX - file.offset)
156
+ unless salvage
157
+ raise DecompressionError,
158
+ "File length exceeds 2GB limit"
159
+ end
160
+
161
+ filelen = Constants::LENGTH_MAX - file.offset
162
+ end
163
+
164
+ filelen
165
+ end
166
+
167
+ # Validate file fits within folder
168
+ #
169
+ # @param folder [Models::Folder] Folder to check
170
+ # @param file_offset [Integer] File offset
171
+ # @param filelen [Integer] File length
172
+ # @param salvage [Boolean] Salvage mode flag
173
+ def validate_file_in_folder(folder, file_offset, filelen, salvage)
174
+ return if salvage
175
+
176
+ max_len = folder.num_blocks * Constants::BLOCK_MAX
177
+ return unless file_offset > max_len || filelen > (max_len - file_offset)
178
+
179
+ raise DecompressionError, "File extends beyond folder data"
180
+ end
181
+
182
+ # Setup decompressor for folder
183
+ #
184
+ # @param folder [Models::Folder] Folder to setup for
185
+ # @param salvage [Boolean] Salvage mode flag
186
+ # @param file_offset [Integer] File offset for reset condition check
187
+ def setup_decompressor_for_folder(folder, salvage, file_offset)
188
+ if ENV["DEBUG_BLOCK"]
189
+ warn "DEBUG extract_file: Checking reset condition"
190
+ warn " @current_folder == folder: #{@current_folder == folder}"
191
+ warn " @current_offset (#{@current_offset}) > file_offset (#{file_offset})"
192
+ warn " @current_decomp.nil?: #{@current_decomp.nil?}"
193
+ end
194
+
195
+ if @current_folder != folder || @current_offset > file_offset || !@current_decomp
196
+ if ENV["DEBUG_BLOCK"]
197
+ warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
198
+ end
199
+
200
+ # Reset state
201
+ @current_input&.close
202
+ @current_input = nil
203
+ @current_decomp = nil
204
+
205
+ # Create new input (libmspack lines 1092-1095)
206
+ @current_input = BlockReader.new(@io_system, folder.data,
207
+ folder.num_blocks, salvage)
208
+ @current_folder = folder
209
+ @current_offset = 0
210
+
211
+ # Create decompressor ONCE and reuse it
212
+ @current_decomp = @decompressor.create_decompressor(folder,
213
+ @current_input, nil)
214
+ elsif ENV["DEBUG_BLOCK"]
215
+ warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader)"
216
+ end
217
+ end
218
+
219
+ # Skip to file offset
220
+ #
221
+ # @param file_offset [Integer] Target offset
222
+ # @param salvage [Boolean] Salvage mode flag
223
+ # @param filename [String] Filename for error messages
224
+ def skip_to_file_offset(file_offset, salvage, filename)
225
+ return unless file_offset > @current_offset
226
+
227
+ skip_bytes = file_offset - @current_offset
228
+ null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
229
+
230
+ @current_decomp.instance_variable_set(:@output, null_output)
231
+ @current_decomp.set_output_length(skip_bytes) if @current_decomp.respond_to?(:set_output_length)
232
+
233
+ begin
234
+ @current_decomp.decompress(skip_bytes)
235
+ rescue DecompressionError
236
+ if salvage
237
+ warn "Salvage: unable to skip to file #{filename}, resetting state"
238
+ reset_state
239
+ else
240
+ raise
241
+ end
242
+ end
243
+ @current_offset += skip_bytes
244
+ end
245
+
246
+ # Write file data using decompressor
247
+ #
248
+ # @param output_fh [System::FileHandle] Output file handle
249
+ # @param filelen [Integer] Number of bytes to write
250
+ def write_file_data(output_fh, filelen)
251
+ @current_decomp.instance_variable_set(:@output, output_fh)
252
+ @current_decomp.set_output_length(filelen) if @current_decomp.respond_to?(:set_output_length)
253
+ @current_decomp.decompress(filelen)
254
+ @current_offset += filelen
255
+ end
256
+
257
+ # Handle extraction error
258
+ #
259
+ # @param output_fh [System::FileHandle] Output file handle
260
+ # @param output_path [String] Output file path
261
+ # @param filename [String] Filename for error messages
262
+ # @param salvage [Boolean] Salvage mode flag
263
+ # @raise [DecompressionError] If not in salvage mode
264
+ def handle_extraction_error(output_fh, output_path, filename, salvage, _filelen)
265
+ output_fh.close
266
+ if salvage
267
+ ::File.write(output_path, "", mode: "wb")
268
+ warn "Salvage: created empty file for #{filename} due to decompression error"
269
+ reset_state
270
+ else
271
+ raise
272
+ end
273
+ end
274
+
192
275
  # Set file attributes based on CAB attributes
193
276
  #
194
277
  # @param path [String] File path
@@ -424,41 +507,7 @@ module Cabriolet
424
507
  end
425
508
 
426
509
  def calculate_checksum(data, initial = 0)
427
- cksum = initial
428
- bytes = data.bytes
429
-
430
- # Process 4-byte chunks
431
- (bytes.size / 4).times do |i|
432
- offset = i * 4
433
- value = bytes[offset] |
434
- (bytes[offset + 1] << 8) |
435
- (bytes[offset + 2] << 16) |
436
- (bytes[offset + 3] << 24)
437
- cksum ^= value
438
- end
439
-
440
- # Process remaining bytes
441
- remainder = bytes.size % 4
442
- if remainder.positive?
443
- ul = 0
444
- offset = bytes.size - remainder
445
-
446
- case remainder
447
- when 3
448
- ul |= bytes[offset + 2] << 16
449
- ul |= bytes[offset + 1] << 8
450
- ul |= bytes[offset]
451
- when 2
452
- ul |= bytes[offset + 1] << 8
453
- ul |= bytes[offset]
454
- when 1
455
- ul |= bytes[offset]
456
- end
457
-
458
- cksum ^= ul
459
- end
460
-
461
- cksum & 0xFFFFFFFF
510
+ Checksum.calculate(data, initial)
462
511
  end
463
512
  end
464
513
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+
5
+ module Cabriolet
6
+ module CAB
7
+ # Work item for compressing a single file in a CAB archive
8
+ class FileCompressionWork < Fractor::Work
9
+ # Initialize work item for file compression
10
+ #
11
+ # @param source_path [String] Path to source file
12
+ # @param compression_method [Symbol] Compression method to use
13
+ # @param block_size [Integer] Maximum block size
14
+ # @param io_system [System::IOSystem] I/O system
15
+ # @param algorithm_factory [AlgorithmFactory] Algorithm factory
16
+ def initialize(source_path:, compression_method:, block_size:,
17
+ io_system:, algorithm_factory:)
18
+ super({
19
+ source_path: source_path,
20
+ compression_method: compression_method,
21
+ block_size: block_size,
22
+ io_system: io_system,
23
+ algorithm_factory: algorithm_factory,
24
+ })
25
+ end
26
+
27
+ def source_path
28
+ input[:source_path]
29
+ end
30
+
31
+ def compression_method
32
+ input[:compression_method]
33
+ end
34
+
35
+ def block_size
36
+ input[:block_size]
37
+ end
38
+
39
+ def io_system
40
+ input[:io_system]
41
+ end
42
+
43
+ def algorithm_factory
44
+ input[:algorithm_factory]
45
+ end
46
+
47
+ def id
48
+ source_path
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module CAB
5
+ # Worker for compressing files in a CAB archive
6
+ class FileCompressionWorker < Fractor::Worker
7
+ # Process a file compression work item
8
+ #
9
+ # @param work [FileCompressionWork] Work item to process
10
+ # @return [Fractor::WorkResult] Result with compressed blocks
11
+ def process(work)
12
+ # Read source file
13
+ file_data = ::File.binread(work.source_path)
14
+ file_size = file_data.bytesize
15
+
16
+ # Split into blocks and compress
17
+ blocks = []
18
+ offset = 0
19
+
20
+ while offset < file_size
21
+ remaining = file_size - offset
22
+ chunk_size = [work.block_size, remaining].min
23
+ chunk = file_data[offset, chunk_size]
24
+
25
+ # Compress chunk
26
+ compressed_chunk = compress_chunk(chunk, work)
27
+
28
+ blocks << {
29
+ uncompressed_size: chunk.bytesize,
30
+ compressed_size: compressed_chunk.bytesize,
31
+ data: compressed_chunk,
32
+ }
33
+
34
+ offset += chunk_size
35
+ end
36
+
37
+ # Return success result
38
+ Fractor::WorkResult.new(
39
+ result: {
40
+ source_path: work.source_path,
41
+ blocks: blocks,
42
+ total_uncompressed: file_size,
43
+ total_compressed: blocks.sum { |b| b[:compressed_size] },
44
+ },
45
+ work: work,
46
+ )
47
+ rescue StandardError => e
48
+ # Return error result
49
+ Fractor::WorkResult.new(
50
+ error: {
51
+ message: e.message,
52
+ class: e.class.name,
53
+ source_path: work.source_path,
54
+ },
55
+ work: work,
56
+ )
57
+ end
58
+
59
+ private
60
+
61
+ # Compress a single chunk of data
62
+ #
63
+ # @param chunk [String] Data chunk to compress
64
+ # @param work [FileCompressionWork] Work item with compression settings
65
+ # @return [String] Compressed data
66
+ def compress_chunk(chunk, work)
67
+ input_handle = System::MemoryHandle.new(chunk)
68
+ output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
69
+
70
+ begin
71
+ compressor = work.algorithm_factory.create(
72
+ work.compression_method,
73
+ :compressor,
74
+ work.io_system,
75
+ input_handle,
76
+ output_handle,
77
+ chunk.bytesize,
78
+ )
79
+
80
+ compressor.compress
81
+
82
+ output_handle.data
83
+
84
+ # Memory handles don't need closing
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ # Utility module for checksum calculations
5
+ module Checksum
6
+ # Calculate CAB-style checksum (XOR-based)
7
+ #
8
+ # @param data [String] Data to calculate checksum for
9
+ # @param initial [Integer] Initial checksum value (default: 0)
10
+ # @return [Integer] Checksum value (32-bit)
11
+ def self.calculate(data, initial = 0)
12
+ cksum = initial
13
+ bytes = data.bytes
14
+
15
+ # Process 4-byte chunks
16
+ (bytes.size / 4).times do |i|
17
+ offset = i * 4
18
+ value = bytes[offset] |
19
+ (bytes[offset + 1] << 8) |
20
+ (bytes[offset + 2] << 16) |
21
+ (bytes[offset + 3] << 24)
22
+ cksum ^= value
23
+ end
24
+
25
+ # Process remaining bytes
26
+ remainder = bytes.size % 4
27
+ if remainder.positive?
28
+ ul = 0
29
+ offset = bytes.size - remainder
30
+
31
+ case remainder
32
+ when 3
33
+ ul |= bytes[offset + 2] << 16
34
+ ul |= bytes[offset + 1] << 8
35
+ ul |= bytes[offset]
36
+ when 2
37
+ ul |= bytes[offset + 1] << 8
38
+ ul |= bytes[offset]
39
+ when 1
40
+ ul |= bytes[offset]
41
+ end
42
+
43
+ cksum ^= ul
44
+ end
45
+
46
+ cksum & 0xFFFFFFFF
47
+ end
48
+ end
49
+ end