cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -1,21 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../checksum"
4
+ require_relative "../errors"
5
+
3
6
  module Cabriolet
4
7
  module CAB
5
8
  # Compressor creates CAB files from source files
6
9
  # rubocop:disable Metrics/ClassLength
7
10
  class Compressor
8
- attr_reader :io_system, :files, :compression, :set_id, :cabinet_index
11
+ attr_reader :io_system, :files, :compression, :set_id, :cabinet_index,
12
+ :workers
9
13
 
10
14
  # Initialize a new compressor
11
15
  #
12
16
  # @param io_system [System::IOSystem] I/O system for writing
13
- def initialize(io_system = nil)
17
+ # @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
18
+ # @param workers [Integer] Number of parallel worker threads (default: 1 for sequential)
19
+ def initialize(io_system = nil, algorithm_factory = nil, workers: 1)
14
20
  @io_system = io_system || System::IOSystem.new
21
+ @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
15
22
  @files = []
16
23
  @compression = :mszip
17
24
  @set_id = rand(0xFFFF)
18
25
  @cabinet_index = 0
26
+ @workers = workers
19
27
  end
20
28
 
21
29
  # Add a file to the cabinet
@@ -54,6 +62,9 @@ module Cabriolet
54
62
  @set_id = options[:set_id] || @set_id
55
63
  @cabinet_index = options[:cabinet_index] || @cabinet_index
56
64
 
65
+ # Validate and cache compression method value to avoid repeated hash lookups
66
+ @compression_method = compression_type_value
67
+
57
68
  # Collect file information
58
69
  file_infos = collect_file_infos
59
70
 
@@ -127,17 +138,80 @@ module Cabriolet
127
138
 
128
139
  # Compress all files and return block data
129
140
  def compress_files(file_infos)
141
+ return compress_files_sequential(file_infos) if @workers <= 1
142
+
143
+ compress_files_parallel(file_infos)
144
+ end
145
+
146
+ # Compress files using parallel workers via Fractor
147
+ def compress_files_parallel(file_infos)
148
+ require_relative "file_compression_work"
149
+ require_relative "file_compression_worker"
150
+
151
+ compression_method = @compression_method || compression_type_value
152
+
153
+ # Create work items for each file
154
+ work_items = file_infos.map do |info|
155
+ FileCompressionWork.new(
156
+ source_path: info[:source_path],
157
+ compression_method: compression_method,
158
+ block_size: Constants::BLOCK_MAX,
159
+ io_system: @io_system,
160
+ algorithm_factory: @algorithm_factory,
161
+ )
162
+ end
163
+
164
+ # Create worker pool
165
+ worker_pool = Fractor::WorkerPool.new(
166
+ FileCompressionWorker,
167
+ num_workers: @workers,
168
+ )
169
+
170
+ # Submit all work items and wait for completion
171
+ results = worker_pool.process_work(work_items)
172
+
173
+ # Aggregate results in original order
174
+ file_result_map = {}
175
+ total_uncompressed = 0
176
+ all_blocks = []
177
+
178
+ results.each do |result|
179
+ if result.error
180
+ raise DecompressionError,
181
+ "Failed to compress #{result.error[:source_path]}: #{result.error[:message]}"
182
+ end
183
+
184
+ file_result_map[result.result[:source_path]] = result.result
185
+ total_uncompressed += result.result[:total_uncompressed]
186
+ end
187
+
188
+ # Reorder blocks to match original file order
189
+ file_infos.each do |info|
190
+ file_result = file_result_map[info[:source_path]]
191
+ all_blocks.concat(file_result[:blocks])
192
+ end
193
+
194
+ {
195
+ blocks: all_blocks,
196
+ total_uncompressed: total_uncompressed,
197
+ }
198
+ end
199
+
200
+ # Compress files sequentially (original implementation)
201
+ def compress_files_sequential(file_infos)
130
202
  blocks = []
131
203
  total_uncompressed = 0
132
204
 
133
205
  file_infos.each do |info|
134
206
  file_data = ::File.binread(info[:source_path])
135
- total_uncompressed += file_data.bytesize
207
+ file_size = file_data.bytesize
208
+ total_uncompressed += file_size
136
209
 
137
210
  # Split into blocks of max 32KB
138
211
  offset = 0
139
- while offset < file_data.bytesize
140
- chunk_size = [Constants::BLOCK_MAX, file_data.bytesize - offset].min
212
+ while offset < file_size
213
+ remaining = file_size - offset
214
+ chunk_size = [Constants::BLOCK_MAX, remaining].min
141
215
  chunk = file_data[offset, chunk_size]
142
216
 
143
217
  # Compress chunk
@@ -161,54 +235,35 @@ module Cabriolet
161
235
 
162
236
  # Compress a single chunk of data
163
237
  def compress_chunk(data)
164
- case @compression
165
- when :none
166
- data
167
- when :mszip
168
- compress_mszip(data)
169
- when :lzx
170
- compress_lzx(data)
171
- when :quantum
172
- compress_quantum(data)
173
- else
174
- raise ArgumentError, "Unsupported compression type: #{@compression}"
175
- end
176
- end
238
+ return data if @compression == :none
177
239
 
178
- # Compress data using MSZIP
179
- def compress_mszip(data)
180
- input = System::MemoryHandle.new(data, Constants::MODE_READ)
240
+ # Create temporary handles for compression
241
+ input = System::MemoryHandle.new(data)
181
242
  output = System::MemoryHandle.new("", Constants::MODE_WRITE)
182
243
 
183
- compressor = Compressors::MSZIP.new(@io_system, input, output,
184
- Cabriolet.default_buffer_size)
185
- compressor.compress
186
-
187
- output.data
188
- end
189
-
190
- # Compress data using LZX
191
- def compress_lzx(data)
192
- input = System::MemoryHandle.new(data, Constants::MODE_READ)
193
- output = System::MemoryHandle.new("", Constants::MODE_WRITE)
194
-
195
- compressor = Compressors::LZX.new(@io_system, input, output,
196
- Cabriolet.default_buffer_size, window_bits: 15)
197
- compressor.compress
198
-
199
- output.data
200
- end
201
-
202
- # Compress data using Quantum
203
- def compress_quantum(data)
204
- input = System::MemoryHandle.new(data, Constants::MODE_READ)
205
- output = System::MemoryHandle.new("", Constants::MODE_WRITE)
244
+ # Use cached compression method value (calculated in generate)
245
+ # Fallback to calculation if not yet cached
246
+ compression_method = @compression_method || compression_type_value
247
+
248
+ # Determine window bits based on compression type
249
+ window_bits = case @compression
250
+ when :lzx then 15
251
+ when :quantum then 10
252
+ end
253
+
254
+ compressor = @algorithm_factory.create(
255
+ compression_method,
256
+ :compressor,
257
+ @io_system,
258
+ input,
259
+ output,
260
+ data.bytesize,
261
+ window_bits: window_bits,
262
+ )
206
263
 
207
- compressor = Compressors::Quantum.new(@io_system, input, output,
208
- Cabriolet.default_buffer_size, window_bits: 10)
209
264
  compressor.compress
210
-
211
- output.data
265
+ output.rewind
266
+ output.read
212
267
  end
213
268
 
214
269
  # Write the complete cabinet file
@@ -286,7 +341,10 @@ cabinet_size)
286
341
  mszip: Constants::COMP_TYPE_MSZIP,
287
342
  lzx: Constants::COMP_TYPE_LZX,
288
343
  quantum: Constants::COMP_TYPE_QUANTUM,
289
- }.fetch(@compression, Constants::COMP_TYPE_MSZIP)
344
+ }.fetch(@compression) do
345
+ raise ArgumentError,
346
+ "Unsupported compression type: #{@compression}"
347
+ end
290
348
  end
291
349
 
292
350
  # Write CFFILE entry
@@ -339,41 +397,7 @@ cabinet_size)
339
397
  # Same algorithm as used in Extractor
340
398
  # rubocop:disable Metrics/MethodLength
341
399
  def calculate_checksum(data, initial = 0)
342
- cksum = initial
343
- bytes = data.bytes
344
-
345
- # Process 4-byte chunks
346
- (bytes.size / 4).times do |i|
347
- offset = i * 4
348
- value = bytes[offset] |
349
- (bytes[offset + 1] << 8) |
350
- (bytes[offset + 2] << 16) |
351
- (bytes[offset + 3] << 24)
352
- cksum ^= value
353
- end
354
-
355
- # Process remaining bytes
356
- remainder = bytes.size % 4
357
- if remainder.positive?
358
- ul = 0
359
- offset = bytes.size - remainder
360
-
361
- case remainder
362
- when 3
363
- ul |= bytes[offset + 2] << 16
364
- ul |= bytes[offset + 1] << 8
365
- ul |= bytes[offset]
366
- when 2
367
- ul |= bytes[offset + 1] << 8
368
- ul |= bytes[offset]
369
- when 1
370
- ul |= bytes[offset]
371
- end
372
-
373
- cksum ^= ul
374
- end
375
-
376
- cksum & 0xFFFFFFFF
400
+ Checksum.calculate(data, initial)
377
401
  end
378
402
  # rubocop:enable Metrics/MethodLength
379
403
  end
@@ -10,8 +10,10 @@ module Cabriolet
10
10
  # Initialize a new CAB decompressor
11
11
  #
12
12
  # @param io_system [System::IOSystem, nil] Custom I/O system or nil for default
13
- def initialize(io_system = nil)
13
+ # @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
14
+ def initialize(io_system = nil, algorithm_factory = nil)
14
15
  @io_system = io_system || System::IOSystem.new
16
+ @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
15
17
  @parser = Parser.new(@io_system)
16
18
  @buffer_size = Cabriolet.default_buffer_size
17
19
  @fix_mszip = false
@@ -57,24 +59,17 @@ module Cabriolet
57
59
  # @param output [System::FileHandle, System::MemoryHandle] Output handle
58
60
  # @return [Decompressors::Base] Appropriate decompressor instance
59
61
  def create_decompressor(folder, input, output)
60
- case folder.compression_method
61
- when Constants::COMP_TYPE_NONE
62
- Decompressors::None.new(@io_system, input, output, @buffer_size)
63
- when Constants::COMP_TYPE_MSZIP
64
- Decompressors::MSZIP.new(@io_system, input, output, @buffer_size,
65
- fix_mszip: @fix_mszip)
66
- when Constants::COMP_TYPE_LZX
67
- window_bits = folder.compression_level
68
- Decompressors::LZX.new(@io_system, input, output, @buffer_size,
69
- window_bits: window_bits)
70
- when Constants::COMP_TYPE_QUANTUM
71
- window_bits = folder.compression_level
72
- Decompressors::Quantum.new(@io_system, input, output, @buffer_size,
73
- window_bits: window_bits)
74
- else
75
- raise UnsupportedFormatError,
76
- "Unsupported compression type: #{folder.compression_method}"
77
- end
62
+ @algorithm_factory.create(
63
+ folder.compression_method,
64
+ :decompressor,
65
+ @io_system,
66
+ input,
67
+ output,
68
+ @buffer_size,
69
+ fix_mszip: @fix_mszip,
70
+ salvage: @salvage,
71
+ window_bits: folder.compression_level,
72
+ )
78
73
  end
79
74
 
80
75
  # Append a cabinet to another, merging their folders and files
@@ -102,7 +97,8 @@ module Cabriolet
102
97
  # @param filename [String] Path to file to search
103
98
  # @return [Models::Cabinet, nil] First cabinet found, or nil if none found
104
99
  def search(filename)
105
- search_buf = Array.new(@search_buffer_size)
100
+ # Reuse search buffer across searches for better performance
101
+ search_buf = @search_buffer ||= Array.new(@search_buffer_size)
106
102
  first_cabinet = nil
107
103
  link_cabinet = nil
108
104
  first_len = 0
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "fileutils"
4
+ require_relative "../checksum"
4
5
 
5
6
  module Cabriolet
6
7
  module CAB
@@ -15,6 +16,12 @@ module Cabriolet
15
16
  def initialize(io_system, decompressor)
16
17
  @io_system = io_system
17
18
  @decompressor = decompressor
19
+
20
+ # State reuse for multi-file extraction (like libmspack self->d)
21
+ @current_folder = nil
22
+ @current_decomp = nil
23
+ @current_input = nil
24
+ @current_offset = 0
18
25
  end
19
26
 
20
27
  # Extract a single file from the cabinet
@@ -45,7 +52,6 @@ module Cabriolet
45
52
  end
46
53
 
47
54
  filelen = Constants::LENGTH_MAX - file.offset
48
-
49
55
  end
50
56
 
51
57
  # Check for merge requirements
@@ -66,38 +72,74 @@ module Cabriolet
66
72
  output_dir = ::File.dirname(output_path)
67
73
  FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
68
74
 
69
- # Create input wrapper that reads CFDATA blocks across cabinets
70
- input_handle = BlockReader.new(@io_system, folder.data,
71
- folder.num_blocks, salvage)
75
+ # Check if we need to change folder or reset (libmspack lines 1076-1078)
76
+ if ENV["DEBUG_BLOCK"]
77
+ warn "DEBUG extract_file: Checking reset condition for file #{file.filename} (offset=#{file.offset}, length=#{file.length})"
78
+ warn " @current_folder == folder: #{@current_folder == folder} (current=#{@current_folder.object_id}, new=#{folder.object_id})"
79
+ warn " @current_offset (#{@current_offset}) > file.offset (#{file.offset}): #{@current_offset > file.offset}"
80
+ warn " @current_decomp.nil?: #{@current_decomp.nil?}"
81
+ warn " Reset needed?: #{@current_folder != folder || @current_offset > file.offset || !@current_decomp}"
82
+ end
83
+
84
+ if @current_folder != folder || @current_offset > file.offset || !@current_decomp
85
+ if ENV["DEBUG_BLOCK"]
86
+ warn "DEBUG extract_file: RESETTING state (creating new BlockReader)"
87
+ end
88
+
89
+ # Reset state
90
+ @current_input&.close
91
+ @current_input = nil
92
+ @current_decomp = nil
93
+
94
+ # Create new input (libmspack lines 1092-1095)
95
+ # This BlockReader will be REUSED across all files in this folder
96
+ @current_input = BlockReader.new(@io_system, folder.data,
97
+ folder.num_blocks, salvage)
98
+ @current_folder = folder
99
+ @current_offset = 0
100
+
101
+ # Create decompressor ONCE and reuse it (this is the key fix!)
102
+ # The decompressor maintains bitstream state across files
103
+ @current_decomp = @decompressor.create_decompressor(folder,
104
+ @current_input, nil)
105
+ elsif ENV["DEBUG_BLOCK"]
106
+ warn "DEBUG extract_file: NOT resetting (reusing existing BlockReader and decompressor)"
107
+ end
108
+
109
+ # Skip ahead if needed (libmspack lines 1130-1134)
110
+ if file.offset > @current_offset
111
+ skip_bytes = file.offset - @current_offset
112
+
113
+ # Decompress with NULL output to skip (libmspack line 1130: self->d->outfh = NULL)
114
+ null_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
115
+
116
+ # Reuse existing decompressor, change output to NULL
117
+ @current_decomp.instance_variable_set(:@output, null_output)
118
+
119
+ # Set output length for LZX frame limiting
120
+ @current_decomp.set_output_length(skip_bytes) if @current_decomp.respond_to?(:set_output_length)
121
+
122
+ @current_decomp.decompress(skip_bytes)
123
+ @current_offset += skip_bytes
124
+ end
125
+
126
+ # Extract actual file (libmspack lines 1137-1141)
127
+ output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
72
128
 
73
129
  begin
74
- # Create output file
75
- output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
76
-
77
- begin
78
- # Create decompressor
79
- decomp = @decompressor.create_decompressor(folder, input_handle,
80
- output_fh)
81
-
82
- # Skip to file offset if needed
83
- if file.offset.positive?
84
- # Decompress and discard bytes before file start
85
- temp_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
86
- temp_decomp = @decompressor.create_decompressor(folder,
87
- input_handle, temp_output)
88
- temp_decomp.decompress(file.offset)
89
- end
130
+ # Reuse existing decompressor, change output to real file
131
+ @current_decomp.instance_variable_set(:@output, output_fh)
90
132
 
91
- # Decompress the file
92
- decomp.decompress(filelen)
133
+ # Set output length for LZX frame limiting
134
+ @current_decomp.set_output_length(filelen) if @current_decomp.respond_to?(:set_output_length)
93
135
 
94
- filelen
95
- ensure
96
- output_fh.close
97
- end
136
+ @current_decomp.decompress(filelen)
137
+ @current_offset += filelen
98
138
  ensure
99
- input_handle.close
139
+ output_fh.close
100
140
  end
141
+
142
+ filelen
101
143
  end
102
144
 
103
145
  # Extract all files from a cabinet
@@ -192,11 +234,28 @@ module Cabriolet
192
234
  end
193
235
 
194
236
  def read(bytes)
237
+ # Early return if we've already exhausted all blocks and buffer
238
+ if @current_block >= @num_blocks && @buffer_pos >= @buffer.bytesize
239
+ if ENV["DEBUG_BLOCK"]
240
+ warn "DEBUG BlockReader.read(#{bytes}): Already exhausted, returning empty"
241
+ end
242
+ return +""
243
+ end
244
+
195
245
  result = +""
196
246
 
247
+ if ENV["DEBUG_BLOCK"]
248
+ warn "DEBUG BlockReader.read(#{bytes}): buffer_size=#{@buffer.bytesize} buffer_pos=#{@buffer_pos} block=#{@current_block}/#{@num_blocks}"
249
+ end
250
+
197
251
  while result.bytesize < bytes
198
252
  # Read more data if buffer is empty
199
- break if (@buffer_pos >= @buffer.bytesize) && !read_next_block
253
+ if (@buffer_pos >= @buffer.bytesize) && !read_next_block
254
+ if ENV["DEBUG_BLOCK"]
255
+ warn "DEBUG BlockReader.read: EXHAUSTED at result.bytesize=#{result.bytesize} (wanted #{bytes})"
256
+ end
257
+ break
258
+ end
200
259
 
201
260
  # Copy from buffer
202
261
  available = @buffer.bytesize - @buffer_pos
@@ -206,6 +265,10 @@ module Cabriolet
206
265
  @buffer_pos += to_copy
207
266
  end
208
267
 
268
+ if ENV["DEBUG_BLOCK"]
269
+ warn "DEBUG BlockReader.read: returning #{result.bytesize} bytes"
270
+ end
271
+
209
272
  result
210
273
  end
211
274
 
@@ -226,15 +289,39 @@ module Cabriolet
226
289
  private
227
290
 
228
291
  def read_next_block
229
- return false if @current_block >= @num_blocks
292
+ if ENV["DEBUG_BLOCK"]
293
+ warn "DEBUG read_next_block: current_block=#{@current_block} num_blocks=#{@num_blocks}"
294
+ end
295
+
296
+ if @current_block >= @num_blocks
297
+ if ENV["DEBUG_BLOCK"]
298
+ warn "DEBUG read_next_block: EXHAUSTED (current_block >= num_blocks)"
299
+ end
300
+ return false
301
+ end
230
302
 
231
303
  # Read blocks, potentially spanning multiple cabinets
232
304
  accumulated_data = +""
233
305
 
234
306
  loop do
235
307
  # Read CFDATA header
308
+ if ENV["DEBUG_BLOCK"]
309
+ handle_pos = @cab_handle.tell
310
+ warn "DEBUG read_next_block: About to read CFDATA header at position #{handle_pos}"
311
+ end
312
+
236
313
  header_data = @cab_handle.read(Constants::CFDATA_SIZE)
237
- return false if header_data.bytesize != Constants::CFDATA_SIZE
314
+
315
+ if ENV["DEBUG_BLOCK"]
316
+ warn "DEBUG read_next_block: Read #{header_data.bytesize} bytes (expected #{Constants::CFDATA_SIZE})"
317
+ end
318
+
319
+ if header_data.bytesize != Constants::CFDATA_SIZE
320
+ if ENV["DEBUG_BLOCK"]
321
+ warn "DEBUG read_next_block: FAILED - header read returned #{header_data.bytesize} bytes"
322
+ end
323
+ return false
324
+ end
238
325
 
239
326
  cfdata = Binary::CFData.read(header_data)
240
327
 
@@ -258,8 +345,22 @@ module Cabriolet
258
345
  end
259
346
 
260
347
  # Read compressed data
348
+ if ENV["DEBUG_BLOCK"]
349
+ warn "DEBUG read_next_block: About to read #{cfdata.compressed_size} bytes of compressed data"
350
+ end
351
+
261
352
  compressed_data = @cab_handle.read(cfdata.compressed_size)
262
- return false if compressed_data.bytesize != cfdata.compressed_size
353
+
354
+ if ENV["DEBUG_BLOCK"]
355
+ warn "DEBUG read_next_block: Read #{compressed_data.bytesize} bytes of compressed data (expected #{cfdata.compressed_size})"
356
+ end
357
+
358
+ if compressed_data.bytesize != cfdata.compressed_size
359
+ if ENV["DEBUG_BLOCK"]
360
+ warn "DEBUG read_next_block: FAILED - compressed data read returned #{compressed_data.bytesize} bytes"
361
+ end
362
+ return false
363
+ end
263
364
 
264
365
  # Verify checksum if present and not in salvage mode
265
366
  if cfdata.checksum.positive? && !@salvage
@@ -299,9 +400,18 @@ module Cabriolet
299
400
  end
300
401
 
301
402
  def open_current_cabinet
403
+ if ENV["DEBUG_BLOCK"]
404
+ warn "DEBUG open_current_cabinet: filename=#{@current_data.cabinet.filename} offset=#{@current_data.offset}"
405
+ end
406
+
302
407
  @cab_handle&.close
303
408
  @cab_handle = @io_system.open(@current_data.cabinet.filename, Constants::MODE_READ)
304
409
  @cab_handle.seek(@current_data.offset, Constants::SEEK_START)
410
+
411
+ if ENV["DEBUG_BLOCK"]
412
+ actual_pos = @cab_handle.tell
413
+ warn "DEBUG open_current_cabinet: seeked to position #{actual_pos} (expected #{@current_data.offset})"
414
+ end
305
415
  end
306
416
 
307
417
  def advance_to_next_cabinet
@@ -315,41 +425,7 @@ module Cabriolet
315
425
  end
316
426
 
317
427
  def calculate_checksum(data, initial = 0)
318
- cksum = initial
319
- bytes = data.bytes
320
-
321
- # Process 4-byte chunks
322
- (bytes.size / 4).times do |i|
323
- offset = i * 4
324
- value = bytes[offset] |
325
- (bytes[offset + 1] << 8) |
326
- (bytes[offset + 2] << 16) |
327
- (bytes[offset + 3] << 24)
328
- cksum ^= value
329
- end
330
-
331
- # Process remaining bytes
332
- remainder = bytes.size % 4
333
- if remainder.positive?
334
- ul = 0
335
- offset = bytes.size - remainder
336
-
337
- case remainder
338
- when 3
339
- ul |= bytes[offset + 2] << 16
340
- ul |= bytes[offset + 1] << 8
341
- ul |= bytes[offset]
342
- when 2
343
- ul |= bytes[offset + 1] << 8
344
- ul |= bytes[offset]
345
- when 1
346
- ul |= bytes[offset]
347
- end
348
-
349
- cksum ^= ul
350
- end
351
-
352
- cksum & 0xFFFFFFFF
428
+ Checksum.calculate(data, initial)
353
429
  end
354
430
  end
355
431
  end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fractor"
4
+
5
+ module Cabriolet
6
+ module CAB
7
+ # Work item for compressing a single file in a CAB archive
8
+ class FileCompressionWork < Fractor::Work
9
+ # Initialize work item for file compression
10
+ #
11
+ # @param source_path [String] Path to source file
12
+ # @param compression_method [Symbol] Compression method to use
13
+ # @param block_size [Integer] Maximum block size
14
+ # @param io_system [System::IOSystem] I/O system
15
+ # @param algorithm_factory [AlgorithmFactory] Algorithm factory
16
+ def initialize(source_path:, compression_method:, block_size:,
17
+ io_system:, algorithm_factory:)
18
+ super({
19
+ source_path: source_path,
20
+ compression_method: compression_method,
21
+ block_size: block_size,
22
+ io_system: io_system,
23
+ algorithm_factory: algorithm_factory,
24
+ })
25
+ end
26
+
27
+ def source_path
28
+ input[:source_path]
29
+ end
30
+
31
+ def compression_method
32
+ input[:compression_method]
33
+ end
34
+
35
+ def block_size
36
+ input[:block_size]
37
+ end
38
+
39
+ def io_system
40
+ input[:io_system]
41
+ end
42
+
43
+ def algorithm_factory
44
+ input[:algorithm_factory]
45
+ end
46
+
47
+ def id
48
+ source_path
49
+ end
50
+ end
51
+ end
52
+ end