cabriolet 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +3 -0
  3. data/lib/cabriolet/binary/bitstream.rb +32 -21
  4. data/lib/cabriolet/binary/bitstream_writer.rb +21 -4
  5. data/lib/cabriolet/cab/compressor.rb +85 -53
  6. data/lib/cabriolet/cab/decompressor.rb +2 -1
  7. data/lib/cabriolet/cab/extractor.rb +170 -121
  8. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  9. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  10. data/lib/cabriolet/checksum.rb +49 -0
  11. data/lib/cabriolet/collections/file_collection.rb +175 -0
  12. data/lib/cabriolet/compressors/quantum.rb +3 -51
  13. data/lib/cabriolet/decompressors/lzx.rb +59 -1
  14. data/lib/cabriolet/decompressors/quantum.rb +81 -52
  15. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  16. data/lib/cabriolet/extraction/extractor.rb +171 -0
  17. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  18. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  19. data/lib/cabriolet/format_base.rb +79 -0
  20. data/lib/cabriolet/hlp/quickhelp/compressor.rb +28 -503
  21. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  22. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  23. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  24. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  25. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  26. data/lib/cabriolet/huffman/encoder.rb +15 -12
  27. data/lib/cabriolet/lit/compressor.rb +45 -689
  28. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  29. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  30. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  31. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  32. data/lib/cabriolet/lit/header_writer.rb +124 -0
  33. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  34. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  35. data/lib/cabriolet/quantum_shared.rb +105 -0
  36. data/lib/cabriolet/version.rb +1 -1
  37. data/lib/cabriolet.rb +114 -3
  38. metadata +38 -4
  39. data/lib/cabriolet/auto.rb +0 -173
  40. data/lib/cabriolet/parallel.rb +0 -333
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b9c71e143078c57e021b1475f9b576e97f63ab7d483451dcb5e48607909cb6ef
4
- data.tar.gz: 155a9b0daeffa6ad54a081cd00f21d941d060cb2fd7e5076fd9bafa447833c5c
3
+ metadata.gz: 9d2875b9afed58332c6e9823f3f383a60d802bac8514cb2015fdbd6a7f1559dc
4
+ data.tar.gz: 6979ce57ad3d47867bed19330d70b5db483bc33ac8920ee2986faa35828d6cbc
5
5
  SHA512:
6
- metadata.gz: 7e64844488da8e4ff630682c2269cab50ba31918d9490d1a6d20a63568a1dd98e05801a21a4877715397f85c2e37d484e571ce07e60a7ad587a88d66ff5d501f
7
- data.tar.gz: e48eaabf49eade967b714a8ac471ee5801394674c44355c390363540c88692b991f3c046f3cc0f3af7489c4f05face6b5ab270db73b326a864293132aa6325ef
6
+ metadata.gz: edd7b1345bee36e75fb5796a3840f087a9dfa51e41c599fa767938e8d64ab0abc985e563b9fb246fc86fc4798eb3f98539f774023eaf613f50a1c9e4a2a6d518
7
+ data.tar.gz: c5cfc76ae8dc5239efa9e90d0956d2518eab56c669d4b7e4087debc4955f509cc97440296c53084152cb30059b25044b6231cd9a84ee94e85f446a20f3306e08
data/README.adoc CHANGED
@@ -3,6 +3,9 @@
3
3
  image:https://img.shields.io/gem/v/cabriolet.svg[RubyGems Version, link=https://rubygems.org/gems/cabriolet]
4
4
  image:https://img.shields.io/github/license/omnizip/cabriolet.svg[License]
5
5
 
6
+ image:https://img.shields.io/badge/Website-Cabriolet_documentation-blue.svg["Documentation site", link="https://omnizip.github.io/cabriolet"]
7
+
8
+
6
9
  Pure Ruby implementation for extracting and creating Microsoft compression
7
10
  format files.
8
11
 
@@ -51,6 +51,24 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
51
51
 
52
52
  private
53
53
 
54
+ # Read 2 bytes as a little-endian 16-bit word for MSB mode
55
+ # This is a shared helper for read_bits_msb and peek_bits
56
+ #
57
+ # @return [Integer] 16-bit word, or nil if at EOF and not in salvage mode
58
+ def read_msb_word
59
+ byte0 = read_byte
60
+ if byte0.nil? && (@salvage || @input_end)
61
+ byte0 = 0
62
+ end
63
+
64
+ byte1 = read_byte
65
+ if byte1.nil?
66
+ byte1 = 0
67
+ end
68
+
69
+ byte0 | (byte1 << 8)
70
+ end
71
+
54
72
  # Read bits in LSB-first order
55
73
  #
56
74
  # Per libmspack: EOF handling allows padding to avoid bitstream overrun.
@@ -95,34 +113,17 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
95
113
  def read_bits_msb(num_bits)
96
114
  # Ensure we have enough bits in the buffer
97
115
  while @bits_left < num_bits
98
- # Read 2 bytes at a time (little-endian), like libmspack
99
- byte0 = read_byte
100
- if byte0.nil? && (@salvage || @input_end)
101
- # First EOF: pad with zeros
102
- # Second EOF: read_byte will raise DecompressionError
103
- byte0 = 0
104
- end
105
-
106
- byte1 = read_byte
107
- if byte1.nil?
108
- # Pad with 0 if only 1 byte left (or EOF)
109
- byte1 = 0
110
- end
111
-
112
- # Combine as little-endian 16-bit value
113
- word = byte0 | (byte1 << 8)
116
+ word = read_msb_word
114
117
 
115
118
  # DEBUG
116
- warn "DEBUG MSB read_bytes: byte0=0x#{byte0.to_s(16)} byte1=0x#{byte1.to_s(16)} word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
119
+ warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
117
120
 
118
121
  # INJECT_BITS (MSB): inject at the left side
119
- # bit_buffer |= word << (BITBUF_WIDTH -16 - bits_left)
120
122
  @bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
121
123
  @bits_left += 16
122
124
  end
123
125
 
124
126
  # PEEK_BITS (MSB): extract from the left
125
- # result = bit_buffer >> (BITBUF_WIDTH - num_bits)
126
127
  result = @bit_buffer >> (@bitbuf_width - num_bits)
127
128
 
128
129
  # REMOVE_BITS (MSB): shift left
@@ -251,9 +252,19 @@ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
251
252
  # @return [Integer] Bits as an integer
252
253
  def read_bits_be(num_bits)
253
254
  result = 0
254
- num_bits.times do
255
- result = (result << 1) | read_bits(1)
255
+ full_bytes = num_bits / 8
256
+ remaining_bits = num_bits % 8
257
+
258
+ # Read full bytes first (more efficient than bit-by-bit)
259
+ full_bytes.times do
260
+ result = (result << 8) | read_bits(8)
256
261
  end
262
+
263
+ # Read remaining bits
264
+ if remaining_bits.positive?
265
+ result = (result << remaining_bits) | read_bits(remaining_bits)
266
+ end
267
+
257
268
  result
258
269
  end
259
270
 
@@ -4,6 +4,10 @@ module Cabriolet
4
4
  module Binary
5
5
  # BitstreamWriter provides bit-level I/O operations for writing compressed data
6
6
  class BitstreamWriter
7
+ # Pre-computed byte constants for fast single-byte writes
8
+ # Avoids repeated array packing for each byte written
9
+ BYTE_CONSTANTS = Array.new(256) { |i| [i].pack("C") }.freeze
10
+
7
11
  attr_reader :io_system, :handle, :buffer_size
8
12
 
9
13
  # Initialize a new bitstream writer
@@ -129,7 +133,8 @@ module Cabriolet
129
133
  # @param byte [Integer] Byte value to write
130
134
  # @return [void]
131
135
  def write_byte(byte)
132
- data = [byte].pack("C")
136
+ # Use pre-encoded byte constant for better performance
137
+ data = BYTE_CONSTANTS[byte]
133
138
  # DEBUG
134
139
  if ENV["DEBUG_BITSTREAM"]
135
140
  warn "DEBUG write_byte: pos=#{@bits_in_buffer} byte=#{byte} (#{byte.to_s(2).rjust(
@@ -217,9 +222,21 @@ module Cabriolet
217
222
  # @param num_bits [Integer] Number of bits to write
218
223
  # @return [void]
219
224
  def write_bits_be(value, num_bits)
220
- num_bits.times do |i|
221
- bit = (value >> (num_bits - 1 - i)) & 1
222
- write_bits(bit, 1)
225
+ # Write full bytes first for better performance
226
+ full_bytes = num_bits / 8
227
+ remaining_bits = num_bits % 8
228
+
229
+ # Write complete bytes MSB first
230
+ full_bytes.times do |i|
231
+ byte_shift = num_bits - 8 - (i * 8)
232
+ byte = (value >> byte_shift) & 0xFF
233
+ write_bits(byte, 8)
234
+ end
235
+
236
+ # Write remaining bits
237
+ if remaining_bits.positive?
238
+ remaining_value = value & ((1 << remaining_bits) - 1)
239
+ write_bits(remaining_value, remaining_bits)
223
240
  end
224
241
  end
225
242
 
@@ -1,23 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "../checksum"
4
+ require_relative "../errors"
5
+
3
6
  module Cabriolet
4
7
  module CAB
5
8
  # Compressor creates CAB files from source files
6
9
  # rubocop:disable Metrics/ClassLength
7
10
  class Compressor
8
- attr_reader :io_system, :files, :compression, :set_id, :cabinet_index
11
+ attr_reader :io_system, :files, :compression, :set_id, :cabinet_index,
12
+ :workers
9
13
 
10
14
  # Initialize a new compressor
11
15
  #
12
16
  # @param io_system [System::IOSystem] I/O system for writing
13
17
  # @param algorithm_factory [AlgorithmFactory, nil] Custom algorithm factory or nil for default
14
- def initialize(io_system = nil, algorithm_factory = nil)
18
+ # @param workers [Integer] Number of parallel worker threads (default: 1 for sequential)
19
+ def initialize(io_system = nil, algorithm_factory = nil, workers: 1)
15
20
  @io_system = io_system || System::IOSystem.new
16
21
  @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
17
22
  @files = []
18
23
  @compression = :mszip
19
24
  @set_id = rand(0xFFFF)
20
25
  @cabinet_index = 0
26
+ @workers = workers
21
27
  end
22
28
 
23
29
  # Add a file to the cabinet
@@ -56,6 +62,9 @@ module Cabriolet
56
62
  @set_id = options[:set_id] || @set_id
57
63
  @cabinet_index = options[:cabinet_index] || @cabinet_index
58
64
 
65
+ # Validate and cache compression method value to avoid repeated hash lookups
66
+ @compression_method = compression_type_value
67
+
59
68
  # Collect file information
60
69
  file_infos = collect_file_infos
61
70
 
@@ -129,17 +138,80 @@ module Cabriolet
129
138
 
130
139
  # Compress all files and return block data
131
140
  def compress_files(file_infos)
141
+ return compress_files_sequential(file_infos) if @workers <= 1
142
+
143
+ compress_files_parallel(file_infos)
144
+ end
145
+
146
+ # Compress files using parallel workers via Fractor
147
+ def compress_files_parallel(file_infos)
148
+ require_relative "file_compression_work"
149
+ require_relative "file_compression_worker"
150
+
151
+ compression_method = @compression_method || compression_type_value
152
+
153
+ # Create work items for each file
154
+ work_items = file_infos.map do |info|
155
+ FileCompressionWork.new(
156
+ source_path: info[:source_path],
157
+ compression_method: compression_method,
158
+ block_size: Constants::BLOCK_MAX,
159
+ io_system: @io_system,
160
+ algorithm_factory: @algorithm_factory,
161
+ )
162
+ end
163
+
164
+ # Create worker pool
165
+ worker_pool = Fractor::WorkerPool.new(
166
+ FileCompressionWorker,
167
+ num_workers: @workers,
168
+ )
169
+
170
+ # Submit all work items and wait for completion
171
+ results = worker_pool.process_work(work_items)
172
+
173
+ # Aggregate results in original order
174
+ file_result_map = {}
175
+ total_uncompressed = 0
176
+ all_blocks = []
177
+
178
+ results.each do |result|
179
+ if result.error
180
+ raise DecompressionError,
181
+ "Failed to compress #{result.error[:source_path]}: #{result.error[:message]}"
182
+ end
183
+
184
+ file_result_map[result.result[:source_path]] = result.result
185
+ total_uncompressed += result.result[:total_uncompressed]
186
+ end
187
+
188
+ # Reorder blocks to match original file order
189
+ file_infos.each do |info|
190
+ file_result = file_result_map[info[:source_path]]
191
+ all_blocks.concat(file_result[:blocks])
192
+ end
193
+
194
+ {
195
+ blocks: all_blocks,
196
+ total_uncompressed: total_uncompressed,
197
+ }
198
+ end
199
+
200
+ # Compress files sequentially (original implementation)
201
+ def compress_files_sequential(file_infos)
132
202
  blocks = []
133
203
  total_uncompressed = 0
134
204
 
135
205
  file_infos.each do |info|
136
206
  file_data = ::File.binread(info[:source_path])
137
- total_uncompressed += file_data.bytesize
207
+ file_size = file_data.bytesize
208
+ total_uncompressed += file_size
138
209
 
139
210
  # Split into blocks of max 32KB
140
211
  offset = 0
141
- while offset < file_data.bytesize
142
- chunk_size = [Constants::BLOCK_MAX, file_data.bytesize - offset].min
212
+ while offset < file_size
213
+ remaining = file_size - offset
214
+ chunk_size = [Constants::BLOCK_MAX, remaining].min
143
215
  chunk = file_data[offset, chunk_size]
144
216
 
145
217
  # Compress chunk
@@ -169,18 +241,9 @@ module Cabriolet
169
241
  input = System::MemoryHandle.new(data)
170
242
  output = System::MemoryHandle.new("", Constants::MODE_WRITE)
171
243
 
172
- # Get compression method value
173
- compression_method = begin
174
- {
175
- none: Constants::COMP_TYPE_NONE,
176
- mszip: Constants::COMP_TYPE_MSZIP,
177
- lzx: Constants::COMP_TYPE_LZX,
178
- quantum: Constants::COMP_TYPE_QUANTUM,
179
- }.fetch(@compression)
180
- rescue KeyError
181
- raise ArgumentError,
182
- "Unsupported compression type: #{@compression}"
183
- end
244
+ # Use cached compression method value (calculated in generate)
245
+ # Fallback to calculation if not yet cached
246
+ compression_method = @compression_method || compression_type_value
184
247
 
185
248
  # Determine window bits based on compression type
186
249
  window_bits = case @compression
@@ -278,7 +341,10 @@ cabinet_size)
278
341
  mszip: Constants::COMP_TYPE_MSZIP,
279
342
  lzx: Constants::COMP_TYPE_LZX,
280
343
  quantum: Constants::COMP_TYPE_QUANTUM,
281
- }.fetch(@compression, Constants::COMP_TYPE_MSZIP)
344
+ }.fetch(@compression) do
345
+ raise ArgumentError,
346
+ "Unsupported compression type: #{@compression}"
347
+ end
282
348
  end
283
349
 
284
350
  # Write CFFILE entry
@@ -331,41 +397,7 @@ cabinet_size)
331
397
  # Same algorithm as used in Extractor
332
398
  # rubocop:disable Metrics/MethodLength
333
399
  def calculate_checksum(data, initial = 0)
334
- cksum = initial
335
- bytes = data.bytes
336
-
337
- # Process 4-byte chunks
338
- (bytes.size / 4).times do |i|
339
- offset = i * 4
340
- value = bytes[offset] |
341
- (bytes[offset + 1] << 8) |
342
- (bytes[offset + 2] << 16) |
343
- (bytes[offset + 3] << 24)
344
- cksum ^= value
345
- end
346
-
347
- # Process remaining bytes
348
- remainder = bytes.size % 4
349
- if remainder.positive?
350
- ul = 0
351
- offset = bytes.size - remainder
352
-
353
- case remainder
354
- when 3
355
- ul |= bytes[offset + 2] << 16
356
- ul |= bytes[offset + 1] << 8
357
- ul |= bytes[offset]
358
- when 2
359
- ul |= bytes[offset + 1] << 8
360
- ul |= bytes[offset]
361
- when 1
362
- ul |= bytes[offset]
363
- end
364
-
365
- cksum ^= ul
366
- end
367
-
368
- cksum & 0xFFFFFFFF
400
+ Checksum.calculate(data, initial)
369
401
  end
370
402
  # rubocop:enable Metrics/MethodLength
371
403
  end
@@ -97,7 +97,8 @@ module Cabriolet
97
97
  # @param filename [String] Path to file to search
98
98
  # @return [Models::Cabinet, nil] First cabinet found, or nil if none found
99
99
  def search(filename)
100
- search_buf = Array.new(@search_buffer_size)
100
+ # Reuse search buffer across searches for better performance
101
+ search_buf = @search_buffer ||= Array.new(@search_buffer_size)
101
102
  first_cabinet = nil
102
103
  link_cabinet = nil
103
104
  first_len = 0