cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,357 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+
5
+ module Cabriolet
6
+ module CAB
7
+ # Extractor handles the extraction of files from cabinets
8
+ class Extractor
9
+ attr_reader :io_system, :decompressor
10
+
11
+ # Initialize a new extractor
12
+ #
13
+ # @param io_system [System::IOSystem] I/O system
14
+ # @param decompressor [CAB::Decompressor] Parent decompressor
15
+ def initialize(io_system, decompressor)
16
+ @io_system = io_system
17
+ @decompressor = decompressor
18
+ end
19
+
20
+ # Extract a single file from the cabinet
21
+ #
22
+ # @param file [Models::File] File to extract
23
+ # @param output_path [String] Where to write the file
24
+ # @param options [Hash] Extraction options
25
+ # @option options [Boolean] :salvage Enable salvage mode
26
+ # @return [Integer] Number of bytes extracted
27
+ def extract_file(file, output_path, **options)
28
+ salvage = options[:salvage] || @decompressor.salvage
29
+ folder = file.folder
30
+
31
+ # Validate file
32
+ raise Cabriolet::ArgumentError, "File has no folder" unless folder
33
+
34
+ if file.offset > Constants::LENGTH_MAX
35
+ raise DecompressionError,
36
+ "File offset beyond 2GB limit"
37
+ end
38
+
39
+ # Check file length
40
+ filelen = file.length
41
+ if filelen > (Constants::LENGTH_MAX - file.offset)
42
+ unless salvage
43
+ raise DecompressionError,
44
+ "File length exceeds 2GB limit"
45
+ end
46
+
47
+ filelen = Constants::LENGTH_MAX - file.offset
48
+
49
+ end
50
+
51
+ # Check for merge requirements
52
+ if folder.needs_prev_merge?
53
+ raise DecompressionError,
54
+ "File requires previous cabinet, cabinet set is incomplete"
55
+ end
56
+
57
+ # Check file fits within folder
58
+ unless salvage
59
+ max_len = folder.num_blocks * Constants::BLOCK_MAX
60
+ if file.offset > max_len || filelen > (max_len - file.offset)
61
+ raise DecompressionError, "File extends beyond folder data"
62
+ end
63
+ end
64
+
65
+ # Create output directory if needed
66
+ output_dir = ::File.dirname(output_path)
67
+ FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
68
+
69
+ # Create input wrapper that reads CFDATA blocks across cabinets
70
+ input_handle = BlockReader.new(@io_system, folder.data,
71
+ folder.num_blocks, salvage)
72
+
73
+ begin
74
+ # Create output file
75
+ output_fh = @io_system.open(output_path, Constants::MODE_WRITE)
76
+
77
+ begin
78
+ # Create decompressor
79
+ decomp = @decompressor.create_decompressor(folder, input_handle,
80
+ output_fh)
81
+
82
+ # Skip to file offset if needed
83
+ if file.offset.positive?
84
+ # Decompress and discard bytes before file start
85
+ temp_output = System::MemoryHandle.new("", Constants::MODE_WRITE)
86
+ temp_decomp = @decompressor.create_decompressor(folder,
87
+ input_handle, temp_output)
88
+ temp_decomp.decompress(file.offset)
89
+ end
90
+
91
+ # Decompress the file
92
+ decomp.decompress(filelen)
93
+
94
+ filelen
95
+ ensure
96
+ output_fh.close
97
+ end
98
+ ensure
99
+ input_handle.close
100
+ end
101
+ end
102
+
103
+ # Extract all files from a cabinet
104
+ #
105
+ # @param cabinet [Models::Cabinet] Cabinet to extract from
106
+ # @param output_dir [String] Directory to extract to
107
+ # @param options [Hash] Extraction options
108
+ # @option options [Boolean] :preserve_paths Preserve directory structure (default: true)
109
+ # @option options [Boolean] :set_timestamps Set file modification times (default: true)
110
+ # @option options [Proc] :progress Progress callback
111
+ # @return [Integer] Number of files extracted
112
+ def extract_all(cabinet, output_dir, **options)
113
+ preserve_paths = options.fetch(:preserve_paths, true)
114
+ set_timestamps = options.fetch(:set_timestamps, true)
115
+ progress = options[:progress]
116
+
117
+ # Create output directory
118
+ FileUtils.mkdir_p(output_dir) unless ::File.directory?(output_dir)
119
+
120
+ count = 0
121
+ cabinet.files.each do |file|
122
+ # Determine output path
123
+ output_path = if preserve_paths
124
+ ::File.join(output_dir, file.filename)
125
+ else
126
+ ::File.join(output_dir,
127
+ ::File.basename(file.filename))
128
+ end
129
+
130
+ # Extract file
131
+ extract_file(file, output_path, **options)
132
+
133
+ # Set timestamp if requested
134
+ if set_timestamps && file.modification_time
135
+ ::File.utime(file.modification_time, file.modification_time,
136
+ output_path)
137
+ end
138
+
139
+ # Set file permissions based on attributes
140
+ set_file_attributes(output_path, file)
141
+
142
+ count += 1
143
+ progress&.call(file, count, cabinet.files.size)
144
+ end
145
+
146
+ count
147
+ end
148
+
149
+ private
150
+
151
+ # Set file attributes based on CAB attributes
152
+ #
153
+ # @param path [String] File path
154
+ # @param file [Models::File] CAB file
155
+ def set_file_attributes(path, file)
156
+ # On Unix systems, set read-only if appropriate
157
+ return unless ::File.exist?(path)
158
+
159
+ if file.readonly?
160
+ # Make file read-only
161
+ ::File.chmod(0o444, path)
162
+ elsif file.executable?
163
+ # Make file executable
164
+ ::File.chmod(0o755, path)
165
+ else
166
+ # Default permissions
167
+ ::File.chmod(0o644, path)
168
+ end
169
+ rescue StandardError
170
+ # Ignore errors setting attributes
171
+ nil
172
+ end
173
+
174
+ # BlockReader wraps cabinet file handles and reads CFDATA blocks
175
+ # Handles multi-part cabinets by following the FolderData chain
176
+ class BlockReader
177
+ attr_reader :io_system, :current_data, :num_blocks, :salvage,
178
+ :current_block
179
+
180
+ def initialize(io_system, folder_data, num_blocks, salvage)
181
+ @io_system = io_system
182
+ @current_data = folder_data
183
+ @num_blocks = num_blocks
184
+ @salvage = salvage
185
+ @current_block = 0
186
+ @buffer = ""
187
+ @buffer_pos = 0
188
+ @cab_handle = nil
189
+
190
+ # Open first cabinet and seek to data offset
191
+ open_current_cabinet
192
+ end
193
+
194
+ def read(bytes)
195
+ result = +""
196
+
197
+ while result.bytesize < bytes
198
+ # Read more data if buffer is empty
199
+ break if (@buffer_pos >= @buffer.bytesize) && !read_next_block
200
+
201
+ # Copy from buffer
202
+ available = @buffer.bytesize - @buffer_pos
203
+ to_copy = [available, bytes - result.bytesize].min
204
+
205
+ result << @buffer[@buffer_pos, to_copy]
206
+ @buffer_pos += to_copy
207
+ end
208
+
209
+ result
210
+ end
211
+
212
+ def seek(_offset, _whence)
213
+ # Not implemented for block reader
214
+ 0
215
+ end
216
+
217
+ def tell
218
+ 0
219
+ end
220
+
221
+ def close
222
+ @cab_handle&.close
223
+ @cab_handle = nil
224
+ end
225
+
226
+ private
227
+
228
+ def read_next_block
229
+ return false if @current_block >= @num_blocks
230
+
231
+ # Read blocks, potentially spanning multiple cabinets
232
+ accumulated_data = +""
233
+
234
+ loop do
235
+ # Read CFDATA header
236
+ header_data = @cab_handle.read(Constants::CFDATA_SIZE)
237
+ return false if header_data.bytesize != Constants::CFDATA_SIZE
238
+
239
+ cfdata = Binary::CFData.read(header_data)
240
+
241
+ # Skip reserved block data if present
242
+ if @current_data.cabinet.block_resv.positive?
243
+ @cab_handle.seek(@current_data.cabinet.block_resv, Constants::SEEK_CUR)
244
+ end
245
+
246
+ # Validate block sizes
247
+ unless @salvage
248
+ total_size = accumulated_data.bytesize + cfdata.compressed_size
249
+ if total_size > Constants::INPUT_MAX
250
+ raise DecompressionError,
251
+ "Compressed block size exceeds maximum"
252
+ end
253
+
254
+ if cfdata.uncompressed_size > Constants::BLOCK_MAX
255
+ raise DecompressionError,
256
+ "Uncompressed block size exceeds maximum"
257
+ end
258
+ end
259
+
260
+ # Read compressed data
261
+ compressed_data = @cab_handle.read(cfdata.compressed_size)
262
+ return false if compressed_data.bytesize != cfdata.compressed_size
263
+
264
+ # Verify checksum if present and not in salvage mode
265
+ if cfdata.checksum.positive? && !@salvage
266
+ # Calculate checksum of data
267
+ data_cksum = calculate_checksum(compressed_data)
268
+ # Calculate checksum of header fields (4 bytes starting at offset 4)
269
+ header_cksum = calculate_checksum(header_data[4, 4], data_cksum)
270
+
271
+ if header_cksum != cfdata.checksum
272
+ raise ChecksumError,
273
+ "Block checksum mismatch"
274
+ end
275
+ end
276
+
277
+ # Accumulate data
278
+ accumulated_data << compressed_data
279
+
280
+ # If uncompressed_size is 0, this block continues in the next cabinet
281
+ break unless cfdata.uncompressed_size.zero?
282
+
283
+ # Move to next cabinet in the chain
284
+ unless advance_to_next_cabinet
285
+ raise DecompressionError,
286
+ "Block continues but no next cabinet available"
287
+ end
288
+ # Continue reading the next part of the block
289
+
290
+ # This is the final part of the block
291
+ end
292
+
293
+ # Store in buffer
294
+ @buffer = accumulated_data
295
+ @buffer_pos = 0
296
+ @current_block += 1
297
+
298
+ true
299
+ end
300
+
301
+ def open_current_cabinet
302
+ @cab_handle&.close
303
+ @cab_handle = @io_system.open(@current_data.cabinet.filename, Constants::MODE_READ)
304
+ @cab_handle.seek(@current_data.offset, Constants::SEEK_START)
305
+ end
306
+
307
+ def advance_to_next_cabinet
308
+ # Move to next data segment
309
+ @current_data = @current_data.next_data
310
+ return false unless @current_data
311
+
312
+ # Open new cabinet file
313
+ open_current_cabinet
314
+ true
315
+ end
316
+
317
+ def calculate_checksum(data, initial = 0)
318
+ cksum = initial
319
+ bytes = data.bytes
320
+
321
+ # Process 4-byte chunks
322
+ (bytes.size / 4).times do |i|
323
+ offset = i * 4
324
+ value = bytes[offset] |
325
+ (bytes[offset + 1] << 8) |
326
+ (bytes[offset + 2] << 16) |
327
+ (bytes[offset + 3] << 24)
328
+ cksum ^= value
329
+ end
330
+
331
+ # Process remaining bytes
332
+ remainder = bytes.size % 4
333
+ if remainder.positive?
334
+ ul = 0
335
+ offset = bytes.size - remainder
336
+
337
+ case remainder
338
+ when 3
339
+ ul |= bytes[offset + 2] << 16
340
+ ul |= bytes[offset + 1] << 8
341
+ ul |= bytes[offset]
342
+ when 2
343
+ ul |= bytes[offset + 1] << 8
344
+ ul |= bytes[offset]
345
+ when 1
346
+ ul |= bytes[offset]
347
+ end
348
+
349
+ cksum ^= ul
350
+ end
351
+
352
+ cksum & 0xFFFFFFFF
353
+ end
354
+ end
355
+ end
356
+ end
357
+ end
@@ -0,0 +1,264 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module CAB
5
+ # Parser reads and parses CAB file headers
6
+ class Parser
7
+ attr_reader :io_system
8
+
9
+ # Initialize a new parser
10
+ #
11
+ # @param io_system [System::IOSystem] I/O system for reading
12
+ def initialize(io_system)
13
+ @io_system = io_system
14
+ end
15
+
16
+ # Parse a CAB file and return a Cabinet model
17
+ #
18
+ # @param filename [String] Path to the CAB file
19
+ # @return [Models::Cabinet] Parsed cabinet
20
+ # @raise [ParseError] if the file is not a valid CAB
21
+ def parse(filename)
22
+ handle = @io_system.open(filename, Constants::MODE_READ)
23
+ cabinet = parse_handle(handle, filename)
24
+ @io_system.close(handle)
25
+ cabinet
26
+ end
27
+
28
+ # Parse a CAB from an already-open handle
29
+ #
30
+ # @param handle [System::FileHandle, System::MemoryHandle] Open handle
31
+ # @param filename [String] Filename for reference
32
+ # @param offset [Integer] Offset in file where cabinet starts
33
+ # @param salvage [Boolean] Enable salvage mode for corrupted files
34
+ # @param quiet [Boolean] Suppress error messages
35
+ # @return [Models::Cabinet] Parsed cabinet
36
+ # @raise [ParseError] if not a valid CAB
37
+ def parse_handle(handle, filename, offset = 0, salvage = false,
38
+ quiet = false)
39
+ @salvage = salvage
40
+ @quiet = quiet
41
+
42
+ cabinet = Models::Cabinet.new(filename)
43
+ cabinet.base_offset = offset
44
+
45
+ # Seek to cabinet start
46
+ @io_system.seek(handle, offset, Constants::SEEK_START)
47
+
48
+ # Read and validate header
49
+ header, folder_resv = read_header(handle, cabinet)
50
+ validate_header(header)
51
+ populate_cabinet_from_header(handle, cabinet, header)
52
+
53
+ # Read folders
54
+ read_folders(handle, cabinet, header, folder_resv)
55
+
56
+ # Read files
57
+ read_files(handle, cabinet, header, salvage)
58
+
59
+ cabinet
60
+ end
61
+
62
+ private
63
+
64
+ def read_header(handle, cabinet)
65
+ header_data = @io_system.read(handle, Constants::CFHEADER_SIZE)
66
+ raise ParseError, "Cannot read CAB header" if header_data.bytesize < Constants::CFHEADER_SIZE
67
+
68
+ header = Binary::CFHeader.read(header_data)
69
+
70
+ folder_resv = 0
71
+
72
+ # Read reserved header if present
73
+ if header.flags.anybits?(Constants::FLAG_RESERVE_PRESENT)
74
+ resv_data = @io_system.read(handle, Constants::CFHEADER_EXT_SIZE)
75
+ if resv_data.bytesize < Constants::CFHEADER_EXT_SIZE
76
+ raise ParseError,
77
+ "Cannot read reserved header"
78
+ end
79
+
80
+ # Parse reserved sizes
81
+ header_resv = resv_data.unpack1("v") # uint16 header_reserved
82
+ folder_resv = resv_data[2].ord # uint8 folder_reserved
83
+ data_resv = resv_data[3].ord # uint8 data_reserved
84
+
85
+ # Store reserved data size in cabinet
86
+ cabinet.set_blocks_info(0, data_resv)
87
+
88
+ # Skip reserved header data
89
+ if header_resv.positive?
90
+ @io_system.seek(handle, header_resv,
91
+ Constants::SEEK_CUR)
92
+ end
93
+ end
94
+
95
+ [header, folder_resv]
96
+ end
97
+
98
+ def validate_header(header)
99
+ unless header.signature == "MSCF"
100
+ raise ParseError,
101
+ "Invalid CAB signature"
102
+ end
103
+
104
+ if !(header.major_version == 1 && header.minor_version == 3) && !@quiet
105
+ @io_system.message(nil, "WARNING; cabinet version is not 1.3")
106
+ end
107
+
108
+ if header.num_folders.zero?
109
+ @io_system.message(nil, "no folders in cabinet.") unless @quiet
110
+ raise ParseError, "No folders in cabinet"
111
+ end
112
+
113
+ return unless header.num_files.zero?
114
+
115
+ @io_system.message(nil, "no files in cabinet.") unless @quiet
116
+ raise ParseError, "No files in cabinet"
117
+ end
118
+
119
+ def populate_cabinet_from_header(handle, cabinet, header)
120
+ cabinet.length = header.cabinet_size
121
+ cabinet.set_id = header.set_id
122
+ cabinet.set_index = header.cabinet_index
123
+ cabinet.flags = header.flags
124
+
125
+ # Read previous cabinet metadata if present
126
+ if header.flags.anybits?(Constants::FLAG_PREV_CABINET)
127
+ cabinet.prevname = read_string(handle, false)
128
+ cabinet.previnfo = read_string(handle, true)
129
+ end
130
+
131
+ # Read next cabinet metadata if present
132
+ return unless header.flags.anybits?(Constants::FLAG_NEXT_CABINET)
133
+
134
+ cabinet.nextname = read_string(handle, false)
135
+ cabinet.nextinfo = read_string(handle, true)
136
+ end
137
+
138
+ def read_folders(handle, cabinet, header, folder_resv)
139
+ header.num_folders.times do
140
+ # Read folder structure
141
+ folder_data = @io_system.read(handle, Constants::CFFOLDER_SIZE)
142
+ if folder_data.bytesize < Constants::CFFOLDER_SIZE
143
+ raise ParseError,
144
+ "Cannot read folder entry"
145
+ end
146
+
147
+ cf_folder = Binary::CFFolder.read(folder_data)
148
+
149
+ # Skip folder reserved space if present
150
+ if folder_resv.positive?
151
+ @io_system.seek(handle, folder_resv,
152
+ Constants::SEEK_CUR)
153
+ end
154
+
155
+ # Create folder model with cabinet and offset
156
+ data_offset = cabinet.base_offset + cf_folder.data_offset
157
+ folder = Models::Folder.new(cabinet, data_offset)
158
+ folder.comp_type = cf_folder.comp_type
159
+ folder.num_blocks = cf_folder.num_blocks
160
+
161
+ # Add to cabinet
162
+ cabinet.folders << folder
163
+ end
164
+ end
165
+
166
+ def read_files(handle, cabinet, header, salvage = false)
167
+ header.num_files.times do
168
+ # Read file structure
169
+ file_data = @io_system.read(handle, Constants::CFFILE_SIZE)
170
+ raise ParseError, "Cannot read file entry" if file_data.bytesize < Constants::CFFILE_SIZE
171
+
172
+ cf_file = Binary::CFFile.read(file_data)
173
+
174
+ # Create file model
175
+ file = Models::File.new
176
+ file.length = cf_file.uncompressed_size
177
+ file.offset = cf_file.folder_offset
178
+ file.folder_index = cf_file.folder_index
179
+ file.attribs = cf_file.attribs
180
+
181
+ # Parse date and time
182
+ file.parse_datetime(cf_file.date, cf_file.time)
183
+
184
+ # Read filename
185
+ begin
186
+ file.filename = read_string(handle, false)
187
+ rescue ParseError
188
+ # In salvage mode, skip bad files
189
+ next if salvage
190
+
191
+ raise
192
+ end
193
+
194
+ # Link file to folder
195
+ begin
196
+ link_file_to_folder(file, cabinet, cf_file.folder_index,
197
+ header.num_folders)
198
+ rescue ParseError
199
+ # In salvage mode, skip files with bad folder indices
200
+ next if salvage
201
+
202
+ raise
203
+ end
204
+
205
+ # Skip if folder linkage failed in salvage mode
206
+ next if file.folder.nil? && salvage
207
+
208
+ # Add to cabinet
209
+ cabinet.files << file
210
+ end
211
+
212
+ # Ensure we got at least some files
213
+ return unless cabinet.files.empty?
214
+
215
+ raise ParseError, "No valid files found in cabinet"
216
+ end
217
+
218
+ def link_file_to_folder(file, cabinet, folder_index, num_folders)
219
+ if folder_index < Constants::FOLDER_CONTINUED_FROM_PREV
220
+ # Normal folder index
221
+ unless folder_index < num_folders
222
+ raise ParseError,
223
+ "Invalid folder index: #{folder_index}"
224
+ end
225
+
226
+ file.folder = cabinet.folders[folder_index]
227
+
228
+ elsif [Constants::FOLDER_CONTINUED_TO_NEXT, Constants::FOLDER_CONTINUED_PREV_AND_NEXT].include?(folder_index)
229
+ # File continues to next cabinet - use last folder
230
+ file.folder = cabinet.folders.last
231
+ elsif folder_index == Constants::FOLDER_CONTINUED_FROM_PREV
232
+ # File continues from previous cabinet - use first folder
233
+ file.folder = cabinet.folders.first
234
+ end
235
+ end
236
+
237
+ def read_string(handle, permit_empty)
238
+ # Save current position before reading
239
+ base_pos = @io_system.tell(handle)
240
+
241
+ # Read up to 256 bytes to find null terminator
242
+ buffer = @io_system.read(handle, 256)
243
+ raise ParseError, "Cannot read string" if buffer.nil? || buffer.empty?
244
+
245
+ # Find null terminator
246
+ null_pos = buffer.index("\x00")
247
+ raise ParseError, "String not null-terminated" if null_pos.nil?
248
+
249
+ if null_pos.zero? && !permit_empty
250
+ raise ParseError,
251
+ "Empty string not permitted"
252
+ end
253
+
254
+ # Extract string (without null terminator)
255
+ string = buffer[0...null_pos]
256
+
257
+ # Seek to position after null terminator (base_pos + null_pos + 1)
258
+ @io_system.seek(handle, base_pos + null_pos + 1, Constants::SEEK_START)
259
+
260
+ string
261
+ end
262
+ end
263
+ end
264
+ end