cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -1,249 +1,615 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "parser"
4
+ require_relative "../decompressors/lzx"
5
+ require_relative "../binary/lit_structures"
6
+ require_relative "../errors"
7
+
3
8
  module Cabriolet
4
9
  module LIT
5
- # Decompressor is the main interface for LIT file operations
10
+ # Decompressor for Microsoft Reader LIT files
11
+ #
12
+ # Handles complete LIT file extraction including:
13
+ # - Parsing complex LIT structure with Parser
14
+ # - DataSpace/Storage sections with transform layers
15
+ # - LZX decompression with ResetTable
16
+ # - Manifest-based filename restoration
17
+ # - Section caching for efficiency
6
18
  #
7
- # LIT files are Microsoft Reader eBook files that use LZX compression.
19
+ # Based on the openclit/SharpLit reference implementation.
8
20
  #
9
- # NOTE: This implementation handles non-encrypted LIT files only.
10
- # DES-encrypted (DRM-protected) LIT files are not supported.
11
- # For encrypted files, use Microsoft Reader or convert to another format
12
- # first.
21
+ # NOTE: DES encryption (DRM) is not supported.
13
22
  class Decompressor
14
- attr_reader :io_system
23
+ attr_reader :io_system, :parser
15
24
  attr_accessor :buffer_size
16
25
 
17
- # Input buffer size for decompression
18
- DEFAULT_BUFFER_SIZE = 32_768
26
+ # Default buffer size for decompression
27
+ DEFAULT_BUFFER_SIZE = 8192
19
28
 
20
- # Initialize a new LIT decompressor
21
- #
22
- # @param io_system [System::IOSystem, nil] Custom I/O system or nil for
23
- # default
24
- def initialize(io_system = nil)
29
+ def initialize(io_system = nil, algorithm_factory = nil)
25
30
  @io_system = io_system || System::IOSystem.new
31
+ @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
32
+ @parser = Parser.new(@io_system)
33
+ @section_cache = {}
26
34
  @buffer_size = DEFAULT_BUFFER_SIZE
27
35
  end
28
36
 
29
37
  # Open and parse a LIT file
30
38
  #
31
- # @param filename [String] Path to the LIT file
32
- # @return [Models::LITHeader] Parsed header with file list
33
- # @raise [Errors::ParseError] if the file is not a valid LIT
34
- # @raise [NotImplementedError] if the file is DES-encrypted
39
+ # @param filename [String] Path to LIT file
40
+ # @return [Models::LITFile] Parsed LIT file structure
41
+ # @raise [Cabriolet::ParseError] if file is invalid
42
+ # @raise [NotImplementedError] if file is DRM-encrypted
35
43
  def open(filename)
36
- header = parse_header(filename)
37
- header.filename = filename
44
+ lit_file = @parser.parse(filename)
38
45
 
39
- # Check for encryption
40
- if header.encrypted?
46
+ # Store filename for later extraction
47
+ lit_file.instance_variable_set(:@filename, filename)
48
+
49
+ # Check for DRM
50
+ if lit_file.encrypted?
41
51
  raise NotImplementedError,
42
- "DES-encrypted LIT files not yet supported. " \
43
- "Use Microsoft Reader or another tool to decrypt first."
52
+ "DES-encrypted LIT files not supported. " \
53
+ "DRM level: #{lit_file.drm_level}"
44
54
  end
45
55
 
46
- header
56
+ lit_file
47
57
  end
48
58
 
49
59
  # Close a LIT file (no-op for compatibility)
50
60
  #
51
- # @param _header [Models::LITHeader] Header to close
61
+ # @param _lit_file [Models::LITFile] LIT file to close
52
62
  # @return [void]
53
- def close(_header)
54
- # No resources to free in the header itself
63
+ def close(_lit_file)
64
+ # No resources to free in the file object itself
55
65
  # File handles are managed separately during extraction
66
+ @section_cache.clear
56
67
  nil
57
68
  end
58
69
 
59
- # Extract a file from LIT archive
70
+ # Extract a file from LIT archive (wrapper for extract_file)
60
71
  #
61
- # @param header [Models::LITHeader] LIT header from open()
62
- # @param file [Models::LITFile] File entry to extract
63
- # @param output_path [String] Where to write the decompressed file
64
- # @return [Integer] Number of bytes written
65
- # @raise [Errors::DecompressionError] if decompression fails
66
- # @raise [NotImplementedError] if the file is encrypted
67
- def extract(header, file, output_path)
68
- raise ArgumentError, "Header must not be nil" unless header
72
+ # @param lit_file [Models::LITFile] Parsed LIT file
73
+ # @param file [Models::LITDirectoryEntry] File entry to extract
74
+ # @param output_path [String] Where to write extracted file
75
+ # @return [Integer] Bytes written
76
+ # @raise [ArgumentError] if parameters are invalid
77
+ # @raise [NotImplementedError] if file is encrypted
78
+ # @raise [Cabriolet::DecompressionError] if extraction fails
79
+ def extract(lit_file, file, output_path)
80
+ raise ArgumentError, "Header must not be nil" unless lit_file
69
81
  raise ArgumentError, "File must not be nil" unless file
70
82
  raise ArgumentError, "Output path must not be nil" unless output_path
71
83
 
72
- if file.encrypted?
84
+ # Check for encryption
85
+ if lit_file.encrypted?
73
86
  raise NotImplementedError,
74
- "DES-encrypted files not yet supported. " \
75
- "Use Microsoft Reader or another tool to decrypt first."
87
+ "Encrypted sections not yet supported. " \
88
+ "DRM level: #{lit_file.drm_level}"
76
89
  end
77
90
 
78
- input_handle = @io_system.open(header.filename, Constants::MODE_READ)
79
- output_handle = @io_system.open(output_path, Constants::MODE_WRITE)
91
+ # Use extract_file with file name
92
+ internal_name = file.respond_to?(:name) ? file.name : file.to_s
93
+ extract_file(lit_file, internal_name, output_path)
94
+ end
95
+
96
+ # Extract a file by name from LIT archive
97
+ #
98
+ # @param lit_file [Models::LITFile] Parsed LIT file
99
+ # @param internal_name [String] Internal filename
100
+ # @param output_path [String] Where to write extracted file
101
+ # @return [Integer] Bytes written
102
+ # @raise [Cabriolet::DecompressionError] if extraction fails
103
+ def extract_file(lit_file, internal_name, output_path)
104
+ raise ArgumentError, "LIT file required" unless lit_file
105
+ raise ArgumentError, "Internal name required" unless internal_name
106
+ raise ArgumentError, "Output path required" unless output_path
107
+
108
+ # Find directory entry
109
+ entry = lit_file.directory.find(internal_name)
110
+ unless entry
111
+ raise Cabriolet::DecompressionError,
112
+ "File not found: #{internal_name}"
113
+ end
114
+
115
+ # Get section data (cached or decompressed)
116
+ section_data = get_section_data(lit_file, entry.section)
117
+
118
+ # Extract file from section
119
+ file_data = section_data[entry.offset, entry.size]
120
+
121
+ # Check if extraction was successful
122
+ unless file_data
123
+ raise Cabriolet::DecompressionError,
124
+ "Failed to extract file #{entry.name}: " \
125
+ "offset=#{entry.offset}, size=#{entry.size}, section_data_size=#{section_data&.bytesize || 0}"
126
+ end
80
127
 
128
+ # Write to output
129
+ output_handle = @io_system.open(output_path, Constants::MODE_WRITE)
81
130
  begin
82
- # Seek to file data
83
- @io_system.seek(input_handle, file.offset, Constants::SEEK_START)
84
-
85
- bytes_written = if file.compressed?
86
- # Decompress using LZX
87
- decompress_lzx(
88
- input_handle, output_handle, file.length
89
- )
90
- else
91
- # Direct copy
92
- copy_data(
93
- input_handle, output_handle, file.length
94
- )
95
- end
96
-
97
- bytes_written
131
+ @io_system.write(output_handle, file_data)
98
132
  ensure
99
- @io_system.close(input_handle) if input_handle
100
- @io_system.close(output_handle) if output_handle
133
+ @io_system.close(output_handle)
101
134
  end
135
+
136
+ file_data.bytesize
102
137
  end
103
138
 
104
139
  # Extract all files from LIT archive
105
140
  #
106
- # @param header [Models::LITHeader] LIT header from open()
107
- # @param output_dir [String] Directory to extract files to
141
+ # @param lit_file [Models::LITFile] Parsed LIT file
142
+ # @param output_dir [String] Directory to extract to
143
+ # @param use_manifest [Boolean] Use manifest for filenames
108
144
  # @return [Integer] Number of files extracted
109
- # @raise [Errors::DecompressionError] if extraction fails
110
- def extract_all(header, output_dir)
111
- raise ArgumentError, "Header must not be nil" unless header
112
- raise ArgumentError, "Output dir must not be nil" unless output_dir
145
+ def extract_all(lit_file, output_dir, use_manifest: true)
146
+ raise ArgumentError, "Header must not be nil" unless lit_file
147
+
148
+ unless output_dir
149
+ raise ArgumentError,
150
+ "Output directory must not be nil"
151
+ end
113
152
 
114
- # Create output directory if it doesn't exist
115
153
  ::FileUtils.mkdir_p(output_dir)
116
154
 
117
155
  extracted = 0
118
- header.files.each do |file|
119
- output_path = ::File.join(output_dir, file.filename)
156
+
157
+ # Extract each directory entry
158
+ lit_file.directory.entries.each do |entry|
159
+ # Skip root entry and directories (ending with /)
160
+ next if entry.root? || entry.name.end_with?("/")
161
+
162
+ # Determine output filename
163
+ if use_manifest && lit_file.manifest
164
+ mapping = lit_file.manifest.find_by_internal(entry.name)
165
+ filename = mapping ? mapping.original_name : entry.name
166
+ else
167
+ filename = entry.name
168
+ end
169
+
170
+ # Sanitize filename and convert path separators
171
+ # Replace :: prefix and convert / to proper path separator
172
+ filename = sanitize_path(filename)
173
+
174
+ # Create output path (join with output_dir)
175
+ output_path = ::File.join(output_dir, filename)
120
176
 
121
177
  # Create subdirectories if needed
122
178
  file_dir = ::File.dirname(output_path)
123
179
  ::FileUtils.mkdir_p(file_dir) unless ::File.directory?(file_dir)
124
180
 
125
- extract(header, file, output_path)
181
+ # Extract file
182
+ extract_file(lit_file, entry.name, output_path)
126
183
  extracted += 1
127
184
  end
128
185
 
129
186
  extracted
130
187
  end
131
188
 
189
+ # List all files in LIT archive
190
+ #
191
+ # @param lit_file [Models::LITFile] Parsed LIT file
192
+ # @param use_manifest [Boolean] Show original filenames
193
+ # @return [Array<Hash>] File information
194
+ def list_files(lit_file, use_manifest: true)
195
+ raise ArgumentError, "LIT file required" unless lit_file
196
+
197
+ lit_file.directory.entries.reject(&:root?).map do |entry|
198
+ info = {
199
+ internal_name: entry.name,
200
+ section: entry.section,
201
+ offset: entry.offset,
202
+ size: entry.size,
203
+ }
204
+
205
+ if use_manifest && lit_file.manifest
206
+ mapping = lit_file.manifest.find_by_internal(entry.name)
207
+ if mapping
208
+ info[:original_name] = mapping.original_name
209
+ info[:content_type] = mapping.content_type
210
+ end
211
+ end
212
+
213
+ info
214
+ end
215
+ end
216
+
132
217
  private
133
218
 
134
- # Parse LIT file header
219
+ # Sanitize filename for cross-platform compatibility
135
220
  #
136
- # @param filename [String] Path to LIT file
137
- # @return [Models::LITHeader] Parsed header
138
- # @raise [Errors::ParseError] if file is not valid LIT
139
- def parse_header(filename)
221
+ # Windows does not allow: \ / : * ? " < > |
222
+ # LIT internal files often use :: prefix (e.g., ::DataSpace)
223
+ #
224
+ # @param filename [String] Original filename
225
+ # @return [String] Sanitized filename safe for all platforms
226
+ def sanitize_filename(filename)
227
+ # Replace colons with underscores (except drive letter on Windows)
228
+ # Also handle other Windows-invalid characters
229
+ sanitized = filename.gsub(/[:<>"|?*]/, "_")
230
+
231
+ # Remove leading underscores that resulted from :: prefix
232
+ sanitized = sanitized.sub(/^_+/, "") if sanitized.start_with?("_")
233
+
234
+ # Ensure we don't return empty string
235
+ sanitized = "_unnamed_" if sanitized.empty?
236
+
237
+ sanitized
238
+ end
239
+
240
+ # Sanitize path for cross-platform compatibility
241
+ #
242
+ # Handles LIT paths like:
243
+ # - /data/bill2/content -> data/bill2/content
244
+ # - ::DataSpace/NameList -> DataSpace/NameList
245
+ # - ::DataSpace/Storage/EbEncryptDS/Content -> DataSpace/Storage/EbEncryptDS/Content
246
+ #
247
+ # @param path [String] Original path
248
+ # @return [String] Sanitized path safe for all platforms
249
+ def sanitize_path(path)
250
+ # Remove leading slash
251
+ sanitized = path.sub(/^\/+/, "")
252
+
253
+ # Handle :: prefix (common in LIT files)
254
+ if sanitized.start_with?("::")
255
+ sanitized = sanitized[2..]
256
+ end
257
+
258
+ # Remove null bytes and other non-printable characters
259
+ sanitized = sanitized.gsub(/[\x00-\x1F\x7F]/, "_")
260
+
261
+ # Replace colons and other Windows-invalid characters with underscores
262
+ sanitized = sanitized.gsub(/[:<>"|?*]/, "_")
263
+
264
+ # Ensure we don't return empty string
265
+ if sanitized.empty?
266
+ sanitized = "_unnamed_"
267
+ end
268
+
269
+ sanitized
270
+ end
271
+
272
+ # Get section data (cached or freshly decompressed)
273
+ #
274
+ # @param lit_file [Models::LITFile] Parsed LIT file
275
+ # @param section_id [Integer] Section ID
276
+ # @return [String] Decompressed section data
277
+ def get_section_data(lit_file, section_id)
278
+ # Check cache first
279
+ return @section_cache[section_id] if @section_cache[section_id]
280
+
281
+ # Section 0 is uncompressed content
282
+ if section_id.zero?
283
+ data = read_uncompressed_content(lit_file)
284
+ else
285
+ # Get section info (sections array is indexed by section_id)
286
+ section = lit_file.sections[section_id]
287
+ unless section
288
+ raise Cabriolet::DecompressionError,
289
+ "Section #{section_id} not found"
290
+ end
291
+
292
+ # Decompress section
293
+ data = decompress_section(lit_file, section)
294
+ end
295
+
296
+ # Cache for future use
297
+ @section_cache[section_id] = data
298
+
299
+ data
300
+ end
301
+
302
+ # Read uncompressed content from section 0
303
+ def read_uncompressed_content(lit_file)
304
+ filename = lit_file.instance_variable_get(:@filename)
140
305
  handle = @io_system.open(filename, Constants::MODE_READ)
141
306
 
142
307
  begin
143
- # Read and verify signature
144
- signature = @io_system.read(handle, 8)
145
- unless signature.start_with?(Binary::LITStructures::SIGNATURE[0..3])
146
- raise Errors::ParseError,
147
- "Not a valid LIT file: invalid signature"
308
+ # Section 0 starts at content_offset
309
+ @io_system.seek(handle, lit_file.content_offset, Constants::SEEK_START)
310
+
311
+ # Read all remaining data from content_offset to EOF
312
+ file_size = ::File.size(filename)
313
+ @io_system.read(handle, file_size - lit_file.content_offset)
314
+ ensure
315
+ @io_system.close(handle)
316
+ end
317
+ end
318
+
319
+ # Decompress a section with transforms
320
+ def decompress_section(lit_file, section)
321
+ lit_file.instance_variable_get(:@filename)
322
+
323
+ # Read transform list
324
+ transform_path = Binary::LITStructures::Paths::STORAGE +
325
+ section.name +
326
+ Binary::LITStructures::Paths::TRANSFORM_LIST
327
+
328
+ transform_entry = lit_file.directory.find(transform_path)
329
+ unless transform_entry
330
+ raise Cabriolet::DecompressionError,
331
+ "Transform list not found for section: #{section.name}"
332
+ end
333
+
334
+ transforms = read_transforms(lit_file, transform_entry)
335
+
336
+ # Read content
337
+ content_path = Binary::LITStructures::Paths::STORAGE +
338
+ section.name +
339
+ Binary::LITStructures::Paths::CONTENT
340
+
341
+ content_entry = lit_file.directory.find(content_path)
342
+ unless content_entry
343
+ raise Cabriolet::DecompressionError,
344
+ "Content not found for section: #{section.name}"
345
+ end
346
+
347
+ data = read_entry_data(lit_file, content_entry)
348
+
349
+ # If content entry is empty, try reading section data directly from file
350
+ # This handles LIT files where MSCompressed metadata is invalid/empty
351
+ if data.empty? && section.name == "MSCompressed"
352
+ data = read_section_data_from_file(lit_file, section)
353
+ end
354
+
355
+ # Read control data
356
+ control_path = Binary::LITStructures::Paths::STORAGE +
357
+ section.name +
358
+ Binary::LITStructures::Paths::CONTROL_DATA
359
+
360
+ control_entry = lit_file.directory.find(control_path)
361
+ control_data = if control_entry
362
+ read_entry_data(lit_file,
363
+ control_entry)
364
+ end
365
+
366
+ # Apply transforms in order
367
+ transforms.each do |transform_guid|
368
+ case transform_guid
369
+ when Binary::LITStructures::GUIDs::DESENCRYPT
370
+ raise NotImplementedError,
371
+ "DES encryption not supported"
372
+ when Binary::LITStructures::GUIDs::LZXCOMPRESS
373
+ data = decompress_lzx_section(lit_file, section, data, control_data)
374
+ when Binary::LITStructures::GUIDs::IDENTITY
375
+ # No-op/identity transform - pass data through unchanged
376
+ next
377
+ else
378
+ # Unknown transform - check if it's the AOLL tag (invalid metadata)
379
+ # If data was read directly, return it as-is
380
+ if transform_guid.include?("4F4C") || transform_guid.include?("AOLL")
381
+ # This is the AOLL directory chunk, indicating invalid transform metadata
382
+ # Return the data as-is (may be uncompressed or custom format)
383
+ next
384
+ end
385
+
386
+ raise Cabriolet::DecompressionError,
387
+ "Unknown transform GUID: #{transform_guid}"
148
388
  end
389
+ end
390
+
391
+ data
392
+ end
393
+
394
+ # Read transforms from transform list
395
+ def read_transforms(lit_file, entry)
396
+ data = read_entry_data(lit_file, entry)
149
397
 
150
- # Seek back to start
151
- @io_system.seek(handle, 0, Constants::SEEK_START)
398
+ transforms = []
399
+ pos = 0
152
400
 
153
- # Read header structure
154
- header_data = @io_system.read(handle, 24)
155
- lit_header = Binary::LITStructures::LITHeader.read(header_data)
401
+ while pos + 16 <= data.bytesize
402
+ guid_bytes = data[pos, 16]
403
+ guid = format_guid(guid_bytes)
404
+ transforms << guid
405
+ pos += 16
406
+ end
156
407
 
157
- # Create header model
158
- header = Models::LITHeader.new
159
- header.version = lit_header.version
160
- header.encrypted = lit_header.flags.anybits?(0x01)
408
+ transforms
409
+ end
161
410
 
162
- # Parse file entries
163
- header.files = parse_file_entries(
164
- handle, lit_header.file_count
411
+ # Format GUID bytes as string
412
+ def format_guid(bytes)
413
+ parts = bytes.unpack("VvvnH12")
414
+ format(
415
+ "{%<part0>08X-%<part1>04X-%<part2>04X-%<part3>04X-%<part4>s}",
416
+ part0: parts[0], part1: parts[1], part2: parts[2],
417
+ part3: parts[3], part4: parts[4].upcase
418
+ )
419
+ end
420
+
421
+ # Read entry data from file
422
+ def read_entry_data(lit_file, entry)
423
+ filename = lit_file.instance_variable_get(:@filename)
424
+ handle = @io_system.open(filename, Constants::MODE_READ)
425
+
426
+ begin
427
+ @io_system.seek(
428
+ handle,
429
+ lit_file.content_offset + entry.offset,
430
+ Constants::SEEK_START,
165
431
  )
432
+ @io_system.read(handle, entry.size)
433
+ ensure
434
+ @io_system.close(handle)
435
+ end
436
+ end
437
+
438
+ # Read section data directly from file (for when Content entry is empty)
439
+ # This calculates where the section data actually starts and reads it
440
+ def read_section_data_from_file(lit_file, section)
441
+ filename = lit_file.instance_variable_get(:@filename)
442
+
443
+ # Find the section ID for this section
444
+ section_id = lit_file.sections.index(section)
445
+ return "" unless section_id
446
+
447
+ # Calculate where section 0 data ends
448
+ section_0_entries = lit_file.directory.entries.select do |e|
449
+ e.section.zero?
450
+ end
451
+ section_0_data = section_0_entries.reject do |e|
452
+ e.name.start_with?("::DataSpace") ||
453
+ e.name.end_with?("/") ||
454
+ e.name.start_with?("/DRM")
455
+ end
456
+ max_end = section_0_data.map { |e| e.offset + e.size }.max
457
+
458
+ # Section data starts after section 0 data
459
+ section_start = lit_file.content_offset + max_end
460
+
461
+ # Calculate section end by finding files in this section
462
+ section_entries = lit_file.directory.entries.select do |e|
463
+ e.section == section_id
464
+ end
465
+ max_section_end = section_entries.map { |e| e.offset + e.size }.max
166
466
 
167
- header
467
+ # Read the section data
468
+ handle = @io_system.open(filename, Constants::MODE_READ)
469
+ begin
470
+ @io_system.seek(handle, section_start, Constants::SEEK_START)
471
+ @io_system.read(handle, max_section_end)
168
472
  ensure
169
- @io_system.close(handle) if handle
473
+ @io_system.close(handle)
170
474
  end
171
475
  end
172
476
 
173
- # Parse file entries from LIT archive
174
- #
175
- # @param handle [System::FileHandle] File handle positioned at file
176
- # entries
177
- # @param file_count [Integer] Number of files to parse
178
- # @return [Array<Models::LITFile>] List of file entries
179
- def parse_file_entries(handle, file_count)
180
- files = []
477
+ # Decompress LZX section with ResetTable
478
+ def decompress_lzx_section(lit_file, section, compressed_data,
479
+ control_data)
480
+ # Parse control data
481
+ unless control_data && control_data.bytesize >= 32
482
+ raise Cabriolet::DecompressionError,
483
+ "Invalid LZX control data"
484
+ end
181
485
 
182
- file_count.times do
183
- # Read filename length
184
- len_data = @io_system.read(handle, 4)
185
- filename_length = len_data.unpack1("V")
486
+ control = Binary::LITStructures::LZXControlData.read(control_data)
186
487
 
187
- # Read filename
188
- filename = @io_system.read(handle, filename_length)
488
+ unless control.tag == Binary::LITStructures::Tags::LZXC
489
+ raise Cabriolet::DecompressionError,
490
+ "Invalid LZXC tag: #{format('0x%08X', control.tag)}"
491
+ end
492
+
493
+ # Calculate window size
494
+ window_size = 15
495
+ size_code = control.window_size_code
496
+ while size_code.positive?
497
+ size_code >>= 1
498
+ window_size += 1
499
+ end
189
500
 
190
- # Read file metadata
191
- metadata = @io_system.read(handle, 28)
192
- offset, _, uncompressed_size, flags =
193
- metadata.unpack("QQQV")
501
+ if window_size < 15 || window_size > 21
502
+ raise Cabriolet::DecompressionError,
503
+ "Invalid LZX window size: #{window_size}"
504
+ end
194
505
 
195
- # Create file entry
196
- file = Models::LITFile.new
197
- file.filename = filename
198
- file.offset = offset
199
- file.length = uncompressed_size
200
- file.compressed = flags.anybits?(Binary::LITStructures::FileFlags::COMPRESSED)
201
- file.encrypted = flags.anybits?(Binary::LITStructures::FileFlags::ENCRYPTED)
506
+ # Read reset table
507
+ reset_table_path = Binary::LITStructures::Paths::STORAGE +
508
+ section.name +
509
+ "/Transform/#{Binary::LITStructures::GUIDs::LZXCOMPRESS}/InstanceData/ResetTable"
202
510
 
203
- files << file
511
+ reset_entry = lit_file.directory.find(reset_table_path)
512
+ unless reset_entry
513
+ raise Cabriolet::DecompressionError,
514
+ "ResetTable not found for section: #{section.name}"
204
515
  end
205
516
 
206
- files
207
- end
517
+ reset_data = read_entry_data(lit_file, reset_entry)
518
+ reset_table = parse_reset_table(reset_data)
208
519
 
209
- # Decompress data using LZX
210
- #
211
- # @param input_handle [System::FileHandle] Input handle
212
- # @param output_handle [System::FileHandle] Output handle
213
- # @param expected_size [Integer] Expected output size
214
- # @return [Integer] Number of bytes written
215
- def decompress_lzx(input_handle, output_handle, expected_size)
216
- decompressor = Decompressors::LZX.new(
217
- @io_system,
218
- input_handle,
219
- output_handle,
220
- @buffer_size,
520
+ # Decompress with reset points
521
+ decompress_with_reset_table(
522
+ compressed_data,
523
+ reset_table,
524
+ window_size,
221
525
  )
526
+ end
527
+
528
+ # Parse reset table
529
+ def parse_reset_table(data)
530
+ header = Binary::LITStructures::ResetTableHeader.read(data[0, 40])
531
+
532
+ unless header.version == 3
533
+ raise Cabriolet::DecompressionError,
534
+ "Unsupported ResetTable version: #{header.version}"
535
+ end
536
+
537
+ # Read reset entries (skip first which is always 0)
538
+ entry_offset = header.header_length + 8
539
+ num_entries = header.num_entries
540
+
541
+ reset_points = []
542
+ (num_entries - 1).times do |_i|
543
+ break if entry_offset + 8 > data.bytesize
222
544
 
223
- decompressor.decompress(expected_size)
545
+ offset_low = data[entry_offset, 4].unpack1("V")
546
+ offset_high = data[entry_offset + 4, 4].unpack1("V")
547
+
548
+ if offset_high != 0
549
+ raise Cabriolet::DecompressionError,
550
+ "64-bit reset point not supported"
551
+ end
552
+
553
+ reset_points << offset_low
554
+ entry_offset += 8
555
+ end
556
+
557
+ {
558
+ uncompressed_length: header.uncompressed_length,
559
+ compressed_length: header.compressed_length,
560
+ reset_interval: header.reset_interval,
561
+ reset_points: reset_points,
562
+ }
224
563
  end
225
564
 
226
- # Copy data directly without decompression
227
- #
228
- # @param input_handle [System::FileHandle] Input handle
229
- # @param output_handle [System::FileHandle] Output handle
230
- # @param size [Integer] Number of bytes to copy
231
- # @return [Integer] Number of bytes written
232
- def copy_data(input_handle, output_handle, size)
233
- bytes_written = 0
234
- remaining = size
565
+ # Decompress with reset table
566
+ def decompress_with_reset_table(compressed_data, reset_table, window_size)
567
+ uncompressed = String.new(capacity: reset_table[:uncompressed_length])
568
+
569
+ # Create LZX decompressor
570
+ input_handle = System::MemoryHandle.new(compressed_data)
571
+ output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
572
+
573
+ decompressor = Decompressors::LZX.new(window_size)
235
574
 
236
- while remaining.positive?
237
- chunk_size = [remaining, @buffer_size].min
238
- data = @io_system.read(input_handle, chunk_size)
239
- break if data.empty?
575
+ window_bytes = 1 << window_size
576
+ reset_table[:reset_interval]
577
+ reset_points = [0] + reset_table[:reset_points]
578
+
579
+ bytes_remaining = reset_table[:uncompressed_length]
580
+ compressed_pos = 0
581
+ 0
582
+
583
+ # Process each reset block
584
+ reset_points.each_with_index do |reset_point, idx|
585
+ next_reset = reset_points[idx + 1] || compressed_data.bytesize
586
+
587
+ compressed_size = next_reset - reset_point
588
+ output_size = [bytes_remaining, window_bytes].min
589
+
590
+ if output_size.positive?
591
+ # Decompress this block
592
+ input_chunk = compressed_data[compressed_pos, compressed_size]
593
+ input_handle = System::MemoryHandle.new(input_chunk)
594
+ output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
595
+
596
+ decompressor.reset if idx.positive?
597
+ decompressor.decompress_chunk(
598
+ input_handle,
599
+ output_handle,
600
+ compressed_size,
601
+ output_size,
602
+ )
603
+
604
+ uncompressed << output_handle.data
605
+ compressed_pos += compressed_size
606
+ bytes_remaining -= output_size
607
+ end
240
608
 
241
- written = @io_system.write(output_handle, data)
242
- bytes_written += written
243
- remaining -= written
609
+ break if bytes_remaining <= 0
244
610
  end
245
611
 
246
- bytes_written
612
+ uncompressed
247
613
  end
248
614
  end
249
615
  end