cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -1,249 +1,615 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "parser"
|
|
4
|
+
require_relative "../decompressors/lzx"
|
|
5
|
+
require_relative "../binary/lit_structures"
|
|
6
|
+
require_relative "../errors"
|
|
7
|
+
|
|
3
8
|
module Cabriolet
|
|
4
9
|
module LIT
|
|
5
|
-
# Decompressor
|
|
10
|
+
# Decompressor for Microsoft Reader LIT files
|
|
11
|
+
#
|
|
12
|
+
# Handles complete LIT file extraction including:
|
|
13
|
+
# - Parsing complex LIT structure with Parser
|
|
14
|
+
# - DataSpace/Storage sections with transform layers
|
|
15
|
+
# - LZX decompression with ResetTable
|
|
16
|
+
# - Manifest-based filename restoration
|
|
17
|
+
# - Section caching for efficiency
|
|
6
18
|
#
|
|
7
|
-
#
|
|
19
|
+
# Based on the openclit/SharpLit reference implementation.
|
|
8
20
|
#
|
|
9
|
-
# NOTE:
|
|
10
|
-
# DES-encrypted (DRM-protected) LIT files are not supported.
|
|
11
|
-
# For encrypted files, use Microsoft Reader or convert to another format
|
|
12
|
-
# first.
|
|
21
|
+
# NOTE: DES encryption (DRM) is not supported.
|
|
13
22
|
class Decompressor
|
|
14
|
-
attr_reader :io_system
|
|
23
|
+
attr_reader :io_system, :parser
|
|
15
24
|
attr_accessor :buffer_size
|
|
16
25
|
|
|
17
|
-
#
|
|
18
|
-
DEFAULT_BUFFER_SIZE =
|
|
26
|
+
# Default buffer size for decompression
|
|
27
|
+
DEFAULT_BUFFER_SIZE = 8192
|
|
19
28
|
|
|
20
|
-
|
|
21
|
-
#
|
|
22
|
-
# @param io_system [System::IOSystem, nil] Custom I/O system or nil for
|
|
23
|
-
# default
|
|
24
|
-
def initialize(io_system = nil)
|
|
29
|
+
def initialize(io_system = nil, algorithm_factory = nil)
|
|
25
30
|
@io_system = io_system || System::IOSystem.new
|
|
31
|
+
@algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
|
|
32
|
+
@parser = Parser.new(@io_system)
|
|
33
|
+
@section_cache = {}
|
|
26
34
|
@buffer_size = DEFAULT_BUFFER_SIZE
|
|
27
35
|
end
|
|
28
36
|
|
|
29
37
|
# Open and parse a LIT file
|
|
30
38
|
#
|
|
31
|
-
# @param filename [String] Path to
|
|
32
|
-
# @return [Models::
|
|
33
|
-
# @raise [
|
|
34
|
-
# @raise [NotImplementedError] if
|
|
39
|
+
# @param filename [String] Path to LIT file
|
|
40
|
+
# @return [Models::LITFile] Parsed LIT file structure
|
|
41
|
+
# @raise [Cabriolet::ParseError] if file is invalid
|
|
42
|
+
# @raise [NotImplementedError] if file is DRM-encrypted
|
|
35
43
|
def open(filename)
|
|
36
|
-
|
|
37
|
-
header.filename = filename
|
|
44
|
+
lit_file = @parser.parse(filename)
|
|
38
45
|
|
|
39
|
-
#
|
|
40
|
-
|
|
46
|
+
# Store filename for later extraction
|
|
47
|
+
lit_file.instance_variable_set(:@filename, filename)
|
|
48
|
+
|
|
49
|
+
# Check for DRM
|
|
50
|
+
if lit_file.encrypted?
|
|
41
51
|
raise NotImplementedError,
|
|
42
|
-
"DES-encrypted LIT files not
|
|
43
|
-
"
|
|
52
|
+
"DES-encrypted LIT files not supported. " \
|
|
53
|
+
"DRM level: #{lit_file.drm_level}"
|
|
44
54
|
end
|
|
45
55
|
|
|
46
|
-
|
|
56
|
+
lit_file
|
|
47
57
|
end
|
|
48
58
|
|
|
49
59
|
# Close a LIT file (no-op for compatibility)
|
|
50
60
|
#
|
|
51
|
-
# @param
|
|
61
|
+
# @param _lit_file [Models::LITFile] LIT file to close
|
|
52
62
|
# @return [void]
|
|
53
|
-
def close(
|
|
54
|
-
# No resources to free in the
|
|
63
|
+
def close(_lit_file)
|
|
64
|
+
# No resources to free in the file object itself
|
|
55
65
|
# File handles are managed separately during extraction
|
|
66
|
+
@section_cache.clear
|
|
56
67
|
nil
|
|
57
68
|
end
|
|
58
69
|
|
|
59
|
-
# Extract a file from LIT archive
|
|
70
|
+
# Extract a file from LIT archive (wrapper for extract_file)
|
|
60
71
|
#
|
|
61
|
-
# @param
|
|
62
|
-
# @param file [Models::
|
|
63
|
-
# @param output_path [String] Where to write
|
|
64
|
-
# @return [Integer]
|
|
65
|
-
# @raise [
|
|
66
|
-
# @raise [NotImplementedError] if
|
|
67
|
-
|
|
68
|
-
|
|
72
|
+
# @param lit_file [Models::LITFile] Parsed LIT file
|
|
73
|
+
# @param file [Models::LITDirectoryEntry] File entry to extract
|
|
74
|
+
# @param output_path [String] Where to write extracted file
|
|
75
|
+
# @return [Integer] Bytes written
|
|
76
|
+
# @raise [ArgumentError] if parameters are invalid
|
|
77
|
+
# @raise [NotImplementedError] if file is encrypted
|
|
78
|
+
# @raise [Cabriolet::DecompressionError] if extraction fails
|
|
79
|
+
def extract(lit_file, file, output_path)
|
|
80
|
+
raise ArgumentError, "Header must not be nil" unless lit_file
|
|
69
81
|
raise ArgumentError, "File must not be nil" unless file
|
|
70
82
|
raise ArgumentError, "Output path must not be nil" unless output_path
|
|
71
83
|
|
|
72
|
-
|
|
84
|
+
# Check for encryption
|
|
85
|
+
if lit_file.encrypted?
|
|
73
86
|
raise NotImplementedError,
|
|
74
|
-
"
|
|
75
|
-
"
|
|
87
|
+
"Encrypted sections not yet supported. " \
|
|
88
|
+
"DRM level: #{lit_file.drm_level}"
|
|
76
89
|
end
|
|
77
90
|
|
|
78
|
-
|
|
79
|
-
|
|
91
|
+
# Use extract_file with file name
|
|
92
|
+
internal_name = file.respond_to?(:name) ? file.name : file.to_s
|
|
93
|
+
extract_file(lit_file, internal_name, output_path)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Extract a file by name from LIT archive
|
|
97
|
+
#
|
|
98
|
+
# @param lit_file [Models::LITFile] Parsed LIT file
|
|
99
|
+
# @param internal_name [String] Internal filename
|
|
100
|
+
# @param output_path [String] Where to write extracted file
|
|
101
|
+
# @return [Integer] Bytes written
|
|
102
|
+
# @raise [Cabriolet::DecompressionError] if extraction fails
|
|
103
|
+
def extract_file(lit_file, internal_name, output_path)
|
|
104
|
+
raise ArgumentError, "LIT file required" unless lit_file
|
|
105
|
+
raise ArgumentError, "Internal name required" unless internal_name
|
|
106
|
+
raise ArgumentError, "Output path required" unless output_path
|
|
107
|
+
|
|
108
|
+
# Find directory entry
|
|
109
|
+
entry = lit_file.directory.find(internal_name)
|
|
110
|
+
unless entry
|
|
111
|
+
raise Cabriolet::DecompressionError,
|
|
112
|
+
"File not found: #{internal_name}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Get section data (cached or decompressed)
|
|
116
|
+
section_data = get_section_data(lit_file, entry.section)
|
|
117
|
+
|
|
118
|
+
# Extract file from section
|
|
119
|
+
file_data = section_data[entry.offset, entry.size]
|
|
120
|
+
|
|
121
|
+
# Check if extraction was successful
|
|
122
|
+
unless file_data
|
|
123
|
+
raise Cabriolet::DecompressionError,
|
|
124
|
+
"Failed to extract file #{entry.name}: " \
|
|
125
|
+
"offset=#{entry.offset}, size=#{entry.size}, section_data_size=#{section_data&.bytesize || 0}"
|
|
126
|
+
end
|
|
80
127
|
|
|
128
|
+
# Write to output
|
|
129
|
+
output_handle = @io_system.open(output_path, Constants::MODE_WRITE)
|
|
81
130
|
begin
|
|
82
|
-
|
|
83
|
-
@io_system.seek(input_handle, file.offset, Constants::SEEK_START)
|
|
84
|
-
|
|
85
|
-
bytes_written = if file.compressed?
|
|
86
|
-
# Decompress using LZX
|
|
87
|
-
decompress_lzx(
|
|
88
|
-
input_handle, output_handle, file.length
|
|
89
|
-
)
|
|
90
|
-
else
|
|
91
|
-
# Direct copy
|
|
92
|
-
copy_data(
|
|
93
|
-
input_handle, output_handle, file.length
|
|
94
|
-
)
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
bytes_written
|
|
131
|
+
@io_system.write(output_handle, file_data)
|
|
98
132
|
ensure
|
|
99
|
-
@io_system.close(
|
|
100
|
-
@io_system.close(output_handle) if output_handle
|
|
133
|
+
@io_system.close(output_handle)
|
|
101
134
|
end
|
|
135
|
+
|
|
136
|
+
file_data.bytesize
|
|
102
137
|
end
|
|
103
138
|
|
|
104
139
|
# Extract all files from LIT archive
|
|
105
140
|
#
|
|
106
|
-
# @param
|
|
107
|
-
# @param output_dir [String] Directory to extract
|
|
141
|
+
# @param lit_file [Models::LITFile] Parsed LIT file
|
|
142
|
+
# @param output_dir [String] Directory to extract to
|
|
143
|
+
# @param use_manifest [Boolean] Use manifest for filenames
|
|
108
144
|
# @return [Integer] Number of files extracted
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
145
|
+
def extract_all(lit_file, output_dir, use_manifest: true)
|
|
146
|
+
raise ArgumentError, "Header must not be nil" unless lit_file
|
|
147
|
+
|
|
148
|
+
unless output_dir
|
|
149
|
+
raise ArgumentError,
|
|
150
|
+
"Output directory must not be nil"
|
|
151
|
+
end
|
|
113
152
|
|
|
114
|
-
# Create output directory if it doesn't exist
|
|
115
153
|
::FileUtils.mkdir_p(output_dir)
|
|
116
154
|
|
|
117
155
|
extracted = 0
|
|
118
|
-
|
|
119
|
-
|
|
156
|
+
|
|
157
|
+
# Extract each directory entry
|
|
158
|
+
lit_file.directory.entries.each do |entry|
|
|
159
|
+
# Skip root entry and directories (ending with /)
|
|
160
|
+
next if entry.root? || entry.name.end_with?("/")
|
|
161
|
+
|
|
162
|
+
# Determine output filename
|
|
163
|
+
if use_manifest && lit_file.manifest
|
|
164
|
+
mapping = lit_file.manifest.find_by_internal(entry.name)
|
|
165
|
+
filename = mapping ? mapping.original_name : entry.name
|
|
166
|
+
else
|
|
167
|
+
filename = entry.name
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Sanitize filename and convert path separators
|
|
171
|
+
# Replace :: prefix and convert / to proper path separator
|
|
172
|
+
filename = sanitize_path(filename)
|
|
173
|
+
|
|
174
|
+
# Create output path (join with output_dir)
|
|
175
|
+
output_path = ::File.join(output_dir, filename)
|
|
120
176
|
|
|
121
177
|
# Create subdirectories if needed
|
|
122
178
|
file_dir = ::File.dirname(output_path)
|
|
123
179
|
::FileUtils.mkdir_p(file_dir) unless ::File.directory?(file_dir)
|
|
124
180
|
|
|
125
|
-
|
|
181
|
+
# Extract file
|
|
182
|
+
extract_file(lit_file, entry.name, output_path)
|
|
126
183
|
extracted += 1
|
|
127
184
|
end
|
|
128
185
|
|
|
129
186
|
extracted
|
|
130
187
|
end
|
|
131
188
|
|
|
189
|
+
# List all files in LIT archive
|
|
190
|
+
#
|
|
191
|
+
# @param lit_file [Models::LITFile] Parsed LIT file
|
|
192
|
+
# @param use_manifest [Boolean] Show original filenames
|
|
193
|
+
# @return [Array<Hash>] File information
|
|
194
|
+
def list_files(lit_file, use_manifest: true)
|
|
195
|
+
raise ArgumentError, "LIT file required" unless lit_file
|
|
196
|
+
|
|
197
|
+
lit_file.directory.entries.reject(&:root?).map do |entry|
|
|
198
|
+
info = {
|
|
199
|
+
internal_name: entry.name,
|
|
200
|
+
section: entry.section,
|
|
201
|
+
offset: entry.offset,
|
|
202
|
+
size: entry.size,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if use_manifest && lit_file.manifest
|
|
206
|
+
mapping = lit_file.manifest.find_by_internal(entry.name)
|
|
207
|
+
if mapping
|
|
208
|
+
info[:original_name] = mapping.original_name
|
|
209
|
+
info[:content_type] = mapping.content_type
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
info
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
132
217
|
private
|
|
133
218
|
|
|
134
|
-
#
|
|
219
|
+
# Sanitize filename for cross-platform compatibility
|
|
135
220
|
#
|
|
136
|
-
#
|
|
137
|
-
#
|
|
138
|
-
#
|
|
139
|
-
|
|
221
|
+
# Windows does not allow: \ / : * ? " < > |
|
|
222
|
+
# LIT internal files often use :: prefix (e.g., ::DataSpace)
|
|
223
|
+
#
|
|
224
|
+
# @param filename [String] Original filename
|
|
225
|
+
# @return [String] Sanitized filename safe for all platforms
|
|
226
|
+
def sanitize_filename(filename)
|
|
227
|
+
# Replace colons with underscores (except drive letter on Windows)
|
|
228
|
+
# Also handle other Windows-invalid characters
|
|
229
|
+
sanitized = filename.gsub(/[:<>"|?*]/, "_")
|
|
230
|
+
|
|
231
|
+
# Remove leading underscores that resulted from :: prefix
|
|
232
|
+
sanitized = sanitized.sub(/^_+/, "") if sanitized.start_with?("_")
|
|
233
|
+
|
|
234
|
+
# Ensure we don't return empty string
|
|
235
|
+
sanitized = "_unnamed_" if sanitized.empty?
|
|
236
|
+
|
|
237
|
+
sanitized
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Sanitize path for cross-platform compatibility
|
|
241
|
+
#
|
|
242
|
+
# Handles LIT paths like:
|
|
243
|
+
# - /data/bill2/content -> data/bill2/content
|
|
244
|
+
# - ::DataSpace/NameList -> DataSpace/NameList
|
|
245
|
+
# - ::DataSpace/Storage/EbEncryptDS/Content -> DataSpace/Storage/EbEncryptDS/Content
|
|
246
|
+
#
|
|
247
|
+
# @param path [String] Original path
|
|
248
|
+
# @return [String] Sanitized path safe for all platforms
|
|
249
|
+
def sanitize_path(path)
|
|
250
|
+
# Remove leading slash
|
|
251
|
+
sanitized = path.sub(/^\/+/, "")
|
|
252
|
+
|
|
253
|
+
# Handle :: prefix (common in LIT files)
|
|
254
|
+
if sanitized.start_with?("::")
|
|
255
|
+
sanitized = sanitized[2..]
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Remove null bytes and other non-printable characters
|
|
259
|
+
sanitized = sanitized.gsub(/[\x00-\x1F\x7F]/, "_")
|
|
260
|
+
|
|
261
|
+
# Replace colons and other Windows-invalid characters with underscores
|
|
262
|
+
sanitized = sanitized.gsub(/[:<>"|?*]/, "_")
|
|
263
|
+
|
|
264
|
+
# Ensure we don't return empty string
|
|
265
|
+
if sanitized.empty?
|
|
266
|
+
sanitized = "_unnamed_"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
sanitized
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Get section data (cached or freshly decompressed)
|
|
273
|
+
#
|
|
274
|
+
# @param lit_file [Models::LITFile] Parsed LIT file
|
|
275
|
+
# @param section_id [Integer] Section ID
|
|
276
|
+
# @return [String] Decompressed section data
|
|
277
|
+
def get_section_data(lit_file, section_id)
|
|
278
|
+
# Check cache first
|
|
279
|
+
return @section_cache[section_id] if @section_cache[section_id]
|
|
280
|
+
|
|
281
|
+
# Section 0 is uncompressed content
|
|
282
|
+
if section_id.zero?
|
|
283
|
+
data = read_uncompressed_content(lit_file)
|
|
284
|
+
else
|
|
285
|
+
# Get section info (sections array is indexed by section_id)
|
|
286
|
+
section = lit_file.sections[section_id]
|
|
287
|
+
unless section
|
|
288
|
+
raise Cabriolet::DecompressionError,
|
|
289
|
+
"Section #{section_id} not found"
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# Decompress section
|
|
293
|
+
data = decompress_section(lit_file, section)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Cache for future use
|
|
297
|
+
@section_cache[section_id] = data
|
|
298
|
+
|
|
299
|
+
data
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Read uncompressed content from section 0
|
|
303
|
+
def read_uncompressed_content(lit_file)
|
|
304
|
+
filename = lit_file.instance_variable_get(:@filename)
|
|
140
305
|
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
141
306
|
|
|
142
307
|
begin
|
|
143
|
-
#
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
308
|
+
# Section 0 starts at content_offset
|
|
309
|
+
@io_system.seek(handle, lit_file.content_offset, Constants::SEEK_START)
|
|
310
|
+
|
|
311
|
+
# Read all remaining data from content_offset to EOF
|
|
312
|
+
file_size = ::File.size(filename)
|
|
313
|
+
@io_system.read(handle, file_size - lit_file.content_offset)
|
|
314
|
+
ensure
|
|
315
|
+
@io_system.close(handle)
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# Decompress a section with transforms
|
|
320
|
+
def decompress_section(lit_file, section)
|
|
321
|
+
lit_file.instance_variable_get(:@filename)
|
|
322
|
+
|
|
323
|
+
# Read transform list
|
|
324
|
+
transform_path = Binary::LITStructures::Paths::STORAGE +
|
|
325
|
+
section.name +
|
|
326
|
+
Binary::LITStructures::Paths::TRANSFORM_LIST
|
|
327
|
+
|
|
328
|
+
transform_entry = lit_file.directory.find(transform_path)
|
|
329
|
+
unless transform_entry
|
|
330
|
+
raise Cabriolet::DecompressionError,
|
|
331
|
+
"Transform list not found for section: #{section.name}"
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
transforms = read_transforms(lit_file, transform_entry)
|
|
335
|
+
|
|
336
|
+
# Read content
|
|
337
|
+
content_path = Binary::LITStructures::Paths::STORAGE +
|
|
338
|
+
section.name +
|
|
339
|
+
Binary::LITStructures::Paths::CONTENT
|
|
340
|
+
|
|
341
|
+
content_entry = lit_file.directory.find(content_path)
|
|
342
|
+
unless content_entry
|
|
343
|
+
raise Cabriolet::DecompressionError,
|
|
344
|
+
"Content not found for section: #{section.name}"
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
data = read_entry_data(lit_file, content_entry)
|
|
348
|
+
|
|
349
|
+
# If content entry is empty, try reading section data directly from file
|
|
350
|
+
# This handles LIT files where MSCompressed metadata is invalid/empty
|
|
351
|
+
if data.empty? && section.name == "MSCompressed"
|
|
352
|
+
data = read_section_data_from_file(lit_file, section)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# Read control data
|
|
356
|
+
control_path = Binary::LITStructures::Paths::STORAGE +
|
|
357
|
+
section.name +
|
|
358
|
+
Binary::LITStructures::Paths::CONTROL_DATA
|
|
359
|
+
|
|
360
|
+
control_entry = lit_file.directory.find(control_path)
|
|
361
|
+
control_data = if control_entry
|
|
362
|
+
read_entry_data(lit_file,
|
|
363
|
+
control_entry)
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Apply transforms in order
|
|
367
|
+
transforms.each do |transform_guid|
|
|
368
|
+
case transform_guid
|
|
369
|
+
when Binary::LITStructures::GUIDs::DESENCRYPT
|
|
370
|
+
raise NotImplementedError,
|
|
371
|
+
"DES encryption not supported"
|
|
372
|
+
when Binary::LITStructures::GUIDs::LZXCOMPRESS
|
|
373
|
+
data = decompress_lzx_section(lit_file, section, data, control_data)
|
|
374
|
+
when Binary::LITStructures::GUIDs::IDENTITY
|
|
375
|
+
# No-op/identity transform - pass data through unchanged
|
|
376
|
+
next
|
|
377
|
+
else
|
|
378
|
+
# Unknown transform - check if it's the AOLL tag (invalid metadata)
|
|
379
|
+
# If data was read directly, return it as-is
|
|
380
|
+
if transform_guid.include?("4F4C") || transform_guid.include?("AOLL")
|
|
381
|
+
# This is the AOLL directory chunk, indicating invalid transform metadata
|
|
382
|
+
# Return the data as-is (may be uncompressed or custom format)
|
|
383
|
+
next
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
raise Cabriolet::DecompressionError,
|
|
387
|
+
"Unknown transform GUID: #{transform_guid}"
|
|
148
388
|
end
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
data
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
# Read transforms from transform list
|
|
395
|
+
def read_transforms(lit_file, entry)
|
|
396
|
+
data = read_entry_data(lit_file, entry)
|
|
149
397
|
|
|
150
|
-
|
|
151
|
-
|
|
398
|
+
transforms = []
|
|
399
|
+
pos = 0
|
|
152
400
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
401
|
+
while pos + 16 <= data.bytesize
|
|
402
|
+
guid_bytes = data[pos, 16]
|
|
403
|
+
guid = format_guid(guid_bytes)
|
|
404
|
+
transforms << guid
|
|
405
|
+
pos += 16
|
|
406
|
+
end
|
|
156
407
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
header.version = lit_header.version
|
|
160
|
-
header.encrypted = lit_header.flags.anybits?(0x01)
|
|
408
|
+
transforms
|
|
409
|
+
end
|
|
161
410
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
411
|
+
# Format GUID bytes as string
|
|
412
|
+
def format_guid(bytes)
|
|
413
|
+
parts = bytes.unpack("VvvnH12")
|
|
414
|
+
format(
|
|
415
|
+
"{%<part0>08X-%<part1>04X-%<part2>04X-%<part3>04X-%<part4>s}",
|
|
416
|
+
part0: parts[0], part1: parts[1], part2: parts[2],
|
|
417
|
+
part3: parts[3], part4: parts[4].upcase
|
|
418
|
+
)
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# Read entry data from file
|
|
422
|
+
def read_entry_data(lit_file, entry)
|
|
423
|
+
filename = lit_file.instance_variable_get(:@filename)
|
|
424
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
425
|
+
|
|
426
|
+
begin
|
|
427
|
+
@io_system.seek(
|
|
428
|
+
handle,
|
|
429
|
+
lit_file.content_offset + entry.offset,
|
|
430
|
+
Constants::SEEK_START,
|
|
165
431
|
)
|
|
432
|
+
@io_system.read(handle, entry.size)
|
|
433
|
+
ensure
|
|
434
|
+
@io_system.close(handle)
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# Read section data directly from file (for when Content entry is empty)
|
|
439
|
+
# This calculates where the section data actually starts and reads it
|
|
440
|
+
def read_section_data_from_file(lit_file, section)
|
|
441
|
+
filename = lit_file.instance_variable_get(:@filename)
|
|
442
|
+
|
|
443
|
+
# Find the section ID for this section
|
|
444
|
+
section_id = lit_file.sections.index(section)
|
|
445
|
+
return "" unless section_id
|
|
446
|
+
|
|
447
|
+
# Calculate where section 0 data ends
|
|
448
|
+
section_0_entries = lit_file.directory.entries.select do |e|
|
|
449
|
+
e.section.zero?
|
|
450
|
+
end
|
|
451
|
+
section_0_data = section_0_entries.reject do |e|
|
|
452
|
+
e.name.start_with?("::DataSpace") ||
|
|
453
|
+
e.name.end_with?("/") ||
|
|
454
|
+
e.name.start_with?("/DRM")
|
|
455
|
+
end
|
|
456
|
+
max_end = section_0_data.map { |e| e.offset + e.size }.max
|
|
457
|
+
|
|
458
|
+
# Section data starts after section 0 data
|
|
459
|
+
section_start = lit_file.content_offset + max_end
|
|
460
|
+
|
|
461
|
+
# Calculate section end by finding files in this section
|
|
462
|
+
section_entries = lit_file.directory.entries.select do |e|
|
|
463
|
+
e.section == section_id
|
|
464
|
+
end
|
|
465
|
+
max_section_end = section_entries.map { |e| e.offset + e.size }.max
|
|
166
466
|
|
|
167
|
-
|
|
467
|
+
# Read the section data
|
|
468
|
+
handle = @io_system.open(filename, Constants::MODE_READ)
|
|
469
|
+
begin
|
|
470
|
+
@io_system.seek(handle, section_start, Constants::SEEK_START)
|
|
471
|
+
@io_system.read(handle, max_section_end)
|
|
168
472
|
ensure
|
|
169
|
-
@io_system.close(handle)
|
|
473
|
+
@io_system.close(handle)
|
|
170
474
|
end
|
|
171
475
|
end
|
|
172
476
|
|
|
173
|
-
#
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
477
|
+
# Decompress LZX section with ResetTable
|
|
478
|
+
def decompress_lzx_section(lit_file, section, compressed_data,
|
|
479
|
+
control_data)
|
|
480
|
+
# Parse control data
|
|
481
|
+
unless control_data && control_data.bytesize >= 32
|
|
482
|
+
raise Cabriolet::DecompressionError,
|
|
483
|
+
"Invalid LZX control data"
|
|
484
|
+
end
|
|
181
485
|
|
|
182
|
-
|
|
183
|
-
# Read filename length
|
|
184
|
-
len_data = @io_system.read(handle, 4)
|
|
185
|
-
filename_length = len_data.unpack1("V")
|
|
486
|
+
control = Binary::LITStructures::LZXControlData.read(control_data)
|
|
186
487
|
|
|
187
|
-
|
|
188
|
-
|
|
488
|
+
unless control.tag == Binary::LITStructures::Tags::LZXC
|
|
489
|
+
raise Cabriolet::DecompressionError,
|
|
490
|
+
"Invalid LZXC tag: #{format('0x%08X', control.tag)}"
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
# Calculate window size
|
|
494
|
+
window_size = 15
|
|
495
|
+
size_code = control.window_size_code
|
|
496
|
+
while size_code.positive?
|
|
497
|
+
size_code >>= 1
|
|
498
|
+
window_size += 1
|
|
499
|
+
end
|
|
189
500
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
501
|
+
if window_size < 15 || window_size > 21
|
|
502
|
+
raise Cabriolet::DecompressionError,
|
|
503
|
+
"Invalid LZX window size: #{window_size}"
|
|
504
|
+
end
|
|
194
505
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
file.length = uncompressed_size
|
|
200
|
-
file.compressed = flags.anybits?(Binary::LITStructures::FileFlags::COMPRESSED)
|
|
201
|
-
file.encrypted = flags.anybits?(Binary::LITStructures::FileFlags::ENCRYPTED)
|
|
506
|
+
# Read reset table
|
|
507
|
+
reset_table_path = Binary::LITStructures::Paths::STORAGE +
|
|
508
|
+
section.name +
|
|
509
|
+
"/Transform/#{Binary::LITStructures::GUIDs::LZXCOMPRESS}/InstanceData/ResetTable"
|
|
202
510
|
|
|
203
|
-
|
|
511
|
+
reset_entry = lit_file.directory.find(reset_table_path)
|
|
512
|
+
unless reset_entry
|
|
513
|
+
raise Cabriolet::DecompressionError,
|
|
514
|
+
"ResetTable not found for section: #{section.name}"
|
|
204
515
|
end
|
|
205
516
|
|
|
206
|
-
|
|
207
|
-
|
|
517
|
+
reset_data = read_entry_data(lit_file, reset_entry)
|
|
518
|
+
reset_table = parse_reset_table(reset_data)
|
|
208
519
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
# @return [Integer] Number of bytes written
|
|
215
|
-
def decompress_lzx(input_handle, output_handle, expected_size)
|
|
216
|
-
decompressor = Decompressors::LZX.new(
|
|
217
|
-
@io_system,
|
|
218
|
-
input_handle,
|
|
219
|
-
output_handle,
|
|
220
|
-
@buffer_size,
|
|
520
|
+
# Decompress with reset points
|
|
521
|
+
decompress_with_reset_table(
|
|
522
|
+
compressed_data,
|
|
523
|
+
reset_table,
|
|
524
|
+
window_size,
|
|
221
525
|
)
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Parse reset table
|
|
529
|
+
def parse_reset_table(data)
|
|
530
|
+
header = Binary::LITStructures::ResetTableHeader.read(data[0, 40])
|
|
531
|
+
|
|
532
|
+
unless header.version == 3
|
|
533
|
+
raise Cabriolet::DecompressionError,
|
|
534
|
+
"Unsupported ResetTable version: #{header.version}"
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Read reset entries (skip first which is always 0)
|
|
538
|
+
entry_offset = header.header_length + 8
|
|
539
|
+
num_entries = header.num_entries
|
|
540
|
+
|
|
541
|
+
reset_points = []
|
|
542
|
+
(num_entries - 1).times do |_i|
|
|
543
|
+
break if entry_offset + 8 > data.bytesize
|
|
222
544
|
|
|
223
|
-
|
|
545
|
+
offset_low = data[entry_offset, 4].unpack1("V")
|
|
546
|
+
offset_high = data[entry_offset + 4, 4].unpack1("V")
|
|
547
|
+
|
|
548
|
+
if offset_high != 0
|
|
549
|
+
raise Cabriolet::DecompressionError,
|
|
550
|
+
"64-bit reset point not supported"
|
|
551
|
+
end
|
|
552
|
+
|
|
553
|
+
reset_points << offset_low
|
|
554
|
+
entry_offset += 8
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
{
|
|
558
|
+
uncompressed_length: header.uncompressed_length,
|
|
559
|
+
compressed_length: header.compressed_length,
|
|
560
|
+
reset_interval: header.reset_interval,
|
|
561
|
+
reset_points: reset_points,
|
|
562
|
+
}
|
|
224
563
|
end
|
|
225
564
|
|
|
226
|
-
#
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
565
|
+
# Decompress with reset table
|
|
566
|
+
def decompress_with_reset_table(compressed_data, reset_table, window_size)
|
|
567
|
+
uncompressed = String.new(capacity: reset_table[:uncompressed_length])
|
|
568
|
+
|
|
569
|
+
# Create LZX decompressor
|
|
570
|
+
input_handle = System::MemoryHandle.new(compressed_data)
|
|
571
|
+
output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
572
|
+
|
|
573
|
+
decompressor = Decompressors::LZX.new(window_size)
|
|
235
574
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
575
|
+
window_bytes = 1 << window_size
|
|
576
|
+
reset_table[:reset_interval]
|
|
577
|
+
reset_points = [0] + reset_table[:reset_points]
|
|
578
|
+
|
|
579
|
+
bytes_remaining = reset_table[:uncompressed_length]
|
|
580
|
+
compressed_pos = 0
|
|
581
|
+
0
|
|
582
|
+
|
|
583
|
+
# Process each reset block
|
|
584
|
+
reset_points.each_with_index do |reset_point, idx|
|
|
585
|
+
next_reset = reset_points[idx + 1] || compressed_data.bytesize
|
|
586
|
+
|
|
587
|
+
compressed_size = next_reset - reset_point
|
|
588
|
+
output_size = [bytes_remaining, window_bytes].min
|
|
589
|
+
|
|
590
|
+
if output_size.positive?
|
|
591
|
+
# Decompress this block
|
|
592
|
+
input_chunk = compressed_data[compressed_pos, compressed_size]
|
|
593
|
+
input_handle = System::MemoryHandle.new(input_chunk)
|
|
594
|
+
output_handle = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
595
|
+
|
|
596
|
+
decompressor.reset if idx.positive?
|
|
597
|
+
decompressor.decompress_chunk(
|
|
598
|
+
input_handle,
|
|
599
|
+
output_handle,
|
|
600
|
+
compressed_size,
|
|
601
|
+
output_size,
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
uncompressed << output_handle.data
|
|
605
|
+
compressed_pos += compressed_size
|
|
606
|
+
bytes_remaining -= output_size
|
|
607
|
+
end
|
|
240
608
|
|
|
241
|
-
|
|
242
|
-
bytes_written += written
|
|
243
|
-
remaining -= written
|
|
609
|
+
break if bytes_remaining <= 0
|
|
244
610
|
end
|
|
245
611
|
|
|
246
|
-
|
|
612
|
+
uncompressed
|
|
247
613
|
end
|
|
248
614
|
end
|
|
249
615
|
end
|