cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,400 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "zeck_lz77"
4
+ require_relative "btree_builder"
5
+
6
+ module Cabriolet
7
+ module HLP
8
+ module WinHelp
9
+ # Compressor creates Windows Help (.HLP) files
10
+ #
11
+ # Creates WinHelp 3.x and 4.x format files with Zeck LZ77 compression.
12
+ # Supports creating |SYSTEM, |TOPIC, and other internal files.
13
+ class Compressor
14
+ attr_reader :io_system
15
+
16
+ # Default block size for WinHelp files (4096 bytes)
17
+ BLOCK_SIZE = 4096
18
+
19
+ # Initialize compressor
20
+ #
21
+ # @param io_system [System::IOSystem, nil] Custom I/O system
22
+ def initialize(io_system = nil)
23
+ @io_system = io_system || System::IOSystem.new
24
+ @internal_files = {}
25
+ @version = :winhelp3
26
+ end
27
+
28
+ # Add an internal file to the WinHelp archive
29
+ #
30
+ # @param name [String] Internal filename (e.g., "|SYSTEM", "|TOPIC")
31
+ # @param data [String] File data
32
+ # @return [void]
33
+ def add_internal_file(name, data)
34
+ @internal_files[name] = data
35
+ end
36
+
37
+ # Add |SYSTEM file with metadata
38
+ #
39
+ # @param options [Hash] System file options
40
+ # @option options [String] :title Help file title
41
+ # @option options [String] :copyright Copyright text
42
+ # @option options [String] :contents Contents file path
43
+ # @return [void]
44
+ def add_system_file(**options)
45
+ system_data = build_system_file(options)
46
+ add_internal_file("|SYSTEM", system_data)
47
+ end
48
+
49
+ # Add |TOPIC file with compressed topics
50
+ #
51
+ # @param topics [Array<String>] Array of topic texts
52
+ # @param compress [Boolean] Whether to compress topics
53
+ # @return [void]
54
+ def add_topic_file(topics, compress: true)
55
+ topic_data = build_topic_file(topics, compress)
56
+ add_internal_file("|TOPIC", topic_data)
57
+ end
58
+
59
+ # Generate WinHelp file
60
+ #
61
+ # @param output_file [String] Path to output file
62
+ # @param options [Hash] Generation options
63
+ # @option options [Symbol] :version Format version (:winhelp3 or :winhelp4)
64
+ # @return [Integer] Bytes written
65
+ def generate(output_file, **options)
66
+ @version = options.fetch(:version, :winhelp3)
67
+
68
+ if @internal_files.empty?
69
+ raise ArgumentError,
70
+ "No internal files added"
71
+ end
72
+ raise ArgumentError, "Invalid version" unless %i[winhelp3
73
+ winhelp4].include?(@version)
74
+
75
+ # Build structure
76
+ structure = build_structure
77
+
78
+ # Write to file
79
+ output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
80
+ begin
81
+ write_winhelp_file(output_handle, structure)
82
+ ensure
83
+ @io_system.close(output_handle)
84
+ end
85
+ end
86
+
87
+ private
88
+
89
+ # Build complete WinHelp structure
90
+ #
91
+ # @return [Hash] Complete structure
92
+ def build_structure
93
+ structure = {
94
+ version: @version,
95
+ internal_files: [],
96
+ }
97
+
98
+ # Prepare internal files with block numbers
99
+ block_number = 1 # Block 0 is reserved for header
100
+ @internal_files.each do |name, data|
101
+ # Calculate blocks needed (round up)
102
+ blocks_needed = (data.bytesize.to_f / BLOCK_SIZE).ceil
103
+
104
+ structure[:internal_files] << {
105
+ name: name,
106
+ data: data,
107
+ size: data.bytesize,
108
+ starting_block: block_number,
109
+ }
110
+
111
+ block_number += blocks_needed
112
+ end
113
+
114
+ # Calculate directory offset
115
+ header_size = @version == :winhelp3 ? 28 : 32
116
+ structure[:directory_offset] = header_size
117
+
118
+ # Calculate directory size
119
+ dir_size = calculate_directory_size(structure[:internal_files])
120
+ structure[:directory_size] = dir_size
121
+
122
+ # Calculate total file size
123
+ structure[:file_size] =
124
+ header_size + dir_size + (block_number * BLOCK_SIZE)
125
+
126
+ structure
127
+ end
128
+
129
+ # Calculate directory size
130
+ #
131
+ # @param files [Array<Hash>] Internal file list
132
+ # @return [Integer] Directory size in bytes
133
+ def calculate_directory_size(files)
134
+ size = 0
135
+ files.each do |file|
136
+ # 4 bytes size + 2 bytes block + filename + null + padding
137
+ size += 4 + 2 + file[:name].bytesize + 1
138
+ # Align to 2-byte boundary
139
+ size += 1 if size.odd?
140
+ end
141
+ # Add end marker (4 bytes of zeros)
142
+ size + 4
143
+ end
144
+
145
+ # Write complete WinHelp file
146
+ #
147
+ # @param output_handle [System::FileHandle] Output handle
148
+ # @param structure [Hash] File structure
149
+ # @return [Integer] Bytes written
150
+ def write_winhelp_file(output_handle, structure)
151
+ bytes_written = 0
152
+
153
+ # Write header
154
+ bytes_written += write_header(output_handle, structure)
155
+
156
+ # Write directory
157
+ bytes_written += write_directory(output_handle, structure)
158
+
159
+ # Pad to first block boundary
160
+ padding_needed = BLOCK_SIZE - (bytes_written % BLOCK_SIZE)
161
+ if padding_needed < BLOCK_SIZE
162
+ bytes_written += @io_system.write(output_handle,
163
+ "\x00" * padding_needed)
164
+ end
165
+
166
+ # Write file data at block boundaries
167
+ structure[:internal_files].each do |file|
168
+ # Seek to correct block
169
+ target_offset = file[:starting_block] * BLOCK_SIZE
170
+ current_offset = bytes_written
171
+
172
+ if target_offset > current_offset
173
+ padding = "\x00" * (target_offset - current_offset)
174
+ bytes_written += @io_system.write(output_handle, padding)
175
+ end
176
+
177
+ # Write file data
178
+ bytes_written += @io_system.write(output_handle, file[:data])
179
+
180
+ # Pad to block boundary
181
+ remainder = file[:data].bytesize % BLOCK_SIZE
182
+ if remainder.positive?
183
+ padding = "\x00" * (BLOCK_SIZE - remainder)
184
+ bytes_written += @io_system.write(output_handle, padding)
185
+ end
186
+ end
187
+
188
+ bytes_written
189
+ end
190
+
191
+ # Write file header
192
+ #
193
+ # @param output_handle [System::FileHandle] Output handle
194
+ # @param structure [Hash] File structure
195
+ # @return [Integer] Bytes written
196
+ def write_header(output_handle, structure)
197
+ if structure[:version] == :winhelp3
198
+ write_header_3x(output_handle, structure)
199
+ else
200
+ write_header_4x(output_handle, structure)
201
+ end
202
+ end
203
+
204
+ # Write WinHelp 3.x header
205
+ #
206
+ # @param output_handle [System::FileHandle] Output handle
207
+ # @param structure [Hash] File structure
208
+ # @return [Integer] Bytes written
209
+ def write_header_3x(output_handle, structure)
210
+ header = Binary::HLPStructures::WinHelp3Header.new
211
+ header.magic = 0x35F3
212
+ header.unknown = 0x0001
213
+ header.directory_offset = structure[:directory_offset]
214
+ header.free_list_offset = 0
215
+ header.file_size = structure[:file_size]
216
+ header.reserved = "\x00" * 12
217
+
218
+ header_data = header.to_binary_s
219
+ @io_system.write(output_handle, header_data)
220
+ end
221
+
222
+ # Write WinHelp 4.x header
223
+ #
224
+ # @param output_handle [System::FileHandle] Output handle
225
+ # @param structure [Hash] File structure
226
+ # @return [Integer] Bytes written
227
+ def write_header_4x(output_handle, structure)
228
+ header = Binary::HLPStructures::WinHelp4Header.new
229
+ header.magic = 0x00033F5F # Magic with low 16 bits = 0x3F5F
230
+ header.directory_offset = structure[:directory_offset]
231
+ header.free_list_offset = 0
232
+ header.file_size = structure[:file_size]
233
+ header.reserved = "\x00" * 16
234
+
235
+ header_data = header.to_binary_s
236
+ @io_system.write(output_handle, header_data)
237
+ end
238
+
239
+ # Write directory
240
+ #
241
+ # @param output_handle [System::FileHandle] Output handle
242
+ # @param structure [Hash] File structure
243
+ # @return [Integer] Bytes written
244
+ def write_directory(output_handle, structure)
245
+ if structure[:version] == :winhelp4
246
+ write_directory_btree(output_handle, structure)
247
+ else
248
+ write_directory_simple(output_handle, structure)
249
+ end
250
+ end
251
+
252
+ # Write simple directory (WinHelp 3.x format)
253
+ #
254
+ # @param output_handle [System::FileHandle] Output handle
255
+ # @param structure [Hash] File structure
256
+ # @return [Integer] Bytes written
257
+ def write_directory_simple(output_handle, structure)
258
+ bytes_written = 0
259
+
260
+ structure[:internal_files].each do |file|
261
+ # Write file size (4 bytes)
262
+ bytes_written += @io_system.write(output_handle,
263
+ [file[:size]].pack("V"))
264
+
265
+ # Write starting block (2 bytes)
266
+ bytes_written += @io_system.write(output_handle,
267
+ [file[:starting_block]].pack("v"))
268
+
269
+ # Write filename with null terminator
270
+ bytes_written += @io_system.write(output_handle,
271
+ "#{file[:name]}\u0000")
272
+
273
+ # Align to 2-byte boundary
274
+ if bytes_written.odd?
275
+ bytes_written += @io_system.write(output_handle, "\x00")
276
+ end
277
+ end
278
+
279
+ # Write end marker
280
+ bytes_written += @io_system.write(output_handle, [0].pack("V"))
281
+
282
+ bytes_written
283
+ end
284
+
285
+ # Write B+ tree directory (WinHelp 4.x format)
286
+ #
287
+ # @param output_handle [System::FileHandle] Output handle
288
+ # @param structure [Hash] File structure
289
+ # @return [Integer] Bytes written
290
+ def write_directory_btree(output_handle, structure)
291
+ bytes_written = 0
292
+
293
+ # Build B+ tree from internal files
294
+ btree = BTreeBuilder.new
295
+ structure[:internal_files].each do |file|
296
+ # Add entry with filename, starting block (offset), and size
297
+ btree.add_entry(file[:name], file[:starting_block] * BLOCK_SIZE,
298
+ file[:size])
299
+ end
300
+
301
+ # Build the tree
302
+ tree = btree.build
303
+
304
+ # Write FILEHEADER (9 bytes) before BTREEHEADER
305
+ # FILEHEADER structure:
306
+ # - 4 bytes: reserved_space (reserved space in help file incl. FILEHEADER)
307
+ # - 4 bytes: used_space (used space in help file excl. FILEHEADER)
308
+ # - 1 byte: file_flags (normally 4)
309
+ # For directory, we set these to 0 for now
310
+ file_header = Binary::HLPStructures::WinHelpFileHeader.new
311
+ file_header.reserved_space = 0
312
+ file_header.used_space = 0
313
+ file_header.file_flags = 4
314
+ file_header_data = file_header.to_binary_s
315
+ bytes_written += @io_system.write(output_handle, file_header_data)
316
+
317
+ # Write BTREEHEADER (38 bytes)
318
+ header = tree[:header]
319
+ header_data = header.to_binary_s
320
+ bytes_written += @io_system.write(output_handle, header_data)
321
+
322
+ # Write pages (sorted by page_num)
323
+ sorted_pages = tree[:pages].sort_by { |p| p[:page_num] }
324
+ sorted_pages.each do |page|
325
+ # Write page data
326
+ bytes_written += @io_system.write(output_handle, page[:data])
327
+ end
328
+
329
+ bytes_written
330
+ end
331
+
332
+ # Build |SYSTEM file
333
+ #
334
+ # @param options [Hash] System file options
335
+ # @return [String] System file data
336
+ def build_system_file(options)
337
+ data = +""
338
+
339
+ # Write title if provided
340
+ if options[:title]
341
+ data << build_system_record(1, options[:title])
342
+ end
343
+
344
+ # Write copyright if provided
345
+ if options[:copyright]
346
+ data << build_system_record(2, options[:copyright])
347
+ end
348
+
349
+ # Write contents if provided
350
+ if options[:contents]
351
+ data << build_system_record(3, options[:contents])
352
+ end
353
+
354
+ data
355
+ end
356
+
357
+ # Build a system record
358
+ #
359
+ # @param type [Integer] Record type
360
+ # @param text [String] Record text
361
+ # @return [String] Record data
362
+ def build_system_record(type, text)
363
+ record = +""
364
+ record << [type].pack("v") # Record type (2 bytes)
365
+ record << [text.bytesize + 1].pack("v") # Length including null (2 bytes)
366
+ record << text
367
+ record << "\x00" # Null terminator
368
+ record
369
+ end
370
+
371
+ # Build |TOPIC file
372
+ #
373
+ # @param topics [Array<String>] Topic texts
374
+ # @param compress [Boolean] Whether to compress
375
+ # @return [String] Topic file data
376
+ def build_topic_file(topics, compress)
377
+ # Simplified: just concatenate topic data
378
+ # Full implementation would include topic headers and blocks
379
+ data = +""
380
+ zeck = ZeckLZ77.new
381
+
382
+ topics.each do |topic_text|
383
+ compressed_data = if compress
384
+ # Compress using Zeck LZ77
385
+ zeck.compress(topic_text)
386
+ else
387
+ topic_text
388
+ end
389
+
390
+ # Write topic with 2-byte length header
391
+ data << [compressed_data.bytesize].pack("v")
392
+ data << compressed_data
393
+ end
394
+
395
+ data
396
+ end
397
+ end
398
+ end
399
+ end
400
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "parser"
4
+ require_relative "zeck_lz77"
5
+ require_relative "../../system/io_system"
6
+ require_relative "../../constants"
7
+
8
+ module Cabriolet
9
+ module HLP
10
+ module WinHelp
11
+ # Decompressor for Windows Help files
12
+ #
13
+ # Extracts and decompresses content from WinHelp files using:
14
+ # - WinHelp::Parser for file structure
15
+ # - ZeckLZ77 for topic decompression
16
+ #
17
+ # Handles both WinHelp 3.x and 4.x formats.
18
+ class Decompressor
19
+ attr_reader :io_system, :header
20
+
21
+ # Initialize decompressor
22
+ #
23
+ # @param filename [String] Path to WinHelp file
24
+ # @param io_system [System::IOSystem, nil] Custom I/O system
25
+ def initialize(filename, io_system = nil)
26
+ @filename = filename
27
+ @io_system = io_system || System::IOSystem.new
28
+ @parser = Parser.new(@io_system)
29
+ @zeck = ZeckLZ77.new
30
+ @header = nil
31
+ end
32
+
33
+ # Parse the WinHelp file structure
34
+ #
35
+ # @return [Models::WinHelpHeader] Parsed header
36
+ def parse
37
+ @header = @parser.parse(@filename)
38
+ end
39
+
40
+ # Extract a specific internal file by name
41
+ #
42
+ # @param filename [String] Internal filename (e.g., "|SYSTEM", "|TOPIC")
43
+ # @return [String, nil] Raw file data or nil if not found
44
+ def extract_internal_file(filename)
45
+ parse unless @header
46
+
47
+ file_entry = @header.find_file(filename)
48
+ return nil unless file_entry
49
+
50
+ # Use file_offset if available (B+ tree format), otherwise fall back to starting_block
51
+ if file_entry[:file_offset]
52
+ file_offset = file_entry[:file_offset]
53
+ else
54
+ # Calculate file offset from starting block (WinHelp 3.x format)
55
+ # Block size is typically 4096 bytes
56
+ block_size = 4096
57
+ file_offset = file_entry[:starting_block] * block_size
58
+ end
59
+
60
+ # Open the WinHelp file and seek to file data
61
+ handle = @io_system.open(@filename, Constants::MODE_READ)
62
+ begin
63
+ @io_system.seek(handle, file_offset, Constants::SEEK_START)
64
+ @io_system.read(handle, file_entry[:file_size])
65
+ ensure
66
+ @io_system.close(handle)
67
+ end
68
+ end
69
+
70
+ # Extract |SYSTEM file data
71
+ #
72
+ # @return [String, nil] System file data
73
+ def extract_system_file
74
+ extract_internal_file("|SYSTEM")
75
+ end
76
+
77
+ # Extract |TOPIC file data
78
+ #
79
+ # @return [String, nil] Topic file data (compressed)
80
+ def extract_topic_file
81
+ extract_internal_file("|TOPIC")
82
+ end
83
+
84
+ # Decompress topic data using Zeck LZ77
85
+ #
86
+ # @param compressed_data [String] Compressed topic data
87
+ # @param output_size [Integer] Expected decompressed size
88
+ # @return [String] Decompressed topic text
89
+ def decompress_topic(compressed_data, output_size)
90
+ @zeck.decompress(compressed_data, output_size)
91
+ end
92
+
93
+ # Extract all topics from |TOPIC file
94
+ #
95
+ # This is a simplified implementation that returns raw topic data.
96
+ # Full implementation would parse topic headers and extract individual topics.
97
+ #
98
+ # @return [Array<Hash>] Array of topic hashes with :data key
99
+ def extract_topics
100
+ parse unless @header
101
+
102
+ topic_data = extract_topic_file
103
+ return [] unless topic_data
104
+
105
+ # For now, return the raw topic data
106
+ # Full implementation would parse topic block headers
107
+ [{
108
+ index: 0,
109
+ data: topic_data,
110
+ compressed: true,
111
+ }]
112
+ end
113
+
114
+ # Extract all files to a directory
115
+ #
116
+ # @param output_dir [String] Output directory path
117
+ # @return [Integer] Number of files extracted
118
+ def extract_all(output_dir)
119
+ parse unless @header
120
+
121
+ FileUtils.mkdir_p(output_dir)
122
+
123
+ count = 0
124
+ @header.internal_files.each do |file_entry|
125
+ data = extract_internal_file(file_entry[:filename])
126
+ next unless data
127
+
128
+ # Sanitize filename for file system
129
+ safe_name = sanitize_filename(file_entry[:filename])
130
+ output_path = File.join(output_dir, safe_name)
131
+
132
+ File.binwrite(output_path, data)
133
+ count += 1
134
+ end
135
+
136
+ count
137
+ end
138
+
139
+ # Sanitize filename for file system
140
+ #
141
+ # @param filename [String] Internal filename
142
+ # @return [String] Safe filename
143
+ def sanitize_filename(filename)
144
+ # Encode to ASCII, replacing non-ASCII and control characters with _
145
+ sanitized = filename.encode("ASCII", invalid: :replace,
146
+ undef: :replace, replace: "_")
147
+
148
+ # Replace | with _pipe_ (after encoding to handle | correctly)
149
+ sanitized = sanitized.gsub("|", "_pipe_")
150
+
151
+ # Replace remaining invalid filename characters with _
152
+ sanitized = sanitized.gsub(/[\/\\:<>"|?*]/, "_")
153
+
154
+ # Replace multiple consecutive underscores with single underscore
155
+ sanitized = sanitized.squeeze("_")
156
+
157
+ # Remove leading/trailing underscores
158
+ sanitized = sanitized.gsub(/^_+|_+$/, "")
159
+
160
+ # Use default name if empty
161
+ sanitized = "_unnamed_file_" if sanitized.empty?
162
+
163
+ sanitized
164
+ end
165
+
166
+ # Get list of internal filenames
167
+ #
168
+ # @return [Array<String>] Internal file names
169
+ def internal_filenames
170
+ parse unless @header
171
+ @header.internal_filenames
172
+ end
173
+
174
+ # Check if |SYSTEM file exists
175
+ #
176
+ # @return [Boolean] true if |SYSTEM present
177
+ def has_system_file?
178
+ parse unless @header
179
+ @header.has_system_file?
180
+ end
181
+
182
+ # Check if |TOPIC file exists
183
+ #
184
+ # @return [Boolean] true if |TOPIC present
185
+ def has_topic_file?
186
+ parse unless @header
187
+ @header.has_topic_file?
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end