cabriolet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +7 -0
  2. data/ARCHITECTURE.md +799 -0
  3. data/CHANGELOG.md +44 -0
  4. data/LICENSE +29 -0
  5. data/README.adoc +1207 -0
  6. data/exe/cabriolet +6 -0
  7. data/lib/cabriolet/auto.rb +173 -0
  8. data/lib/cabriolet/binary/bitstream.rb +148 -0
  9. data/lib/cabriolet/binary/bitstream_writer.rb +180 -0
  10. data/lib/cabriolet/binary/chm_structures.rb +213 -0
  11. data/lib/cabriolet/binary/hlp_structures.rb +66 -0
  12. data/lib/cabriolet/binary/kwaj_structures.rb +74 -0
  13. data/lib/cabriolet/binary/lit_structures.rb +107 -0
  14. data/lib/cabriolet/binary/oab_structures.rb +112 -0
  15. data/lib/cabriolet/binary/structures.rb +56 -0
  16. data/lib/cabriolet/binary/szdd_structures.rb +60 -0
  17. data/lib/cabriolet/cab/compressor.rb +382 -0
  18. data/lib/cabriolet/cab/decompressor.rb +510 -0
  19. data/lib/cabriolet/cab/extractor.rb +357 -0
  20. data/lib/cabriolet/cab/parser.rb +264 -0
  21. data/lib/cabriolet/chm/compressor.rb +513 -0
  22. data/lib/cabriolet/chm/decompressor.rb +436 -0
  23. data/lib/cabriolet/chm/parser.rb +254 -0
  24. data/lib/cabriolet/cli.rb +776 -0
  25. data/lib/cabriolet/compressors/base.rb +34 -0
  26. data/lib/cabriolet/compressors/lzss.rb +250 -0
  27. data/lib/cabriolet/compressors/lzx.rb +581 -0
  28. data/lib/cabriolet/compressors/mszip.rb +315 -0
  29. data/lib/cabriolet/compressors/quantum.rb +446 -0
  30. data/lib/cabriolet/constants.rb +75 -0
  31. data/lib/cabriolet/decompressors/base.rb +39 -0
  32. data/lib/cabriolet/decompressors/lzss.rb +138 -0
  33. data/lib/cabriolet/decompressors/lzx.rb +726 -0
  34. data/lib/cabriolet/decompressors/mszip.rb +390 -0
  35. data/lib/cabriolet/decompressors/none.rb +27 -0
  36. data/lib/cabriolet/decompressors/quantum.rb +456 -0
  37. data/lib/cabriolet/errors.rb +39 -0
  38. data/lib/cabriolet/format_detector.rb +156 -0
  39. data/lib/cabriolet/hlp/compressor.rb +272 -0
  40. data/lib/cabriolet/hlp/decompressor.rb +198 -0
  41. data/lib/cabriolet/hlp/parser.rb +131 -0
  42. data/lib/cabriolet/huffman/decoder.rb +79 -0
  43. data/lib/cabriolet/huffman/encoder.rb +108 -0
  44. data/lib/cabriolet/huffman/tree.rb +138 -0
  45. data/lib/cabriolet/kwaj/compressor.rb +479 -0
  46. data/lib/cabriolet/kwaj/decompressor.rb +237 -0
  47. data/lib/cabriolet/kwaj/parser.rb +183 -0
  48. data/lib/cabriolet/lit/compressor.rb +255 -0
  49. data/lib/cabriolet/lit/decompressor.rb +250 -0
  50. data/lib/cabriolet/models/cabinet.rb +81 -0
  51. data/lib/cabriolet/models/chm_file.rb +28 -0
  52. data/lib/cabriolet/models/chm_header.rb +67 -0
  53. data/lib/cabriolet/models/chm_section.rb +38 -0
  54. data/lib/cabriolet/models/file.rb +119 -0
  55. data/lib/cabriolet/models/folder.rb +102 -0
  56. data/lib/cabriolet/models/folder_data.rb +21 -0
  57. data/lib/cabriolet/models/hlp_file.rb +45 -0
  58. data/lib/cabriolet/models/hlp_header.rb +37 -0
  59. data/lib/cabriolet/models/kwaj_header.rb +98 -0
  60. data/lib/cabriolet/models/lit_header.rb +55 -0
  61. data/lib/cabriolet/models/oab_header.rb +95 -0
  62. data/lib/cabriolet/models/szdd_header.rb +72 -0
  63. data/lib/cabriolet/modifier.rb +326 -0
  64. data/lib/cabriolet/oab/compressor.rb +353 -0
  65. data/lib/cabriolet/oab/decompressor.rb +315 -0
  66. data/lib/cabriolet/parallel.rb +333 -0
  67. data/lib/cabriolet/repairer.rb +288 -0
  68. data/lib/cabriolet/streaming.rb +221 -0
  69. data/lib/cabriolet/system/file_handle.rb +107 -0
  70. data/lib/cabriolet/system/io_system.rb +87 -0
  71. data/lib/cabriolet/system/memory_handle.rb +105 -0
  72. data/lib/cabriolet/szdd/compressor.rb +217 -0
  73. data/lib/cabriolet/szdd/decompressor.rb +184 -0
  74. data/lib/cabriolet/szdd/parser.rb +127 -0
  75. data/lib/cabriolet/validator.rb +332 -0
  76. data/lib/cabriolet/version.rb +5 -0
  77. data/lib/cabriolet.rb +104 -0
  78. metadata +157 -0
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ # Streaming API for memory-efficient processing of large archives
5
+ module Streaming
6
+ # Stream-based archive parser
7
+ class StreamParser
8
+ DEFAULT_CHUNK_SIZE = 65_536 # 64KB chunks
9
+
10
+ def initialize(path, chunk_size: DEFAULT_CHUNK_SIZE)
11
+ @path = path
12
+ @chunk_size = chunk_size
13
+ @format = FormatDetector.detect(path)
14
+ raise UnsupportedFormatError, "Unable to detect format" unless @format
15
+ end
16
+
17
+ # Iterate over files without loading entire archive into memory
18
+ #
19
+ # @yield [file] Yields each file object
20
+ # @yieldparam file [Object] File object from the archive
21
+ # @return [Enumerator] if no block given
22
+ #
23
+ # @example
24
+ # parser = Cabriolet::Streaming::StreamParser.new('huge.cab')
25
+ # parser.each_file do |file|
26
+ # # Process one file at a time
27
+ # puts "#{file.name}: #{file.size} bytes"
28
+ # # File data loaded on-demand via file.data
29
+ # end
30
+ def each_file(&)
31
+ return enum_for(:each_file) unless block_given?
32
+
33
+ case @format
34
+ when :cab
35
+ stream_cab_files(&)
36
+ when :chm
37
+ stream_chm_files(&)
38
+ else
39
+ # Fallback to standard parsing for unsupported streaming formats
40
+ archive = Cabriolet::Auto.open(@path)
41
+ archive.files.each(&)
42
+ end
43
+ end
44
+
45
+ # Stream file data in chunks
46
+ #
47
+ # @param file [Object] File object from archive
48
+ # @yield [chunk] Yields data chunks
49
+ # @yieldparam chunk [String] Binary data chunk
50
+ # @return [Enumerator] if no block given
51
+ #
52
+ # @example
53
+ # parser.stream_file_data(file) do |chunk|
54
+ # output.write(chunk)
55
+ # end
56
+ def stream_file_data(file, &)
57
+ return enum_for(:stream_file_data, file) unless block_given?
58
+
59
+ if file.respond_to?(:stream_data)
60
+ file.stream_data(chunk_size: @chunk_size, &)
61
+ else
62
+ # Fallback: load entire file and yield in chunks
63
+ data = file.data
64
+ offset = 0
65
+ while offset < data.bytesize
66
+ chunk = data.byteslice(offset, @chunk_size)
67
+ yield chunk
68
+ offset += @chunk_size
69
+ end
70
+ end
71
+ end
72
+
73
+ # Extract files using streaming to minimize memory usage
74
+ #
75
+ # @param output_dir [String] Directory to extract to
76
+ # @param options [Hash] Extraction options
77
+ # @return [Hash] Extraction statistics
78
+ def extract_streaming(output_dir, **_options)
79
+ FileUtils.mkdir_p(output_dir)
80
+ stats = { extracted: 0, bytes: 0, failed: 0 }
81
+
82
+ each_file do |file|
83
+ output_path = File.join(output_dir, file.name.gsub("\\", "/"))
84
+ FileUtils.mkdir_p(File.dirname(output_path))
85
+
86
+ File.open(output_path, "wb") do |out|
87
+ stream_file_data(file) do |chunk|
88
+ out.write(chunk)
89
+ end
90
+ end
91
+
92
+ stats[:extracted] += 1
93
+ stats[:bytes] += file.size if file.respond_to?(:size)
94
+ rescue StandardError => e
95
+ stats[:failed] += 1
96
+ warn "Failed to extract #{file.name}: #{e.message}"
97
+ end
98
+
99
+ stats
100
+ end
101
+
102
+ private
103
+
104
+ def stream_cab_files
105
+ # Use lazy enumeration for CAB files
106
+ parser = Cabriolet::CAB::Parser.new
107
+ cabinet = parser.parse(@path)
108
+
109
+ # Wrap files in lazy enumerator
110
+ cabinet.files.lazy.each do |file|
111
+ yield LazyFile.new(file, @chunk_size)
112
+ end
113
+ end
114
+
115
+ def stream_chm_files
116
+ parser = Cabriolet::CHM::Parser.new
117
+ chm = parser.parse(@path)
118
+
119
+ chm.files.lazy.each do |file|
120
+ yield LazyFile.new(file, @chunk_size)
121
+ end
122
+ end
123
+ end
124
+
125
+ # Wrapper for lazy file data loading
126
+ class LazyFile
127
+ def initialize(file, chunk_size)
128
+ @file = file
129
+ @chunk_size = chunk_size
130
+ @data_loaded = false
131
+ end
132
+
133
+ def name
134
+ @file.name
135
+ end
136
+
137
+ def size
138
+ @file.size
139
+ end
140
+
141
+ def attributes
142
+ @file.attributes if @file.respond_to?(:attributes)
143
+ end
144
+
145
+ def date
146
+ @file.date if @file.respond_to?(:date)
147
+ end
148
+
149
+ def time
150
+ @file.time if @file.respond_to?(:time)
151
+ end
152
+
153
+ # Load data only when accessed
154
+ def data
155
+ @data ||= @file.data
156
+ end
157
+
158
+ # Stream data in chunks
159
+ def stream_data(chunk_size: @chunk_size)
160
+ full_data = data
161
+ offset = 0
162
+
163
+ while offset < full_data.bytesize
164
+ chunk = full_data.byteslice(offset, chunk_size)
165
+ yield chunk
166
+ offset += chunk_size
167
+ end
168
+ end
169
+
170
+ def method_missing(method, ...)
171
+ @file.send(method, ...)
172
+ end
173
+
174
+ def respond_to_missing?(method, include_private = false)
175
+ @file.respond_to?(method, include_private)
176
+ end
177
+ end
178
+
179
+ # Stream processor for batch operations
180
+ class BatchProcessor
181
+ def initialize(chunk_size: StreamParser::DEFAULT_CHUNK_SIZE)
182
+ @chunk_size = chunk_size
183
+ @stats = { processed: 0, failed: 0, bytes: 0 }
184
+ end
185
+
186
+ # Process multiple archives in streaming mode
187
+ #
188
+ # @param paths [Array<String>] Array of archive paths
189
+ # @yield [file, archive_path] Yields each file with its archive path
190
+ # @return [Hash] Processing statistics
191
+ def process_archives(paths, &block)
192
+ paths.each do |path|
193
+ process_archive(path, &block)
194
+ end
195
+
196
+ @stats
197
+ end
198
+
199
+ # Process single archive in streaming mode
200
+ #
201
+ # @param path [String] Archive path
202
+ # @yield [file] Yields each file
203
+ def process_archive(path)
204
+ parser = StreamParser.new(path, chunk_size: @chunk_size)
205
+
206
+ parser.each_file do |file|
207
+ yield file, path
208
+ @stats[:processed] += 1
209
+ @stats[:bytes] += file.size if file.respond_to?(:size)
210
+ rescue StandardError => e
211
+ @stats[:failed] += 1
212
+ warn "Error processing #{file.name} from #{path}: #{e.message}"
213
+ end
214
+ rescue StandardError => e
215
+ warn "Error processing archive #{path}: #{e.message}"
216
+ end
217
+
218
+ attr_reader :stats
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module System
5
+ # FileHandle provides file I/O operations using the Ruby File class
6
+ class FileHandle
7
+ attr_reader :filename, :mode
8
+
9
+ # Initialize a new file handle
10
+ #
11
+ # @param filename [String] Path to the file
12
+ # @param mode [Integer] One of MODE_READ, MODE_WRITE, MODE_UPDATE, MODE_APPEND
13
+ # @raise [IOError] if the file cannot be opened
14
+ def initialize(filename, mode)
15
+ @filename = filename
16
+ @mode = mode
17
+ @file = open_file(mode)
18
+ end
19
+
20
+ # Read bytes from the file
21
+ #
22
+ # @param bytes [Integer] Number of bytes to read
23
+ # @return [String] Bytes read (binary encoding)
24
+ def read(bytes)
25
+ @file.read(bytes) || ""
26
+ end
27
+
28
+ # Write bytes to the file
29
+ #
30
+ # @param data [String] Data to write
31
+ # @return [Integer] Number of bytes written
32
+ def write(data)
33
+ @file.write(data)
34
+ end
35
+
36
+ # Seek to a position in the file
37
+ #
38
+ # @param offset [Integer] Offset to seek to
39
+ # @param whence [Integer] One of SEEK_START, SEEK_CUR, SEEK_END
40
+ # @return [Integer] New position
41
+ def seek(offset, whence)
42
+ io_whence = case whence
43
+ when Constants::SEEK_START then ::IO::SEEK_SET
44
+ when Constants::SEEK_CUR then ::IO::SEEK_CUR
45
+ when Constants::SEEK_END then ::IO::SEEK_END
46
+ else
47
+ raise ArgumentError, "Invalid whence value: #{whence}"
48
+ end
49
+ @file.seek(offset, io_whence)
50
+ @file.pos
51
+ end
52
+
53
+ # Get current position in the file
54
+ #
55
+ # @return [Integer] Current position
56
+ def tell
57
+ @file.pos
58
+ end
59
+
60
+ # Get the size of the file
61
+ #
62
+ # @return [Integer] File size in bytes
63
+ def size
64
+ @file.size
65
+ end
66
+
67
+ # Flush the file buffer
68
+ #
69
+ # @return [void]
70
+ def flush
71
+ @file.flush unless @file.closed?
72
+ end
73
+
74
+ # Close the file
75
+ #
76
+ # @return [void]
77
+ def close
78
+ @file.flush unless @file.closed?
79
+ @file.close unless @file.closed?
80
+ end
81
+
82
+ # Check if the file is closed
83
+ #
84
+ # @return [Boolean]
85
+ def closed?
86
+ @file.closed?
87
+ end
88
+
89
+ private
90
+
91
+ def open_file(mode)
92
+ file_mode = case mode
93
+ when Constants::MODE_READ then "rb"
94
+ when Constants::MODE_WRITE then "wb"
95
+ when Constants::MODE_UPDATE then "r+b"
96
+ when Constants::MODE_APPEND then "ab"
97
+ else
98
+ raise ArgumentError, "Invalid mode: #{mode}"
99
+ end
100
+
101
+ ::File.open(@filename, file_mode)
102
+ rescue Errno::ENOENT, Errno::EACCES => e
103
+ raise IOError, "Cannot open file #{@filename}: #{e.message}"
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module System
5
+ # IOSystem provides an abstraction layer for file I/O operations,
6
+ # enabling dependency injection and custom I/O implementations.
7
+ #
8
+ # This allows for:
9
+ # - Testing with mock I/O
10
+ # - In-memory operations
11
+ # - Custom I/O sources (network, etc.)
12
+ class IOSystem
13
+ # Open a file for reading, writing, or updating
14
+ #
15
+ # @param filename [String] Path to the file
16
+ # @param mode [Integer] One of MODE_READ, MODE_WRITE, MODE_UPDATE, MODE_APPEND
17
+ # @return [FileHandle] Handle for performing I/O operations
18
+ # @raise [IOError] if the file cannot be opened
19
+ def open(filename, mode)
20
+ FileHandle.new(filename, mode)
21
+ end
22
+
23
+ # Close a file handle
24
+ #
25
+ # @param handle [FileHandle, MemoryHandle] Handle to close
26
+ # @return [void]
27
+ def close(handle)
28
+ handle.close
29
+ end
30
+
31
+ # Read bytes from a handle
32
+ #
33
+ # @param handle [FileHandle, MemoryHandle] Handle to read from
34
+ # @param bytes [Integer] Number of bytes to read
35
+ # @return [String] Bytes read (may be fewer than requested at EOF)
36
+ def read(handle, bytes)
37
+ handle.read(bytes)
38
+ end
39
+
40
+ # Write bytes to a handle
41
+ #
42
+ # @param handle [FileHandle, MemoryHandle] Handle to write to
43
+ # @param data [String] Data to write
44
+ # @return [Integer] Number of bytes written
45
+ def write(handle, data)
46
+ handle.write(data)
47
+ end
48
+
49
+ # Seek to a position in a handle
50
+ #
51
+ # @param handle [FileHandle, MemoryHandle] Handle to seek in
52
+ # @param offset [Integer] Offset to seek to
53
+ # @param whence [Integer] One of SEEK_START, SEEK_CUR, SEEK_END
54
+ # @return [Integer] New position
55
+ def seek(handle, offset, whence)
56
+ handle.seek(offset, whence)
57
+ end
58
+
59
+ # Get current position in a handle
60
+ #
61
+ # @param handle [FileHandle, MemoryHandle] Handle to query
62
+ # @return [Integer] Current position
63
+ def tell(handle)
64
+ handle.tell
65
+ end
66
+
67
+ # Copy bytes from source to destination
68
+ #
69
+ # @param src [String] Source bytes
70
+ # @param dest [String] Destination buffer
71
+ # @param bytes [Integer] Number of bytes to copy
72
+ # @return [void]
73
+ def copy(src, dest, bytes)
74
+ dest.replace(src.byteslice(0, bytes))
75
+ end
76
+
77
+ # Output a message (for debugging/logging)
78
+ #
79
+ # @param handle [FileHandle, MemoryHandle, nil] Handle associated with message
80
+ # @param message [String] Message to output
81
+ # @return [void]
82
+ def message(_handle, message)
83
+ warn "[Cabriolet] #{message}" if Cabriolet.verbose
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ module System
5
+ # MemoryHandle provides in-memory I/O operations using a StringIO-like interface
6
+ class MemoryHandle
7
+ attr_reader :data, :mode
8
+
9
+ # Initialize a new memory handle
10
+ #
11
+ # @param data [String] Initial data (for reading) or empty string (for writing)
12
+ # @param mode [Integer] One of MODE_READ, MODE_WRITE, MODE_UPDATE, MODE_APPEND
13
+ def initialize(data = "", mode = Constants::MODE_READ)
14
+ @data = data.dup.force_encoding(Encoding::BINARY)
15
+ @mode = mode
16
+ @pos = mode == Constants::MODE_APPEND ? @data.bytesize : 0
17
+ @closed = false
18
+ end
19
+
20
+ # Read bytes from memory
21
+ #
22
+ # @param bytes [Integer] Number of bytes to read
23
+ # @return [String] Bytes read (binary encoding)
24
+ def read(bytes)
25
+ return "" if @pos >= @data.bytesize
26
+
27
+ result = @data.byteslice(@pos, bytes) || ""
28
+ @pos += result.bytesize
29
+ result
30
+ end
31
+
32
+ # Write bytes to memory
33
+ #
34
+ # @param content [String] Data to write
35
+ # @return [Integer] Number of bytes written
36
+ def write(content)
37
+ raise IOError, "Handle is closed" if @closed
38
+ raise IOError, "Handle not opened for writing" if @mode == Constants::MODE_READ
39
+
40
+ content = content.dup.force_encoding(Encoding::BINARY)
41
+
42
+ if @pos >= @data.bytesize
43
+ # Append to end
44
+ @data << content
45
+ else
46
+ # Overwrite existing data
47
+ before = @data.byteslice(0, @pos) || ""
48
+ after = @data.byteslice((@pos + content.bytesize)..-1) || ""
49
+ @data = before + content + after
50
+ end
51
+
52
+ @pos += content.bytesize
53
+ content.bytesize
54
+ end
55
+
56
+ # Seek to a position in memory
57
+ #
58
+ # @param offset [Integer] Offset to seek to
59
+ # @param whence [Integer] One of SEEK_START, SEEK_CUR, SEEK_END
60
+ # @return [Integer] New position
61
+ def seek(offset, whence)
62
+ new_pos = case whence
63
+ when Constants::SEEK_START then offset
64
+ when Constants::SEEK_CUR then @pos + offset
65
+ when Constants::SEEK_END then @data.bytesize + offset
66
+ else
67
+ raise ArgumentError, "Invalid whence value: #{whence}"
68
+ end
69
+
70
+ @pos = [[new_pos, 0].max, @data.bytesize].min
71
+ end
72
+
73
+ # Get current position in memory
74
+ #
75
+ # @return [Integer] Current position
76
+ def tell
77
+ @pos
78
+ end
79
+
80
+ # Close the handle
81
+ #
82
+ # @return [void]
83
+ def close
84
+ @closed = true
85
+ end
86
+
87
+ # Check if the handle is closed
88
+ #
89
+ # @return [Boolean]
90
+ def closed?
91
+ @closed
92
+ end
93
+
94
+ # Get the complete data buffer
95
+ #
96
+ # @return [String] All data in the buffer
97
+ def to_s
98
+ @data
99
+ end
100
+
101
+ # Alias for to_s
102
+ alias buffer to_s
103
+ end
104
+ end
105
+ end