cabriolet 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +703 -38
- data/lib/cabriolet/algorithm_factory.rb +250 -0
- data/lib/cabriolet/base_compressor.rb +206 -0
- data/lib/cabriolet/binary/bitstream.rb +167 -16
- data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
- data/lib/cabriolet/binary/chm_structures.rb +2 -2
- data/lib/cabriolet/binary/hlp_structures.rb +258 -37
- data/lib/cabriolet/binary/lit_structures.rb +231 -65
- data/lib/cabriolet/binary/oab_structures.rb +17 -1
- data/lib/cabriolet/cab/command_handler.rb +226 -0
- data/lib/cabriolet/cab/compressor.rb +108 -84
- data/lib/cabriolet/cab/decompressor.rb +16 -20
- data/lib/cabriolet/cab/extractor.rb +142 -66
- data/lib/cabriolet/cab/file_compression_work.rb +52 -0
- data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
- data/lib/cabriolet/checksum.rb +49 -0
- data/lib/cabriolet/chm/command_handler.rb +227 -0
- data/lib/cabriolet/chm/compressor.rb +7 -3
- data/lib/cabriolet/chm/decompressor.rb +39 -21
- data/lib/cabriolet/chm/parser.rb +5 -2
- data/lib/cabriolet/cli/base_command_handler.rb +127 -0
- data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
- data/lib/cabriolet/cli/command_registry.rb +83 -0
- data/lib/cabriolet/cli.rb +356 -607
- data/lib/cabriolet/collections/file_collection.rb +175 -0
- data/lib/cabriolet/compressors/base.rb +1 -1
- data/lib/cabriolet/compressors/lzx.rb +241 -54
- data/lib/cabriolet/compressors/mszip.rb +35 -3
- data/lib/cabriolet/compressors/quantum.rb +36 -95
- data/lib/cabriolet/decompressors/base.rb +1 -1
- data/lib/cabriolet/decompressors/lzss.rb +13 -3
- data/lib/cabriolet/decompressors/lzx.rb +70 -33
- data/lib/cabriolet/decompressors/mszip.rb +126 -39
- data/lib/cabriolet/decompressors/quantum.rb +83 -53
- data/lib/cabriolet/errors.rb +3 -0
- data/lib/cabriolet/extraction/base_extractor.rb +88 -0
- data/lib/cabriolet/extraction/extractor.rb +171 -0
- data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
- data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
- data/lib/cabriolet/file_entry.rb +156 -0
- data/lib/cabriolet/file_manager.rb +144 -0
- data/lib/cabriolet/format_base.rb +79 -0
- data/lib/cabriolet/hlp/command_handler.rb +282 -0
- data/lib/cabriolet/hlp/compressor.rb +28 -238
- data/lib/cabriolet/hlp/decompressor.rb +107 -147
- data/lib/cabriolet/hlp/parser.rb +52 -101
- data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
- data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
- data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
- data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
- data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
- data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
- data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
- data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
- data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
- data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
- data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
- data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
- data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
- data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
- data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
- data/lib/cabriolet/huffman/encoder.rb +15 -12
- data/lib/cabriolet/huffman/tree.rb +85 -1
- data/lib/cabriolet/kwaj/command_handler.rb +213 -0
- data/lib/cabriolet/kwaj/compressor.rb +7 -3
- data/lib/cabriolet/kwaj/decompressor.rb +18 -12
- data/lib/cabriolet/lit/command_handler.rb +221 -0
- data/lib/cabriolet/lit/compressor.rb +119 -168
- data/lib/cabriolet/lit/content_encoder.rb +76 -0
- data/lib/cabriolet/lit/content_type_detector.rb +50 -0
- data/lib/cabriolet/lit/decompressor.rb +518 -152
- data/lib/cabriolet/lit/directory_builder.rb +153 -0
- data/lib/cabriolet/lit/guid_generator.rb +16 -0
- data/lib/cabriolet/lit/header_writer.rb +124 -0
- data/lib/cabriolet/lit/parser.rb +670 -0
- data/lib/cabriolet/lit/piece_builder.rb +74 -0
- data/lib/cabriolet/lit/structure_builder.rb +252 -0
- data/lib/cabriolet/models/hlp_file.rb +130 -29
- data/lib/cabriolet/models/hlp_header.rb +105 -17
- data/lib/cabriolet/models/lit_header.rb +212 -25
- data/lib/cabriolet/models/szdd_header.rb +10 -2
- data/lib/cabriolet/models/winhelp_header.rb +127 -0
- data/lib/cabriolet/oab/command_handler.rb +257 -0
- data/lib/cabriolet/oab/compressor.rb +17 -8
- data/lib/cabriolet/oab/decompressor.rb +41 -10
- data/lib/cabriolet/offset_calculator.rb +81 -0
- data/lib/cabriolet/plugin.rb +233 -0
- data/lib/cabriolet/plugin_manager.rb +453 -0
- data/lib/cabriolet/plugin_validator.rb +422 -0
- data/lib/cabriolet/quantum_shared.rb +105 -0
- data/lib/cabriolet/system/io_system.rb +3 -0
- data/lib/cabriolet/system/memory_handle.rb +17 -4
- data/lib/cabriolet/szdd/command_handler.rb +217 -0
- data/lib/cabriolet/szdd/compressor.rb +15 -11
- data/lib/cabriolet/szdd/decompressor.rb +18 -9
- data/lib/cabriolet/version.rb +1 -1
- data/lib/cabriolet.rb +181 -20
- metadata +69 -4
- data/lib/cabriolet/auto.rb +0 -173
- data/lib/cabriolet/parallel.rb +0 -333
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Cabriolet
|
|
4
|
+
# Factory for creating and managing compression/decompression algorithms
|
|
5
|
+
#
|
|
6
|
+
# The AlgorithmFactory provides a centralized registry for compression and
|
|
7
|
+
# decompression algorithms. It handles algorithm registration, validation,
|
|
8
|
+
# instantiation, and type normalization.
|
|
9
|
+
#
|
|
10
|
+
# @example Register and create an algorithm
|
|
11
|
+
# factory = AlgorithmFactory.new
|
|
12
|
+
# factory.register(:custom, CustomCompressor, category: :compressor)
|
|
13
|
+
# algorithm = factory.create(:custom, :compressor, io, input, output, 4096)
|
|
14
|
+
#
|
|
15
|
+
# @example Use with integer type constants
|
|
16
|
+
# # Constants::COMP_TYPE_MSZIP (1) is normalized to :mszip
|
|
17
|
+
# algorithm = factory.create(1, :decompressor, io, input, output, 4096)
|
|
18
|
+
class AlgorithmFactory
|
|
19
|
+
# @return [Hash] Registry of algorithms by category and type
|
|
20
|
+
attr_reader :algorithms
|
|
21
|
+
|
|
22
|
+
# Initialize a new algorithm factory
|
|
23
|
+
#
|
|
24
|
+
# @param auto_register [Boolean] Whether to automatically register
|
|
25
|
+
# built-in algorithms
|
|
26
|
+
def initialize(auto_register: true)
|
|
27
|
+
@algorithms = { compressor: {}, decompressor: {} }
|
|
28
|
+
register_built_in_algorithms if auto_register
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Register an algorithm in the factory
|
|
32
|
+
#
|
|
33
|
+
# @param type [Symbol] Algorithm type (:none, :mszip, :lzx, :quantum,
|
|
34
|
+
# :lzss)
|
|
35
|
+
# @param algorithm_class [Class] Algorithm class (must inherit from
|
|
36
|
+
# Compressors::Base or Decompressors::Base)
|
|
37
|
+
# @param options [Hash] Registration options
|
|
38
|
+
# @option options [Symbol] :category Required - :compressor or
|
|
39
|
+
# :decompressor
|
|
40
|
+
# @option options [Integer] :priority Priority for selection (default: 0)
|
|
41
|
+
# @option options [Symbol, nil] :format Format restriction (optional)
|
|
42
|
+
#
|
|
43
|
+
# @return [self] Returns self for method chaining
|
|
44
|
+
#
|
|
45
|
+
# @raise [ArgumentError] If category is invalid
|
|
46
|
+
# @raise [ArgumentError] If algorithm_class doesn't inherit from Base
|
|
47
|
+
#
|
|
48
|
+
# @example Register a custom compressor
|
|
49
|
+
# factory.register(:custom, MyCompressor,
|
|
50
|
+
# category: :compressor, priority: 10)
|
|
51
|
+
#
|
|
52
|
+
# @example Chain multiple registrations
|
|
53
|
+
# factory
|
|
54
|
+
# .register(:algo1, Algo1, category: :compressor)
|
|
55
|
+
# .register(:algo2, Algo2, category: :decompressor)
|
|
56
|
+
def register(type, algorithm_class, **options)
|
|
57
|
+
category = options[:category]
|
|
58
|
+
validate_category!(category)
|
|
59
|
+
validate_algorithm_class!(algorithm_class, category)
|
|
60
|
+
|
|
61
|
+
@algorithms[category][type] = {
|
|
62
|
+
class: algorithm_class,
|
|
63
|
+
priority: options.fetch(:priority, 0),
|
|
64
|
+
format: options[:format],
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
self
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Create an instance of a registered algorithm
|
|
71
|
+
#
|
|
72
|
+
# @param type [Symbol, Integer] Algorithm type (symbol or constant)
|
|
73
|
+
# @param category [Symbol] Category (:compressor or :decompressor)
|
|
74
|
+
# @param io_system [System::IOSystem] I/O system for operations
|
|
75
|
+
# @param input [System::FileHandle, System::MemoryHandle] Input handle
|
|
76
|
+
# @param output [System::FileHandle, System::MemoryHandle] Output handle
|
|
77
|
+
# @param buffer_size [Integer] Buffer size for I/O operations
|
|
78
|
+
# @param kwargs [Hash] Additional keyword arguments for algorithm
|
|
79
|
+
#
|
|
80
|
+
# @return [Compressors::Base, Decompressors::Base] Algorithm instance
|
|
81
|
+
#
|
|
82
|
+
# @raise [ArgumentError] If category is invalid
|
|
83
|
+
# @raise [UnsupportedFormatError] If algorithm type not registered
|
|
84
|
+
#
|
|
85
|
+
# @example Create a decompressor
|
|
86
|
+
# decompressor = factory.create(:mszip, :decompressor,
|
|
87
|
+
# io, input, output, 4096)
|
|
88
|
+
#
|
|
89
|
+
# @example Create with integer constant
|
|
90
|
+
# # Constants::COMP_TYPE_LZX (3) -> :lzx
|
|
91
|
+
# compressor = factory.create(3, :compressor,
|
|
92
|
+
# io, input, output, 8192)
|
|
93
|
+
def create(type, category, io_system, input, output, buffer_size,
|
|
94
|
+
**kwargs)
|
|
95
|
+
validate_category!(category)
|
|
96
|
+
|
|
97
|
+
normalized_type = normalize_type(type)
|
|
98
|
+
algorithm_info = @algorithms[category][normalized_type]
|
|
99
|
+
|
|
100
|
+
unless algorithm_info
|
|
101
|
+
raise UnsupportedFormatError,
|
|
102
|
+
"Unknown #{category} algorithm: #{normalized_type}"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
algorithm_info[:class].new(io_system, input, output, buffer_size,
|
|
106
|
+
**kwargs)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Check if an algorithm is registered
|
|
110
|
+
#
|
|
111
|
+
# @param type [Symbol] Algorithm type
|
|
112
|
+
# @param category [Symbol] Category (:compressor or :decompressor)
|
|
113
|
+
#
|
|
114
|
+
# @return [Boolean] True if registered, false otherwise
|
|
115
|
+
#
|
|
116
|
+
# @example Check registration
|
|
117
|
+
# factory.registered?(:mszip, :compressor) #=> true
|
|
118
|
+
# factory.registered?(:unknown, :compressor) #=> false
|
|
119
|
+
def registered?(type, category)
|
|
120
|
+
@algorithms[category]&.key?(type) || false
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# List registered algorithms
|
|
124
|
+
#
|
|
125
|
+
# @param category [Symbol, nil] Optional category filter
|
|
126
|
+
#
|
|
127
|
+
# @return [Hash] Hash of registered algorithms
|
|
128
|
+
#
|
|
129
|
+
# @example List all algorithms
|
|
130
|
+
# factory.list
|
|
131
|
+
# #=> { compressor: { mszip: {...}, lzx: {...} },
|
|
132
|
+
# # decompressor: { none: {...}, mszip: {...} } }
|
|
133
|
+
#
|
|
134
|
+
# @example List compressors only
|
|
135
|
+
# factory.list(:compressor)
|
|
136
|
+
# #=> { mszip: {...}, lzx: {...}, quantum: {...}, lzss: {...} }
|
|
137
|
+
def list(category = nil)
|
|
138
|
+
if category.nil?
|
|
139
|
+
{
|
|
140
|
+
compressor: @algorithms[:compressor].dup,
|
|
141
|
+
decompressor: @algorithms[:decompressor].dup,
|
|
142
|
+
}
|
|
143
|
+
else
|
|
144
|
+
@algorithms[category]&.dup || {}
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Unregister an algorithm
|
|
149
|
+
#
|
|
150
|
+
# @param type [Symbol] Algorithm type to remove
|
|
151
|
+
# @param category [Symbol] Category (:compressor or :decompressor)
|
|
152
|
+
#
|
|
153
|
+
# @return [Boolean] True if removed, false if not found
|
|
154
|
+
#
|
|
155
|
+
# @example Unregister an algorithm
|
|
156
|
+
# factory.unregister(:mszip, :compressor) #=> true
|
|
157
|
+
# factory.unregister(:unknown, :compressor) #=> false
|
|
158
|
+
# rubocop:disable Naming/PredicatePrefix
|
|
159
|
+
def unregister(type, category)
|
|
160
|
+
!@algorithms[category].delete(type).nil?
|
|
161
|
+
end
|
|
162
|
+
# rubocop:enable Naming/PredicatePrefix
|
|
163
|
+
|
|
164
|
+
private
|
|
165
|
+
|
|
166
|
+
# Register all built-in compression and decompression algorithms
|
|
167
|
+
#
|
|
168
|
+
# Registers 5 decompressors (none, lzss, mszip, lzx, quantum) and
|
|
169
|
+
# 4 compressors (lzss, mszip, lzx, quantum).
|
|
170
|
+
#
|
|
171
|
+
# @return [void]
|
|
172
|
+
def register_built_in_algorithms
|
|
173
|
+
# Register decompressors (5 total)
|
|
174
|
+
register(:none, Decompressors::None, category: :decompressor)
|
|
175
|
+
register(:lzss, Decompressors::LZSS, category: :decompressor)
|
|
176
|
+
register(:mszip, Decompressors::MSZIP, category: :decompressor)
|
|
177
|
+
register(:lzx, Decompressors::LZX, category: :decompressor)
|
|
178
|
+
register(:quantum, Decompressors::Quantum, category: :decompressor)
|
|
179
|
+
|
|
180
|
+
# Register compressors (4 total - no 'none' compressor)
|
|
181
|
+
register(:lzss, Compressors::LZSS, category: :compressor)
|
|
182
|
+
register(:mszip, Compressors::MSZIP, category: :compressor)
|
|
183
|
+
register(:lzx, Compressors::LZX, category: :compressor)
|
|
184
|
+
register(:quantum, Compressors::Quantum, category: :compressor)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Normalize algorithm type from integer constant to symbol
|
|
188
|
+
#
|
|
189
|
+
# @param type [Symbol, Integer] Type to normalize
|
|
190
|
+
#
|
|
191
|
+
# @return [Symbol] Normalized type symbol
|
|
192
|
+
#
|
|
193
|
+
# @example Normalize integer constants
|
|
194
|
+
# normalize_type(0) #=> :none
|
|
195
|
+
# normalize_type(1) #=> :mszip
|
|
196
|
+
# normalize_type(2) #=> :quantum
|
|
197
|
+
# normalize_type(3) #=> :lzx
|
|
198
|
+
# normalize_type(:lzss) #=> :lzss
|
|
199
|
+
def normalize_type(type)
|
|
200
|
+
return type if type.is_a?(Symbol)
|
|
201
|
+
|
|
202
|
+
case type
|
|
203
|
+
when Constants::COMP_TYPE_NONE then :none
|
|
204
|
+
when Constants::COMP_TYPE_MSZIP then :mszip
|
|
205
|
+
when Constants::COMP_TYPE_QUANTUM then :quantum
|
|
206
|
+
when Constants::COMP_TYPE_LZX then :lzx
|
|
207
|
+
else
|
|
208
|
+
raise UnsupportedFormatError,
|
|
209
|
+
"Unsupported compression type: #{type}"
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Validate that category is valid
|
|
214
|
+
#
|
|
215
|
+
# @param category [Symbol] Category to validate
|
|
216
|
+
#
|
|
217
|
+
# @raise [ArgumentError] If category is not :compressor or :decompressor
|
|
218
|
+
#
|
|
219
|
+
# @return [void]
|
|
220
|
+
def validate_category!(category)
|
|
221
|
+
valid_categories = %i[compressor decompressor]
|
|
222
|
+
return if valid_categories.include?(category)
|
|
223
|
+
|
|
224
|
+
raise ArgumentError,
|
|
225
|
+
"Invalid category: #{category}. " \
|
|
226
|
+
"Must be :compressor or :decompressor"
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Validate that algorithm class inherits from appropriate base class
|
|
230
|
+
#
|
|
231
|
+
# @param klass [Class] Algorithm class to validate
|
|
232
|
+
# @param category [Symbol] Category (:compressor or :decompressor)
|
|
233
|
+
#
|
|
234
|
+
# @raise [ArgumentError] If class doesn't inherit from correct base
|
|
235
|
+
#
|
|
236
|
+
# @return [void]
|
|
237
|
+
def validate_algorithm_class!(klass, category)
|
|
238
|
+
base_class = if category == :compressor
|
|
239
|
+
Compressors::Base
|
|
240
|
+
else
|
|
241
|
+
Decompressors::Base
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
return if klass < base_class
|
|
245
|
+
|
|
246
|
+
raise ArgumentError,
|
|
247
|
+
"#{klass} must inherit from #{base_class}"
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "file_manager"
|
|
4
|
+
require_relative "system/io_system"
|
|
5
|
+
|
|
6
|
+
module Cabriolet
|
|
7
|
+
# Abstract base class for all format compressors
|
|
8
|
+
#
|
|
9
|
+
# Implements Template Method pattern:
|
|
10
|
+
# - Defines common compression workflow in generate()
|
|
11
|
+
# - Subclasses implement format-specific hooks
|
|
12
|
+
#
|
|
13
|
+
# Provides:
|
|
14
|
+
# - File management via FileManager
|
|
15
|
+
# - Common initialization pattern
|
|
16
|
+
# - Template method for generation workflow
|
|
17
|
+
# - Hook methods for format customization
|
|
18
|
+
# - Helper methods for common operations
|
|
19
|
+
#
|
|
20
|
+
# Subclasses must implement:
|
|
21
|
+
# - build_structure(options) - Create format-specific structure
|
|
22
|
+
# - write_format(output_handle, structure) - Write binary data
|
|
23
|
+
#
|
|
24
|
+
# Subclasses may override:
|
|
25
|
+
# - validate_generation_prerequisites!(options) - Custom validation
|
|
26
|
+
# - post_generation_hook(output_file, structure, bytes) - Cleanup/logging
|
|
27
|
+
#
|
|
28
|
+
# @example Creating a format compressor
|
|
29
|
+
# class MyFormatCompressor < BaseCompressor
|
|
30
|
+
# protected
|
|
31
|
+
#
|
|
32
|
+
# def build_structure(options)
|
|
33
|
+
# { header: build_header, files: collect_files }
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
# def write_format(output_handle, structure)
|
|
37
|
+
# io_system.write(output_handle, structure[:header].to_binary_s)
|
|
38
|
+
# end
|
|
39
|
+
# end
|
|
40
|
+
class BaseCompressor
|
|
41
|
+
attr_reader :io_system, :algorithm_factory, :file_manager
|
|
42
|
+
|
|
43
|
+
# Initialize compressor with I/O and algorithm dependencies
|
|
44
|
+
#
|
|
45
|
+
# @param io_system [System::IOSystem, nil] I/O system or nil for default
|
|
46
|
+
# @param algorithm_factory [AlgorithmFactory, nil] Algorithm factory or nil
|
|
47
|
+
def initialize(io_system = nil, algorithm_factory = nil)
|
|
48
|
+
@io_system = io_system || System::IOSystem.new
|
|
49
|
+
@algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
|
|
50
|
+
@file_manager = FileManager.new
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Add file from disk to archive
|
|
54
|
+
#
|
|
55
|
+
# @param source_path [String] Path to source file
|
|
56
|
+
# @param archive_path [String, nil] Path in archive (nil = use basename)
|
|
57
|
+
# @param options [Hash] Format-specific options
|
|
58
|
+
# @return [FileEntry] Added entry
|
|
59
|
+
# @raise [ArgumentError] if file doesn't exist
|
|
60
|
+
def add_file(source_path, archive_path = nil, **options)
|
|
61
|
+
@file_manager.add_file(source_path, archive_path, **options)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Add file from memory to archive
|
|
65
|
+
#
|
|
66
|
+
# @param data [String] File data
|
|
67
|
+
# @param archive_path [String] Path in archive
|
|
68
|
+
# @param options [Hash] Format-specific options
|
|
69
|
+
# @return [FileEntry] Added entry
|
|
70
|
+
def add_data(data, archive_path, **options)
|
|
71
|
+
@file_manager.add_data(data, archive_path, **options)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Generate archive (Template Method)
|
|
75
|
+
#
|
|
76
|
+
# This method defines the compression workflow:
|
|
77
|
+
# 1. Validate prerequisites
|
|
78
|
+
# 2. Build format-specific structure
|
|
79
|
+
# 3. Write to output file
|
|
80
|
+
# 4. Post-generation hook
|
|
81
|
+
# 5. Return bytes written
|
|
82
|
+
#
|
|
83
|
+
# Subclasses customize via hook methods, not by overriding this method.
|
|
84
|
+
#
|
|
85
|
+
# @param output_file [String] Path to output file
|
|
86
|
+
# @param options [Hash] Format-specific options
|
|
87
|
+
# @return [Integer] Bytes written to output file
|
|
88
|
+
# @raise [ArgumentError] if validation fails
|
|
89
|
+
def generate(output_file, **options)
|
|
90
|
+
validate_generation_prerequisites!(options)
|
|
91
|
+
|
|
92
|
+
structure = build_structure(options)
|
|
93
|
+
|
|
94
|
+
bytes_written = write_to_file(output_file, structure)
|
|
95
|
+
|
|
96
|
+
post_generation_hook(output_file, structure, bytes_written)
|
|
97
|
+
|
|
98
|
+
bytes_written
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
protected
|
|
102
|
+
|
|
103
|
+
# Hook: Build format-specific structure
|
|
104
|
+
#
|
|
105
|
+
# Subclasses MUST implement this method to create the archive structure
|
|
106
|
+
# ready for writing. The structure should contain all necessary metadata,
|
|
107
|
+
# compressed data, headers, and calculated offsets.
|
|
108
|
+
#
|
|
109
|
+
# @param options [Hash] Generation options from generate() call
|
|
110
|
+
# @return [Hash] Format structure ready for writing
|
|
111
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
112
|
+
def build_structure(options)
|
|
113
|
+
raise NotImplementedError,
|
|
114
|
+
"#{self.class.name} must implement build_structure(options)"
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Hook: Write format to output handle
|
|
118
|
+
#
|
|
119
|
+
# Subclasses MUST implement this method to write the format-specific
|
|
120
|
+
# binary data to the output handle. Should write headers, directory,
|
|
121
|
+
# and file data according to format specification.
|
|
122
|
+
#
|
|
123
|
+
# @param output_handle [System::FileHandle] Open output handle
|
|
124
|
+
# @param structure [Hash] Format structure from build_structure()
|
|
125
|
+
# @return [Integer] Bytes written
|
|
126
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
127
|
+
def write_format(output_handle, structure)
|
|
128
|
+
raise NotImplementedError,
|
|
129
|
+
"#{self.class.name} must implement write_format(output_handle, structure)"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Hook: Validate pre-generation requirements
|
|
133
|
+
#
|
|
134
|
+
# Subclasses CAN override this for format-specific validation.
|
|
135
|
+
# Default implementation checks that files have been added.
|
|
136
|
+
#
|
|
137
|
+
# @param options [Hash] Generation options
|
|
138
|
+
# @raise [ArgumentError] if validation fails
|
|
139
|
+
def validate_generation_prerequisites!(_options)
|
|
140
|
+
raise ArgumentError, "No files added to archive" if @file_manager.empty?
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Hook: Post-generation callback
|
|
144
|
+
#
|
|
145
|
+
# Subclasses CAN override this for cleanup, logging, or additional
|
|
146
|
+
# processing after successful generation.
|
|
147
|
+
#
|
|
148
|
+
# @param output_file [String] Path to generated file
|
|
149
|
+
# @param structure [Hash] Generated structure
|
|
150
|
+
# @param bytes_written [Integer] Bytes written to file
|
|
151
|
+
# @return [void]
|
|
152
|
+
def post_generation_hook(_output_file, _structure, _bytes_written)
|
|
153
|
+
# Default: no-op
|
|
154
|
+
nil
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Helper: Compress data using specified algorithm
|
|
158
|
+
#
|
|
159
|
+
# Provides unified interface for compression across all formats.
|
|
160
|
+
# Uses algorithm factory for extensibility.
|
|
161
|
+
#
|
|
162
|
+
# @param data [String] Data to compress
|
|
163
|
+
# @param algorithm [Symbol] Algorithm type (:lzss, :mszip, :lzx, :quantum)
|
|
164
|
+
# @param options [Hash] Compression options
|
|
165
|
+
# @option options [Integer] :window_bits Window size in bits
|
|
166
|
+
# @option options [Integer] :mode Algorithm mode
|
|
167
|
+
# @return [String] Compressed data
|
|
168
|
+
def compress_data(data, algorithm:, **options)
|
|
169
|
+
input = System::MemoryHandle.new(data)
|
|
170
|
+
output = System::MemoryHandle.new("", Constants::MODE_WRITE)
|
|
171
|
+
|
|
172
|
+
compressor = @algorithm_factory.create(
|
|
173
|
+
algorithm,
|
|
174
|
+
:compressor,
|
|
175
|
+
@io_system,
|
|
176
|
+
input,
|
|
177
|
+
output,
|
|
178
|
+
data.bytesize,
|
|
179
|
+
**options,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
compressor.compress
|
|
183
|
+
output.data
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
private
|
|
187
|
+
|
|
188
|
+
# Write structure to file
|
|
189
|
+
#
|
|
190
|
+
# Handles file opening/closing and delegates to write_format hook
|
|
191
|
+
#
|
|
192
|
+
# @param output_file [String] Path to output file
|
|
193
|
+
# @param structure [Hash] Format structure
|
|
194
|
+
# @return [Integer] Bytes written
|
|
195
|
+
def write_to_file(output_file, structure)
|
|
196
|
+
output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
|
|
197
|
+
|
|
198
|
+
begin
|
|
199
|
+
bytes = write_format(output_handle, structure)
|
|
200
|
+
bytes
|
|
201
|
+
ensure
|
|
202
|
+
@io_system.close(output_handle) if output_handle
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
@@ -4,22 +4,31 @@ module Cabriolet
|
|
|
4
4
|
module Binary
|
|
5
5
|
# Bitstream provides bit-level I/O operations for reading compressed data
|
|
6
6
|
class Bitstream
|
|
7
|
-
attr_reader :io_system, :handle, :buffer_size
|
|
7
|
+
attr_reader :io_system, :handle, :buffer_size, :bit_order
|
|
8
8
|
|
|
9
9
|
# Initialize a new bitstream
|
|
10
10
|
#
|
|
11
11
|
# @param io_system [System::IOSystem] I/O system for reading data
|
|
12
12
|
# @param handle [System::FileHandle, System::MemoryHandle] Handle to read from
|
|
13
13
|
# @param buffer_size [Integer] Size of the input buffer
|
|
14
|
+
# @param bit_order [Symbol] Bit order (:lsb or :msb)
|
|
15
|
+
# @param salvage [Boolean] Salvage mode - return 0 on EOF instead of raising
|
|
14
16
|
def initialize(io_system, handle,
|
|
15
|
-
buffer_size = Cabriolet.default_buffer_size)
|
|
17
|
+
buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
|
|
16
18
|
@io_system = io_system
|
|
17
19
|
@handle = handle
|
|
18
20
|
@buffer_size = buffer_size
|
|
21
|
+
@bit_order = bit_order
|
|
22
|
+
@salvage = salvage
|
|
19
23
|
@buffer = ""
|
|
20
24
|
@buffer_pos = 0
|
|
21
25
|
@bit_buffer = 0
|
|
22
26
|
@bits_left = 0
|
|
27
|
+
@input_end = false # Track EOF state (matches libmspack's input_end flag)
|
|
28
|
+
|
|
29
|
+
# For MSB mode, we need to know the bit width of the buffer
|
|
30
|
+
# Ruby integers are arbitrary precision, so we use 32 bits as standard
|
|
31
|
+
@bitbuf_width = 32
|
|
23
32
|
end
|
|
24
33
|
|
|
25
34
|
# Read specified number of bits from the stream
|
|
@@ -33,31 +42,129 @@ buffer_size = Cabriolet.default_buffer_size)
|
|
|
33
42
|
"Can only read 1-32 bits at a time"
|
|
34
43
|
end
|
|
35
44
|
|
|
45
|
+
if @bit_order == :msb
|
|
46
|
+
read_bits_msb(num_bits)
|
|
47
|
+
else
|
|
48
|
+
read_bits_lsb(num_bits)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
# Read 2 bytes as a little-endian 16-bit word for MSB mode
|
|
55
|
+
# This is a shared helper for read_bits_msb and peek_bits
|
|
56
|
+
#
|
|
57
|
+
# @return [Integer] 16-bit word, or nil if at EOF and not in salvage mode
|
|
58
|
+
def read_msb_word
|
|
59
|
+
byte0 = read_byte
|
|
60
|
+
if byte0.nil? && (@salvage || @input_end)
|
|
61
|
+
byte0 = 0
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
byte1 = read_byte
|
|
65
|
+
if byte1.nil?
|
|
66
|
+
byte1 = 0
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
byte0 | (byte1 << 8)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Read bits in LSB-first order
|
|
73
|
+
#
|
|
74
|
+
# Per libmspack: EOF handling allows padding to avoid bitstream overrun.
|
|
75
|
+
# First EOF: pad with zeros (2 bytes worth). Second EOF: raise error.
|
|
76
|
+
def read_bits_lsb(num_bits)
|
|
36
77
|
# Ensure we have enough bits in the buffer
|
|
37
78
|
while @bits_left < num_bits
|
|
38
79
|
byte = read_byte
|
|
39
|
-
|
|
80
|
+
# First EOF: pad with zeros (matches libmspack read_input behavior)
|
|
81
|
+
# On second EOF, read_byte will raise DecompressionError
|
|
82
|
+
# In salvage mode, pad indefinitely; otherwise pad on first EOF
|
|
83
|
+
byte = 0 if byte.nil?
|
|
84
|
+
|
|
85
|
+
# DEBUG
|
|
86
|
+
if ENV["DEBUG_BITSTREAM"]
|
|
87
|
+
warn "DEBUG LSB read_byte: buffer_pos=#{@buffer_pos} byte=#{byte} (#{byte.to_s(2).rjust(
|
|
88
|
+
8, '0'
|
|
89
|
+
)}) bits_left=#{@bits_left}"
|
|
90
|
+
end
|
|
40
91
|
|
|
92
|
+
# INJECT_BITS (LSB): append to the right
|
|
41
93
|
@bit_buffer |= (byte << @bits_left)
|
|
42
94
|
@bits_left += 8
|
|
43
95
|
end
|
|
44
96
|
|
|
45
|
-
#
|
|
97
|
+
# PEEK_BITS (LSB): extract from the right
|
|
46
98
|
result = @bit_buffer & ((1 << num_bits) - 1)
|
|
99
|
+
# REMOVE_BITS (LSB): shift right
|
|
47
100
|
@bit_buffer >>= num_bits
|
|
48
101
|
@bits_left -= num_bits
|
|
49
102
|
|
|
103
|
+
# DEBUG
|
|
104
|
+
warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
|
|
105
|
+
|
|
50
106
|
result
|
|
51
107
|
end
|
|
52
108
|
|
|
109
|
+
# Read bits in MSB-first order (libmspack LZX/Quantum style)
|
|
110
|
+
#
|
|
111
|
+
# Per libmspack readbits.h: Reads 2 bytes at a time (little-endian 16-bit word).
|
|
112
|
+
# EOF handling: First EOF pads with zeros, second EOF raises error.
|
|
113
|
+
def read_bits_msb(num_bits)
|
|
114
|
+
# Ensure we have enough bits in the buffer
|
|
115
|
+
while @bits_left < num_bits
|
|
116
|
+
word = read_msb_word
|
|
117
|
+
|
|
118
|
+
# DEBUG
|
|
119
|
+
warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
|
|
120
|
+
|
|
121
|
+
# INJECT_BITS (MSB): inject at the left side
|
|
122
|
+
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
123
|
+
@bits_left += 16
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# PEEK_BITS (MSB): extract from the left
|
|
127
|
+
result = @bit_buffer >> (@bitbuf_width - num_bits)
|
|
128
|
+
|
|
129
|
+
# REMOVE_BITS (MSB): shift left
|
|
130
|
+
@bit_buffer = (@bit_buffer << num_bits) & ((1 << @bitbuf_width) - 1)
|
|
131
|
+
@bits_left -= num_bits
|
|
132
|
+
|
|
133
|
+
# DEBUG
|
|
134
|
+
warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
|
|
135
|
+
|
|
136
|
+
result
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
public
|
|
140
|
+
|
|
53
141
|
# Read a single byte from the input
|
|
54
142
|
#
|
|
55
|
-
#
|
|
143
|
+
# Per libmspack readbits.h: On first EOF, we pad with zeros.
|
|
144
|
+
# On second EOF, we raise an error (unless salvage mode).
|
|
145
|
+
#
|
|
146
|
+
# @return [Integer, nil] Byte value or nil to signal EOF padding needed
|
|
147
|
+
# @raise [DecompressionError] on second EOF attempt (unless salvage mode)
|
|
56
148
|
def read_byte
|
|
57
149
|
if @buffer_pos >= @buffer.bytesize
|
|
58
150
|
@buffer = @io_system.read(@handle, @buffer_size)
|
|
59
151
|
@buffer_pos = 0
|
|
60
|
-
|
|
152
|
+
|
|
153
|
+
if @buffer.empty?
|
|
154
|
+
# Hit EOF - check if this is first or second EOF
|
|
155
|
+
if @input_end
|
|
156
|
+
# Second EOF: raise error unless salvage mode
|
|
157
|
+
unless @salvage
|
|
158
|
+
raise DecompressionError, "Unexpected end of input stream"
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# In salvage mode, keep returning nil
|
|
162
|
+
else
|
|
163
|
+
# First EOF: signal to pad with zeros (return nil)
|
|
164
|
+
@input_end = true
|
|
165
|
+
end
|
|
166
|
+
return nil
|
|
167
|
+
end
|
|
61
168
|
end
|
|
62
169
|
|
|
63
170
|
byte = @buffer.getbyte(@buffer_pos)
|
|
@@ -84,16 +191,50 @@ buffer_size = Cabriolet.default_buffer_size)
|
|
|
84
191
|
"Can only peek 1-32 bits at a time"
|
|
85
192
|
end
|
|
86
193
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
194
|
+
if @bit_order == :msb
|
|
195
|
+
# Ensure we have enough bits
|
|
196
|
+
while @bits_left < num_bits
|
|
197
|
+
# Read 2 bytes at a time (little-endian), like libmspack
|
|
198
|
+
byte0 = read_byte
|
|
199
|
+
if byte0.nil?
|
|
200
|
+
# At EOF: break and work with remaining bits
|
|
201
|
+
break
|
|
202
|
+
end
|
|
91
203
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
204
|
+
byte1 = read_byte
|
|
205
|
+
byte1 = 0 if byte1.nil?
|
|
206
|
+
|
|
207
|
+
# Combine as little-endian 16-bit value
|
|
208
|
+
word = byte0 | (byte1 << 8)
|
|
209
|
+
|
|
210
|
+
# INJECT_BITS (MSB): inject at the left side
|
|
211
|
+
@bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
|
|
212
|
+
@bits_left += 16
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# PEEK_BITS (MSB): extract from the left
|
|
216
|
+
# If we have fewer than num_bits available, result may be incorrect
|
|
217
|
+
# but this matches EOF handling behavior
|
|
218
|
+
@bit_buffer >> (@bitbuf_width - num_bits)
|
|
219
|
+
else
|
|
220
|
+
# Ensure we have enough bits (LSB mode)
|
|
221
|
+
while @bits_left < num_bits
|
|
222
|
+
byte = read_byte
|
|
223
|
+
if byte.nil?
|
|
224
|
+
# At EOF: pad remaining bits with zeros and continue
|
|
225
|
+
# This matches libmspack behavior where peek can use partial bits
|
|
226
|
+
# The missing high bits are implicitly 0
|
|
227
|
+
break
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
@bit_buffer |= (byte << @bits_left)
|
|
231
|
+
@bits_left += 8
|
|
232
|
+
end
|
|
95
233
|
|
|
96
|
-
|
|
234
|
+
# Extract num_bits from bit_buffer
|
|
235
|
+
# If we have fewer than num_bits, the high bits will be 0
|
|
236
|
+
@bit_buffer & ((1 << num_bits) - 1)
|
|
237
|
+
end
|
|
97
238
|
end
|
|
98
239
|
|
|
99
240
|
# Skip specified number of bits
|
|
@@ -111,9 +252,19 @@ buffer_size = Cabriolet.default_buffer_size)
|
|
|
111
252
|
# @return [Integer] Bits as an integer
|
|
112
253
|
def read_bits_be(num_bits)
|
|
113
254
|
result = 0
|
|
114
|
-
num_bits
|
|
115
|
-
|
|
255
|
+
full_bytes = num_bits / 8
|
|
256
|
+
remaining_bits = num_bits % 8
|
|
257
|
+
|
|
258
|
+
# Read full bytes first (more efficient than bit-by-bit)
|
|
259
|
+
full_bytes.times do
|
|
260
|
+
result = (result << 8) | read_bits(8)
|
|
116
261
|
end
|
|
262
|
+
|
|
263
|
+
# Read remaining bits
|
|
264
|
+
if remaining_bits.positive?
|
|
265
|
+
result = (result << remaining_bits) | read_bits(remaining_bits)
|
|
266
|
+
end
|
|
267
|
+
|
|
117
268
|
result
|
|
118
269
|
end
|
|
119
270
|
|