cabriolet 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +700 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +154 -14
  6. data/lib/cabriolet/binary/bitstream_writer.rb +129 -17
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +35 -43
  13. data/lib/cabriolet/cab/decompressor.rb +14 -19
  14. data/lib/cabriolet/cab/extractor.rb +140 -31
  15. data/lib/cabriolet/chm/command_handler.rb +227 -0
  16. data/lib/cabriolet/chm/compressor.rb +7 -3
  17. data/lib/cabriolet/chm/decompressor.rb +39 -21
  18. data/lib/cabriolet/chm/parser.rb +5 -2
  19. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  20. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  21. data/lib/cabriolet/cli/command_registry.rb +83 -0
  22. data/lib/cabriolet/cli.rb +356 -607
  23. data/lib/cabriolet/compressors/base.rb +1 -1
  24. data/lib/cabriolet/compressors/lzx.rb +241 -54
  25. data/lib/cabriolet/compressors/mszip.rb +35 -3
  26. data/lib/cabriolet/compressors/quantum.rb +34 -45
  27. data/lib/cabriolet/decompressors/base.rb +1 -1
  28. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  29. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  30. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  31. data/lib/cabriolet/decompressors/quantum.rb +3 -2
  32. data/lib/cabriolet/errors.rb +3 -0
  33. data/lib/cabriolet/file_entry.rb +156 -0
  34. data/lib/cabriolet/file_manager.rb +144 -0
  35. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  36. data/lib/cabriolet/hlp/compressor.rb +28 -238
  37. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  38. data/lib/cabriolet/hlp/parser.rb +52 -101
  39. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  40. data/lib/cabriolet/hlp/quickhelp/compressor.rb +626 -0
  41. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  42. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  43. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  44. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  45. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  46. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  47. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  48. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  49. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  50. data/lib/cabriolet/huffman/tree.rb +85 -1
  51. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  52. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  53. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  54. data/lib/cabriolet/lit/command_handler.rb +221 -0
  55. data/lib/cabriolet/lit/compressor.rb +633 -38
  56. data/lib/cabriolet/lit/decompressor.rb +518 -152
  57. data/lib/cabriolet/lit/parser.rb +670 -0
  58. data/lib/cabriolet/models/hlp_file.rb +130 -29
  59. data/lib/cabriolet/models/hlp_header.rb +105 -17
  60. data/lib/cabriolet/models/lit_header.rb +212 -25
  61. data/lib/cabriolet/models/szdd_header.rb +10 -2
  62. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  63. data/lib/cabriolet/oab/command_handler.rb +257 -0
  64. data/lib/cabriolet/oab/compressor.rb +17 -8
  65. data/lib/cabriolet/oab/decompressor.rb +41 -10
  66. data/lib/cabriolet/offset_calculator.rb +81 -0
  67. data/lib/cabriolet/plugin.rb +233 -0
  68. data/lib/cabriolet/plugin_manager.rb +453 -0
  69. data/lib/cabriolet/plugin_validator.rb +422 -0
  70. data/lib/cabriolet/system/io_system.rb +3 -0
  71. data/lib/cabriolet/system/memory_handle.rb +17 -4
  72. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  73. data/lib/cabriolet/szdd/compressor.rb +15 -11
  74. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  75. data/lib/cabriolet/version.rb +1 -1
  76. data/lib/cabriolet.rb +67 -17
  77. metadata +33 -2
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ # Factory for creating and managing compression/decompression algorithms
5
+ #
6
+ # The AlgorithmFactory provides a centralized registry for compression and
7
+ # decompression algorithms. It handles algorithm registration, validation,
8
+ # instantiation, and type normalization.
9
+ #
10
+ # @example Register and create an algorithm
11
+ # factory = AlgorithmFactory.new
12
+ # factory.register(:custom, CustomCompressor, category: :compressor)
13
+ # algorithm = factory.create(:custom, :compressor, io, input, output, 4096)
14
+ #
15
+ # @example Use with integer type constants
16
+ # # Constants::COMP_TYPE_MSZIP (1) is normalized to :mszip
17
+ # algorithm = factory.create(1, :decompressor, io, input, output, 4096)
18
+ class AlgorithmFactory
19
+ # @return [Hash] Registry of algorithms by category and type
20
+ attr_reader :algorithms
21
+
22
+ # Initialize a new algorithm factory
23
+ #
24
+ # @param auto_register [Boolean] Whether to automatically register
25
+ # built-in algorithms
26
+ def initialize(auto_register: true)
27
+ @algorithms = { compressor: {}, decompressor: {} }
28
+ register_built_in_algorithms if auto_register
29
+ end
30
+
31
+ # Register an algorithm in the factory
32
+ #
33
+ # @param type [Symbol] Algorithm type (:none, :mszip, :lzx, :quantum,
34
+ # :lzss)
35
+ # @param algorithm_class [Class] Algorithm class (must inherit from
36
+ # Compressors::Base or Decompressors::Base)
37
+ # @param options [Hash] Registration options
38
+ # @option options [Symbol] :category Required - :compressor or
39
+ # :decompressor
40
+ # @option options [Integer] :priority Priority for selection (default: 0)
41
+ # @option options [Symbol, nil] :format Format restriction (optional)
42
+ #
43
+ # @return [self] Returns self for method chaining
44
+ #
45
+ # @raise [ArgumentError] If category is invalid
46
+ # @raise [ArgumentError] If algorithm_class doesn't inherit from Base
47
+ #
48
+ # @example Register a custom compressor
49
+ # factory.register(:custom, MyCompressor,
50
+ # category: :compressor, priority: 10)
51
+ #
52
+ # @example Chain multiple registrations
53
+ # factory
54
+ # .register(:algo1, Algo1, category: :compressor)
55
+ # .register(:algo2, Algo2, category: :decompressor)
56
+ def register(type, algorithm_class, **options)
57
+ category = options[:category]
58
+ validate_category!(category)
59
+ validate_algorithm_class!(algorithm_class, category)
60
+
61
+ @algorithms[category][type] = {
62
+ class: algorithm_class,
63
+ priority: options.fetch(:priority, 0),
64
+ format: options[:format],
65
+ }
66
+
67
+ self
68
+ end
69
+
70
+ # Create an instance of a registered algorithm
71
+ #
72
+ # @param type [Symbol, Integer] Algorithm type (symbol or constant)
73
+ # @param category [Symbol] Category (:compressor or :decompressor)
74
+ # @param io_system [System::IOSystem] I/O system for operations
75
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
76
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
77
+ # @param buffer_size [Integer] Buffer size for I/O operations
78
+ # @param kwargs [Hash] Additional keyword arguments for algorithm
79
+ #
80
+ # @return [Compressors::Base, Decompressors::Base] Algorithm instance
81
+ #
82
+ # @raise [ArgumentError] If category is invalid
83
+ # @raise [UnsupportedFormatError] If algorithm type not registered
84
+ #
85
+ # @example Create a decompressor
86
+ # decompressor = factory.create(:mszip, :decompressor,
87
+ # io, input, output, 4096)
88
+ #
89
+ # @example Create with integer constant
90
+ # # Constants::COMP_TYPE_LZX (3) -> :lzx
91
+ # compressor = factory.create(3, :compressor,
92
+ # io, input, output, 8192)
93
+ def create(type, category, io_system, input, output, buffer_size,
94
+ **kwargs)
95
+ validate_category!(category)
96
+
97
+ normalized_type = normalize_type(type)
98
+ algorithm_info = @algorithms[category][normalized_type]
99
+
100
+ unless algorithm_info
101
+ raise UnsupportedFormatError,
102
+ "Unknown #{category} algorithm: #{normalized_type}"
103
+ end
104
+
105
+ algorithm_info[:class].new(io_system, input, output, buffer_size,
106
+ **kwargs)
107
+ end
108
+
109
+ # Check if an algorithm is registered
110
+ #
111
+ # @param type [Symbol] Algorithm type
112
+ # @param category [Symbol] Category (:compressor or :decompressor)
113
+ #
114
+ # @return [Boolean] True if registered, false otherwise
115
+ #
116
+ # @example Check registration
117
+ # factory.registered?(:mszip, :compressor) #=> true
118
+ # factory.registered?(:unknown, :compressor) #=> false
119
+ def registered?(type, category)
120
+ @algorithms[category]&.key?(type) || false
121
+ end
122
+
123
+ # List registered algorithms
124
+ #
125
+ # @param category [Symbol, nil] Optional category filter
126
+ #
127
+ # @return [Hash] Hash of registered algorithms
128
+ #
129
+ # @example List all algorithms
130
+ # factory.list
131
+ # #=> { compressor: { mszip: {...}, lzx: {...} },
132
+ # # decompressor: { none: {...}, mszip: {...} } }
133
+ #
134
+ # @example List compressors only
135
+ # factory.list(:compressor)
136
+ # #=> { mszip: {...}, lzx: {...}, quantum: {...}, lzss: {...} }
137
+ def list(category = nil)
138
+ if category.nil?
139
+ {
140
+ compressor: @algorithms[:compressor].dup,
141
+ decompressor: @algorithms[:decompressor].dup,
142
+ }
143
+ else
144
+ @algorithms[category]&.dup || {}
145
+ end
146
+ end
147
+
148
+ # Unregister an algorithm
149
+ #
150
+ # @param type [Symbol] Algorithm type to remove
151
+ # @param category [Symbol] Category (:compressor or :decompressor)
152
+ #
153
+ # @return [Boolean] True if removed, false if not found
154
+ #
155
+ # @example Unregister an algorithm
156
+ # factory.unregister(:mszip, :compressor) #=> true
157
+ # factory.unregister(:unknown, :compressor) #=> false
158
+ # rubocop:disable Naming/PredicatePrefix
159
+ def unregister(type, category)
160
+ !@algorithms[category].delete(type).nil?
161
+ end
162
+ # rubocop:enable Naming/PredicatePrefix
163
+
164
+ private
165
+
166
+ # Register all built-in compression and decompression algorithms
167
+ #
168
+ # Registers 5 decompressors (none, lzss, mszip, lzx, quantum) and
169
+ # 4 compressors (lzss, mszip, lzx, quantum).
170
+ #
171
+ # @return [void]
172
+ def register_built_in_algorithms
173
+ # Register decompressors (5 total)
174
+ register(:none, Decompressors::None, category: :decompressor)
175
+ register(:lzss, Decompressors::LZSS, category: :decompressor)
176
+ register(:mszip, Decompressors::MSZIP, category: :decompressor)
177
+ register(:lzx, Decompressors::LZX, category: :decompressor)
178
+ register(:quantum, Decompressors::Quantum, category: :decompressor)
179
+
180
+ # Register compressors (4 total - no 'none' compressor)
181
+ register(:lzss, Compressors::LZSS, category: :compressor)
182
+ register(:mszip, Compressors::MSZIP, category: :compressor)
183
+ register(:lzx, Compressors::LZX, category: :compressor)
184
+ register(:quantum, Compressors::Quantum, category: :compressor)
185
+ end
186
+
187
+ # Normalize algorithm type from integer constant to symbol
188
+ #
189
+ # @param type [Symbol, Integer] Type to normalize
190
+ #
191
+ # @return [Symbol] Normalized type symbol
192
+ #
193
+ # @example Normalize integer constants
194
+ # normalize_type(0) #=> :none
195
+ # normalize_type(1) #=> :mszip
196
+ # normalize_type(2) #=> :quantum
197
+ # normalize_type(3) #=> :lzx
198
+ # normalize_type(:lzss) #=> :lzss
199
+ def normalize_type(type)
200
+ return type if type.is_a?(Symbol)
201
+
202
+ case type
203
+ when Constants::COMP_TYPE_NONE then :none
204
+ when Constants::COMP_TYPE_MSZIP then :mszip
205
+ when Constants::COMP_TYPE_QUANTUM then :quantum
206
+ when Constants::COMP_TYPE_LZX then :lzx
207
+ else
208
+ raise UnsupportedFormatError,
209
+ "Unsupported compression type: #{type}"
210
+ end
211
+ end
212
+
213
+ # Validate that category is valid
214
+ #
215
+ # @param category [Symbol] Category to validate
216
+ #
217
+ # @raise [ArgumentError] If category is not :compressor or :decompressor
218
+ #
219
+ # @return [void]
220
+ def validate_category!(category)
221
+ valid_categories = %i[compressor decompressor]
222
+ return if valid_categories.include?(category)
223
+
224
+ raise ArgumentError,
225
+ "Invalid category: #{category}. " \
226
+ "Must be :compressor or :decompressor"
227
+ end
228
+
229
+ # Validate that algorithm class inherits from appropriate base class
230
+ #
231
+ # @param klass [Class] Algorithm class to validate
232
+ # @param category [Symbol] Category (:compressor or :decompressor)
233
+ #
234
+ # @raise [ArgumentError] If class doesn't inherit from correct base
235
+ #
236
+ # @return [void]
237
+ def validate_algorithm_class!(klass, category)
238
+ base_class = if category == :compressor
239
+ Compressors::Base
240
+ else
241
+ Decompressors::Base
242
+ end
243
+
244
+ return if klass < base_class
245
+
246
+ raise ArgumentError,
247
+ "#{klass} must inherit from #{base_class}"
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "file_manager"
4
+ require_relative "system/io_system"
5
+
6
+ module Cabriolet
7
+ # Abstract base class for all format compressors
8
+ #
9
+ # Implements Template Method pattern:
10
+ # - Defines common compression workflow in generate()
11
+ # - Subclasses implement format-specific hooks
12
+ #
13
+ # Provides:
14
+ # - File management via FileManager
15
+ # - Common initialization pattern
16
+ # - Template method for generation workflow
17
+ # - Hook methods for format customization
18
+ # - Helper methods for common operations
19
+ #
20
+ # Subclasses must implement:
21
+ # - build_structure(options) - Create format-specific structure
22
+ # - write_format(output_handle, structure) - Write binary data
23
+ #
24
+ # Subclasses may override:
25
+ # - validate_generation_prerequisites!(options) - Custom validation
26
+ # - post_generation_hook(output_file, structure, bytes) - Cleanup/logging
27
+ #
28
+ # @example Creating a format compressor
29
+ # class MyFormatCompressor < BaseCompressor
30
+ # protected
31
+ #
32
+ # def build_structure(options)
33
+ # { header: build_header, files: collect_files }
34
+ # end
35
+ #
36
+ # def write_format(output_handle, structure)
37
+ # io_system.write(output_handle, structure[:header].to_binary_s)
38
+ # end
39
+ # end
40
+ class BaseCompressor
41
+ attr_reader :io_system, :algorithm_factory, :file_manager
42
+
43
+ # Initialize compressor with I/O and algorithm dependencies
44
+ #
45
+ # @param io_system [System::IOSystem, nil] I/O system or nil for default
46
+ # @param algorithm_factory [AlgorithmFactory, nil] Algorithm factory or nil
47
+ def initialize(io_system = nil, algorithm_factory = nil)
48
+ @io_system = io_system || System::IOSystem.new
49
+ @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
50
+ @file_manager = FileManager.new
51
+ end
52
+
53
+ # Add file from disk to archive
54
+ #
55
+ # @param source_path [String] Path to source file
56
+ # @param archive_path [String, nil] Path in archive (nil = use basename)
57
+ # @param options [Hash] Format-specific options
58
+ # @return [FileEntry] Added entry
59
+ # @raise [ArgumentError] if file doesn't exist
60
+ def add_file(source_path, archive_path = nil, **options)
61
+ @file_manager.add_file(source_path, archive_path, **options)
62
+ end
63
+
64
+ # Add file from memory to archive
65
+ #
66
+ # @param data [String] File data
67
+ # @param archive_path [String] Path in archive
68
+ # @param options [Hash] Format-specific options
69
+ # @return [FileEntry] Added entry
70
+ def add_data(data, archive_path, **options)
71
+ @file_manager.add_data(data, archive_path, **options)
72
+ end
73
+
74
+ # Generate archive (Template Method)
75
+ #
76
+ # This method defines the compression workflow:
77
+ # 1. Validate prerequisites
78
+ # 2. Build format-specific structure
79
+ # 3. Write to output file
80
+ # 4. Post-generation hook
81
+ # 5. Return bytes written
82
+ #
83
+ # Subclasses customize via hook methods, not by overriding this method.
84
+ #
85
+ # @param output_file [String] Path to output file
86
+ # @param options [Hash] Format-specific options
87
+ # @return [Integer] Bytes written to output file
88
+ # @raise [ArgumentError] if validation fails
89
+ def generate(output_file, **options)
90
+ validate_generation_prerequisites!(options)
91
+
92
+ structure = build_structure(options)
93
+
94
+ bytes_written = write_to_file(output_file, structure)
95
+
96
+ post_generation_hook(output_file, structure, bytes_written)
97
+
98
+ bytes_written
99
+ end
100
+
101
+ protected
102
+
103
+ # Hook: Build format-specific structure
104
+ #
105
+ # Subclasses MUST implement this method to create the archive structure
106
+ # ready for writing. The structure should contain all necessary metadata,
107
+ # compressed data, headers, and calculated offsets.
108
+ #
109
+ # @param options [Hash] Generation options from generate() call
110
+ # @return [Hash] Format structure ready for writing
111
+ # @raise [NotImplementedError] if not implemented by subclass
112
+ def build_structure(options)
113
+ raise NotImplementedError,
114
+ "#{self.class.name} must implement build_structure(options)"
115
+ end
116
+
117
+ # Hook: Write format to output handle
118
+ #
119
+ # Subclasses MUST implement this method to write the format-specific
120
+ # binary data to the output handle. Should write headers, directory,
121
+ # and file data according to format specification.
122
+ #
123
+ # @param output_handle [System::FileHandle] Open output handle
124
+ # @param structure [Hash] Format structure from build_structure()
125
+ # @return [Integer] Bytes written
126
+ # @raise [NotImplementedError] if not implemented by subclass
127
+ def write_format(output_handle, structure)
128
+ raise NotImplementedError,
129
+ "#{self.class.name} must implement write_format(output_handle, structure)"
130
+ end
131
+
132
+ # Hook: Validate pre-generation requirements
133
+ #
134
+ # Subclasses CAN override this for format-specific validation.
135
+ # Default implementation checks that files have been added.
136
+ #
137
+ # @param options [Hash] Generation options
138
+ # @raise [ArgumentError] if validation fails
139
+ def validate_generation_prerequisites!(_options)
140
+ raise ArgumentError, "No files added to archive" if @file_manager.empty?
141
+ end
142
+
143
+ # Hook: Post-generation callback
144
+ #
145
+ # Subclasses CAN override this for cleanup, logging, or additional
146
+ # processing after successful generation.
147
+ #
148
+ # @param output_file [String] Path to generated file
149
+ # @param structure [Hash] Generated structure
150
+ # @param bytes_written [Integer] Bytes written to file
151
+ # @return [void]
152
+ def post_generation_hook(_output_file, _structure, _bytes_written)
153
+ # Default: no-op
154
+ nil
155
+ end
156
+
157
+ # Helper: Compress data using specified algorithm
158
+ #
159
+ # Provides unified interface for compression across all formats.
160
+ # Uses algorithm factory for extensibility.
161
+ #
162
+ # @param data [String] Data to compress
163
+ # @param algorithm [Symbol] Algorithm type (:lzss, :mszip, :lzx, :quantum)
164
+ # @param options [Hash] Compression options
165
+ # @option options [Integer] :window_bits Window size in bits
166
+ # @option options [Integer] :mode Algorithm mode
167
+ # @return [String] Compressed data
168
+ def compress_data(data, algorithm:, **options)
169
+ input = System::MemoryHandle.new(data)
170
+ output = System::MemoryHandle.new("", Constants::MODE_WRITE)
171
+
172
+ compressor = @algorithm_factory.create(
173
+ algorithm,
174
+ :compressor,
175
+ @io_system,
176
+ input,
177
+ output,
178
+ data.bytesize,
179
+ **options,
180
+ )
181
+
182
+ compressor.compress
183
+ output.data
184
+ end
185
+
186
+ private
187
+
188
+ # Write structure to file
189
+ #
190
+ # Handles file opening/closing and delegates to write_format hook
191
+ #
192
+ # @param output_file [String] Path to output file
193
+ # @param structure [Hash] Format structure
194
+ # @return [Integer] Bytes written
195
+ def write_to_file(output_file, structure)
196
+ output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
197
+
198
+ begin
199
+ bytes = write_format(output_handle, structure)
200
+ bytes
201
+ ensure
202
+ @io_system.close(output_handle) if output_handle
203
+ end
204
+ end
205
+ end
206
+ end
@@ -4,22 +4,31 @@ module Cabriolet
4
4
  module Binary
5
5
  # Bitstream provides bit-level I/O operations for reading compressed data
6
6
  class Bitstream
7
- attr_reader :io_system, :handle, :buffer_size
7
+ attr_reader :io_system, :handle, :buffer_size, :bit_order
8
8
 
9
9
  # Initialize a new bitstream
10
10
  #
11
11
  # @param io_system [System::IOSystem] I/O system for reading data
12
12
  # @param handle [System::FileHandle, System::MemoryHandle] Handle to read from
13
13
  # @param buffer_size [Integer] Size of the input buffer
14
+ # @param bit_order [Symbol] Bit order (:lsb or :msb)
15
+ # @param salvage [Boolean] Salvage mode - return 0 on EOF instead of raising
14
16
  def initialize(io_system, handle,
15
- buffer_size = Cabriolet.default_buffer_size)
17
+ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
16
18
  @io_system = io_system
17
19
  @handle = handle
18
20
  @buffer_size = buffer_size
21
+ @bit_order = bit_order
22
+ @salvage = salvage
19
23
  @buffer = ""
20
24
  @buffer_pos = 0
21
25
  @bit_buffer = 0
22
26
  @bits_left = 0
27
+ @input_end = false # Track EOF state (matches libmspack's input_end flag)
28
+
29
+ # For MSB mode, we need to know the bit width of the buffer
30
+ # Ruby integers are arbitrary precision, so we use 32 bits as standard
31
+ @bitbuf_width = 32
23
32
  end
24
33
 
25
34
  # Read specified number of bits from the stream
@@ -33,31 +42,128 @@ buffer_size = Cabriolet.default_buffer_size)
33
42
  "Can only read 1-32 bits at a time"
34
43
  end
35
44
 
45
+ if @bit_order == :msb
46
+ read_bits_msb(num_bits)
47
+ else
48
+ read_bits_lsb(num_bits)
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ # Read bits in LSB-first order
55
+ #
56
+ # Per libmspack: EOF handling allows padding to avoid bitstream overrun.
57
+ # First EOF: pad with zeros (2 bytes worth). Second EOF: raise error.
58
+ def read_bits_lsb(num_bits)
36
59
  # Ensure we have enough bits in the buffer
37
60
  while @bits_left < num_bits
38
61
  byte = read_byte
39
- return 0 if byte.nil? # EOF
62
+ # First EOF: pad with zeros (matches libmspack read_input behavior)
63
+ # On second EOF, read_byte will raise DecompressionError
64
+ # In salvage mode, pad indefinitely; otherwise pad on first EOF
65
+ byte = 0 if byte.nil?
66
+
67
+ # DEBUG
68
+ if ENV["DEBUG_BITSTREAM"]
69
+ warn "DEBUG LSB read_byte: buffer_pos=#{@buffer_pos} byte=#{byte} (#{byte.to_s(2).rjust(
70
+ 8, '0'
71
+ )}) bits_left=#{@bits_left}"
72
+ end
40
73
 
74
+ # INJECT_BITS (LSB): append to the right
41
75
  @bit_buffer |= (byte << @bits_left)
42
76
  @bits_left += 8
43
77
  end
44
78
 
45
- # Extract the requested bits
79
+ # PEEK_BITS (LSB): extract from the right
46
80
  result = @bit_buffer & ((1 << num_bits) - 1)
81
+ # REMOVE_BITS (LSB): shift right
47
82
  @bit_buffer >>= num_bits
48
83
  @bits_left -= num_bits
49
84
 
85
+ # DEBUG
86
+ warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
87
+
88
+ result
89
+ end
90
+
91
+ # Read bits in MSB-first order (libmspack LZX/Quantum style)
92
+ #
93
+ # Per libmspack readbits.h: Reads 2 bytes at a time (little-endian 16-bit word).
94
+ # EOF handling: First EOF pads with zeros, second EOF raises error.
95
+ def read_bits_msb(num_bits)
96
+ # Ensure we have enough bits in the buffer
97
+ while @bits_left < num_bits
98
+ # Read 2 bytes at a time (little-endian), like libmspack
99
+ byte0 = read_byte
100
+ if byte0.nil? && (@salvage || @input_end)
101
+ # First EOF: pad with zeros
102
+ # Second EOF: read_byte will raise DecompressionError
103
+ byte0 = 0
104
+ end
105
+
106
+ byte1 = read_byte
107
+ if byte1.nil?
108
+ # Pad with 0 if only 1 byte left (or EOF)
109
+ byte1 = 0
110
+ end
111
+
112
+ # Combine as little-endian 16-bit value
113
+ word = byte0 | (byte1 << 8)
114
+
115
+ # DEBUG
116
+ warn "DEBUG MSB read_bytes: byte0=0x#{byte0.to_s(16)} byte1=0x#{byte1.to_s(16)} word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
117
+
118
+ # INJECT_BITS (MSB): inject at the left side
119
+ # bit_buffer |= word << (BITBUF_WIDTH -16 - bits_left)
120
+ @bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
121
+ @bits_left += 16
122
+ end
123
+
124
+ # PEEK_BITS (MSB): extract from the left
125
+ # result = bit_buffer >> (BITBUF_WIDTH - num_bits)
126
+ result = @bit_buffer >> (@bitbuf_width - num_bits)
127
+
128
+ # REMOVE_BITS (MSB): shift left
129
+ @bit_buffer = (@bit_buffer << num_bits) & ((1 << @bitbuf_width) - 1)
130
+ @bits_left -= num_bits
131
+
132
+ # DEBUG
133
+ warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
134
+
50
135
  result
51
136
  end
52
137
 
138
+ public
139
+
53
140
  # Read a single byte from the input
54
141
  #
55
- # @return [Integer, nil] Byte value or nil at EOF
142
+ # Per libmspack readbits.h: On first EOF, we pad with zeros.
143
+ # On second EOF, we raise an error (unless salvage mode).
144
+ #
145
+ # @return [Integer, nil] Byte value or nil to signal EOF padding needed
146
+ # @raise [DecompressionError] on second EOF attempt (unless salvage mode)
56
147
  def read_byte
57
148
  if @buffer_pos >= @buffer.bytesize
58
149
  @buffer = @io_system.read(@handle, @buffer_size)
59
150
  @buffer_pos = 0
60
- return nil if @buffer.empty?
151
+
152
+ if @buffer.empty?
153
+ # Hit EOF - check if this is first or second EOF
154
+ if @input_end
155
+ # Second EOF: raise error unless salvage mode
156
+ unless @salvage
157
+ raise DecompressionError, "Unexpected end of input stream"
158
+ end
159
+
160
+ # In salvage mode, keep returning nil
161
+ else
162
+ # First EOF: signal to pad with zeros (return nil)
163
+ @input_end = true
164
+ end
165
+ return nil
166
+ end
61
167
  end
62
168
 
63
169
  byte = @buffer.getbyte(@buffer_pos)
@@ -84,16 +190,50 @@ buffer_size = Cabriolet.default_buffer_size)
84
190
  "Can only peek 1-32 bits at a time"
85
191
  end
86
192
 
87
- # Ensure we have enough bits
88
- while @bits_left < num_bits
89
- byte = read_byte
90
- return 0 if byte.nil?
193
+ if @bit_order == :msb
194
+ # Ensure we have enough bits
195
+ while @bits_left < num_bits
196
+ # Read 2 bytes at a time (little-endian), like libmspack
197
+ byte0 = read_byte
198
+ if byte0.nil?
199
+ # At EOF: break and work with remaining bits
200
+ break
201
+ end
91
202
 
92
- @bit_buffer |= (byte << @bits_left)
93
- @bits_left += 8
94
- end
203
+ byte1 = read_byte
204
+ byte1 = 0 if byte1.nil?
205
+
206
+ # Combine as little-endian 16-bit value
207
+ word = byte0 | (byte1 << 8)
95
208
 
96
- @bit_buffer & ((1 << num_bits) - 1)
209
+ # INJECT_BITS (MSB): inject at the left side
210
+ @bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
211
+ @bits_left += 16
212
+ end
213
+
214
+ # PEEK_BITS (MSB): extract from the left
215
+ # If we have fewer than num_bits available, result may be incorrect
216
+ # but this matches EOF handling behavior
217
+ @bit_buffer >> (@bitbuf_width - num_bits)
218
+ else
219
+ # Ensure we have enough bits (LSB mode)
220
+ while @bits_left < num_bits
221
+ byte = read_byte
222
+ if byte.nil?
223
+ # At EOF: pad remaining bits with zeros and continue
224
+ # This matches libmspack behavior where peek can use partial bits
225
+ # The missing high bits are implicitly 0
226
+ break
227
+ end
228
+
229
+ @bit_buffer |= (byte << @bits_left)
230
+ @bits_left += 8
231
+ end
232
+
233
+ # Extract num_bits from bit_buffer
234
+ # If we have fewer than num_bits, the high bits will be 0
235
+ @bit_buffer & ((1 << num_bits) - 1)
236
+ end
97
237
  end
98
238
 
99
239
  # Skip specified number of bits