cabriolet 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +703 -38
  3. data/lib/cabriolet/algorithm_factory.rb +250 -0
  4. data/lib/cabriolet/base_compressor.rb +206 -0
  5. data/lib/cabriolet/binary/bitstream.rb +167 -16
  6. data/lib/cabriolet/binary/bitstream_writer.rb +150 -21
  7. data/lib/cabriolet/binary/chm_structures.rb +2 -2
  8. data/lib/cabriolet/binary/hlp_structures.rb +258 -37
  9. data/lib/cabriolet/binary/lit_structures.rb +231 -65
  10. data/lib/cabriolet/binary/oab_structures.rb +17 -1
  11. data/lib/cabriolet/cab/command_handler.rb +226 -0
  12. data/lib/cabriolet/cab/compressor.rb +108 -84
  13. data/lib/cabriolet/cab/decompressor.rb +16 -20
  14. data/lib/cabriolet/cab/extractor.rb +142 -66
  15. data/lib/cabriolet/cab/file_compression_work.rb +52 -0
  16. data/lib/cabriolet/cab/file_compression_worker.rb +89 -0
  17. data/lib/cabriolet/checksum.rb +49 -0
  18. data/lib/cabriolet/chm/command_handler.rb +227 -0
  19. data/lib/cabriolet/chm/compressor.rb +7 -3
  20. data/lib/cabriolet/chm/decompressor.rb +39 -21
  21. data/lib/cabriolet/chm/parser.rb +5 -2
  22. data/lib/cabriolet/cli/base_command_handler.rb +127 -0
  23. data/lib/cabriolet/cli/command_dispatcher.rb +140 -0
  24. data/lib/cabriolet/cli/command_registry.rb +83 -0
  25. data/lib/cabriolet/cli.rb +356 -607
  26. data/lib/cabriolet/collections/file_collection.rb +175 -0
  27. data/lib/cabriolet/compressors/base.rb +1 -1
  28. data/lib/cabriolet/compressors/lzx.rb +241 -54
  29. data/lib/cabriolet/compressors/mszip.rb +35 -3
  30. data/lib/cabriolet/compressors/quantum.rb +36 -95
  31. data/lib/cabriolet/decompressors/base.rb +1 -1
  32. data/lib/cabriolet/decompressors/lzss.rb +13 -3
  33. data/lib/cabriolet/decompressors/lzx.rb +70 -33
  34. data/lib/cabriolet/decompressors/mszip.rb +126 -39
  35. data/lib/cabriolet/decompressors/quantum.rb +83 -53
  36. data/lib/cabriolet/errors.rb +3 -0
  37. data/lib/cabriolet/extraction/base_extractor.rb +88 -0
  38. data/lib/cabriolet/extraction/extractor.rb +171 -0
  39. data/lib/cabriolet/extraction/file_extraction_work.rb +60 -0
  40. data/lib/cabriolet/extraction/file_extraction_worker.rb +106 -0
  41. data/lib/cabriolet/file_entry.rb +156 -0
  42. data/lib/cabriolet/file_manager.rb +144 -0
  43. data/lib/cabriolet/format_base.rb +79 -0
  44. data/lib/cabriolet/hlp/command_handler.rb +282 -0
  45. data/lib/cabriolet/hlp/compressor.rb +28 -238
  46. data/lib/cabriolet/hlp/decompressor.rb +107 -147
  47. data/lib/cabriolet/hlp/parser.rb +52 -101
  48. data/lib/cabriolet/hlp/quickhelp/compression_stream.rb +138 -0
  49. data/lib/cabriolet/hlp/quickhelp/compressor.rb +151 -0
  50. data/lib/cabriolet/hlp/quickhelp/decompressor.rb +558 -0
  51. data/lib/cabriolet/hlp/quickhelp/file_writer.rb +125 -0
  52. data/lib/cabriolet/hlp/quickhelp/huffman_stream.rb +74 -0
  53. data/lib/cabriolet/hlp/quickhelp/huffman_tree.rb +167 -0
  54. data/lib/cabriolet/hlp/quickhelp/offset_calculator.rb +61 -0
  55. data/lib/cabriolet/hlp/quickhelp/parser.rb +274 -0
  56. data/lib/cabriolet/hlp/quickhelp/structure_builder.rb +93 -0
  57. data/lib/cabriolet/hlp/quickhelp/topic_builder.rb +52 -0
  58. data/lib/cabriolet/hlp/quickhelp/topic_compressor.rb +83 -0
  59. data/lib/cabriolet/hlp/winhelp/btree_builder.rb +289 -0
  60. data/lib/cabriolet/hlp/winhelp/compressor.rb +400 -0
  61. data/lib/cabriolet/hlp/winhelp/decompressor.rb +192 -0
  62. data/lib/cabriolet/hlp/winhelp/parser.rb +484 -0
  63. data/lib/cabriolet/hlp/winhelp/zeck_lz77.rb +271 -0
  64. data/lib/cabriolet/huffman/encoder.rb +15 -12
  65. data/lib/cabriolet/huffman/tree.rb +85 -1
  66. data/lib/cabriolet/kwaj/command_handler.rb +213 -0
  67. data/lib/cabriolet/kwaj/compressor.rb +7 -3
  68. data/lib/cabriolet/kwaj/decompressor.rb +18 -12
  69. data/lib/cabriolet/lit/command_handler.rb +221 -0
  70. data/lib/cabriolet/lit/compressor.rb +119 -168
  71. data/lib/cabriolet/lit/content_encoder.rb +76 -0
  72. data/lib/cabriolet/lit/content_type_detector.rb +50 -0
  73. data/lib/cabriolet/lit/decompressor.rb +518 -152
  74. data/lib/cabriolet/lit/directory_builder.rb +153 -0
  75. data/lib/cabriolet/lit/guid_generator.rb +16 -0
  76. data/lib/cabriolet/lit/header_writer.rb +124 -0
  77. data/lib/cabriolet/lit/parser.rb +670 -0
  78. data/lib/cabriolet/lit/piece_builder.rb +74 -0
  79. data/lib/cabriolet/lit/structure_builder.rb +252 -0
  80. data/lib/cabriolet/models/hlp_file.rb +130 -29
  81. data/lib/cabriolet/models/hlp_header.rb +105 -17
  82. data/lib/cabriolet/models/lit_header.rb +212 -25
  83. data/lib/cabriolet/models/szdd_header.rb +10 -2
  84. data/lib/cabriolet/models/winhelp_header.rb +127 -0
  85. data/lib/cabriolet/oab/command_handler.rb +257 -0
  86. data/lib/cabriolet/oab/compressor.rb +17 -8
  87. data/lib/cabriolet/oab/decompressor.rb +41 -10
  88. data/lib/cabriolet/offset_calculator.rb +81 -0
  89. data/lib/cabriolet/plugin.rb +233 -0
  90. data/lib/cabriolet/plugin_manager.rb +453 -0
  91. data/lib/cabriolet/plugin_validator.rb +422 -0
  92. data/lib/cabriolet/quantum_shared.rb +105 -0
  93. data/lib/cabriolet/system/io_system.rb +3 -0
  94. data/lib/cabriolet/system/memory_handle.rb +17 -4
  95. data/lib/cabriolet/szdd/command_handler.rb +217 -0
  96. data/lib/cabriolet/szdd/compressor.rb +15 -11
  97. data/lib/cabriolet/szdd/decompressor.rb +18 -9
  98. data/lib/cabriolet/version.rb +1 -1
  99. data/lib/cabriolet.rb +181 -20
  100. metadata +69 -4
  101. data/lib/cabriolet/auto.rb +0 -173
  102. data/lib/cabriolet/parallel.rb +0 -333
@@ -0,0 +1,250 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cabriolet
4
+ # Factory for creating and managing compression/decompression algorithms
5
+ #
6
+ # The AlgorithmFactory provides a centralized registry for compression and
7
+ # decompression algorithms. It handles algorithm registration, validation,
8
+ # instantiation, and type normalization.
9
+ #
10
+ # @example Register and create an algorithm
11
+ # factory = AlgorithmFactory.new
12
+ # factory.register(:custom, CustomCompressor, category: :compressor)
13
+ # algorithm = factory.create(:custom, :compressor, io, input, output, 4096)
14
+ #
15
+ # @example Use with integer type constants
16
+ # # Constants::COMP_TYPE_MSZIP (1) is normalized to :mszip
17
+ # algorithm = factory.create(1, :decompressor, io, input, output, 4096)
18
+ class AlgorithmFactory
19
+ # @return [Hash] Registry of algorithms by category and type
20
+ attr_reader :algorithms
21
+
22
+ # Initialize a new algorithm factory
23
+ #
24
+ # @param auto_register [Boolean] Whether to automatically register
25
+ # built-in algorithms
26
+ def initialize(auto_register: true)
27
+ @algorithms = { compressor: {}, decompressor: {} }
28
+ register_built_in_algorithms if auto_register
29
+ end
30
+
31
+ # Register an algorithm in the factory
32
+ #
33
+ # @param type [Symbol] Algorithm type (:none, :mszip, :lzx, :quantum,
34
+ # :lzss)
35
+ # @param algorithm_class [Class] Algorithm class (must inherit from
36
+ # Compressors::Base or Decompressors::Base)
37
+ # @param options [Hash] Registration options
38
+ # @option options [Symbol] :category Required - :compressor or
39
+ # :decompressor
40
+ # @option options [Integer] :priority Priority for selection (default: 0)
41
+ # @option options [Symbol, nil] :format Format restriction (optional)
42
+ #
43
+ # @return [self] Returns self for method chaining
44
+ #
45
+ # @raise [ArgumentError] If category is invalid
46
+ # @raise [ArgumentError] If algorithm_class doesn't inherit from Base
47
+ #
48
+ # @example Register a custom compressor
49
+ # factory.register(:custom, MyCompressor,
50
+ # category: :compressor, priority: 10)
51
+ #
52
+ # @example Chain multiple registrations
53
+ # factory
54
+ # .register(:algo1, Algo1, category: :compressor)
55
+ # .register(:algo2, Algo2, category: :decompressor)
56
+ def register(type, algorithm_class, **options)
57
+ category = options[:category]
58
+ validate_category!(category)
59
+ validate_algorithm_class!(algorithm_class, category)
60
+
61
+ @algorithms[category][type] = {
62
+ class: algorithm_class,
63
+ priority: options.fetch(:priority, 0),
64
+ format: options[:format],
65
+ }
66
+
67
+ self
68
+ end
69
+
70
+ # Create an instance of a registered algorithm
71
+ #
72
+ # @param type [Symbol, Integer] Algorithm type (symbol or constant)
73
+ # @param category [Symbol] Category (:compressor or :decompressor)
74
+ # @param io_system [System::IOSystem] I/O system for operations
75
+ # @param input [System::FileHandle, System::MemoryHandle] Input handle
76
+ # @param output [System::FileHandle, System::MemoryHandle] Output handle
77
+ # @param buffer_size [Integer] Buffer size for I/O operations
78
+ # @param kwargs [Hash] Additional keyword arguments for algorithm
79
+ #
80
+ # @return [Compressors::Base, Decompressors::Base] Algorithm instance
81
+ #
82
+ # @raise [ArgumentError] If category is invalid
83
+ # @raise [UnsupportedFormatError] If algorithm type not registered
84
+ #
85
+ # @example Create a decompressor
86
+ # decompressor = factory.create(:mszip, :decompressor,
87
+ # io, input, output, 4096)
88
+ #
89
+ # @example Create with integer constant
90
+ # # Constants::COMP_TYPE_LZX (3) -> :lzx
91
+ # compressor = factory.create(3, :compressor,
92
+ # io, input, output, 8192)
93
+ def create(type, category, io_system, input, output, buffer_size,
94
+ **kwargs)
95
+ validate_category!(category)
96
+
97
+ normalized_type = normalize_type(type)
98
+ algorithm_info = @algorithms[category][normalized_type]
99
+
100
+ unless algorithm_info
101
+ raise UnsupportedFormatError,
102
+ "Unknown #{category} algorithm: #{normalized_type}"
103
+ end
104
+
105
+ algorithm_info[:class].new(io_system, input, output, buffer_size,
106
+ **kwargs)
107
+ end
108
+
109
+ # Check if an algorithm is registered
110
+ #
111
+ # @param type [Symbol] Algorithm type
112
+ # @param category [Symbol] Category (:compressor or :decompressor)
113
+ #
114
+ # @return [Boolean] True if registered, false otherwise
115
+ #
116
+ # @example Check registration
117
+ # factory.registered?(:mszip, :compressor) #=> true
118
+ # factory.registered?(:unknown, :compressor) #=> false
119
+ def registered?(type, category)
120
+ @algorithms[category]&.key?(type) || false
121
+ end
122
+
123
+ # List registered algorithms
124
+ #
125
+ # @param category [Symbol, nil] Optional category filter
126
+ #
127
+ # @return [Hash] Hash of registered algorithms
128
+ #
129
+ # @example List all algorithms
130
+ # factory.list
131
+ # #=> { compressor: { mszip: {...}, lzx: {...} },
132
+ # # decompressor: { none: {...}, mszip: {...} } }
133
+ #
134
+ # @example List compressors only
135
+ # factory.list(:compressor)
136
+ # #=> { mszip: {...}, lzx: {...}, quantum: {...}, lzss: {...} }
137
+ def list(category = nil)
138
+ if category.nil?
139
+ {
140
+ compressor: @algorithms[:compressor].dup,
141
+ decompressor: @algorithms[:decompressor].dup,
142
+ }
143
+ else
144
+ @algorithms[category]&.dup || {}
145
+ end
146
+ end
147
+
148
+ # Unregister an algorithm
149
+ #
150
+ # @param type [Symbol] Algorithm type to remove
151
+ # @param category [Symbol] Category (:compressor or :decompressor)
152
+ #
153
+ # @return [Boolean] True if removed, false if not found
154
+ #
155
+ # @example Unregister an algorithm
156
+ # factory.unregister(:mszip, :compressor) #=> true
157
+ # factory.unregister(:unknown, :compressor) #=> false
158
+ # rubocop:disable Naming/PredicatePrefix
159
+ def unregister(type, category)
160
+ !@algorithms[category].delete(type).nil?
161
+ end
162
+ # rubocop:enable Naming/PredicatePrefix
163
+
164
+ private
165
+
166
+ # Register all built-in compression and decompression algorithms
167
+ #
168
+ # Registers 5 decompressors (none, lzss, mszip, lzx, quantum) and
169
+ # 4 compressors (lzss, mszip, lzx, quantum).
170
+ #
171
+ # @return [void]
172
+ def register_built_in_algorithms
173
+ # Register decompressors (5 total)
174
+ register(:none, Decompressors::None, category: :decompressor)
175
+ register(:lzss, Decompressors::LZSS, category: :decompressor)
176
+ register(:mszip, Decompressors::MSZIP, category: :decompressor)
177
+ register(:lzx, Decompressors::LZX, category: :decompressor)
178
+ register(:quantum, Decompressors::Quantum, category: :decompressor)
179
+
180
+ # Register compressors (4 total - no 'none' compressor)
181
+ register(:lzss, Compressors::LZSS, category: :compressor)
182
+ register(:mszip, Compressors::MSZIP, category: :compressor)
183
+ register(:lzx, Compressors::LZX, category: :compressor)
184
+ register(:quantum, Compressors::Quantum, category: :compressor)
185
+ end
186
+
187
+ # Normalize algorithm type from integer constant to symbol
188
+ #
189
+ # @param type [Symbol, Integer] Type to normalize
190
+ #
191
+ # @return [Symbol] Normalized type symbol
192
+ #
193
+ # @example Normalize integer constants
194
+ # normalize_type(0) #=> :none
195
+ # normalize_type(1) #=> :mszip
196
+ # normalize_type(2) #=> :quantum
197
+ # normalize_type(3) #=> :lzx
198
+ # normalize_type(:lzss) #=> :lzss
199
+ def normalize_type(type)
200
+ return type if type.is_a?(Symbol)
201
+
202
+ case type
203
+ when Constants::COMP_TYPE_NONE then :none
204
+ when Constants::COMP_TYPE_MSZIP then :mszip
205
+ when Constants::COMP_TYPE_QUANTUM then :quantum
206
+ when Constants::COMP_TYPE_LZX then :lzx
207
+ else
208
+ raise UnsupportedFormatError,
209
+ "Unsupported compression type: #{type}"
210
+ end
211
+ end
212
+
213
+ # Validate that category is valid
214
+ #
215
+ # @param category [Symbol] Category to validate
216
+ #
217
+ # @raise [ArgumentError] If category is not :compressor or :decompressor
218
+ #
219
+ # @return [void]
220
+ def validate_category!(category)
221
+ valid_categories = %i[compressor decompressor]
222
+ return if valid_categories.include?(category)
223
+
224
+ raise ArgumentError,
225
+ "Invalid category: #{category}. " \
226
+ "Must be :compressor or :decompressor"
227
+ end
228
+
229
+ # Validate that algorithm class inherits from appropriate base class
230
+ #
231
+ # @param klass [Class] Algorithm class to validate
232
+ # @param category [Symbol] Category (:compressor or :decompressor)
233
+ #
234
+ # @raise [ArgumentError] If class doesn't inherit from correct base
235
+ #
236
+ # @return [void]
237
+ def validate_algorithm_class!(klass, category)
238
+ base_class = if category == :compressor
239
+ Compressors::Base
240
+ else
241
+ Decompressors::Base
242
+ end
243
+
244
+ return if klass < base_class
245
+
246
+ raise ArgumentError,
247
+ "#{klass} must inherit from #{base_class}"
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "file_manager"
4
+ require_relative "system/io_system"
5
+
6
+ module Cabriolet
7
+ # Abstract base class for all format compressors
8
+ #
9
+ # Implements Template Method pattern:
10
+ # - Defines common compression workflow in generate()
11
+ # - Subclasses implement format-specific hooks
12
+ #
13
+ # Provides:
14
+ # - File management via FileManager
15
+ # - Common initialization pattern
16
+ # - Template method for generation workflow
17
+ # - Hook methods for format customization
18
+ # - Helper methods for common operations
19
+ #
20
+ # Subclasses must implement:
21
+ # - build_structure(options) - Create format-specific structure
22
+ # - write_format(output_handle, structure) - Write binary data
23
+ #
24
+ # Subclasses may override:
25
+ # - validate_generation_prerequisites!(options) - Custom validation
26
+ # - post_generation_hook(output_file, structure, bytes) - Cleanup/logging
27
+ #
28
+ # @example Creating a format compressor
29
+ # class MyFormatCompressor < BaseCompressor
30
+ # protected
31
+ #
32
+ # def build_structure(options)
33
+ # { header: build_header, files: collect_files }
34
+ # end
35
+ #
36
+ # def write_format(output_handle, structure)
37
+ # io_system.write(output_handle, structure[:header].to_binary_s)
38
+ # end
39
+ # end
40
+ class BaseCompressor
41
+ attr_reader :io_system, :algorithm_factory, :file_manager
42
+
43
+ # Initialize compressor with I/O and algorithm dependencies
44
+ #
45
+ # @param io_system [System::IOSystem, nil] I/O system or nil for default
46
+ # @param algorithm_factory [AlgorithmFactory, nil] Algorithm factory or nil
47
+ def initialize(io_system = nil, algorithm_factory = nil)
48
+ @io_system = io_system || System::IOSystem.new
49
+ @algorithm_factory = algorithm_factory || Cabriolet.algorithm_factory
50
+ @file_manager = FileManager.new
51
+ end
52
+
53
+ # Add file from disk to archive
54
+ #
55
+ # @param source_path [String] Path to source file
56
+ # @param archive_path [String, nil] Path in archive (nil = use basename)
57
+ # @param options [Hash] Format-specific options
58
+ # @return [FileEntry] Added entry
59
+ # @raise [ArgumentError] if file doesn't exist
60
+ def add_file(source_path, archive_path = nil, **options)
61
+ @file_manager.add_file(source_path, archive_path, **options)
62
+ end
63
+
64
+ # Add file from memory to archive
65
+ #
66
+ # @param data [String] File data
67
+ # @param archive_path [String] Path in archive
68
+ # @param options [Hash] Format-specific options
69
+ # @return [FileEntry] Added entry
70
+ def add_data(data, archive_path, **options)
71
+ @file_manager.add_data(data, archive_path, **options)
72
+ end
73
+
74
+ # Generate archive (Template Method)
75
+ #
76
+ # This method defines the compression workflow:
77
+ # 1. Validate prerequisites
78
+ # 2. Build format-specific structure
79
+ # 3. Write to output file
80
+ # 4. Post-generation hook
81
+ # 5. Return bytes written
82
+ #
83
+ # Subclasses customize via hook methods, not by overriding this method.
84
+ #
85
+ # @param output_file [String] Path to output file
86
+ # @param options [Hash] Format-specific options
87
+ # @return [Integer] Bytes written to output file
88
+ # @raise [ArgumentError] if validation fails
89
+ def generate(output_file, **options)
90
+ validate_generation_prerequisites!(options)
91
+
92
+ structure = build_structure(options)
93
+
94
+ bytes_written = write_to_file(output_file, structure)
95
+
96
+ post_generation_hook(output_file, structure, bytes_written)
97
+
98
+ bytes_written
99
+ end
100
+
101
+ protected
102
+
103
+ # Hook: Build format-specific structure
104
+ #
105
+ # Subclasses MUST implement this method to create the archive structure
106
+ # ready for writing. The structure should contain all necessary metadata,
107
+ # compressed data, headers, and calculated offsets.
108
+ #
109
+ # @param options [Hash] Generation options from generate() call
110
+ # @return [Hash] Format structure ready for writing
111
+ # @raise [NotImplementedError] if not implemented by subclass
112
+ def build_structure(options)
113
+ raise NotImplementedError,
114
+ "#{self.class.name} must implement build_structure(options)"
115
+ end
116
+
117
+ # Hook: Write format to output handle
118
+ #
119
+ # Subclasses MUST implement this method to write the format-specific
120
+ # binary data to the output handle. Should write headers, directory,
121
+ # and file data according to format specification.
122
+ #
123
+ # @param output_handle [System::FileHandle] Open output handle
124
+ # @param structure [Hash] Format structure from build_structure()
125
+ # @return [Integer] Bytes written
126
+ # @raise [NotImplementedError] if not implemented by subclass
127
+ def write_format(output_handle, structure)
128
+ raise NotImplementedError,
129
+ "#{self.class.name} must implement write_format(output_handle, structure)"
130
+ end
131
+
132
+ # Hook: Validate pre-generation requirements
133
+ #
134
+ # Subclasses CAN override this for format-specific validation.
135
+ # Default implementation checks that files have been added.
136
+ #
137
+ # @param options [Hash] Generation options
138
+ # @raise [ArgumentError] if validation fails
139
+ def validate_generation_prerequisites!(_options)
140
+ raise ArgumentError, "No files added to archive" if @file_manager.empty?
141
+ end
142
+
143
+ # Hook: Post-generation callback
144
+ #
145
+ # Subclasses CAN override this for cleanup, logging, or additional
146
+ # processing after successful generation.
147
+ #
148
+ # @param output_file [String] Path to generated file
149
+ # @param structure [Hash] Generated structure
150
+ # @param bytes_written [Integer] Bytes written to file
151
+ # @return [void]
152
+ def post_generation_hook(_output_file, _structure, _bytes_written)
153
+ # Default: no-op
154
+ nil
155
+ end
156
+
157
+ # Helper: Compress data using specified algorithm
158
+ #
159
+ # Provides unified interface for compression across all formats.
160
+ # Uses algorithm factory for extensibility.
161
+ #
162
+ # @param data [String] Data to compress
163
+ # @param algorithm [Symbol] Algorithm type (:lzss, :mszip, :lzx, :quantum)
164
+ # @param options [Hash] Compression options
165
+ # @option options [Integer] :window_bits Window size in bits
166
+ # @option options [Integer] :mode Algorithm mode
167
+ # @return [String] Compressed data
168
+ def compress_data(data, algorithm:, **options)
169
+ input = System::MemoryHandle.new(data)
170
+ output = System::MemoryHandle.new("", Constants::MODE_WRITE)
171
+
172
+ compressor = @algorithm_factory.create(
173
+ algorithm,
174
+ :compressor,
175
+ @io_system,
176
+ input,
177
+ output,
178
+ data.bytesize,
179
+ **options,
180
+ )
181
+
182
+ compressor.compress
183
+ output.data
184
+ end
185
+
186
+ private
187
+
188
+ # Write structure to file
189
+ #
190
+ # Handles file opening/closing and delegates to write_format hook
191
+ #
192
+ # @param output_file [String] Path to output file
193
+ # @param structure [Hash] Format structure
194
+ # @return [Integer] Bytes written
195
+ def write_to_file(output_file, structure)
196
+ output_handle = @io_system.open(output_file, Constants::MODE_WRITE)
197
+
198
+ begin
199
+ bytes = write_format(output_handle, structure)
200
+ bytes
201
+ ensure
202
+ @io_system.close(output_handle) if output_handle
203
+ end
204
+ end
205
+ end
206
+ end
@@ -4,22 +4,31 @@ module Cabriolet
4
4
  module Binary
5
5
  # Bitstream provides bit-level I/O operations for reading compressed data
6
6
  class Bitstream
7
- attr_reader :io_system, :handle, :buffer_size
7
+ attr_reader :io_system, :handle, :buffer_size, :bit_order
8
8
 
9
9
  # Initialize a new bitstream
10
10
  #
11
11
  # @param io_system [System::IOSystem] I/O system for reading data
12
12
  # @param handle [System::FileHandle, System::MemoryHandle] Handle to read from
13
13
  # @param buffer_size [Integer] Size of the input buffer
14
+ # @param bit_order [Symbol] Bit order (:lsb or :msb)
15
+ # @param salvage [Boolean] Salvage mode - return 0 on EOF instead of raising
14
16
  def initialize(io_system, handle,
15
- buffer_size = Cabriolet.default_buffer_size)
17
+ buffer_size = Cabriolet.default_buffer_size, bit_order: :lsb, salvage: false)
16
18
  @io_system = io_system
17
19
  @handle = handle
18
20
  @buffer_size = buffer_size
21
+ @bit_order = bit_order
22
+ @salvage = salvage
19
23
  @buffer = ""
20
24
  @buffer_pos = 0
21
25
  @bit_buffer = 0
22
26
  @bits_left = 0
27
+ @input_end = false # Track EOF state (matches libmspack's input_end flag)
28
+
29
+ # For MSB mode, we need to know the bit width of the buffer
30
+ # Ruby integers are arbitrary precision, so we use 32 bits as standard
31
+ @bitbuf_width = 32
23
32
  end
24
33
 
25
34
  # Read specified number of bits from the stream
@@ -33,31 +42,129 @@ buffer_size = Cabriolet.default_buffer_size)
33
42
  "Can only read 1-32 bits at a time"
34
43
  end
35
44
 
45
+ if @bit_order == :msb
46
+ read_bits_msb(num_bits)
47
+ else
48
+ read_bits_lsb(num_bits)
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ # Read 2 bytes as a little-endian 16-bit word for MSB mode
55
+ # This is a shared helper for read_bits_msb and peek_bits
56
+ #
57
+ # @return [Integer] 16-bit word, or nil if at EOF and not in salvage mode
58
+ def read_msb_word
59
+ byte0 = read_byte
60
+ if byte0.nil? && (@salvage || @input_end)
61
+ byte0 = 0
62
+ end
63
+
64
+ byte1 = read_byte
65
+ if byte1.nil?
66
+ byte1 = 0
67
+ end
68
+
69
+ byte0 | (byte1 << 8)
70
+ end
71
+
72
+ # Read bits in LSB-first order
73
+ #
74
+ # Per libmspack: EOF handling allows padding to avoid bitstream overrun.
75
+ # First EOF: pad with zeros (2 bytes worth). Second EOF: raise error.
76
+ def read_bits_lsb(num_bits)
36
77
  # Ensure we have enough bits in the buffer
37
78
  while @bits_left < num_bits
38
79
  byte = read_byte
39
- return 0 if byte.nil? # EOF
80
+ # First EOF: pad with zeros (matches libmspack read_input behavior)
81
+ # On second EOF, read_byte will raise DecompressionError
82
+ # In salvage mode, pad indefinitely; otherwise pad on first EOF
83
+ byte = 0 if byte.nil?
84
+
85
+ # DEBUG
86
+ if ENV["DEBUG_BITSTREAM"]
87
+ warn "DEBUG LSB read_byte: buffer_pos=#{@buffer_pos} byte=#{byte} (#{byte.to_s(2).rjust(
88
+ 8, '0'
89
+ )}) bits_left=#{@bits_left}"
90
+ end
40
91
 
92
+ # INJECT_BITS (LSB): append to the right
41
93
  @bit_buffer |= (byte << @bits_left)
42
94
  @bits_left += 8
43
95
  end
44
96
 
45
- # Extract the requested bits
97
+ # PEEK_BITS (LSB): extract from the right
46
98
  result = @bit_buffer & ((1 << num_bits) - 1)
99
+ # REMOVE_BITS (LSB): shift right
47
100
  @bit_buffer >>= num_bits
48
101
  @bits_left -= num_bits
49
102
 
103
+ # DEBUG
104
+ warn "DEBUG LSB read_bits(#{num_bits}): result=#{result} buffer=#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
105
+
50
106
  result
51
107
  end
52
108
 
109
+ # Read bits in MSB-first order (libmspack LZX/Quantum style)
110
+ #
111
+ # Per libmspack readbits.h: Reads 2 bytes at a time (little-endian 16-bit word).
112
+ # EOF handling: First EOF pads with zeros, second EOF raises error.
113
+ def read_bits_msb(num_bits)
114
+ # Ensure we have enough bits in the buffer
115
+ while @bits_left < num_bits
116
+ word = read_msb_word
117
+
118
+ # DEBUG
119
+ warn "DEBUG MSB read_bytes: word=0x#{word.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
120
+
121
+ # INJECT_BITS (MSB): inject at the left side
122
+ @bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
123
+ @bits_left += 16
124
+ end
125
+
126
+ # PEEK_BITS (MSB): extract from the left
127
+ result = @bit_buffer >> (@bitbuf_width - num_bits)
128
+
129
+ # REMOVE_BITS (MSB): shift left
130
+ @bit_buffer = (@bit_buffer << num_bits) & ((1 << @bitbuf_width) - 1)
131
+ @bits_left -= num_bits
132
+
133
+ # DEBUG
134
+ warn "DEBUG MSB read_bits(#{num_bits}) result=#{result} (0x#{result.to_s(16)}) buffer=0x#{@bit_buffer.to_s(16)} bits_left=#{@bits_left}" if ENV["DEBUG_BITSTREAM"]
135
+
136
+ result
137
+ end
138
+
139
+ public
140
+
53
141
  # Read a single byte from the input
54
142
  #
55
- # @return [Integer, nil] Byte value or nil at EOF
143
+ # Per libmspack readbits.h: On first EOF, we pad with zeros.
144
+ # On second EOF, we raise an error (unless salvage mode).
145
+ #
146
+ # @return [Integer, nil] Byte value or nil to signal EOF padding needed
147
+ # @raise [DecompressionError] on second EOF attempt (unless salvage mode)
56
148
  def read_byte
57
149
  if @buffer_pos >= @buffer.bytesize
58
150
  @buffer = @io_system.read(@handle, @buffer_size)
59
151
  @buffer_pos = 0
60
- return nil if @buffer.empty?
152
+
153
+ if @buffer.empty?
154
+ # Hit EOF - check if this is first or second EOF
155
+ if @input_end
156
+ # Second EOF: raise error unless salvage mode
157
+ unless @salvage
158
+ raise DecompressionError, "Unexpected end of input stream"
159
+ end
160
+
161
+ # In salvage mode, keep returning nil
162
+ else
163
+ # First EOF: signal to pad with zeros (return nil)
164
+ @input_end = true
165
+ end
166
+ return nil
167
+ end
61
168
  end
62
169
 
63
170
  byte = @buffer.getbyte(@buffer_pos)
@@ -84,16 +191,50 @@ buffer_size = Cabriolet.default_buffer_size)
84
191
  "Can only peek 1-32 bits at a time"
85
192
  end
86
193
 
87
- # Ensure we have enough bits
88
- while @bits_left < num_bits
89
- byte = read_byte
90
- return 0 if byte.nil?
194
+ if @bit_order == :msb
195
+ # Ensure we have enough bits
196
+ while @bits_left < num_bits
197
+ # Read 2 bytes at a time (little-endian), like libmspack
198
+ byte0 = read_byte
199
+ if byte0.nil?
200
+ # At EOF: break and work with remaining bits
201
+ break
202
+ end
91
203
 
92
- @bit_buffer |= (byte << @bits_left)
93
- @bits_left += 8
94
- end
204
+ byte1 = read_byte
205
+ byte1 = 0 if byte1.nil?
206
+
207
+ # Combine as little-endian 16-bit value
208
+ word = byte0 | (byte1 << 8)
209
+
210
+ # INJECT_BITS (MSB): inject at the left side
211
+ @bit_buffer |= (word << (@bitbuf_width - 16 - @bits_left))
212
+ @bits_left += 16
213
+ end
214
+
215
+ # PEEK_BITS (MSB): extract from the left
216
+ # If we have fewer than num_bits available, result may be incorrect
217
+ # but this matches EOF handling behavior
218
+ @bit_buffer >> (@bitbuf_width - num_bits)
219
+ else
220
+ # Ensure we have enough bits (LSB mode)
221
+ while @bits_left < num_bits
222
+ byte = read_byte
223
+ if byte.nil?
224
+ # At EOF: pad remaining bits with zeros and continue
225
+ # This matches libmspack behavior where peek can use partial bits
226
+ # The missing high bits are implicitly 0
227
+ break
228
+ end
229
+
230
+ @bit_buffer |= (byte << @bits_left)
231
+ @bits_left += 8
232
+ end
95
233
 
96
- @bit_buffer & ((1 << num_bits) - 1)
234
+ # Extract num_bits from bit_buffer
235
+ # If we have fewer than num_bits, the high bits will be 0
236
+ @bit_buffer & ((1 << num_bits) - 1)
237
+ end
97
238
  end
98
239
 
99
240
  # Skip specified number of bits
@@ -111,9 +252,19 @@ buffer_size = Cabriolet.default_buffer_size)
111
252
  # @return [Integer] Bits as an integer
112
253
  def read_bits_be(num_bits)
113
254
  result = 0
114
- num_bits.times do
115
- result = (result << 1) | read_bits(1)
255
+ full_bytes = num_bits / 8
256
+ remaining_bits = num_bits % 8
257
+
258
+ # Read full bytes first (more efficient than bit-by-bit)
259
+ full_bytes.times do
260
+ result = (result << 8) | read_bits(8)
116
261
  end
262
+
263
+ # Read remaining bits
264
+ if remaining_bits.positive?
265
+ result = (result << remaining_bits) | read_bits(remaining_bits)
266
+ end
267
+
117
268
  result
118
269
  end
119
270