RubyGems - omnizip - Versions diffs - 0.3.2 → 0.3.4 - Mend

omnizip 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

checksums.yaml +4 -4
data/.rubocop_todo.yml +243 -368
data/README.adoc +101 -5
data/docs/guides/archive-formats/index.adoc +31 -1
data/docs/guides/archive-formats/ole-format.adoc +316 -0
data/docs/guides/archive-formats/rpm-format.adoc +249 -0
data/docs/index.adoc +12 -2
data/lib/omnizip/algorithms/lzma/distance_coder.rb +29 -18
data/lib/omnizip/algorithms/lzma/encoder.rb +2 -1
data/lib/omnizip/algorithms/lzma/length_coder.rb +6 -3
data/lib/omnizip/algorithms/lzma/literal_decoder.rb +2 -1
data/lib/omnizip/algorithms/lzma/lzip_decoder.rb +40 -13
data/lib/omnizip/algorithms/lzma/range_decoder.rb +36 -2
data/lib/omnizip/algorithms/lzma/range_encoder.rb +19 -0
data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb +2 -1
data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb +148 -112
data/lib/omnizip/algorithms/lzma.rb +20 -5
data/lib/omnizip/algorithms/ppmd7/decoder.rb +25 -21
data/lib/omnizip/algorithms/ppmd7/encoder.rb +4 -11
data/lib/omnizip/algorithms/sevenzip_lzma2.rb +2 -1
data/lib/omnizip/algorithms/xz_lzma2.rb +2 -1
data/lib/omnizip/algorithms/zstandard/constants.rb +125 -9
data/lib/omnizip/algorithms/zstandard/decoder.rb +202 -17
data/lib/omnizip/algorithms/zstandard/encoder.rb +197 -17
data/lib/omnizip/algorithms/zstandard/frame/block.rb +128 -0
data/lib/omnizip/algorithms/zstandard/frame/header.rb +224 -0
data/lib/omnizip/algorithms/zstandard/fse/bitstream.rb +186 -0
data/lib/omnizip/algorithms/zstandard/fse/encoder.rb +325 -0
data/lib/omnizip/algorithms/zstandard/fse/table.rb +269 -0
data/lib/omnizip/algorithms/zstandard/huffman.rb +272 -0
data/lib/omnizip/algorithms/zstandard/huffman_encoder.rb +339 -0
data/lib/omnizip/algorithms/zstandard/literals.rb +178 -0
data/lib/omnizip/algorithms/zstandard/literals_encoder.rb +251 -0
data/lib/omnizip/algorithms/zstandard/sequences.rb +346 -0
data/lib/omnizip/buffer/memory_extractor.rb +3 -3
data/lib/omnizip/buffer.rb +2 -2
data/lib/omnizip/filters/delta.rb +2 -1
data/lib/omnizip/filters/registry.rb +6 -6
data/lib/omnizip/formats/cpio/bounded_io.rb +66 -0
data/lib/omnizip/formats/lzip.rb +2 -1
data/lib/omnizip/formats/lzma_alone.rb +2 -1
data/lib/omnizip/formats/ole/allocation_table.rb +244 -0
data/lib/omnizip/formats/ole/constants.rb +61 -0
data/lib/omnizip/formats/ole/dirent.rb +380 -0
data/lib/omnizip/formats/ole/header.rb +198 -0
data/lib/omnizip/formats/ole/ranges_io.rb +264 -0
data/lib/omnizip/formats/ole/storage.rb +305 -0
data/lib/omnizip/formats/ole/types/variant.rb +328 -0
data/lib/omnizip/formats/ole.rb +145 -0
data/lib/omnizip/formats/rar/compression/ppmd/decoder.rb +92 -49
data/lib/omnizip/formats/rar/compression/ppmd/encoder.rb +13 -20
data/lib/omnizip/formats/rar/rar5/compression/lzss.rb +6 -2
data/lib/omnizip/formats/rar3/reader.rb +6 -2
data/lib/omnizip/formats/rar5/reader.rb +4 -1
data/lib/omnizip/formats/rpm/constants.rb +58 -0
data/lib/omnizip/formats/rpm/entry.rb +102 -0
data/lib/omnizip/formats/rpm/header.rb +113 -0
data/lib/omnizip/formats/rpm/lead.rb +122 -0
data/lib/omnizip/formats/rpm/tag.rb +230 -0
data/lib/omnizip/formats/rpm.rb +434 -0
data/lib/omnizip/formats/seven_zip/bcj2_stream_decompressor.rb +239 -0
data/lib/omnizip/formats/seven_zip/coder_chain.rb +32 -8
data/lib/omnizip/formats/seven_zip/constants.rb +1 -1
data/lib/omnizip/formats/seven_zip/reader.rb +84 -8
data/lib/omnizip/formats/seven_zip/stream_compressor.rb +2 -1
data/lib/omnizip/formats/seven_zip/stream_decompressor.rb +6 -0
data/lib/omnizip/formats/seven_zip/writer.rb +21 -9
data/lib/omnizip/formats/seven_zip.rb +10 -0
data/lib/omnizip/formats/xar/entry.rb +18 -5
data/lib/omnizip/formats/xar/header.rb +34 -6
data/lib/omnizip/formats/xar/reader.rb +43 -10
data/lib/omnizip/formats/xar/toc.rb +34 -21
data/lib/omnizip/formats/xar/writer.rb +15 -5
data/lib/omnizip/formats/xz_impl/block_decoder.rb +45 -33
data/lib/omnizip/formats/xz_impl/block_encoder.rb +2 -1
data/lib/omnizip/formats/xz_impl/index_decoder.rb +3 -1
data/lib/omnizip/formats/xz_impl/stream_header_parser.rb +2 -1
data/lib/omnizip/formats/zip/end_of_central_directory.rb +4 -3
data/lib/omnizip/implementations/seven_zip/lzma/decoder.rb +14 -6
data/lib/omnizip/implementations/seven_zip/lzma/encoder.rb +2 -1
data/lib/omnizip/implementations/seven_zip/lzma2/encoder.rb +28 -13
data/lib/omnizip/implementations/xz_utils/lzma2/encoder.rb +13 -6
data/lib/omnizip/pipe/stream_compressor.rb +1 -1
data/lib/omnizip/version.rb +1 -1
data/readme-docs/compression-algorithms.adoc +6 -2
metadata +30 -2

data/lib/omnizip/algorithms/lzma/range_encoder.rb CHANGED Viewed

@@ -109,6 +109,25 @@ module Omnizip
           end
         end
+        # Encode a symbol using cumulative frequency range
+        #
+        # This is used by PPMd for encoding symbols based on their
+        # frequency distribution in the current context.
+        #
+        # @param cum_freq [Integer] Cumulative frequency up to this symbol
+        # @param freq [Integer] Frequency of this symbol
+        # @param total_freq [Integer] Total frequency of all symbols in context
+        # @return [void]
+        def encode_freq(cum_freq, freq, total_freq)
+          normalize
+          range_freq = @range / total_freq
+          low_bound = range_freq * cum_freq
+          high_bound = range_freq * (cum_freq + freq)
+          @low = (@low + low_bound) & 0xFFFFFFFFFFFFFFFF
+          @range = (high_bound - low_bound) & 0xFFFFFFFF
+        end
         # Flush remaining bytes to output stream
         #
         # Ported from XZ Utils rc_flush().

data/lib/omnizip/algorithms/lzma/xz_encoder_fast.rb CHANGED Viewed

@@ -487,7 +487,8 @@ module Omnizip
             subcoder_index = offset + match_bit + (symbol >> 8)
             bit = (symbol >> 7) & 1
-            @encoder.queue_bit(@models.literal[literal_base + subcoder_index], bit)
+            @encoder.queue_bit(@models.literal[literal_base + subcoder_index],
+                               bit)
             symbol <<= 1
             offset &= ~(match_byte ^ symbol)

data/lib/omnizip/algorithms/lzma/xz_utils_decoder.rb CHANGED Viewed

@@ -167,12 +167,15 @@ module Omnizip
       # @param preserve_dict [Boolean] Whether to preserve dictionary from previous decode
       # @param check_rc_finished [Boolean] Whether to check if range decoder is finished
       # @return [String, Integer] Decompressed data or bytes written
-      def decode_stream(output = nil, preserve_dict: false, check_rc_finished: true)
+      def decode_stream(output = nil, preserve_dict: false,
+check_rc_finished: true)
         @decode_stream_call_count ||= 0
         @decode_stream_call_count += 1
         call_num = @decode_stream_call_count
-        puts "DEBUG decode_stream START (call ##{call_num}): @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, preserve_dict=#{preserve_dict}, @uncompressed_size=#{@uncompressed_size.inspect}" if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 200 && @dict_full <= 230
+        puts "DEBUG decode_stream START (call ##{call_num}): @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, preserve_dict=#{preserve_dict}, @uncompressed_size=#{@uncompressed_size.inspect}" if ENV.fetch(
+          "LZMA_DEBUG", nil
+        ) && @dict_full && @dict_full >= 200 && @dict_full <= 230
         if ENV["LZMA_DEBUG_DECODE_STREAM"]
           warn "DEBUG decode_stream[#{@decoder_id}] START: preserve_dict=#{preserve_dict}, @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, @dict_buf.object_id=#{@dict_buf&.object_id || 'nil'}, @dict_buf.size=#{@dict_buf&.size || 'nil'}"
         end
@@ -183,10 +186,10 @@ module Omnizip
         # See: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma2_decoder.c:140-141
         if ENV["LZMA_DEBUG"]
           warn "DEBUG: decode_stream - reusing range decoder @input.pos=#{begin
-                 @input.pos
-               rescue StandardError
-                 'N/A'
-               end}, @range_decoder.class=#{@range_decoder.class}"
+            @input.pos
+          rescue StandardError
+            'N/A'
+          end}, @range_decoder.class=#{@range_decoder.class}"
         end
         # Create range decoder if it doesn't exist (first chunk)
@@ -200,7 +203,7 @@ module Omnizip
         # Special case: empty input (uncompressed_size == 0)
         # Return immediately without trying to decode anything
-        if @uncompressed_size != 0xFFFFFFFFFFFFFFFF && @uncompressed_size == 0
+        if @uncompressed_size != 0xFFFFFFFFFFFFFFFF && @uncompressed_size.zero?
           if ENV["LZMA_DEBUG"]
             warn "DEBUG: decode_stream - empty input (uncompressed_size=0), returning immediately"
           end
@@ -216,7 +219,9 @@ module Omnizip
         @chunk_bytes_decoded = 0
         # DEBUG: Show chunk_bytes_decoded initialization
-        if @dict_full && @dict_full >= 220 && @dict_full <= 240 && ENV.fetch("LZMA_DEBUG", nil)
+        if @dict_full && @dict_full >= 220 && @dict_full <= 240 && ENV.fetch(
+          "LZMA_DEBUG", nil
+        )
           puts "DEBUG: chunk_bytes_decoded reset to 0 for chunk (call_num=#{call_num}, dict_full=#{@dict_full})"
         end
@@ -274,9 +279,13 @@ module Omnizip
         # properly reflected in start_pos, so we only return NEW bytes.
         # For LZMA2, we need to return only the NEW bytes, not all bytes from LZ_DICT_INIT_POS
         start_pos = @pos || LZ_DICT_INIT_POS
-        puts "DEBUG: start_pos=#{start_pos}, @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, preserve_dict=#{preserve_dict}, @decoder_id=#{@decoder_id}" if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 200 && @dict_full <= 230
+        puts "DEBUG: start_pos=#{start_pos}, @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, preserve_dict=#{preserve_dict}, @decoder_id=#{@decoder_id}" if ENV.fetch(
+          "LZMA_DEBUG", nil
+        ) && @dict_full && @dict_full >= 200 && @dict_full <= 230
         # Also show for chunk #1 start (dict_full around 227)
-        puts "DEBUG: start_pos=#{start_pos}, @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, @uncompressed_size=#{@uncompressed_size}, @decoder_id=#{@decoder_id}" if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 225 && @dict_full <= 230
+        puts "DEBUG: start_pos=#{start_pos}, @pos=#{@pos.inspect}, @dict_full=#{@dict_full.inspect}, @uncompressed_size=#{@uncompressed_size}, @decoder_id=#{@decoder_id}" if ENV.fetch(
+          "LZMA_DEBUG", nil
+        ) && @dict_full && @dict_full >= 225 && @dict_full <= 230
         # Initialize rep distances (XZ Utils initializes to 0)
         # See: /Users/mulgogi/src/external/xz/src/liblzma/lzma/lzma_decoder.c:1054-1055
@@ -285,7 +294,9 @@ module Omnizip
         #
         # IMPORTANT: Initialize rep distances if they're nil OR not preserving dict
         if @rep0.nil? || @rep1.nil? || @rep2.nil? || @rep3.nil? || !preserve_dict
-          puts "DEBUG: Resetting rep distances to 0 (rep0.nil?=#{@rep0.nil?}, preserve_dict=#{preserve_dict})" if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 200 && @dict_full <= 230
+          puts "DEBUG: Resetting rep distances to 0 (rep0.nil?=#{@rep0.nil?}, preserve_dict=#{preserve_dict})" if ENV.fetch(
+            "LZMA_DEBUG", nil
+          ) && @dict_full && @dict_full >= 200 && @dict_full <= 230
           @rep0 = 0
           @rep1 = 0
           @rep2 = 0
@@ -301,17 +312,19 @@ module Omnizip
         # XZ Utils uses dict->pos (current position) + uncompressed_size
         # We use start_pos (current position) + @uncompressed_size
         limit = if @uncompressed_size == 0xFFFFFFFFFFFFFFFF
-            nil # No limit for unknown size
-          else
-            start_pos + @uncompressed_size
-          end
+                  nil # No limit for unknown size
+                else
+                  start_pos + @uncompressed_size
+                end
         # DEBUG: Show limit calculation for chunk #1
-        if ENV.fetch("LZMA_DEBUG_LIMIT", nil) && @dict_full && @dict_full >= 220 && @dict_full <= 240
+        if ENV.fetch("LZMA_DEBUG_LIMIT",
+                     nil) && @dict_full && @dict_full >= 220 && @dict_full <= 240
           puts "DEBUG LIMIT CALCULATION: start_pos=#{start_pos}, @uncompressed_size=#{@uncompressed_size}, limit=#{limit.inspect}"
         end
         # DEBUG: Also show for dict_full around 293 (where the error occurs)
-        if ENV.fetch("LZMA_DEBUG_LIMIT", nil) && @dict_full && @dict_full >= 290 && @dict_full <= 300
+        if ENV.fetch("LZMA_DEBUG_LIMIT",
+                     nil) && @dict_full && @dict_full >= 290 && @dict_full <= 300
           puts "DEBUG LIMIT CALCULATION at dict_full=#{@dict_full}: start_pos=#{start_pos}, @uncompressed_size=#{@uncompressed_size}, limit=#{limit.inspect}, @decoder_id=#{@decoder_id}"
         end
@@ -319,17 +332,18 @@ module Omnizip
         loop do
           iteration += 1
           # DEBUG: Show every iteration after position 200
-          if ENV.fetch("LZMA_DEBUG_ITER", nil) && @dict_full && @dict_full >= 200 && @dict_full <= 500
+          if ENV.fetch("LZMA_DEBUG_ITER",
+                       nil) && @dict_full && @dict_full >= 200 && @dict_full <= 500
             puts "DEBUG ITERATION ##{iteration}: pos=#{@pos}, dict_full=#{@dict_full}, limit=#{limit.inspect}"
           end
           # Check if we've reached the expected size (if known)
           # XZ Utils: checks dict.pos < dict.limit
           if ENV["LZMA_DEBUG_LIMIT"]
             compare_result = begin
-                limit && @pos >= limit
-              rescue StandardError
-                "ERROR"
-              end
+              limit && @pos >= limit
+            rescue StandardError
+              "ERROR"
+            end
             XzUtilsDecoderDebug.debug_puts "DEBUG LIMIT: iter=#{iteration}, pos=#{@pos.inspect}, dict_full=#{@dict_full}, limit=#{limit.inspect}, pos >= limit: #{compare_result}"
           end
@@ -504,16 +518,16 @@ module Omnizip
             # If explicitly set to false, it allows EOPM even when uncompressed size is known
             # Reference: alone_decoder.c:127 (LZMA_LZMA1EXT_ALLOW_EOPM)
             should_check = if @allow_eopm == true
-                # EOPM is explicitly allowed, skip the check
-                false
-              elsif @allow_eopm == false
-                # LZMA2 mode: always check (EOPM is not allowed)
-                true
-              else
-                # @allow_eopm is nil (not set, first chunk or legacy mode)
-                # Use check_rc_finished parameter as default
-                check_rc_finished
-              end
+                             # EOPM is explicitly allowed, skip the check
+                             false
+                           elsif @allow_eopm == false
+                             # LZMA2 mode: always check (EOPM is not allowed)
+                             true
+                           else
+                             # @allow_eopm is nil (not set, first chunk or legacy mode)
+                             # Use check_rc_finished parameter as default
+                             check_rc_finished
+                           end
             if should_check
               # If EOPM is not allowed, range decoder MUST be finished
@@ -522,12 +536,10 @@ module Omnizip
                       "LZMA stream finished with leftover compressed data (range_decoder.code=#{@range_decoder.code}, expected 0). This indicates corruption in the compressed stream or an invalid EOPM for LZMA2."
               end
               break
-            else
+            elsif @range_decoder.code.zero?
               # EOPM is allowed (e.g., LZMA_Alone format)
               # If range decoder is finished, we're done
-              if @range_decoder.code.zero?
-                break
-              end
+              break
               # Otherwise, continue decoding to find EOPM marker
               # XZ Utils sets eopm_is_valid = true and continues
               # Reference: lzma_decoder.c:704
@@ -535,12 +547,14 @@ module Omnizip
           end
           # DEBUG: Show when approaching limit for chunk #1
-          if ENV.fetch("LZMA_DEBUG_LIMIT", nil) && limit && @pos >= limit - 10 && @pos < limit + 10
+          if ENV.fetch("LZMA_DEBUG_LIMIT",
+                       nil) && limit && @pos >= limit - 10 && @pos < limit + 10
             puts "DEBUG NEAR LIMIT (call #{call_num}): pos=#{@pos}, limit=#{limit}, dict_full=#{@dict_full}, chunk_bytes_decoded=#{@chunk_bytes_decoded}, remaining=#{@uncompressed_size ? @uncompressed_size - @chunk_bytes_decoded : 'N/A'}"
           end
           # DEBUG: Show when we've passed the expected limit
-          if ENV.fetch("LZMA_DEBUG_LIMIT", nil) && limit && @pos >= limit && @pos < limit + 10
+          if ENV.fetch("LZMA_DEBUG_LIMIT",
+                       nil) && limit && @pos >= limit && @pos < limit + 10
             puts "DEBUG PASSED LIMIT: pos=#{@pos}, limit=#{limit}, dict_full=#{@dict_full}, delta=#{@pos - limit}"
           end
@@ -614,16 +628,18 @@ module Omnizip
         end
         valid_bytes = @dict_buf[start_pos...@pos]
         # DEBUG: Show return value calculation
-        puts "DEBUG RETURN CALCULATION: call_num=#{call_num}, start_pos=#{start_pos}, @pos=#{@pos}, valid_bytes.size=#{@dict_buf[start_pos...@pos].size}, dict_full=#{@dict_full}, chunk_bytes_decoded=#{@chunk_bytes_decoded}" if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 220 && @dict_full <= 240 && call_num == 2
+        puts "DEBUG RETURN CALCULATION: call_num=#{call_num}, start_pos=#{start_pos}, @pos=#{@pos}, valid_bytes.size=#{@dict_buf[start_pos...@pos].size}, dict_full=#{@dict_full}, chunk_bytes_decoded=#{@chunk_bytes_decoded}" if ENV.fetch(
+          "LZMA_DEBUG", nil
+        ) && @dict_full && @dict_full >= 220 && @dict_full <= 240 && call_num == 2
         puts "DEBUG RETURN CALCULATION: call_num=#{call_num}, start_pos=#{start_pos}, @pos=#{@pos}, valid_bytes.size=#{@dict_buf[start_pos...@pos].size}, dict_full=#{@dict_full}, chunk_bytes_decoded=#{@chunk_bytes_decoded}" if ENV["LZMA_DEBUG"] && call_num == 2
         # Filter out nil values (can happen during dictionary reset transitions)
         valid_bytes = valid_bytes.map { |b| b.nil? ? 0 : b }
         if ENV["DEBUG_DICT_BUF"]
           XzUtilsDecoderDebug.debug_puts "DEBUG: valid_bytes=#{begin
-                                           valid_bytes.size
-                                         rescue StandardError
-                                           valid_bytes.inspect
-                                         end}"
+            valid_bytes.size
+          rescue StandardError
+            valid_bytes.inspect
+          end}"
         end
         valid_data = valid_bytes.pack("C*")
         if output
@@ -863,24 +879,24 @@ module Omnizip
         if @range_decoder
           if ENV["LZMA_DEBUG"]
             input_pos = begin
-                @input.pos
-              rescue StandardError
-                "N/A"
-              end
+              @input.pos
+            rescue StandardError
+              "N/A"
+            end
             input_size = begin
-                @input.size
-              rescue StandardError
-                "N/A"
-              end
+              @input.size
+            rescue StandardError
+              "N/A"
+            end
             XzUtilsDecoderDebug.debug_puts "=== finish_state_reset: resetting range_decoder, input pos=#{input_pos}, size=#{input_size}"
           end
           @range_decoder.reset
           if ENV["LZMA_DEBUG"]
             input_pos_after = begin
-                @input.pos
-              rescue StandardError
-                "N/A"
-              end
+              @input.pos
+            rescue StandardError
+              "N/A"
+            end
             XzUtilsDecoderDebug.debug_puts "=== finish_state_reset: after reset, input pos=#{input_pos_after}, range_decoder.code=0x#{@range_decoder.code.to_s(16)}"
           end
         end
@@ -920,7 +936,8 @@ module Omnizip
         @input = new_input
         # DEBUG: Trace input stream contents
-        if ENV.fetch("LZMA_DEBUG", nil) && @dict_full && @dict_full >= 220 && @dict_full <= 230
+        if ENV.fetch("LZMA_DEBUG",
+                     nil) && @dict_full && @dict_full >= 220 && @dict_full <= 230
           puts "\n=== set_input at dict_full=#{@dict_full} ==="
           puts "  new_input.size=#{new_input.size}"
           puts "  new_input.pos=#{new_input.pos}"
@@ -934,7 +951,9 @@ module Omnizip
             first_bytes << byte
           end
-          puts "  First 10 bytes: #{first_bytes.map { |b| "0x#{b.to_s(16).upcase}" }.join(' ')}"
+          puts "  First 10 bytes: #{first_bytes.map do |b|
+            "0x#{b.to_s(16).upcase}"
+          end.join(' ')}"
           new_input.rewind
           test_byte = new_input.getbyte
@@ -1268,22 +1287,22 @@ module Omnizip
           if @dict_full == 233
             XzUtilsDecoderDebug.debug_puts "  DETAILED TRACE at dict_full=233 (pos=#{@pos}):"
             XzUtilsDecoderDebug.debug_puts "    byte=0x#{byte.to_s(16)} ('#{begin
-                   byte.chr
-                 rescue StandardError
-                   '?'
-                 end}')"
+              byte.chr
+            rescue StandardError
+              '?'
+            end}')"
             XzUtilsDecoderDebug.debug_puts "    state.value=#{@state.value}, lit_state=#{lit_state}"
             XzUtilsDecoderDebug.debug_puts "    use_matched_literal?=#{@state.use_matched_literal?}"
             prev_byte_val = @dict_full.positive? ? @dict_buf[@pos - 1] : "N/A"
             XzUtilsDecoderDebug.debug_puts "    prev_byte=#{prev_byte_val.inspect} (#{if prev_byte_val.is_a?(Integer)
-                   "0x#{prev_byte_val.to_s(16)} ('#{begin
-                     prev_byte_val.chr
-                   rescue StandardError
-                     '?'
-                   end}')"
-                 else
-                   'N/A'
-                 end})"
+                                                                                        "0x#{prev_byte_val.to_s(16)} ('#{begin
+                                                                                          prev_byte_val.chr
+                                                                                        rescue StandardError
+                                                                                          '?'
+                                                                                        end}')"
+                                                                                      else
+                                                                                        'N/A'
+                                                                                      end})"
             XzUtilsDecoderDebug.debug_puts "    range_decoder.range=0x#{@range_decoder.range.to_s(16)}, range_decoder.code=0x#{@range_decoder.code.to_s(16)}"
             XzUtilsDecoderDebug.debug_puts "    input.pos=#{@input.pos}, input.size=#{@input.size}"
           end
@@ -1335,14 +1354,17 @@ module Omnizip
         if ENV["TRACE_ARM64_BYTES"]
           @arm64_trace ||= []
           if @arm64_trace.size < 20
-            @arm64_trace << [@dict_full, @pos, byte.class, byte.is_a?(Integer) ? byte : byte.ord, @dict_buf[@pos]]
+            @arm64_trace << [@dict_full, @pos, byte.class,
+                             byte.is_a?(Integer) ? byte : byte.ord, @dict_buf[@pos]]
             if @arm64_trace.size == 20
               # Dump the trace
               puts "\n=== ARM64 BYTE TRACE (first 20 bytes) ==="
               puts "Decoder ID: #{@decoder_id}"
               @arm64_trace.each_with_index do |entry, i|
                 df, p, _, val, stored = entry
-                puts "  [#{i + 1}] dict_full=#{df.to_s.rjust(6)}, pos=#{p.to_s.rjust(6)}, byte=#{val.to_s.rjust(3)} (0x#{val.to_s(16).upcase.rjust(2, '0')}) stored=#{stored.inspect}"
+                puts "  [#{i + 1}] dict_full=#{df.to_s.rjust(6)}, pos=#{p.to_s.rjust(6)}, byte=#{val.to_s.rjust(3)} (0x#{val.to_s(16).upcase.rjust(
+                  2, '0'
+                )}) stored=#{stored.inspect}"
               end
               puts "=========================================\n"
               $stderr.flush
@@ -1482,24 +1504,36 @@ module Omnizip
         # ((len) < DIST_STATES + MATCH_LEN_MIN ? (len) - MATCH_LEN_MIN : DIST_STATES - 1)
         # This gives: len=2→0, len=3→1, len=4→2, len=5→3, len=6+→3
         len_state = if length < NUM_LEN_TO_POS_STATES + MATCH_LEN_MIN
-            length - MATCH_LEN_MIN
-          else
-            NUM_LEN_TO_POS_STATES - 1
-          end
+                      length - MATCH_LEN_MIN
+                    else
+                      NUM_LEN_TO_POS_STATES - 1
+                    end
         # DEBUG: Show bytes being copied
         if old_dict_full.between?(210, 230) || ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "\n=== decode_regular_match at dict_full=#{old_dict_full} ===" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "\n=== decode_regular_match at dict_full=#{old_dict_full} ===" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER] decode_regular_match at dict_full=#{old_dict_full}" if ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "  pos_state=#{pos_state}" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "  pos_state=#{pos_state}" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER]   pos_state=#{pos_state}" if ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "  state=#{old_state}" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "  state=#{old_state}" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER]   state=#{old_state}" if ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "  length_encoded=#{length_encoded} length=#{length}" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "  length_encoded=#{length_encoded} length=#{length}" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER]   length_encoded=#{length_encoded} length=#{length}" if ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "  len_state=#{len_state}" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "  len_state=#{len_state}" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER]   len_state=#{len_state}" if ENV["LZMA_DEBUG_DISTANCE"]
-          XzUtilsDecoderDebug.debug_puts "  rep0_before=#{old_rep0}" if old_dict_full.between?(210, 230)
+          XzUtilsDecoderDebug.debug_puts "  rep0_before=#{old_rep0}" if old_dict_full.between?(
+            210, 230
+          )
           puts "[DISTANCE_DECODER]   rep0_before=#{old_rep0}" if ENV["LZMA_DEBUG_DISTANCE"]
         end
@@ -1513,7 +1547,9 @@ module Omnizip
         rep0 = @distance_coder.decode(@range_decoder, len_state)
         # DEBUG
-        if (ENV.fetch("LZMA_DEBUG", nil) && old_dict_full.between?(210, 230)) || old_dict_full == 293
+        if (ENV.fetch("LZMA_DEBUG",
+                      nil) && old_dict_full.between?(210,
+                                                     230)) || old_dict_full == 293
           puts "  rep0_decoded=#{rep0} (distance = #{rep0})"
           puts "  buffer_back calculation: back=#{@dict_full - rep0 - 1}"
         end
@@ -1609,14 +1645,14 @@ module Omnizip
           XzUtilsDecoderDebug.debug_puts "  buffer_back=#{buffer_back}, back=#{back}"
           bytes_at_back = @dict_buf[buffer_back, 3]
           bytes_hex = if bytes_at_back.is_a?(String)
-              bytes_at_back.bytes.map do |b|
-                "%02x" % b
-              end.join(" ")
-            else
-              bytes_at_back.map do |b|
-                "%02x" % b
-              end.join(" ")
-            end
+                        bytes_at_back.bytes.map do |b|
+                          "%02x" % b
+                        end.join(" ")
+                      else
+                        bytes_at_back.map do |b|
+                          "%02x" % b
+                        end.join(" ")
+                      end
           XzUtilsDecoderDebug.debug_puts "  First 3 bytes at buffer_back: #{bytes_hex} (#{bytes_at_back.inspect})"
         end
@@ -1916,14 +1952,14 @@ module Omnizip
           XzUtilsDecoderDebug.debug_puts "  back=#{old_back}, wrapped_back=#{back}, buffer_back=#{buffer_back}"
           bytes_at_back = @dict_buf[buffer_back, 3]
           bytes_hex = if bytes_at_back.is_a?(String)
-              bytes_at_back.bytes.map do |b|
-                "%02x" % b
-              end.join(" ")
-            else
-              [bytes_at_back].flatten.map do |b|
-                "%02x" % b
-              end.join(" ")
-            end
+                        bytes_at_back.bytes.map do |b|
+                          "%02x" % b
+                        end.join(" ")
+                      else
+                        [bytes_at_back].flatten.map do |b|
+                          "%02x" % b
+                        end.join(" ")
+                      end
           XzUtilsDecoderDebug.debug_puts "  First 3 bytes at buffer_back: #{bytes_hex} (#{bytes_at_back.inspect})"
         end
@@ -1933,10 +1969,10 @@ module Omnizip
           source_val = @dict_buf[@pos - 1]
           puts "  Rep match copy at dict_full=#{@dict_full}: length=#{length}, distance=#{distance}, @pos=#{@pos} (will write to #{@pos}...#{@pos + length - 1})"
           puts "  Reading from @pos-1=#{@pos - 1}, source byte = #{source_val} (0x#{source_val.to_s(16)} '#{begin
-                 source_val.chr
-               rescue StandardError
-                 '?'
-               end}')"
+            source_val.chr
+          rescue StandardError
+            '?'
+          end}')"
           puts "  Before copy: @dict_buf[#{@pos}...#{@pos + length - 1}] = #{@dict_buf[@pos,
                                                                                        length].inspect}"
         end
@@ -1950,17 +1986,17 @@ module Omnizip
                                                                          10].inspect}"
             # DEBUG: Check if buffer_back+1 has the correct byte
             puts "  dict_buf[buffer_back+1=#{buffer_back + 1}] = #{@dict_buf[buffer_back + 1].inspect} ('#{begin
-                   @dict_buf[buffer_back + 1].chr
-                 rescue StandardError
-                   '?'
-                 end}')"
+              @dict_buf[buffer_back + 1].chr
+            rescue StandardError
+              '?'
+            end}')"
             prev_5 = if buffer_back > 4
-                @dict_buf[(buffer_back - 5)..(buffer_back - 1)].map do |b|
-                  "0x#{b.to_s(16).upcase} (#{b.chr})"
-                end.join(", ")
-              else
-                "N/A"
-              end
+                       @dict_buf[(buffer_back - 5)..(buffer_back - 1)].map do |b|
+                         "0x#{b.to_s(16).upcase} (#{b.chr})"
+                       end.join(", ")
+                     else
+                       "N/A"
+                     end
             puts "  Previous 5 bytes: [#{prev_5}]"
             puts "  Current dict_full=#{@dict_full}, @pos=#{@pos}"
           end

data/lib/omnizip/algorithms/lzma.rb CHANGED Viewed

@@ -97,12 +97,21 @@ module Omnizip
         # Build decoder options, merging with instance variables as fallbacks
         decoder_opts = build_decoder_options(options)
-        decoder_opts[:lzma2_mode] = @lzma2_mode if @lzma2_mode && !decoder_opts.key?(:lzma2_mode)
+        if @lzma2_mode && !decoder_opts.key?(:lzma2_mode)
+          decoder_opts[:lzma2_mode] =
+            @lzma2_mode
+        end
         decoder_opts[:lc] = @lc if @lc && !decoder_opts.key?(:lc)
         decoder_opts[:lp] = @lp if @lp && !decoder_opts.key?(:lp)
         decoder_opts[:pb] = @pb if @pb && !decoder_opts.key?(:pb)
-        decoder_opts[:dict_size] = @dict_size if @dict_size && !decoder_opts.key?(:dict_size)
-        decoder_opts[:uncompressed_size] = @uncompressed_size if @uncompressed_size && !decoder_opts.key?(:uncompressed_size)
+        if @dict_size && !decoder_opts.key?(:dict_size)
+          decoder_opts[:dict_size] =
+            @dict_size
+        end
+        if @uncompressed_size && !decoder_opts.key?(:uncompressed_size)
+          decoder_opts[:uncompressed_size] =
+            @uncompressed_size
+        end
         decoder_opts[:uncompressed_size] ||= options[:size] if options.respond_to?(:key?) && options.key?(:size)
         decoder = Decoder.new(input_stream, decoder_opts)
@@ -167,13 +176,19 @@ module Omnizip
         # Handle Hash-like options - pass through all decoder-relevant options
         if options.respond_to?(:key?)
-          opts[:sdk_compatible] = options[:sdk_compatible] if options.key?(:sdk_compatible)
+          if options.key?(:sdk_compatible)
+            opts[:sdk_compatible] =
+              options[:sdk_compatible]
+          end
           opts[:lzma2_mode] = options[:lzma2_mode] if options.key?(:lzma2_mode)
           opts[:lc] = options[:lc] if options.key?(:lc)
           opts[:lp] = options[:lp] if options.key?(:lp)
           opts[:pb] = options[:pb] if options.key?(:pb)
           opts[:dict_size] = options[:dict_size] if options.key?(:dict_size)
-          opts[:uncompressed_size] = options[:uncompressed_size] if options.key?(:uncompressed_size)
+          if options.key?(:uncompressed_size)
+            opts[:uncompressed_size] =
+              options[:uncompressed_size]
+          end
           opts[:size] = options[:size] if options.key?(:size)
         end

data/lib/omnizip/algorithms/ppmd7/decoder.rb CHANGED Viewed

@@ -74,46 +74,50 @@ module Omnizip
         # Decode a single symbol using the model
         #
         # Uses the model and range decoder to extract the
-        # original symbol value.
+        # original symbol value using proper range decoding.
         #
         # @return [Integer, nil] Decoded byte or nil if end
         def decode_symbol
-          # Decode range value
-          value = @range_decoder.decode_direct_bits(16)
+          # Get context for decoding
+          context = @model.root_context
+          total_freq = context.total_freq
+          return nil if total_freq.zero?
+          # Decode cumulative frequency using proper range decoding
+          cum_freq_value = @range_decoder.decode_freq(total_freq)
-          # Find symbol from range
-          symbol = find_symbol_from_range(value)
+          # Find symbol from cumulative frequency value
+          symbol, cum_freq, freq = find_symbol_from_cum_freq(context,
+                                                             cum_freq_value)
           return nil if symbol.nil?
+          # Normalize the range decoder state
+          @range_decoder.normalize_freq(cum_freq, freq, total_freq)
           # Update model to stay in sync with encoder
           @model.update(symbol)
           symbol
         end
-        # Find symbol from decoded range value
+        # Find symbol from cumulative frequency value
         #
-        # Converts the range value back to a symbol using
-        # the current context's probability distribution.
+        # Maps the decoded cumulative frequency back to a symbol
+        # using the context's probability distribution.
         #
-        # @param value [Integer] Decoded range value
-        # @return [Integer, nil] The symbol
-        def find_symbol_from_range(value)
-          # This is simplified - real implementation would
-          # properly decode using context probabilities
-          context = @model.root_context
-          # Find symbol whose cumulative range contains value
-          scale = 0x10000
+        # @param context [Context] The current context
+        # @param cum_freq_value [Integer] Decoded cumulative frequency
+        # @return [Array<Integer, Integer, Integer>] symbol, cum_freq, freq
+        def find_symbol_from_cum_freq(context, cum_freq_value)
           cum_freq = 0
           context.states.keys.sort.each do |symbol|
             state = context.states[symbol]
-            next_cum = cum_freq + state.freq
-            sym_low = (cum_freq * scale) / context.total_freq
-            sym_high = (next_cum * scale) / context.total_freq
+            freq = state.freq
+            next_cum = cum_freq + freq
-            return symbol if value >= sym_low && value < sym_high
+            return [symbol, cum_freq, freq] if cum_freq_value < next_cum
             cum_freq = next_cum
           end