RubyGems - pure_jpeg - Versions diffs - 0.1.0 → 0.3.0 - Mend

pure_jpeg 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +42 -0
data/LICENSE +1 -1
data/README.md +73 -16
data/lib/pure_jpeg/bit_reader.rb +8 -1
data/lib/pure_jpeg/bit_writer.rb +4 -4
data/lib/pure_jpeg/decoder.rb +337 -15
data/lib/pure_jpeg/encoder.rb +217 -68
data/lib/pure_jpeg/huffman/decoder.rb +1 -1
data/lib/pure_jpeg/huffman/encoder.rb +73 -45
data/lib/pure_jpeg/huffman/tables.rb +93 -1
data/lib/pure_jpeg/image.rb +40 -8
data/lib/pure_jpeg/info.rb +6 -0
data/lib/pure_jpeg/jfif_reader.rb +74 -21
data/lib/pure_jpeg/source/chunky_png_source.rb +8 -5
data/lib/pure_jpeg/source/raw_source.rb +2 -1
data/lib/pure_jpeg/version.rb +1 -1
data/lib/pure_jpeg.rb +30 -0
metadata +3 -2

data/lib/pure_jpeg/encoder.rb CHANGED Viewed

@@ -15,6 +15,8 @@ module PureJPEG
     attr_reader :quality
     # @return [Boolean] whether grayscale mode is enabled
     attr_reader :grayscale
+    # @return [Boolean] whether image-specific Huffman tables are generated
+    attr_reader :optimize_huffman
     # Create a new encoder for the given pixel source.
     #
@@ -34,13 +36,19 @@ module PureJPEG
     # @param scramble_quantization [Boolean] write quantization tables in raster
     #   order instead of zigzag (non-spec-compliant; recreates the "early digicam"
     #   artifact look when decoded by standard viewers)
+    # @param optimize_huffman [Boolean] build image-specific Huffman tables with
+    #   an additional analysis pass (default false)
     def initialize(source, quality: 85, grayscale: false, chroma_quality: nil,
                    luminance_table: nil, chrominance_table: nil,
-                   quantization_modifier: nil, scramble_quantization: false)
+                   quantization_modifier: nil, scramble_quantization: false,
+                   optimize_huffman: false)
       @source = source
       @quality = quality
       @grayscale = grayscale
+      @optimize_huffman = optimize_huffman
       @chroma_quality = chroma_quality || quality
+      validate_qtable!(luminance_table, "luminance_table") if luminance_table
+      validate_qtable!(chrominance_table, "chrominance_table") if chrominance_table
       @luminance_table = luminance_table
       @chrominance_table = chrominance_table
       @quantization_modifier = quantization_modifier
@@ -52,7 +60,7 @@ module PureJPEG
     # @param path [String] output file path
     # @return [void]
     def write(path)
-      File.open(path, "wb") { |f| encode(f) }
+      File.binwrite(path, to_bytes)
     end
     # Return the encoded JPEG as a binary string.
@@ -78,65 +86,138 @@ module PureJPEG
       table
     end
+    def validate_qtable!(table, name)
+      raise ArgumentError, "#{name} must have exactly 64 elements (got #{table.length})" unless table.length == 64
+      unless table.all? { |v| v.is_a?(Integer) && v >= 1 && v <= 255 }
+        raise ArgumentError, "#{name} elements must be integers between 1 and 255"
+      end
+    end
     def encode(io)
       width = source.width
       height = source.height
+      raise ArgumentError, "Width must be a positive integer (got #{width.inspect})" unless width.is_a?(Integer) && width > 0
+      raise ArgumentError, "Height must be a positive integer (got #{height.inspect})" unless height.is_a?(Integer) && height > 0
+      raise ArgumentError, "Width #{width} exceeds maximum of #{MAX_DIMENSION}" if width > MAX_DIMENSION
+      raise ArgumentError, "Height #{height} exceeds maximum of #{MAX_DIMENSION}" if height > MAX_DIMENSION
       lum_qtable = build_lum_qtable
-      lum_dc = Huffman.build_table(Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
-      lum_ac = Huffman.build_table(Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
-      lum_huff = Huffman::Encoder.new(lum_dc, lum_ac)
       if grayscale
-        scan_data = encode_grayscale(width, height, lum_qtable, lum_huff)
-        write_grayscale_jfif(io, width, height, lum_qtable, scan_data)
+        y_data = extract_luminance(width, height)
+        lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values =
+          if optimize_huffman
+            counter = collect_grayscale_frequencies(y_data, width, height, lum_qtable)
+            dc_bits, dc_values = Huffman.optimize_table(counter.dc_frequencies)
+            ac_bits, ac_values = Huffman.optimize_table(counter.ac_frequencies)
+            [dc_bits, dc_values, ac_bits, ac_values]
+          else
+            [Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES,
+             Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES]
+          end
+        lum_huff = Huffman::Encoder.new(
+          Huffman.build_table(lum_dc_bits, lum_dc_values),
+          Huffman.build_table(lum_ac_bits, lum_ac_values)
+        )
+        scan_data = encode_grayscale_data(y_data, width, height, lum_qtable, lum_huff)
+        write_grayscale_jfif(io, width, height, lum_qtable, scan_data,
+                             lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values)
       else
         chr_qtable = build_chr_qtable
-        chr_dc = Huffman.build_table(Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES)
-        chr_ac = Huffman.build_table(Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES)
-        chr_huff = Huffman::Encoder.new(chr_dc, chr_ac)
-        scan_data = encode_color(width, height, lum_qtable, chr_qtable, lum_huff, chr_huff)
-        write_color_jfif(io, width, height, lum_qtable, chr_qtable, scan_data)
+        y_data, cb_data, cr_data = extract_ycbcr(width, height)
+        sub_w = (width + 1) / 2
+        sub_h = (height + 1) / 2
+        cb_sub = downsample(cb_data, width, height, sub_w, sub_h)
+        cr_sub = downsample(cr_data, width, height, sub_w, sub_h)
+        lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
+          chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values =
+          if optimize_huffman
+            lum_counter, chr_counter = collect_color_frequencies(
+              y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qtable, chr_qtable
+            )
+            dc_bits, dc_values = Huffman.optimize_table(lum_counter.dc_frequencies)
+            ac_bits, ac_values = Huffman.optimize_table(lum_counter.ac_frequencies)
+            chr_dc_bits, chr_dc_values = Huffman.optimize_table(chr_counter.dc_frequencies)
+            chr_ac_bits, chr_ac_values = Huffman.optimize_table(chr_counter.ac_frequencies)
+            [dc_bits, dc_values, ac_bits, ac_values, chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values]
+          else
+            [Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES,
+             Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES,
+             Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES,
+             Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES]
+          end
+        lum_huff = Huffman::Encoder.new(
+          Huffman.build_table(lum_dc_bits, lum_dc_values),
+          Huffman.build_table(lum_ac_bits, lum_ac_values)
+        )
+        chr_huff = Huffman::Encoder.new(
+          Huffman.build_table(chr_dc_bits, chr_dc_values),
+          Huffman.build_table(chr_ac_bits, chr_ac_values)
+        )
+        scan_data = encode_color_data(
+          y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qtable, chr_qtable, lum_huff, chr_huff
+        )
+        write_color_jfif(io, width, height, lum_qtable, chr_qtable, scan_data,
+                         lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
+                         chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values)
       end
     end
     # --- Grayscale encoding ---
-    def encode_grayscale(width, height, qtable, huff)
-      y_data = extract_luminance(width, height)
+    def collect_grayscale_frequencies(y_data, width, height, qtable)
+      counter = Huffman::FrequencyCounter.new
+      each_grayscale_block(y_data, width, height, qtable) do |zbuf|
+        counter.observe_block(zbuf, :y)
+      end
+      counter
+    end
+    def encode_grayscale_data(y_data, width, height, qtable, huff)
+      bit_writer = BitWriter.new
+      prev_dc = 0
+      each_grayscale_block(y_data, width, height, qtable) do |zbuf|
+        prev_dc = huff.encode_block(zbuf, prev_dc, bit_writer)
+      end
+      bit_writer.flush
+      bit_writer.bytes
+    end
+    def each_grayscale_block(y_data, width, height, qtable)
       padded_w = (width + 7) & ~7
       padded_h = (height + 7) & ~7
-      # Reusable buffers
       block = Array.new(64, 0.0)
       temp  = Array.new(64, 0.0)
       dct   = Array.new(64, 0.0)
       qbuf  = Array.new(64, 0)
       zbuf  = Array.new(64, 0)
-      bit_writer = BitWriter.new
-      prev_dc = 0
       (0...padded_h).step(8) do |by|
         (0...padded_w).step(8) do |bx|
           extract_block_into(y_data, width, height, bx, by, block)
-          prev_dc = encode_block(block, temp, dct, qbuf, zbuf, qtable, huff, prev_dc, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, qtable)
+          yield zbuf
         end
       end
-      bit_writer.flush
-      bit_writer.bytes
     end
-    def write_grayscale_jfif(io, width, height, qtable, scan_data)
+    def write_grayscale_jfif(io, width, height, qtable, scan_data, dc_bits, dc_values, ac_bits, ac_values)
       jfif = JFIFWriter.new(io, scramble_quantization: @scramble_quantization)
       jfif.write_soi
       jfif.write_app0
       jfif.write_dqt(qtable, 0)
       jfif.write_sof0(width, height, [[1, 1, 1, 0]])
-      jfif.write_dht(0, 0, Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
-      jfif.write_dht(1, 0, Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
+      jfif.write_dht(0, 0, dc_bits, dc_values)
+      jfif.write_dht(1, 0, ac_bits, ac_values)
       jfif.write_sos([[1, 0, 0]])
       jfif.write_scan_data(scan_data)
       jfif.write_eoi
@@ -144,69 +225,97 @@ module PureJPEG
     # --- Color encoding (YCbCr 4:2:0) ---
-    def encode_color(width, height, lum_qt, chr_qt, lum_huff, chr_huff)
-      y_data, cb_data, cr_data = extract_ycbcr(width, height)
+    def collect_color_frequencies(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt)
+      lum_counter = Huffman::FrequencyCounter.new
+      chr_counter = Huffman::FrequencyCounter.new
+      each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt) do |component, zbuf|
+        case component
+        when :y
+          lum_counter.observe_block(zbuf, :y)
+        when :cb
+          chr_counter.observe_block(zbuf, :cb)
+        when :cr
+          chr_counter.observe_block(zbuf, :cr)
+        end
+      end
+      [lum_counter, chr_counter]
+    end
+    def encode_color_data(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt, lum_huff, chr_huff)
+      bit_writer = BitWriter.new
+      prev_dc_y = 0
+      prev_dc_cb = 0
+      prev_dc_cr = 0
+      each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt) do |component, zbuf|
+        case component
+        when :y
+          prev_dc_y = lum_huff.encode_block(zbuf, prev_dc_y, bit_writer)
+        when :cb
+          prev_dc_cb = chr_huff.encode_block(zbuf, prev_dc_cb, bit_writer)
+        when :cr
+          prev_dc_cr = chr_huff.encode_block(zbuf, prev_dc_cr, bit_writer)
+        end
+      end
-      sub_w = (width + 1) / 2
-      sub_h = (height + 1) / 2
-      cb_sub = downsample(cb_data, width, height, sub_w, sub_h)
-      cr_sub = downsample(cr_data, width, height, sub_w, sub_h)
+      bit_writer.flush
+      bit_writer.bytes
+    end
+    def each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt)
       mcu_w = (width + 15) & ~15
       mcu_h = (height + 15) & ~15
-      # Reusable buffers
       block = Array.new(64, 0.0)
       temp  = Array.new(64, 0.0)
       dct   = Array.new(64, 0.0)
       qbuf  = Array.new(64, 0)
       zbuf  = Array.new(64, 0)
-      bit_writer = BitWriter.new
-      prev_dc_y = 0
-      prev_dc_cb = 0
-      prev_dc_cr = 0
       (0...mcu_h).step(16) do |my|
         (0...mcu_w).step(16) do |mx|
-          # 4 luminance blocks
           extract_block_into(y_data, width, height, mx, my, block)
-          prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
+          yield :y, zbuf
           extract_block_into(y_data, width, height, mx + 8, my, block)
-          prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
+          yield :y, zbuf
           extract_block_into(y_data, width, height, mx, my + 8, block)
-          prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
+          yield :y, zbuf
           extract_block_into(y_data, width, height, mx + 8, my + 8, block)
-          prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
+          yield :y, zbuf
-          # 1 Cb block
           extract_block_into(cb_sub, sub_w, sub_h, mx >> 1, my >> 1, block)
-          prev_dc_cb = encode_block(block, temp, dct, qbuf, zbuf, chr_qt, chr_huff, prev_dc_cb, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, chr_qt)
+          yield :cb, zbuf
-          # 1 Cr block
           extract_block_into(cr_sub, sub_w, sub_h, mx >> 1, my >> 1, block)
-          prev_dc_cr = encode_block(block, temp, dct, qbuf, zbuf, chr_qt, chr_huff, prev_dc_cr, bit_writer)
+          transform_block(block, temp, dct, qbuf, zbuf, chr_qt)
+          yield :cr, zbuf
         end
       end
-      bit_writer.flush
-      bit_writer.bytes
     end
-    def write_color_jfif(io, width, height, lum_qt, chr_qt, scan_data)
+    def write_color_jfif(io, width, height, lum_qt, chr_qt, scan_data,
+                         lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
+                         chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values)
       jfif = JFIFWriter.new(io, scramble_quantization: @scramble_quantization)
       jfif.write_soi
       jfif.write_app0
       jfif.write_dqt(lum_qt, 0)
       jfif.write_dqt(chr_qt, 1)
       jfif.write_sof0(width, height, [[1, 2, 2, 0], [2, 1, 1, 1], [3, 1, 1, 1]])
-      jfif.write_dht(0, 0, Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
-      jfif.write_dht(1, 0, Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
-      jfif.write_dht(0, 1, Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES)
-      jfif.write_dht(1, 1, Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES)
+      jfif.write_dht(0, 0, lum_dc_bits, lum_dc_values)
+      jfif.write_dht(1, 0, lum_ac_bits, lum_ac_values)
+      jfif.write_dht(0, 1, chr_dc_bits, chr_dc_values)
+      jfif.write_dht(1, 1, chr_ac_bits, chr_ac_values)
       jfif.write_sos([[1, 0, 0], [2, 1, 1], [3, 1, 1]])
       jfif.write_scan_data(scan_data)
       jfif.write_eoi
@@ -214,22 +323,46 @@ module PureJPEG
     # --- Shared block pipeline (all buffers pre-allocated) ---
-    def encode_block(block, temp, dct, qbuf, zbuf, qtable, huff, prev_dc, bit_writer)
+    def transform_block(block, temp, dct, qbuf, zbuf, qtable)
       DCT.forward!(block, temp, dct)
       Quantization.quantize!(dct, qtable, qbuf)
       Zigzag.reorder!(qbuf, zbuf)
-      huff.encode_block(zbuf, prev_dc, bit_writer)
+      zbuf
     end
     # --- Pixel extraction ---
+    # Determine RGB bit shifts for a packed_pixels source.
+    # ChunkyPNG uses (r<<24 | g<<16 | b<<8 | a), Image uses (r<<16 | g<<8 | b).
+    def packed_shifts
+      if source.is_a?(Image)
+        [16, 8, 0]
+      else
+        [24, 16, 8]
+      end
+    end
     def extract_luminance(width, height)
       luminance = Array.new(width * height)
-      height.times do |y|
-        row = y * width
-        width.times do |x|
-          pixel = source[x, y]
-          luminance[row + x] = (0.299 * pixel.r + 0.587 * pixel.g + 0.114 * pixel.b).round.clamp(0, 255)
+      if source.respond_to?(:packed_pixels)
+        packed = source.packed_pixels
+        r_shift, g_shift, b_shift = packed_shifts
+        i = 0
+        (width * height).times do
+          color = packed[i]
+          r = (color >> r_shift) & 0xFF
+          g = (color >> g_shift) & 0xFF
+          b = (color >> b_shift) & 0xFF
+          luminance[i] = (0.299 * r + 0.587 * g + 0.114 * b).round.clamp(0, 255)
+          i += 1
+        end
+      else
+        height.times do |y|
+          row = y * width
+          width.times do |x|
+            pixel = source[x, y]
+            luminance[row + x] = (0.299 * pixel.r + 0.587 * pixel.g + 0.114 * pixel.b).round.clamp(0, 255)
+          end
         end
       end
       luminance
@@ -241,15 +374,31 @@ module PureJPEG
       cb_data = Array.new(size)
       cr_data = Array.new(size)
-      height.times do |py|
-        row = py * width
-        width.times do |px|
-          pixel = source[px, py]
-          r = pixel.r; g = pixel.g; b = pixel.b
-          i = row + px
+      if source.respond_to?(:packed_pixels)
+        packed = source.packed_pixels
+        r_shift, g_shift, b_shift = packed_shifts
+        i = 0
+        size.times do
+          color = packed[i]
+          r = (color >> r_shift) & 0xFF
+          g = (color >> g_shift) & 0xFF
+          b = (color >> b_shift) & 0xFF
           y_data[i]  = ( 0.299    * r + 0.587    * g + 0.114    * b).round.clamp(0, 255)
           cb_data[i] = (-0.168736 * r - 0.331264 * g + 0.5      * b + 128.0).round.clamp(0, 255)
           cr_data[i] = ( 0.5      * r - 0.418688 * g - 0.081312 * b + 128.0).round.clamp(0, 255)
+          i += 1
+        end
+      else
+        height.times do |py|
+          row = py * width
+          width.times do |px|
+            pixel = source[px, py]
+            r = pixel.r; g = pixel.g; b = pixel.b
+            i = row + px
+            y_data[i]  = ( 0.299    * r + 0.587    * g + 0.114    * b).round.clamp(0, 255)
+            cb_data[i] = (-0.168736 * r - 0.331264 * g + 0.5      * b + 128.0).round.clamp(0, 255)
+            cr_data[i] = ( 0.5      * r - 0.418688 * g - 0.081312 * b + 128.0).round.clamp(0, 255)
+          end
         end
       end

data/lib/pure_jpeg/huffman/decoder.rb CHANGED Viewed

@@ -33,7 +33,7 @@ module PureJPEG
             return @values[@val_ptr[len] + code - @min_code[len]]
           end
         end
-        raise "Invalid Huffman code"
+        raise PureJPEG::DecodeError, "Invalid Huffman code"
       end
     end
   end

data/lib/pure_jpeg/huffman/encoder.rb CHANGED Viewed

@@ -3,6 +3,56 @@
 module PureJPEG
   module Huffman
     class Encoder
+      def self.category_and_bits(value)
+        return [0, 0] if value == 0
+        abs_val = value.abs
+        cat = 0
+        v = abs_val
+        while v > 0
+          cat += 1
+          v >>= 1
+        end
+        bits = value > 0 ? value : value + (1 << cat) - 1
+        [cat, bits]
+      end
+      def self.each_ac_item(zigzag)
+        last_nonzero = 63
+        last_nonzero -= 1 while last_nonzero > 0 && zigzag[last_nonzero] == 0
+        if last_nonzero == 0
+          yield 0x00, 0
+          return
+        end
+        i = 1
+        while i <= last_nonzero
+          run = 0
+          while i <= last_nonzero && zigzag[i] == 0
+            run += 1
+            i += 1
+          end
+          while run >= 16
+            yield 0xF0, 0
+            run -= 16
+          end
+          value = zigzag[i]
+          cat, = category_and_bits(value)
+          yield (run << 4) | cat, value
+          i += 1
+        end
+        yield 0x00, 0 if last_nonzero < 63
+      end
+      def self.each_ac_symbol(zigzag)
+        each_ac_item(zigzag) do |symbol, _value|
+          yield symbol
+        end
+      end
       def initialize(dc_table, ac_table)
         @dc_table = dc_table
         @ac_table = ac_table
@@ -23,65 +73,43 @@ module PureJPEG
       private
       def encode_dc(diff, writer)
-        cat, bits = category_and_bits(diff)
+        cat, bits = self.class.category_and_bits(diff)
         code, length = @dc_table[cat]
         writer.write_bits(code, length)
         writer.write_bits(bits, cat) if cat > 0
       end
       def encode_ac(zigzag, writer)
-        last_nonzero = 63
-        last_nonzero -= 1 while last_nonzero > 0 && zigzag[last_nonzero] == 0
+        self.class.each_ac_item(zigzag) do |symbol, value|
+          code, length = @ac_table[symbol]
+          writer.write_bits(code, length)
+          next if symbol == 0x00 || symbol == 0xF0
-        if last_nonzero == 0 && zigzag[0] == zigzag[0]  # AC starts at index 1
-          # All AC coefficients are zero
-          eob = @ac_table[0x00]
-          writer.write_bits(eob[0], eob[1])
-          return
+          cat, bits = self.class.category_and_bits(value)
+          writer.write_bits(bits, cat)
         end
+      end
+    end
-        i = 1
-        while i <= last_nonzero
-          run = 0
-          while i <= last_nonzero && zigzag[i] == 0
-            run += 1
-            i += 1
-          end
+    class FrequencyCounter
+      attr_reader :dc_frequencies, :ac_frequencies
-          # Emit ZRL (16 zeros) symbols as needed
-          while run >= 16
-            zrl = @ac_table[0xF0]
-            writer.write_bits(zrl[0], zrl[1])
-            run -= 16
-          end
+      def initialize
+        @dc_frequencies = Array.new(256, 0)
+        @ac_frequencies = Array.new(256, 0)
+        @prev_dc = Hash.new(0)
+      end
-          cat, bits = category_and_bits(zigzag[i])
-          symbol = (run << 4) | cat
-          code, length = @ac_table[symbol]
-          writer.write_bits(code, length)
-          writer.write_bits(bits, cat) if cat > 0
-          i += 1
-        end
+      def observe_block(zigzag, state_key)
+        diff = zigzag[0] - @prev_dc[state_key]
+        @prev_dc[state_key] = zigzag[0]
-        # EOB if we didn't reach position 63
-        if last_nonzero < 63
-          eob = @ac_table[0x00]
-          writer.write_bits(eob[0], eob[1])
-        end
-      end
+        cat, = Encoder.category_and_bits(diff)
+        @dc_frequencies[cat] += 1
-      # Returns [category, encoded_bits] for a coefficient value.
-      def category_and_bits(value)
-        return [0, 0] if value == 0
-        abs_val = value.abs
-        cat = 0
-        v = abs_val
-        while v > 0
-          cat += 1
-          v >>= 1
+        Encoder.each_ac_symbol(zigzag) do |symbol|
+          @ac_frequencies[symbol] += 1
         end
-        bits = value > 0 ? value : value + (1 << cat) - 1
-        [cat, bits]
       end
     end
   end

data/lib/pure_jpeg/huffman/tables.rb CHANGED Viewed

@@ -64,8 +64,9 @@ module PureJPEG
     # Build a lookup table: symbol -> [code, code_length]
     # from the bits/values specification.
+    # Returns an Array indexed by symbol value for O(1) lookup.
     def self.build_table(bits, values)
-      table = {}
+      table = Array.new(256)
       code = 0
       k = 0
@@ -80,5 +81,96 @@ module PureJPEG
       table
     end
+    # Build a JPEG canonical Huffman table definition from symbol frequencies.
+    # Returns [bits, values], where bits has 16 entries for code lengths 1..16.
+    def self.optimize_table(frequencies)
+      lengths = build_code_lengths(frequencies)
+      counts = length_counts(lengths)
+      trim_counts_to_jpeg_limit!(counts)
+      symbols = (0...256).select { |symbol| frequencies[symbol].positive? }
+      symbols.sort_by! { |symbol| [-frequencies[symbol], symbol] }
+      bits = Array.new(16, 0)
+      values = []
+      index = 0
+      1.upto(16) do |length|
+        count = counts[length]
+        bits[length - 1] = count
+        count.times do
+          values << symbols[index]
+          index += 1
+        end
+      end
+      [bits.freeze, values.freeze]
+    end
+    def self.build_code_lengths(frequencies)
+      nodes = []
+      256.times do |symbol|
+        freq = frequencies[symbol]
+        nodes << { freq: freq, symbol: symbol } if freq.positive?
+      end
+      nodes << { freq: 1, symbol: 256 }
+      while nodes.length > 1
+        nodes.sort_by! do |node|
+          [node[:freq], node[:symbol] || 257]
+        end
+        left = nodes.shift
+        right = nodes.shift
+        nodes << { freq: left[:freq] + right[:freq], left: left, right: right }
+      end
+      lengths = Array.new(257, 0)
+      assign_code_lengths(nodes.first, 0, lengths)
+      lengths
+    end
+    private_class_method :build_code_lengths
+    def self.assign_code_lengths(node, depth, lengths)
+      if node[:symbol]
+        lengths[node[:symbol]] = depth.zero? ? 1 : depth
+        return
+      end
+      assign_code_lengths(node[:left], depth + 1, lengths)
+      assign_code_lengths(node[:right], depth + 1, lengths)
+    end
+    private_class_method :assign_code_lengths
+    def self.length_counts(lengths)
+      counts = Array.new([lengths.max + 1, 33].max, 0)
+      lengths.each do |length|
+        counts[length] += 1 if length.positive?
+      end
+      counts
+    end
+    private_class_method :length_counts
+    def self.trim_counts_to_jpeg_limit!(counts)
+      max_length = counts.length - 1
+      while max_length > 16
+        while counts[max_length].positive?
+          j = max_length - 2
+          j -= 1 while j.positive? && counts[j].zero?
+          raise ArgumentError, "Unable to limit Huffman code lengths" unless j.positive?
+          counts[max_length] -= 2
+          counts[max_length - 1] += 1
+          counts[j + 1] += 2
+          counts[j] -= 1
+        end
+        max_length -= 1
+      end
+      max_length = 16
+      max_length -= 1 while max_length.positive? && counts[max_length].zero?
+      counts[max_length] -= 1
+    end
+    private_class_method :trim_counts_to_jpeg_limit!
   end
 end