pure_jpeg 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,8 @@ module PureJPEG
15
15
  attr_reader :quality
16
16
  # @return [Boolean] whether grayscale mode is enabled
17
17
  attr_reader :grayscale
18
+ # @return [Boolean] whether image-specific Huffman tables are generated
19
+ attr_reader :optimize_huffman
18
20
 
19
21
  # Create a new encoder for the given pixel source.
20
22
  #
@@ -34,13 +36,19 @@ module PureJPEG
34
36
  # @param scramble_quantization [Boolean] write quantization tables in raster
35
37
  # order instead of zigzag (non-spec-compliant; recreates the "early digicam"
36
38
  # artifact look when decoded by standard viewers)
39
+ # @param optimize_huffman [Boolean] build image-specific Huffman tables with
40
+ # an additional analysis pass (default false)
37
41
  def initialize(source, quality: 85, grayscale: false, chroma_quality: nil,
38
42
  luminance_table: nil, chrominance_table: nil,
39
- quantization_modifier: nil, scramble_quantization: false)
43
+ quantization_modifier: nil, scramble_quantization: false,
44
+ optimize_huffman: false)
40
45
  @source = source
41
46
  @quality = quality
42
47
  @grayscale = grayscale
48
+ @optimize_huffman = optimize_huffman
43
49
  @chroma_quality = chroma_quality || quality
50
+ validate_qtable!(luminance_table, "luminance_table") if luminance_table
51
+ validate_qtable!(chrominance_table, "chrominance_table") if chrominance_table
44
52
  @luminance_table = luminance_table
45
53
  @chrominance_table = chrominance_table
46
54
  @quantization_modifier = quantization_modifier
@@ -52,7 +60,7 @@ module PureJPEG
52
60
  # @param path [String] output file path
53
61
  # @return [void]
54
62
  def write(path)
55
- File.open(path, "wb") { |f| encode(f) }
63
+ File.binwrite(path, to_bytes)
56
64
  end
57
65
 
58
66
  # Return the encoded JPEG as a binary string.
@@ -78,65 +86,138 @@ module PureJPEG
78
86
  table
79
87
  end
80
88
 
89
+ def validate_qtable!(table, name)
90
+ raise ArgumentError, "#{name} must have exactly 64 elements (got #{table.length})" unless table.length == 64
91
+ unless table.all? { |v| v.is_a?(Integer) && v >= 1 && v <= 255 }
92
+ raise ArgumentError, "#{name} elements must be integers between 1 and 255"
93
+ end
94
+ end
95
+
81
96
  def encode(io)
82
97
  width = source.width
83
98
  height = source.height
84
99
 
100
+ raise ArgumentError, "Width must be a positive integer (got #{width.inspect})" unless width.is_a?(Integer) && width > 0
101
+ raise ArgumentError, "Height must be a positive integer (got #{height.inspect})" unless height.is_a?(Integer) && height > 0
102
+ raise ArgumentError, "Width #{width} exceeds maximum of #{MAX_DIMENSION}" if width > MAX_DIMENSION
103
+ raise ArgumentError, "Height #{height} exceeds maximum of #{MAX_DIMENSION}" if height > MAX_DIMENSION
104
+
85
105
  lum_qtable = build_lum_qtable
86
- lum_dc = Huffman.build_table(Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
87
- lum_ac = Huffman.build_table(Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
88
- lum_huff = Huffman::Encoder.new(lum_dc, lum_ac)
89
106
 
90
107
  if grayscale
91
- scan_data = encode_grayscale(width, height, lum_qtable, lum_huff)
92
- write_grayscale_jfif(io, width, height, lum_qtable, scan_data)
108
+ y_data = extract_luminance(width, height)
109
+ lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values =
110
+ if optimize_huffman
111
+ counter = collect_grayscale_frequencies(y_data, width, height, lum_qtable)
112
+ dc_bits, dc_values = Huffman.optimize_table(counter.dc_frequencies)
113
+ ac_bits, ac_values = Huffman.optimize_table(counter.ac_frequencies)
114
+ [dc_bits, dc_values, ac_bits, ac_values]
115
+ else
116
+ [Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES,
117
+ Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES]
118
+ end
119
+
120
+ lum_huff = Huffman::Encoder.new(
121
+ Huffman.build_table(lum_dc_bits, lum_dc_values),
122
+ Huffman.build_table(lum_ac_bits, lum_ac_values)
123
+ )
124
+
125
+ scan_data = encode_grayscale_data(y_data, width, height, lum_qtable, lum_huff)
126
+ write_grayscale_jfif(io, width, height, lum_qtable, scan_data,
127
+ lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values)
93
128
  else
94
129
  chr_qtable = build_chr_qtable
95
- chr_dc = Huffman.build_table(Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES)
96
- chr_ac = Huffman.build_table(Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES)
97
- chr_huff = Huffman::Encoder.new(chr_dc, chr_ac)
98
-
99
- scan_data = encode_color(width, height, lum_qtable, chr_qtable, lum_huff, chr_huff)
100
- write_color_jfif(io, width, height, lum_qtable, chr_qtable, scan_data)
130
+ y_data, cb_data, cr_data = extract_ycbcr(width, height)
131
+ sub_w = (width + 1) / 2
132
+ sub_h = (height + 1) / 2
133
+ cb_sub = downsample(cb_data, width, height, sub_w, sub_h)
134
+ cr_sub = downsample(cr_data, width, height, sub_w, sub_h)
135
+
136
+ lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
137
+ chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values =
138
+ if optimize_huffman
139
+ lum_counter, chr_counter = collect_color_frequencies(
140
+ y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qtable, chr_qtable
141
+ )
142
+ dc_bits, dc_values = Huffman.optimize_table(lum_counter.dc_frequencies)
143
+ ac_bits, ac_values = Huffman.optimize_table(lum_counter.ac_frequencies)
144
+ chr_dc_bits, chr_dc_values = Huffman.optimize_table(chr_counter.dc_frequencies)
145
+ chr_ac_bits, chr_ac_values = Huffman.optimize_table(chr_counter.ac_frequencies)
146
+ [dc_bits, dc_values, ac_bits, ac_values, chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values]
147
+ else
148
+ [Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES,
149
+ Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES,
150
+ Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES,
151
+ Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES]
152
+ end
153
+
154
+ lum_huff = Huffman::Encoder.new(
155
+ Huffman.build_table(lum_dc_bits, lum_dc_values),
156
+ Huffman.build_table(lum_ac_bits, lum_ac_values)
157
+ )
158
+ chr_huff = Huffman::Encoder.new(
159
+ Huffman.build_table(chr_dc_bits, chr_dc_values),
160
+ Huffman.build_table(chr_ac_bits, chr_ac_values)
161
+ )
162
+
163
+ scan_data = encode_color_data(
164
+ y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qtable, chr_qtable, lum_huff, chr_huff
165
+ )
166
+ write_color_jfif(io, width, height, lum_qtable, chr_qtable, scan_data,
167
+ lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
168
+ chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values)
101
169
  end
102
170
  end
103
171
 
104
172
  # --- Grayscale encoding ---
105
173
 
106
- def encode_grayscale(width, height, qtable, huff)
107
- y_data = extract_luminance(width, height)
174
+ def collect_grayscale_frequencies(y_data, width, height, qtable)
175
+ counter = Huffman::FrequencyCounter.new
176
+ each_grayscale_block(y_data, width, height, qtable) do |zbuf|
177
+ counter.observe_block(zbuf, :y)
178
+ end
179
+ counter
180
+ end
181
+
182
+ def encode_grayscale_data(y_data, width, height, qtable, huff)
183
+ bit_writer = BitWriter.new
184
+ prev_dc = 0
185
+
186
+ each_grayscale_block(y_data, width, height, qtable) do |zbuf|
187
+ prev_dc = huff.encode_block(zbuf, prev_dc, bit_writer)
188
+ end
189
+
190
+ bit_writer.flush
191
+ bit_writer.bytes
192
+ end
193
+
194
+ def each_grayscale_block(y_data, width, height, qtable)
108
195
  padded_w = (width + 7) & ~7
109
196
  padded_h = (height + 7) & ~7
110
197
 
111
- # Reusable buffers
112
198
  block = Array.new(64, 0.0)
113
199
  temp = Array.new(64, 0.0)
114
200
  dct = Array.new(64, 0.0)
115
201
  qbuf = Array.new(64, 0)
116
202
  zbuf = Array.new(64, 0)
117
203
 
118
- bit_writer = BitWriter.new
119
- prev_dc = 0
120
-
121
204
  (0...padded_h).step(8) do |by|
122
205
  (0...padded_w).step(8) do |bx|
123
206
  extract_block_into(y_data, width, height, bx, by, block)
124
- prev_dc = encode_block(block, temp, dct, qbuf, zbuf, qtable, huff, prev_dc, bit_writer)
207
+ transform_block(block, temp, dct, qbuf, zbuf, qtable)
208
+ yield zbuf
125
209
  end
126
210
  end
127
-
128
- bit_writer.flush
129
- bit_writer.bytes
130
211
  end
131
212
 
132
- def write_grayscale_jfif(io, width, height, qtable, scan_data)
213
+ def write_grayscale_jfif(io, width, height, qtable, scan_data, dc_bits, dc_values, ac_bits, ac_values)
133
214
  jfif = JFIFWriter.new(io, scramble_quantization: @scramble_quantization)
134
215
  jfif.write_soi
135
216
  jfif.write_app0
136
217
  jfif.write_dqt(qtable, 0)
137
218
  jfif.write_sof0(width, height, [[1, 1, 1, 0]])
138
- jfif.write_dht(0, 0, Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
139
- jfif.write_dht(1, 0, Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
219
+ jfif.write_dht(0, 0, dc_bits, dc_values)
220
+ jfif.write_dht(1, 0, ac_bits, ac_values)
140
221
  jfif.write_sos([[1, 0, 0]])
141
222
  jfif.write_scan_data(scan_data)
142
223
  jfif.write_eoi
@@ -144,69 +225,97 @@ module PureJPEG
144
225
 
145
226
  # --- Color encoding (YCbCr 4:2:0) ---
146
227
 
147
- def encode_color(width, height, lum_qt, chr_qt, lum_huff, chr_huff)
148
- y_data, cb_data, cr_data = extract_ycbcr(width, height)
228
+ def collect_color_frequencies(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt)
229
+ lum_counter = Huffman::FrequencyCounter.new
230
+ chr_counter = Huffman::FrequencyCounter.new
231
+
232
+ each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt) do |component, zbuf|
233
+ case component
234
+ when :y
235
+ lum_counter.observe_block(zbuf, :y)
236
+ when :cb
237
+ chr_counter.observe_block(zbuf, :cb)
238
+ when :cr
239
+ chr_counter.observe_block(zbuf, :cr)
240
+ end
241
+ end
242
+
243
+ [lum_counter, chr_counter]
244
+ end
245
+
246
+ def encode_color_data(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt, lum_huff, chr_huff)
247
+ bit_writer = BitWriter.new
248
+ prev_dc_y = 0
249
+ prev_dc_cb = 0
250
+ prev_dc_cr = 0
251
+
252
+ each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt) do |component, zbuf|
253
+ case component
254
+ when :y
255
+ prev_dc_y = lum_huff.encode_block(zbuf, prev_dc_y, bit_writer)
256
+ when :cb
257
+ prev_dc_cb = chr_huff.encode_block(zbuf, prev_dc_cb, bit_writer)
258
+ when :cr
259
+ prev_dc_cr = chr_huff.encode_block(zbuf, prev_dc_cr, bit_writer)
260
+ end
261
+ end
149
262
 
150
- sub_w = (width + 1) / 2
151
- sub_h = (height + 1) / 2
152
- cb_sub = downsample(cb_data, width, height, sub_w, sub_h)
153
- cr_sub = downsample(cr_data, width, height, sub_w, sub_h)
263
+ bit_writer.flush
264
+ bit_writer.bytes
265
+ end
154
266
 
267
+ def each_color_block(y_data, cb_sub, cr_sub, width, height, sub_w, sub_h, lum_qt, chr_qt)
155
268
  mcu_w = (width + 15) & ~15
156
269
  mcu_h = (height + 15) & ~15
157
270
 
158
- # Reusable buffers
159
271
  block = Array.new(64, 0.0)
160
272
  temp = Array.new(64, 0.0)
161
273
  dct = Array.new(64, 0.0)
162
274
  qbuf = Array.new(64, 0)
163
275
  zbuf = Array.new(64, 0)
164
276
 
165
- bit_writer = BitWriter.new
166
- prev_dc_y = 0
167
- prev_dc_cb = 0
168
- prev_dc_cr = 0
169
-
170
277
  (0...mcu_h).step(16) do |my|
171
278
  (0...mcu_w).step(16) do |mx|
172
- # 4 luminance blocks
173
279
  extract_block_into(y_data, width, height, mx, my, block)
174
- prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
280
+ transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
281
+ yield :y, zbuf
175
282
 
176
283
  extract_block_into(y_data, width, height, mx + 8, my, block)
177
- prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
284
+ transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
285
+ yield :y, zbuf
178
286
 
179
287
  extract_block_into(y_data, width, height, mx, my + 8, block)
180
- prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
288
+ transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
289
+ yield :y, zbuf
181
290
 
182
291
  extract_block_into(y_data, width, height, mx + 8, my + 8, block)
183
- prev_dc_y = encode_block(block, temp, dct, qbuf, zbuf, lum_qt, lum_huff, prev_dc_y, bit_writer)
292
+ transform_block(block, temp, dct, qbuf, zbuf, lum_qt)
293
+ yield :y, zbuf
184
294
 
185
- # 1 Cb block
186
295
  extract_block_into(cb_sub, sub_w, sub_h, mx >> 1, my >> 1, block)
187
- prev_dc_cb = encode_block(block, temp, dct, qbuf, zbuf, chr_qt, chr_huff, prev_dc_cb, bit_writer)
296
+ transform_block(block, temp, dct, qbuf, zbuf, chr_qt)
297
+ yield :cb, zbuf
188
298
 
189
- # 1 Cr block
190
299
  extract_block_into(cr_sub, sub_w, sub_h, mx >> 1, my >> 1, block)
191
- prev_dc_cr = encode_block(block, temp, dct, qbuf, zbuf, chr_qt, chr_huff, prev_dc_cr, bit_writer)
300
+ transform_block(block, temp, dct, qbuf, zbuf, chr_qt)
301
+ yield :cr, zbuf
192
302
  end
193
303
  end
194
-
195
- bit_writer.flush
196
- bit_writer.bytes
197
304
  end
198
305
 
199
- def write_color_jfif(io, width, height, lum_qt, chr_qt, scan_data)
306
+ def write_color_jfif(io, width, height, lum_qt, chr_qt, scan_data,
307
+ lum_dc_bits, lum_dc_values, lum_ac_bits, lum_ac_values,
308
+ chr_dc_bits, chr_dc_values, chr_ac_bits, chr_ac_values)
200
309
  jfif = JFIFWriter.new(io, scramble_quantization: @scramble_quantization)
201
310
  jfif.write_soi
202
311
  jfif.write_app0
203
312
  jfif.write_dqt(lum_qt, 0)
204
313
  jfif.write_dqt(chr_qt, 1)
205
314
  jfif.write_sof0(width, height, [[1, 2, 2, 0], [2, 1, 1, 1], [3, 1, 1, 1]])
206
- jfif.write_dht(0, 0, Huffman::DC_LUMINANCE_BITS, Huffman::DC_LUMINANCE_VALUES)
207
- jfif.write_dht(1, 0, Huffman::AC_LUMINANCE_BITS, Huffman::AC_LUMINANCE_VALUES)
208
- jfif.write_dht(0, 1, Huffman::DC_CHROMINANCE_BITS, Huffman::DC_CHROMINANCE_VALUES)
209
- jfif.write_dht(1, 1, Huffman::AC_CHROMINANCE_BITS, Huffman::AC_CHROMINANCE_VALUES)
315
+ jfif.write_dht(0, 0, lum_dc_bits, lum_dc_values)
316
+ jfif.write_dht(1, 0, lum_ac_bits, lum_ac_values)
317
+ jfif.write_dht(0, 1, chr_dc_bits, chr_dc_values)
318
+ jfif.write_dht(1, 1, chr_ac_bits, chr_ac_values)
210
319
  jfif.write_sos([[1, 0, 0], [2, 1, 1], [3, 1, 1]])
211
320
  jfif.write_scan_data(scan_data)
212
321
  jfif.write_eoi
@@ -214,22 +323,46 @@ module PureJPEG
214
323
 
215
324
  # --- Shared block pipeline (all buffers pre-allocated) ---
216
325
 
217
- def encode_block(block, temp, dct, qbuf, zbuf, qtable, huff, prev_dc, bit_writer)
326
+ def transform_block(block, temp, dct, qbuf, zbuf, qtable)
218
327
  DCT.forward!(block, temp, dct)
219
328
  Quantization.quantize!(dct, qtable, qbuf)
220
329
  Zigzag.reorder!(qbuf, zbuf)
221
- huff.encode_block(zbuf, prev_dc, bit_writer)
330
+ zbuf
222
331
  end
223
332
 
224
333
  # --- Pixel extraction ---
225
334
 
335
+ # Determine RGB bit shifts for a packed_pixels source.
336
+ # ChunkyPNG uses (r<<24 | g<<16 | b<<8 | a), Image uses (r<<16 | g<<8 | b).
337
+ def packed_shifts
338
+ if source.is_a?(Image)
339
+ [16, 8, 0]
340
+ else
341
+ [24, 16, 8]
342
+ end
343
+ end
344
+
226
345
  def extract_luminance(width, height)
227
346
  luminance = Array.new(width * height)
228
- height.times do |y|
229
- row = y * width
230
- width.times do |x|
231
- pixel = source[x, y]
232
- luminance[row + x] = (0.299 * pixel.r + 0.587 * pixel.g + 0.114 * pixel.b).round.clamp(0, 255)
347
+ if source.respond_to?(:packed_pixels)
348
+ packed = source.packed_pixels
349
+ r_shift, g_shift, b_shift = packed_shifts
350
+ i = 0
351
+ (width * height).times do
352
+ color = packed[i]
353
+ r = (color >> r_shift) & 0xFF
354
+ g = (color >> g_shift) & 0xFF
355
+ b = (color >> b_shift) & 0xFF
356
+ luminance[i] = (0.299 * r + 0.587 * g + 0.114 * b).round.clamp(0, 255)
357
+ i += 1
358
+ end
359
+ else
360
+ height.times do |y|
361
+ row = y * width
362
+ width.times do |x|
363
+ pixel = source[x, y]
364
+ luminance[row + x] = (0.299 * pixel.r + 0.587 * pixel.g + 0.114 * pixel.b).round.clamp(0, 255)
365
+ end
233
366
  end
234
367
  end
235
368
  luminance
@@ -241,15 +374,31 @@ module PureJPEG
241
374
  cb_data = Array.new(size)
242
375
  cr_data = Array.new(size)
243
376
 
244
- height.times do |py|
245
- row = py * width
246
- width.times do |px|
247
- pixel = source[px, py]
248
- r = pixel.r; g = pixel.g; b = pixel.b
249
- i = row + px
377
+ if source.respond_to?(:packed_pixels)
378
+ packed = source.packed_pixels
379
+ r_shift, g_shift, b_shift = packed_shifts
380
+ i = 0
381
+ size.times do
382
+ color = packed[i]
383
+ r = (color >> r_shift) & 0xFF
384
+ g = (color >> g_shift) & 0xFF
385
+ b = (color >> b_shift) & 0xFF
250
386
  y_data[i] = ( 0.299 * r + 0.587 * g + 0.114 * b).round.clamp(0, 255)
251
387
  cb_data[i] = (-0.168736 * r - 0.331264 * g + 0.5 * b + 128.0).round.clamp(0, 255)
252
388
  cr_data[i] = ( 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0).round.clamp(0, 255)
389
+ i += 1
390
+ end
391
+ else
392
+ height.times do |py|
393
+ row = py * width
394
+ width.times do |px|
395
+ pixel = source[px, py]
396
+ r = pixel.r; g = pixel.g; b = pixel.b
397
+ i = row + px
398
+ y_data[i] = ( 0.299 * r + 0.587 * g + 0.114 * b).round.clamp(0, 255)
399
+ cb_data[i] = (-0.168736 * r - 0.331264 * g + 0.5 * b + 128.0).round.clamp(0, 255)
400
+ cr_data[i] = ( 0.5 * r - 0.418688 * g - 0.081312 * b + 128.0).round.clamp(0, 255)
401
+ end
253
402
  end
254
403
  end
255
404
 
@@ -33,7 +33,7 @@ module PureJPEG
33
33
  return @values[@val_ptr[len] + code - @min_code[len]]
34
34
  end
35
35
  end
36
- raise "Invalid Huffman code"
36
+ raise PureJPEG::DecodeError, "Invalid Huffman code"
37
37
  end
38
38
  end
39
39
  end
@@ -3,6 +3,56 @@
3
3
  module PureJPEG
4
4
  module Huffman
5
5
  class Encoder
6
+ def self.category_and_bits(value)
7
+ return [0, 0] if value == 0
8
+ abs_val = value.abs
9
+ cat = 0
10
+ v = abs_val
11
+ while v > 0
12
+ cat += 1
13
+ v >>= 1
14
+ end
15
+ bits = value > 0 ? value : value + (1 << cat) - 1
16
+ [cat, bits]
17
+ end
18
+
19
+ def self.each_ac_item(zigzag)
20
+ last_nonzero = 63
21
+ last_nonzero -= 1 while last_nonzero > 0 && zigzag[last_nonzero] == 0
22
+
23
+ if last_nonzero == 0
24
+ yield 0x00, 0
25
+ return
26
+ end
27
+
28
+ i = 1
29
+ while i <= last_nonzero
30
+ run = 0
31
+ while i <= last_nonzero && zigzag[i] == 0
32
+ run += 1
33
+ i += 1
34
+ end
35
+
36
+ while run >= 16
37
+ yield 0xF0, 0
38
+ run -= 16
39
+ end
40
+
41
+ value = zigzag[i]
42
+ cat, = category_and_bits(value)
43
+ yield (run << 4) | cat, value
44
+ i += 1
45
+ end
46
+
47
+ yield 0x00, 0 if last_nonzero < 63
48
+ end
49
+
50
+ def self.each_ac_symbol(zigzag)
51
+ each_ac_item(zigzag) do |symbol, _value|
52
+ yield symbol
53
+ end
54
+ end
55
+
6
56
  def initialize(dc_table, ac_table)
7
57
  @dc_table = dc_table
8
58
  @ac_table = ac_table
@@ -23,65 +73,43 @@ module PureJPEG
23
73
  private
24
74
 
25
75
  def encode_dc(diff, writer)
26
- cat, bits = category_and_bits(diff)
76
+ cat, bits = self.class.category_and_bits(diff)
27
77
  code, length = @dc_table[cat]
28
78
  writer.write_bits(code, length)
29
79
  writer.write_bits(bits, cat) if cat > 0
30
80
  end
31
81
 
32
82
  def encode_ac(zigzag, writer)
33
- last_nonzero = 63
34
- last_nonzero -= 1 while last_nonzero > 0 && zigzag[last_nonzero] == 0
83
+ self.class.each_ac_item(zigzag) do |symbol, value|
84
+ code, length = @ac_table[symbol]
85
+ writer.write_bits(code, length)
86
+ next if symbol == 0x00 || symbol == 0xF0
35
87
 
36
- if last_nonzero == 0 && zigzag[0] == zigzag[0] # AC starts at index 1
37
- # All AC coefficients are zero
38
- eob = @ac_table[0x00]
39
- writer.write_bits(eob[0], eob[1])
40
- return
88
+ cat, bits = self.class.category_and_bits(value)
89
+ writer.write_bits(bits, cat)
41
90
  end
91
+ end
92
+ end
42
93
 
43
- i = 1
44
- while i <= last_nonzero
45
- run = 0
46
- while i <= last_nonzero && zigzag[i] == 0
47
- run += 1
48
- i += 1
49
- end
94
+ class FrequencyCounter
95
+ attr_reader :dc_frequencies, :ac_frequencies
50
96
 
51
- # Emit ZRL (16 zeros) symbols as needed
52
- while run >= 16
53
- zrl = @ac_table[0xF0]
54
- writer.write_bits(zrl[0], zrl[1])
55
- run -= 16
56
- end
97
+ def initialize
98
+ @dc_frequencies = Array.new(256, 0)
99
+ @ac_frequencies = Array.new(256, 0)
100
+ @prev_dc = Hash.new(0)
101
+ end
57
102
 
58
- cat, bits = category_and_bits(zigzag[i])
59
- symbol = (run << 4) | cat
60
- code, length = @ac_table[symbol]
61
- writer.write_bits(code, length)
62
- writer.write_bits(bits, cat) if cat > 0
63
- i += 1
64
- end
103
+ def observe_block(zigzag, state_key)
104
+ diff = zigzag[0] - @prev_dc[state_key]
105
+ @prev_dc[state_key] = zigzag[0]
65
106
 
66
- # EOB if we didn't reach position 63
67
- if last_nonzero < 63
68
- eob = @ac_table[0x00]
69
- writer.write_bits(eob[0], eob[1])
70
- end
71
- end
107
+ cat, = Encoder.category_and_bits(diff)
108
+ @dc_frequencies[cat] += 1
72
109
 
73
- # Returns [category, encoded_bits] for a coefficient value.
74
- def category_and_bits(value)
75
- return [0, 0] if value == 0
76
- abs_val = value.abs
77
- cat = 0
78
- v = abs_val
79
- while v > 0
80
- cat += 1
81
- v >>= 1
110
+ Encoder.each_ac_symbol(zigzag) do |symbol|
111
+ @ac_frequencies[symbol] += 1
82
112
  end
83
- bits = value > 0 ? value : value + (1 << cat) - 1
84
- [cat, bits]
85
113
  end
86
114
  end
87
115
  end
@@ -64,8 +64,9 @@ module PureJPEG
64
64
 
65
65
  # Build a lookup table: symbol -> [code, code_length]
66
66
  # from the bits/values specification.
67
+ # Returns an Array indexed by symbol value for O(1) lookup.
67
68
  def self.build_table(bits, values)
68
- table = {}
69
+ table = Array.new(256)
69
70
  code = 0
70
71
  k = 0
71
72
 
@@ -80,5 +81,96 @@ module PureJPEG
80
81
 
81
82
  table
82
83
  end
84
+
85
+ # Build a JPEG canonical Huffman table definition from symbol frequencies.
86
+ # Returns [bits, values], where bits has 16 entries for code lengths 1..16.
87
+ def self.optimize_table(frequencies)
88
+ lengths = build_code_lengths(frequencies)
89
+ counts = length_counts(lengths)
90
+ trim_counts_to_jpeg_limit!(counts)
91
+
92
+ symbols = (0...256).select { |symbol| frequencies[symbol].positive? }
93
+ symbols.sort_by! { |symbol| [-frequencies[symbol], symbol] }
94
+
95
+ bits = Array.new(16, 0)
96
+ values = []
97
+ index = 0
98
+
99
+ 1.upto(16) do |length|
100
+ count = counts[length]
101
+ bits[length - 1] = count
102
+ count.times do
103
+ values << symbols[index]
104
+ index += 1
105
+ end
106
+ end
107
+
108
+ [bits.freeze, values.freeze]
109
+ end
110
+
111
+ def self.build_code_lengths(frequencies)
112
+ nodes = []
113
+ 256.times do |symbol|
114
+ freq = frequencies[symbol]
115
+ nodes << { freq: freq, symbol: symbol } if freq.positive?
116
+ end
117
+ nodes << { freq: 1, symbol: 256 }
118
+
119
+ while nodes.length > 1
120
+ nodes.sort_by! do |node|
121
+ [node[:freq], node[:symbol] || 257]
122
+ end
123
+ left = nodes.shift
124
+ right = nodes.shift
125
+ nodes << { freq: left[:freq] + right[:freq], left: left, right: right }
126
+ end
127
+
128
+ lengths = Array.new(257, 0)
129
+ assign_code_lengths(nodes.first, 0, lengths)
130
+ lengths
131
+ end
132
+ private_class_method :build_code_lengths
133
+
134
+ def self.assign_code_lengths(node, depth, lengths)
135
+ if node[:symbol]
136
+ lengths[node[:symbol]] = depth.zero? ? 1 : depth
137
+ return
138
+ end
139
+
140
+ assign_code_lengths(node[:left], depth + 1, lengths)
141
+ assign_code_lengths(node[:right], depth + 1, lengths)
142
+ end
143
+ private_class_method :assign_code_lengths
144
+
145
+ def self.length_counts(lengths)
146
+ counts = Array.new([lengths.max + 1, 33].max, 0)
147
+ lengths.each do |length|
148
+ counts[length] += 1 if length.positive?
149
+ end
150
+ counts
151
+ end
152
+ private_class_method :length_counts
153
+
154
+ def self.trim_counts_to_jpeg_limit!(counts)
155
+ max_length = counts.length - 1
156
+ while max_length > 16
157
+ while counts[max_length].positive?
158
+ j = max_length - 2
159
+ j -= 1 while j.positive? && counts[j].zero?
160
+ raise ArgumentError, "Unable to limit Huffman code lengths" unless j.positive?
161
+
162
+ counts[max_length] -= 2
163
+ counts[max_length - 1] += 1
164
+ counts[j + 1] += 2
165
+ counts[j] -= 1
166
+ end
167
+ max_length -= 1
168
+ end
169
+
170
+ max_length = 16
171
+ max_length -= 1 while max_length.positive? && counts[max_length].zero?
172
+ counts[max_length] -= 1
173
+ end
174
+ private_class_method :trim_counts_to_jpeg_limit!
83
175
  end
84
176
  end