sjpeg 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,427 @@
1
+ // Copyright 2017 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Fast & simple JPEG encoder. Internal header.
16
+ //
17
+ // Author: Skal (pascal.massimino@gmail.com)
18
+
19
+ #ifndef SJPEG_JPEGI_H_
20
+ #define SJPEG_JPEGI_H_
21
+
22
+ #include "sjpeg.h"
23
+ #include "bit_writer.h"
24
+
25
+ #ifndef NULL
26
+ #define NULL 0
27
+ #endif
28
+
29
+ #if defined(__SSE2__)
30
+ #define SJPEG_USE_SSE2
31
+ #endif
32
+
33
+ #if defined(__ARM_NEON__) || defined(__aarch64__)
34
+ #define SJPEG_USE_NEON
35
+ #endif
36
+
37
+ #if defined(SJPEG_NEED_ASM_HEADERS)
38
+ #if defined(SJPEG_USE_SSE2)
39
+ #include <emmintrin.h>
40
+ #endif
41
+
42
+ #if defined(SJPEG_USE_NEON)
43
+ #include <arm_neon.h>
44
+ #endif
45
+ #endif // SJPEG_NEED_ASM_HEADERS
46
+
47
+ #include <assert.h>
48
+
49
+ ////////////////////////////////////////////////////////////////////////////////
50
+
51
+ namespace sjpeg {
52
+
53
+ extern bool SupportsSSE2();
54
+ extern bool SupportsNEON();
55
+
56
+ // Constants below are marker codes defined in JPEG spec
57
+ // ISO/IEC 10918-1 : 1993(E) Table B.1
58
+ // See also: http://www.w3.org/Graphics/JPEG/itu-t81.pdf
59
+
60
+ #define M_SOF0 0xffc0
61
+ #define M_SOF1 0xffc1
62
+ #define M_DHT 0xffc4
63
+ #define M_SOI 0xffd8
64
+ #define M_EOI 0xffd9
65
+ #define M_SOS 0xffda
66
+ #define M_DQT 0xffdb
67
+
68
+ // Forward 8x8 Fourier transforms, in-place.
69
+ typedef void (*FdctFunc)(int16_t *coeffs, int num_blocks);
70
+ FdctFunc GetFdct();
71
+
72
+ // these are the default luma/chroma matrices (JPEG spec section K.1)
73
+ extern const uint8_t kDefaultMatrices[2][64];
74
+ extern const uint8_t kZigzag[64];
75
+
76
+ // scoring tables in score_7.cc
77
+ extern const int kRGBSize;
78
+ extern const uint8_t kSharpnessScore[];
79
+
80
+ // internal riskiness scoring functions:
81
+ extern double DCTRiskinessScore(const int16_t yuv[3 * 8],
82
+ int16_t scores[8 * 8]);
83
+ extern double BlockRiskinessScore(const uint8_t* rgb, int stride,
84
+ int16_t scores[8 * 8]);
85
+ extern int YUVToRiskIdx(int16_t y, int16_t u, int16_t v);
86
+
87
+ ///////////////////////////////////////////////////////////////////////////////
88
+ // RGB->YUV conversion
89
+
90
+ // convert 16x16 RGB block into YUV420, or 8x8 RGB block into YUV444
91
+ typedef void (*RGBToYUVBlockFunc)(const uint8_t* src, int src_stride,
92
+ int16_t* blocks);
93
+ extern RGBToYUVBlockFunc GetBlockFunc(bool use_444);
94
+
95
+ // convert a row of RGB samples to YUV444
96
+ typedef void (*RGBToIndexRowFunc)(const uint8_t* src, int width,
97
+ uint16_t* dst);
98
+ extern RGBToIndexRowFunc GetRowFunc();
99
+
100
+ // Enhanced slower RGB->YUV conversion:
101
+ // y_plane[] has dimension W x H, whereas u_plane[] and v_plane[] have
102
+ // dimension (W + 1)/2 x (H + 1)/2.
103
+ void ApplySharpYUVConversion(const uint8_t* const rgb,
104
+ int W, int H, int stride,
105
+ uint8_t* y_plane,
106
+ uint8_t* u_plane, uint8_t* v_plane);
107
+
108
+ ///////////////////////////////////////////////////////////////////////////////
109
+ // some useful helper functions around quant matrices
110
+
111
+ extern float GetQFactor(float q); // convert quality factor -> scale factor
112
+ extern void CopyQuantMatrix(const uint8_t in[64], uint8_t out[64]);
113
+ extern void SetQuantMatrix(const uint8_t in[64], float q_factor,
114
+ uint8_t out[64]);
115
+ extern void SetMinQuantMatrix(const uint8_t* const m, uint8_t out[64],
116
+ int tolerance);
117
+
118
+ ////////////////////////////////////////////////////////////////////////////////
119
+ // main structs
120
+
121
+ // Huffman tables
122
+ struct HuffmanTable {
123
+ uint8_t bits_[16]; // number of symbols per bit count
124
+ const uint8_t* syms_; // symbol map, in increasing bit length
125
+ uint8_t nb_syms_; // cached value of sum(bits_[])
126
+ };
127
+
128
+ // quantizer matrices
129
+ struct Quantizer {
130
+ uint8_t quant_[64]; // direct quantizer matrix
131
+ uint8_t min_quant_[64]; // min quantizer value allowed
132
+ uint16_t iquant_[64]; // precalc'd reciprocal for divisor
133
+ uint16_t qthresh_[64]; // minimal absolute value that produce non-zero coeff
134
+ uint16_t bias_[64]; // bias, for coring
135
+ const uint32_t* codes_; // codes for bit-cost calculation
136
+ };
137
+
138
+ // compact Run/Level storage, separate from DCTCoeffs infos
139
+ // Run/Level Information is not yet entropy-coded, but just stored
140
+ struct RunLevel {
141
+ int16_t run_;
142
+ uint16_t level_; // 4bits for length, 12bits for mantissa
143
+ };
144
+
145
+ // short infos about the block of quantized coefficients
146
+ struct DCTCoeffs {
147
+ int16_t last_; // last position (inclusive) of non-zero coeff
148
+ int16_t nb_coeffs_; // total number of non-zero AC coeffs
149
+ uint16_t dc_code_; // DC code (4bits for length, 12bits for suffix)
150
+ int8_t idx_; // component idx
151
+ int8_t bias_; // perceptual bias
152
+ };
153
+
154
+ // Histogram of transform coefficients, for adaptive quant matrices
155
+ // * HSHIFT controls the trade-off between storage size for counts[]
156
+ // and precision: the fdct doesn't descale and returns coefficients as
157
+ // signed 16bit value. We are only interested in the absolute values
158
+ // of coefficients that are less than MAX_HISTO_DCT_COEFF, which are our
159
+ // best contributors.
160
+ // Still, storing histogram up to MAX_HISTO_DCT_COEFF can be costly, so
161
+ // we further aggregate the statistics in bins of size 1 << HSHIFT to save
162
+ // space.
163
+ // * HLAMBDA roughly measures how much you are willing to trade in distortion
164
+ // for a 1-bit gain in filesize.
165
+ // * QDELTA_MIN / QDELTA_MAX control how much we allow wandering around the
166
+ // initial point. This helps reducing the CPU cost, as long as keeping the
167
+ // optimization around the initial desired quality-factor (HLAMBDA also
168
+ // serve this purpose).
169
+ enum { HSHIFT = 2, // size of bins is (1 << HSHIFT)
170
+ HHALF = 1 << (HSHIFT - 1),
171
+ MAX_HISTO_DCT_COEFF = (1 << 7), // max coefficient, descaled by HSHIFT
172
+ HLAMBDA = 0x80,
173
+ // Limits on range of alternate quantizers explored around
174
+ // the initial value. (see details in AnalyseHisto())
175
+ QDELTA_MIN = -12, QDELTA_MAX = 12,
176
+ QSIZE = QDELTA_MAX + 1 - QDELTA_MIN,
177
+ };
178
+
179
+ struct Histo {
180
+ // Reserve one extra entry for counting all coeffs greater than
181
+ // MAX_HISTO_DCT_COEFF. Result isn't used, but it makes the loop easier.
182
+ int counts_[64][MAX_HISTO_DCT_COEFF + 1];
183
+ };
184
+
185
+ ////////////////////////////////////////////////////////////////////////////////
186
+
187
+ struct Encoder {
188
+ public:
189
+ Encoder(int W, int H, int step, const uint8_t* rgb, ByteSink* sink);
190
+ virtual ~Encoder();
191
+ bool Ok() const { return ok_; }
192
+
193
+ // setters
194
+ void SetQuality(float q);
195
+ void SetCompressionMethod(int method);
196
+
197
+ // all-in-one init from EncoderParam.
198
+ bool InitFromParam(const EncoderParam& param);
199
+
200
+ // Main call. Return false in case of parameter error (setting empty output).
201
+ bool Encode();
202
+
203
+ // these are colorspace-dependant.
204
+ virtual void InitComponents() = 0;
205
+ // return MCU samples at macroblock position (mb_x, mb_y)
206
+ // clipped is true if the MCU is clipped and needs replication
207
+ virtual void GetSamples(int mb_x, int mb_y, bool clipped,
208
+ int16_t* out_blocks) = 0;
209
+
210
+ private:
211
+ // setters
212
+ void SetQuantMatrices(const uint8_t m[2][64]);
213
+ void SetMinQuantMatrices(const uint8_t m[2][64], int tolerance);
214
+ void SetDefaultMinQuantMatrices();
215
+
216
+ void SetQuantizationBias(int bias, bool use_adaptive);
217
+ void SetQuantizationDeltas(int qdelta_luma, int qdelta_chroma);
218
+
219
+ typedef enum { ICC, EXIF, XMP, MARKERS } MetadataType;
220
+ void SetMetadata(const std::string& data, MetadataType type);
221
+
222
+ private:
223
+ bool CheckBuffers(); // returns false in case of memory alloc error
224
+
225
+ void WriteAPP0();
226
+ bool WriteAPPMarkers(const std::string& data);
227
+ bool WriteEXIF(const std::string& data);
228
+ bool WriteICCP(const std::string& data);
229
+ bool WriteXMP(const std::string& data);
230
+ void WriteDQT();
231
+ void WriteSOF();
232
+ void WriteDHT();
233
+ void WriteSOS();
234
+ void WriteEOI();
235
+
236
+ void ResetDCs();
237
+
238
+ // collect transformed coeffs (unquantized) only
239
+ void CollectCoeffs();
240
+
241
+ // 2-pass Huffman optimizing scan
242
+ void ResetEntropyStats();
243
+ void AddEntropyStats(const DCTCoeffs* const coeffs,
244
+ const RunLevel* const run_levels);
245
+ void CompileEntropyStats();
246
+ void StoreOptimalHuffmanTables(size_t nb_mbs, const DCTCoeffs* coeffs);
247
+
248
+ void SinglePassScan(); // finalizing scan
249
+ void SinglePassScanOptimized(); // optimize the Huffman table + finalize scan
250
+
251
+ // quantize and compute run/levels from already stored coeffs
252
+ void StoreRunLevels(DCTCoeffs* coeffs);
253
+ // just write already stored run_levels & coeffs:
254
+ void FinalPassScan(size_t nb_mbs, const DCTCoeffs* coeffs);
255
+
256
+ // dichotomy loop
257
+ void LoopScan();
258
+
259
+ // Histogram pass
260
+ void CollectHistograms();
261
+
262
+ void BuildHuffmanCodes(const HuffmanTable* const tab,
263
+ uint32_t* const codes);
264
+
265
+ typedef int (*QuantizeBlockFunc)(const int16_t in[64], int idx,
266
+ const Quantizer* const Q,
267
+ DCTCoeffs* const out, RunLevel* const rl);
268
+ static QuantizeBlockFunc quantize_block_;
269
+ static QuantizeBlockFunc GetQuantizeBlockFunc();
270
+
271
+ static int TrellisQuantizeBlock(const int16_t in[64], int idx,
272
+ const Quantizer* const Q,
273
+ DCTCoeffs* const out,
274
+ RunLevel* const rl);
275
+
276
+ typedef uint32_t (*QuantizeErrorFunc)(const int16_t in[64],
277
+ const Quantizer* const Q);
278
+ static QuantizeErrorFunc quantize_error_;
279
+ static QuantizeErrorFunc GetQuantizeErrorFunc();
280
+
281
+ void CodeBlock(const DCTCoeffs* const coeffs, const RunLevel* const rl);
282
+ // returns DC code (4bits for length, 12bits for suffix), updates DC_predictor
283
+ static uint16_t GenerateDCDiffCode(int DC, int* const DC_predictor);
284
+
285
+ static void FinalizeQuantMatrix(Quantizer* const q, int bias);
286
+ void SetCostCodes(int idx);
287
+ void InitCodes(bool only_ac);
288
+
289
+ size_t HeaderSize() const;
290
+ void BlocksSize(int nb_mbs, const DCTCoeffs* coeffs,
291
+ const RunLevel* rl, sjpeg::BitCounter* const bc) const;
292
+ float ComputeSize(const DCTCoeffs* coeffs);
293
+ float ComputePSNR() const;
294
+
295
+ protected:
296
+ bool SetError(); // sets ok_ to true
297
+
298
+ // format-specific parameters, set by virtual InitComponents()
299
+ enum { MAX_COMP = 3 };
300
+ int nb_comps_;
301
+ int quant_idx_[MAX_COMP]; // indices for quantization matrices
302
+ int nb_blocks_[MAX_COMP]; // number of 8x8 blocks per components
303
+ uint8_t block_dims_[MAX_COMP]; // component dimensions (8-pixels units)
304
+ int block_w_, block_h_; // maximum mcu width / height
305
+ int mcu_blocks_; // total blocks in mcu (= sum of nb_blocks_[])
306
+
307
+ // data accessible to sub-classes implementing alternate input format
308
+ int W_, H_, step_; // width, height, stride
309
+ int mb_w_, mb_h_; // width / height in units of mcu
310
+ const uint8_t* const rgb_; // samples
311
+
312
+ // Replicate an RGB source sub_w x sub_h block, expanding it to w x h size.
313
+ const uint8_t* GetReplicatedSamples(const uint8_t* rgb, // block source
314
+ int rgb_step, // stride in source
315
+ int sub_w, int sub_h, // sub-block size
316
+ int w, int h); // size of mcu
317
+ // Replicate an YUV sub-block similarly.
318
+ const uint8_t* GetReplicatedYUVSamples(const uint8_t* in, int step,
319
+ int sub_w, int sub_h, int w, int h);
320
+ // set blocks that are totally outside of the picture to an average value
321
+ void AverageExtraLuma(int sub_w, int sub_h, int16_t* out);
322
+ uint8_t replicated_buffer_[3 * 16 * 16]; // tmp buffer for replication
323
+
324
+ sjpeg::RGBToYUVBlockFunc get_yuv_block_;
325
+ static sjpeg::RGBToYUVBlockFunc get_yuv444_block_;
326
+ void SetYUVFormat(bool use_444) {
327
+ get_yuv_block_ = sjpeg::GetBlockFunc(use_444);
328
+ }
329
+ bool adaptive_bias_; // if true, use per-block perceptual bias modulation
330
+
331
+ // Memory management
332
+ template<class T> T* Alloc(size_t num) {
333
+ assert(memory_hook_ != nullptr);
334
+ T* const ptr = reinterpret_cast<T*>(memory_hook_->Alloc(sizeof(T) * num));
335
+ if (ptr == nullptr) SetError();
336
+ return ptr;
337
+ }
338
+ template<class T> void Free(T* const ptr) {
339
+ memory_hook_->Free(reinterpret_cast<void*>(ptr));
340
+ }
341
+
342
+ private:
343
+ bool ok_; // set to false if a new[] fails
344
+ sjpeg::BitWriter bw_; // output buffer
345
+
346
+ std::string iccp_, xmp_, exif_, app_markers_; // metadata
347
+
348
+ // compression tools. See sjpeg.h for description of methods.
349
+ bool optimize_size_; // Huffman-optimize the codes (method 0, 3)
350
+ bool use_adaptive_quant_; // modulate the quant matrix (method 3-8)
351
+ bool use_extra_memory_; // save the unquantized coeffs (method 3, 4)
352
+ bool reuse_run_levels_; // save quantized run/levels (method 1, 4, 5)
353
+ bool use_trellis_; // use trellis-quantization (method 7, 8)
354
+
355
+ int q_bias_; // [0..255]: rounding bias for quant. of AC coeffs.
356
+ Quantizer quants_[2]; // quant matrices
357
+ int DCs_[3]; // DC predictors
358
+
359
+ // DCT coefficients storage, aligned
360
+ static const size_t ALIGN_CST = 15;
361
+ uint8_t* in_blocks_base_; // base memory for blocks
362
+ int16_t* in_blocks_; // aligned pointer to in_blocks_base_
363
+ bool have_coeffs_; // true if the Fourier coefficients are stored
364
+ bool AllocateBlocks(size_t num_blocks); // returns false in case of error
365
+ void DesallocateBlocks();
366
+
367
+ // these are for regular compression methods 0 or 2.
368
+ RunLevel base_run_levels_[64];
369
+
370
+ // this is the extra memory for compression method 1
371
+ RunLevel* all_run_levels_;
372
+ size_t nb_run_levels_, max_run_levels_;
373
+
374
+ // Huffman_tables_ indices:
375
+ // 0: luma dc, 1: chroma dc, 2: luma ac, 3: chroma ac
376
+ const HuffmanTable *Huffman_tables_[4];
377
+ uint32_t ac_codes_[2][256];
378
+ uint32_t dc_codes_[2][12];
379
+
380
+ // histograms for dynamic codes. Could be temporaries.
381
+ uint32_t freq_ac_[2][256 + 1]; // frequency distribution for AC coeffs
382
+ uint32_t freq_dc_[2][12 + 1]; // frequency distribution for DC coeffs
383
+ uint8_t opt_syms_ac_[2][256]; // optimal table for AC symbols
384
+ uint8_t opt_syms_dc_[2][12]; // optimal table for DC symbols
385
+ HuffmanTable opt_tables_ac_[2];
386
+ HuffmanTable opt_tables_dc_[2];
387
+
388
+ // Limits on how much we will decrease the bitrate in the luminance
389
+ // and chrominance channels (respectively).
390
+ int qdelta_max_luma_;
391
+ int qdelta_max_chroma_;
392
+
393
+ // Histogram handling
394
+
395
+ // This function aggregates each 63 unquantized AC coefficients into an
396
+ // histogram for further analysis.
397
+ typedef void (*StoreHistoFunc)(const int16_t in[64], Histo* const histos,
398
+ int nb_blocks);
399
+ static StoreHistoFunc store_histo_;
400
+ static StoreHistoFunc GetStoreHistoFunc(); // select between the above.
401
+
402
+ // Provided the AC histograms have been stored with StoreHisto(), this
403
+ // function will analyze impact of varying the quantization scales around
404
+ // initial values, trading distortion for bit-rate in a controlled way.
405
+ void AnalyseHisto();
406
+ void ResetHisto(); // initialize histos_[]
407
+ Histo histos_[2];
408
+
409
+ // multi-pass parameters
410
+ int passes_;
411
+ SearchHook default_hook_;
412
+ SearchHook* search_hook_;
413
+
414
+ // lower memory management
415
+ MemoryManager* memory_hook_;
416
+
417
+ static const float kHistoWeight[QSIZE];
418
+
419
+ static void (*fDCT_)(int16_t* in, int num_blocks);
420
+ static void InitializeStaticPointers();
421
+ };
422
+
423
+ ////////////////////////////////////////////////////////////////////////////////
424
+
425
+ } // namespace sjpeg
426
+
427
+ #endif // SJPEG_JPEGI_H_