sjpeg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,427 @@
1
+ // Copyright 2017 Google Inc.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+ // Fast & simple JPEG encoder. Internal header.
16
+ //
17
+ // Author: Skal (pascal.massimino@gmail.com)
18
+
19
+ #ifndef SJPEG_JPEGI_H_
20
+ #define SJPEG_JPEGI_H_
21
+
22
+ #include "sjpeg.h"
23
+ #include "bit_writer.h"
24
+
25
+ #ifndef NULL
26
+ #define NULL 0
27
+ #endif
28
+
29
+ #if defined(__SSE2__)
30
+ #define SJPEG_USE_SSE2
31
+ #endif
32
+
33
+ #if defined(__ARM_NEON__) || defined(__aarch64__)
34
+ #define SJPEG_USE_NEON
35
+ #endif
36
+
37
+ #if defined(SJPEG_NEED_ASM_HEADERS)
38
+ #if defined(SJPEG_USE_SSE2)
39
+ #include <emmintrin.h>
40
+ #endif
41
+
42
+ #if defined(SJPEG_USE_NEON)
43
+ #include <arm_neon.h>
44
+ #endif
45
+ #endif // SJPEG_NEED_ASM_HEADERS
46
+
47
+ #include <assert.h>
48
+
49
+ ////////////////////////////////////////////////////////////////////////////////
50
+
51
+ namespace sjpeg {
52
+
53
+ extern bool SupportsSSE2();
54
+ extern bool SupportsNEON();
55
+
56
+ // Constants below are marker codes defined in JPEG spec
57
+ // ISO/IEC 10918-1 : 1993(E) Table B.1
58
+ // See also: http://www.w3.org/Graphics/JPEG/itu-t81.pdf
59
+
60
+ #define M_SOF0 0xffc0
61
+ #define M_SOF1 0xffc1
62
+ #define M_DHT 0xffc4
63
+ #define M_SOI 0xffd8
64
+ #define M_EOI 0xffd9
65
+ #define M_SOS 0xffda
66
+ #define M_DQT 0xffdb
67
+
68
+ // Forward 8x8 Fourier transforms, in-place.
69
+ typedef void (*FdctFunc)(int16_t *coeffs, int num_blocks);
70
+ FdctFunc GetFdct();
71
+
72
+ // these are the default luma/chroma matrices (JPEG spec section K.1)
73
+ extern const uint8_t kDefaultMatrices[2][64];
74
+ extern const uint8_t kZigzag[64];
75
+
76
+ // scoring tables in score_7.cc
77
+ extern const int kRGBSize;
78
+ extern const uint8_t kSharpnessScore[];
79
+
80
+ // internal riskiness scoring functions:
81
+ extern double DCTRiskinessScore(const int16_t yuv[3 * 8],
82
+ int16_t scores[8 * 8]);
83
+ extern double BlockRiskinessScore(const uint8_t* rgb, int stride,
84
+ int16_t scores[8 * 8]);
85
+ extern int YUVToRiskIdx(int16_t y, int16_t u, int16_t v);
86
+
87
+ ///////////////////////////////////////////////////////////////////////////////
88
+ // RGB->YUV conversion
89
+
90
+ // convert 16x16 RGB block into YUV420, or 8x8 RGB block into YUV444
91
+ typedef void (*RGBToYUVBlockFunc)(const uint8_t* src, int src_stride,
92
+ int16_t* blocks);
93
+ extern RGBToYUVBlockFunc GetBlockFunc(bool use_444);
94
+
95
+ // convert a row of RGB samples to YUV444
96
+ typedef void (*RGBToIndexRowFunc)(const uint8_t* src, int width,
97
+ uint16_t* dst);
98
+ extern RGBToIndexRowFunc GetRowFunc();
99
+
100
+ // Enhanced slower RGB->YUV conversion:
101
+ // y_plane[] has dimension W x H, whereas u_plane[] and v_plane[] have
102
+ // dimension (W + 1)/2 x (H + 1)/2.
103
+ void ApplySharpYUVConversion(const uint8_t* const rgb,
104
+ int W, int H, int stride,
105
+ uint8_t* y_plane,
106
+ uint8_t* u_plane, uint8_t* v_plane);
107
+
108
+ ///////////////////////////////////////////////////////////////////////////////
109
+ // some useful helper functions around quant matrices
110
+
111
+ extern float GetQFactor(float q); // convert quality factor -> scale factor
112
+ extern void CopyQuantMatrix(const uint8_t in[64], uint8_t out[64]);
113
+ extern void SetQuantMatrix(const uint8_t in[64], float q_factor,
114
+ uint8_t out[64]);
115
+ extern void SetMinQuantMatrix(const uint8_t* const m, uint8_t out[64],
116
+ int tolerance);
117
+
118
+ ////////////////////////////////////////////////////////////////////////////////
119
+ // main structs
120
+
121
+ // Huffman tables
122
+ struct HuffmanTable {
123
+ uint8_t bits_[16]; // number of symbols per bit count
124
+ const uint8_t* syms_; // symbol map, in increasing bit length
125
+ uint8_t nb_syms_; // cached value of sum(bits_[])
126
+ };
127
+
128
+ // quantizer matrices
129
+ struct Quantizer {
130
+ uint8_t quant_[64]; // direct quantizer matrix
131
+ uint8_t min_quant_[64]; // min quantizer value allowed
132
+ uint16_t iquant_[64]; // precalc'd reciprocal for divisor
133
+ uint16_t qthresh_[64]; // minimal absolute value that produce non-zero coeff
134
+ uint16_t bias_[64]; // bias, for coring
135
+ const uint32_t* codes_; // codes for bit-cost calculation
136
+ };
137
+
138
+ // compact Run/Level storage, separate from DCTCoeffs infos
139
+ // Run/Level Information is not yet entropy-coded, but just stored
140
+ struct RunLevel {
141
+ int16_t run_;
142
+ uint16_t level_; // 4bits for length, 12bits for mantissa
143
+ };
144
+
145
+ // short infos about the block of quantized coefficients
146
+ struct DCTCoeffs {
147
+ int16_t last_; // last position (inclusive) of non-zero coeff
148
+ int16_t nb_coeffs_; // total number of non-zero AC coeffs
149
+ uint16_t dc_code_; // DC code (4bits for length, 12bits for suffix)
150
+ int8_t idx_; // component idx
151
+ int8_t bias_; // perceptual bias
152
+ };
153
+
154
+ // Histogram of transform coefficients, for adaptive quant matrices
155
+ // * HSHIFT controls the trade-off between storage size for counts[]
156
+ // and precision: the fdct doesn't descale and returns coefficients as
157
+ // signed 16bit value. We are only interested in the absolute values
158
+ // of coefficients that are less than MAX_HISTO_DCT_COEFF, which are our
159
+ // best contributors.
160
+ // Still, storing histogram up to MAX_HISTO_DCT_COEFF can be costly, so
161
+ // we further aggregate the statistics in bins of size 1 << HSHIFT to save
162
+ // space.
163
+ // * HLAMBDA roughly measures how much you are willing to trade in distortion
164
+ // for a 1-bit gain in filesize.
165
+ // * QDELTA_MIN / QDELTA_MAX control how much we allow wandering around the
166
+ // initial point. This helps reducing the CPU cost, as long as keeping the
167
+ // optimization around the initial desired quality-factor (HLAMBDA also
168
+ // serve this purpose).
169
+ enum { HSHIFT = 2, // size of bins is (1 << HSHIFT)
170
+ HHALF = 1 << (HSHIFT - 1),
171
+ MAX_HISTO_DCT_COEFF = (1 << 7), // max coefficient, descaled by HSHIFT
172
+ HLAMBDA = 0x80,
173
+ // Limits on range of alternate quantizers explored around
174
+ // the initial value. (see details in AnalyseHisto())
175
+ QDELTA_MIN = -12, QDELTA_MAX = 12,
176
+ QSIZE = QDELTA_MAX + 1 - QDELTA_MIN,
177
+ };
178
+
179
+ struct Histo {
180
+ // Reserve one extra entry for counting all coeffs greater than
181
+ // MAX_HISTO_DCT_COEFF. Result isn't used, but it makes the loop easier.
182
+ int counts_[64][MAX_HISTO_DCT_COEFF + 1];
183
+ };
184
+
185
+ ////////////////////////////////////////////////////////////////////////////////
186
+
187
+ struct Encoder {
188
+ public:
189
+ Encoder(int W, int H, int step, const uint8_t* rgb, ByteSink* sink);
190
+ virtual ~Encoder();
191
+ bool Ok() const { return ok_; }
192
+
193
+ // setters
194
+ void SetQuality(float q);
195
+ void SetCompressionMethod(int method);
196
+
197
+ // all-in-one init from EncoderParam.
198
+ bool InitFromParam(const EncoderParam& param);
199
+
200
+ // Main call. Return false in case of parameter error (setting empty output).
201
+ bool Encode();
202
+
203
+ // these are colorspace-dependant.
204
+ virtual void InitComponents() = 0;
205
+ // return MCU samples at macroblock position (mb_x, mb_y)
206
+ // clipped is true if the MCU is clipped and needs replication
207
+ virtual void GetSamples(int mb_x, int mb_y, bool clipped,
208
+ int16_t* out_blocks) = 0;
209
+
210
+ private:
211
+ // setters
212
+ void SetQuantMatrices(const uint8_t m[2][64]);
213
+ void SetMinQuantMatrices(const uint8_t m[2][64], int tolerance);
214
+ void SetDefaultMinQuantMatrices();
215
+
216
+ void SetQuantizationBias(int bias, bool use_adaptive);
217
+ void SetQuantizationDeltas(int qdelta_luma, int qdelta_chroma);
218
+
219
+ typedef enum { ICC, EXIF, XMP, MARKERS } MetadataType;
220
+ void SetMetadata(const std::string& data, MetadataType type);
221
+
222
+ private:
223
+ bool CheckBuffers(); // returns false in case of memory alloc error
224
+
225
+ void WriteAPP0();
226
+ bool WriteAPPMarkers(const std::string& data);
227
+ bool WriteEXIF(const std::string& data);
228
+ bool WriteICCP(const std::string& data);
229
+ bool WriteXMP(const std::string& data);
230
+ void WriteDQT();
231
+ void WriteSOF();
232
+ void WriteDHT();
233
+ void WriteSOS();
234
+ void WriteEOI();
235
+
236
+ void ResetDCs();
237
+
238
+ // collect transformed coeffs (unquantized) only
239
+ void CollectCoeffs();
240
+
241
+ // 2-pass Huffman optimizing scan
242
+ void ResetEntropyStats();
243
+ void AddEntropyStats(const DCTCoeffs* const coeffs,
244
+ const RunLevel* const run_levels);
245
+ void CompileEntropyStats();
246
+ void StoreOptimalHuffmanTables(size_t nb_mbs, const DCTCoeffs* coeffs);
247
+
248
+ void SinglePassScan(); // finalizing scan
249
+ void SinglePassScanOptimized(); // optimize the Huffman table + finalize scan
250
+
251
+ // quantize and compute run/levels from already stored coeffs
252
+ void StoreRunLevels(DCTCoeffs* coeffs);
253
+ // just write already stored run_levels & coeffs:
254
+ void FinalPassScan(size_t nb_mbs, const DCTCoeffs* coeffs);
255
+
256
+ // dichotomy loop
257
+ void LoopScan();
258
+
259
+ // Histogram pass
260
+ void CollectHistograms();
261
+
262
+ void BuildHuffmanCodes(const HuffmanTable* const tab,
263
+ uint32_t* const codes);
264
+
265
+ typedef int (*QuantizeBlockFunc)(const int16_t in[64], int idx,
266
+ const Quantizer* const Q,
267
+ DCTCoeffs* const out, RunLevel* const rl);
268
+ static QuantizeBlockFunc quantize_block_;
269
+ static QuantizeBlockFunc GetQuantizeBlockFunc();
270
+
271
+ static int TrellisQuantizeBlock(const int16_t in[64], int idx,
272
+ const Quantizer* const Q,
273
+ DCTCoeffs* const out,
274
+ RunLevel* const rl);
275
+
276
+ typedef uint32_t (*QuantizeErrorFunc)(const int16_t in[64],
277
+ const Quantizer* const Q);
278
+ static QuantizeErrorFunc quantize_error_;
279
+ static QuantizeErrorFunc GetQuantizeErrorFunc();
280
+
281
+ void CodeBlock(const DCTCoeffs* const coeffs, const RunLevel* const rl);
282
+ // returns DC code (4bits for length, 12bits for suffix), updates DC_predictor
283
+ static uint16_t GenerateDCDiffCode(int DC, int* const DC_predictor);
284
+
285
+ static void FinalizeQuantMatrix(Quantizer* const q, int bias);
286
+ void SetCostCodes(int idx);
287
+ void InitCodes(bool only_ac);
288
+
289
+ size_t HeaderSize() const;
290
+ void BlocksSize(int nb_mbs, const DCTCoeffs* coeffs,
291
+ const RunLevel* rl, sjpeg::BitCounter* const bc) const;
292
+ float ComputeSize(const DCTCoeffs* coeffs);
293
+ float ComputePSNR() const;
294
+
295
+ protected:
296
+ bool SetError(); // sets ok_ to true
297
+
298
+ // format-specific parameters, set by virtual InitComponents()
299
+ enum { MAX_COMP = 3 };
300
+ int nb_comps_;
301
+ int quant_idx_[MAX_COMP]; // indices for quantization matrices
302
+ int nb_blocks_[MAX_COMP]; // number of 8x8 blocks per components
303
+ uint8_t block_dims_[MAX_COMP]; // component dimensions (8-pixels units)
304
+ int block_w_, block_h_; // maximum mcu width / height
305
+ int mcu_blocks_; // total blocks in mcu (= sum of nb_blocks_[])
306
+
307
+ // data accessible to sub-classes implementing alternate input format
308
+ int W_, H_, step_; // width, height, stride
309
+ int mb_w_, mb_h_; // width / height in units of mcu
310
+ const uint8_t* const rgb_; // samples
311
+
312
+ // Replicate an RGB source sub_w x sub_h block, expanding it to w x h size.
313
+ const uint8_t* GetReplicatedSamples(const uint8_t* rgb, // block source
314
+ int rgb_step, // stride in source
315
+ int sub_w, int sub_h, // sub-block size
316
+ int w, int h); // size of mcu
317
+ // Replicate an YUV sub-block similarly.
318
+ const uint8_t* GetReplicatedYUVSamples(const uint8_t* in, int step,
319
+ int sub_w, int sub_h, int w, int h);
320
+ // set blocks that are totally outside of the picture to an average value
321
+ void AverageExtraLuma(int sub_w, int sub_h, int16_t* out);
322
+ uint8_t replicated_buffer_[3 * 16 * 16]; // tmp buffer for replication
323
+
324
+ sjpeg::RGBToYUVBlockFunc get_yuv_block_;
325
+ static sjpeg::RGBToYUVBlockFunc get_yuv444_block_;
326
+ void SetYUVFormat(bool use_444) {
327
+ get_yuv_block_ = sjpeg::GetBlockFunc(use_444);
328
+ }
329
+ bool adaptive_bias_; // if true, use per-block perceptual bias modulation
330
+
331
+ // Memory management
332
+ template<class T> T* Alloc(size_t num) {
333
+ assert(memory_hook_ != nullptr);
334
+ T* const ptr = reinterpret_cast<T*>(memory_hook_->Alloc(sizeof(T) * num));
335
+ if (ptr == nullptr) SetError();
336
+ return ptr;
337
+ }
338
+ template<class T> void Free(T* const ptr) {
339
+ memory_hook_->Free(reinterpret_cast<void*>(ptr));
340
+ }
341
+
342
+ private:
343
+ bool ok_; // set to false if a new[] fails
344
+ sjpeg::BitWriter bw_; // output buffer
345
+
346
+ std::string iccp_, xmp_, exif_, app_markers_; // metadata
347
+
348
+ // compression tools. See sjpeg.h for description of methods.
349
+ bool optimize_size_; // Huffman-optimize the codes (method 0, 3)
350
+ bool use_adaptive_quant_; // modulate the quant matrix (method 3-8)
351
+ bool use_extra_memory_; // save the unquantized coeffs (method 3, 4)
352
+ bool reuse_run_levels_; // save quantized run/levels (method 1, 4, 5)
353
+ bool use_trellis_; // use trellis-quantization (method 7, 8)
354
+
355
+ int q_bias_; // [0..255]: rounding bias for quant. of AC coeffs.
356
+ Quantizer quants_[2]; // quant matrices
357
+ int DCs_[3]; // DC predictors
358
+
359
+ // DCT coefficients storage, aligned
360
+ static const size_t ALIGN_CST = 15;
361
+ uint8_t* in_blocks_base_; // base memory for blocks
362
+ int16_t* in_blocks_; // aligned pointer to in_blocks_base_
363
+ bool have_coeffs_; // true if the Fourier coefficients are stored
364
+ bool AllocateBlocks(size_t num_blocks); // returns false in case of error
365
+ void DesallocateBlocks();
366
+
367
+ // these are for regular compression methods 0 or 2.
368
+ RunLevel base_run_levels_[64];
369
+
370
+ // this is the extra memory for compression method 1
371
+ RunLevel* all_run_levels_;
372
+ size_t nb_run_levels_, max_run_levels_;
373
+
374
+ // Huffman_tables_ indices:
375
+ // 0: luma dc, 1: chroma dc, 2: luma ac, 3: chroma ac
376
+ const HuffmanTable *Huffman_tables_[4];
377
+ uint32_t ac_codes_[2][256];
378
+ uint32_t dc_codes_[2][12];
379
+
380
+ // histograms for dynamic codes. Could be temporaries.
381
+ uint32_t freq_ac_[2][256 + 1]; // frequency distribution for AC coeffs
382
+ uint32_t freq_dc_[2][12 + 1]; // frequency distribution for DC coeffs
383
+ uint8_t opt_syms_ac_[2][256]; // optimal table for AC symbols
384
+ uint8_t opt_syms_dc_[2][12]; // optimal table for DC symbols
385
+ HuffmanTable opt_tables_ac_[2];
386
+ HuffmanTable opt_tables_dc_[2];
387
+
388
+ // Limits on how much we will decrease the bitrate in the luminance
389
+ // and chrominance channels (respectively).
390
+ int qdelta_max_luma_;
391
+ int qdelta_max_chroma_;
392
+
393
+ // Histogram handling
394
+
395
+ // This function aggregates each 63 unquantized AC coefficients into an
396
+ // histogram for further analysis.
397
+ typedef void (*StoreHistoFunc)(const int16_t in[64], Histo* const histos,
398
+ int nb_blocks);
399
+ static StoreHistoFunc store_histo_;
400
+ static StoreHistoFunc GetStoreHistoFunc(); // select between the above.
401
+
402
+ // Provided the AC histograms have been stored with StoreHisto(), this
403
+ // function will analyze impact of varying the quantization scales around
404
+ // initial values, trading distortion for bit-rate in a controlled way.
405
+ void AnalyseHisto();
406
+ void ResetHisto(); // initialize histos_[]
407
+ Histo histos_[2];
408
+
409
+ // multi-pass parameters
410
+ int passes_;
411
+ SearchHook default_hook_;
412
+ SearchHook* search_hook_;
413
+
414
+ // lower memory management
415
+ MemoryManager* memory_hook_;
416
+
417
+ static const float kHistoWeight[QSIZE];
418
+
419
+ static void (*fDCT_)(int16_t* in, int num_blocks);
420
+ static void InitializeStaticPointers();
421
+ };
422
+
423
+ ////////////////////////////////////////////////////////////////////////////////
424
+
425
+ } // namespace sjpeg
426
+
427
+ #endif // SJPEG_JPEGI_H_