image_intensities 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/ext/image_intensities/Makefile +73 -4
  3. data/ext/image_intensities/turbojpeg/cderror.h +137 -0
  4. data/ext/image_intensities/turbojpeg/cdjpeg.h +157 -0
  5. data/ext/image_intensities/turbojpeg/cmyk.h +61 -0
  6. data/ext/image_intensities/turbojpeg/jaricom.c +157 -0
  7. data/ext/image_intensities/turbojpeg/jcapimin.c +295 -0
  8. data/ext/image_intensities/turbojpeg/jcapistd.c +162 -0
  9. data/ext/image_intensities/turbojpeg/jcarith.c +932 -0
  10. data/ext/image_intensities/turbojpeg/jccoefct.c +449 -0
  11. data/ext/image_intensities/turbojpeg/jccolext.c +144 -0
  12. data/ext/image_intensities/turbojpeg/jccolor.c +710 -0
  13. data/ext/image_intensities/turbojpeg/jcdctmgr.c +721 -0
  14. data/ext/image_intensities/turbojpeg/jchuff.c +1096 -0
  15. data/ext/image_intensities/turbojpeg/jchuff.h +42 -0
  16. data/ext/image_intensities/turbojpeg/jcicc.c +105 -0
  17. data/ext/image_intensities/turbojpeg/jcinit.c +77 -0
  18. data/ext/image_intensities/turbojpeg/jcmainct.c +162 -0
  19. data/ext/image_intensities/turbojpeg/jcmarker.c +664 -0
  20. data/ext/image_intensities/turbojpeg/jcmaster.c +640 -0
  21. data/ext/image_intensities/turbojpeg/jcomapi.c +109 -0
  22. data/ext/image_intensities/turbojpeg/jconfig.h +73 -0
  23. data/ext/image_intensities/turbojpeg/jconfigint.h +31 -0
  24. data/ext/image_intensities/turbojpeg/jcparam.c +541 -0
  25. data/ext/image_intensities/turbojpeg/jcphuff.c +1105 -0
  26. data/ext/image_intensities/turbojpeg/jcprepct.c +351 -0
  27. data/ext/image_intensities/turbojpeg/jcsample.c +539 -0
  28. data/ext/image_intensities/turbojpeg/jctrans.c +400 -0
  29. data/ext/image_intensities/turbojpeg/jdapimin.c +407 -0
  30. data/ext/image_intensities/turbojpeg/jdapistd.c +639 -0
  31. data/ext/image_intensities/turbojpeg/jdarith.c +773 -0
  32. data/ext/image_intensities/turbojpeg/jdatadst-tj.c +203 -0
  33. data/ext/image_intensities/turbojpeg/jdatadst.c +293 -0
  34. data/ext/image_intensities/turbojpeg/jdatasrc-tj.c +194 -0
  35. data/ext/image_intensities/turbojpeg/jdatasrc.c +295 -0
  36. data/ext/image_intensities/turbojpeg/jdcoefct.c +692 -0
  37. data/ext/image_intensities/turbojpeg/jdcoefct.h +82 -0
  38. data/ext/image_intensities/turbojpeg/jdcol565.c +384 -0
  39. data/ext/image_intensities/turbojpeg/jdcolext.c +143 -0
  40. data/ext/image_intensities/turbojpeg/jdcolor.c +883 -0
  41. data/ext/image_intensities/turbojpeg/jdct.h +208 -0
  42. data/ext/image_intensities/turbojpeg/jddctmgr.c +352 -0
  43. data/ext/image_intensities/turbojpeg/jdhuff.c +831 -0
  44. data/ext/image_intensities/turbojpeg/jdhuff.h +238 -0
  45. data/ext/image_intensities/turbojpeg/jdicc.c +171 -0
  46. data/ext/image_intensities/turbojpeg/jdinput.c +408 -0
  47. data/ext/image_intensities/turbojpeg/jdmainct.c +460 -0
  48. data/ext/image_intensities/turbojpeg/jdmainct.h +71 -0
  49. data/ext/image_intensities/turbojpeg/jdmarker.c +1377 -0
  50. data/ext/image_intensities/turbojpeg/jdmaster.c +737 -0
  51. data/ext/image_intensities/turbojpeg/jdmaster.h +28 -0
  52. data/ext/image_intensities/turbojpeg/jdmerge.c +617 -0
  53. data/ext/image_intensities/turbojpeg/jdmrg565.c +354 -0
  54. data/ext/image_intensities/turbojpeg/jdmrgext.c +184 -0
  55. data/ext/image_intensities/turbojpeg/jdphuff.c +687 -0
  56. data/ext/image_intensities/turbojpeg/jdpostct.c +294 -0
  57. data/ext/image_intensities/turbojpeg/jdsample.c +518 -0
  58. data/ext/image_intensities/turbojpeg/jdsample.h +50 -0
  59. data/ext/image_intensities/turbojpeg/jdtrans.c +155 -0
  60. data/ext/image_intensities/turbojpeg/jerror.c +251 -0
  61. data/ext/image_intensities/turbojpeg/jfdctflt.c +169 -0
  62. data/ext/image_intensities/turbojpeg/jfdctfst.c +227 -0
  63. data/ext/image_intensities/turbojpeg/jfdctint.c +288 -0
  64. data/ext/image_intensities/turbojpeg/jidctflt.c +240 -0
  65. data/ext/image_intensities/turbojpeg/jidctfst.c +371 -0
  66. data/ext/image_intensities/turbojpeg/jidctint.c +2627 -0
  67. data/ext/image_intensities/turbojpeg/jidctred.c +409 -0
  68. data/ext/image_intensities/turbojpeg/jinclude.h +88 -0
  69. data/ext/image_intensities/turbojpeg/jmemmgr.c +1179 -0
  70. data/ext/image_intensities/turbojpeg/jmemnobs.c +115 -0
  71. data/ext/image_intensities/turbojpeg/jmemsys.h +178 -0
  72. data/ext/image_intensities/turbojpeg/jpeg_nbits_table.h +4098 -0
  73. data/ext/image_intensities/turbojpeg/jpegcomp.h +31 -0
  74. data/ext/image_intensities/turbojpeg/jquant1.c +859 -0
  75. data/ext/image_intensities/turbojpeg/jquant2.c +1285 -0
  76. data/ext/image_intensities/turbojpeg/jsimd.h +117 -0
  77. data/ext/image_intensities/turbojpeg/jsimd_none.c +418 -0
  78. data/ext/image_intensities/turbojpeg/jsimddct.h +70 -0
  79. data/ext/image_intensities/turbojpeg/jstdhuff.c +143 -0
  80. data/ext/image_intensities/turbojpeg/jutils.c +133 -0
  81. data/ext/image_intensities/turbojpeg/jversion.h +52 -0
  82. data/ext/image_intensities/turbojpeg/libturbojpeg.a +0 -0
  83. data/ext/image_intensities/turbojpeg/rdbmp.c +689 -0
  84. data/ext/image_intensities/turbojpeg/rdppm.c +766 -0
  85. data/ext/image_intensities/turbojpeg/tjutil.h +47 -0
  86. data/ext/image_intensities/turbojpeg/transupp.c +1628 -0
  87. data/ext/image_intensities/turbojpeg/transupp.h +210 -0
  88. data/ext/image_intensities/turbojpeg/turbojpeg.c +2150 -0
  89. data/ext/image_intensities/turbojpeg/wrbmp.c +558 -0
  90. data/ext/image_intensities/turbojpeg/wrppm.c +365 -0
  91. data/image_intensities.gemspec +1 -1
  92. data/lib/image_intensities/version.rb +1 -1
  93. metadata +91 -3
@@ -0,0 +1,240 @@
1
+ /*
2
+ * jidctflt.c
3
+ *
4
+ * This file was part of the Independent JPEG Group's software:
5
+ * Copyright (C) 1994-1998, Thomas G. Lane.
6
+ * Modified 2010 by Guido Vollbeding.
7
+ * libjpeg-turbo Modifications:
8
+ * Copyright (C) 2014, D. R. Commander.
9
+ * For conditions of distribution and use, see the accompanying README.ijg
10
+ * file.
11
+ *
12
+ * This file contains a floating-point implementation of the
13
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
14
+ * must also perform dequantization of the input coefficients.
15
+ *
16
+ * This implementation should be more accurate than either of the integer
17
+ * IDCT implementations. However, it may not give the same results on all
18
+ * machines because of differences in roundoff behavior. Speed will depend
19
+ * on the hardware's floating point capacity.
20
+ *
21
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
22
+ * on each row (or vice versa, but it's more convenient to emit a row at
23
+ * a time). Direct algorithms are also available, but they are much more
24
+ * complex and seem not to be any faster when reduced to code.
25
+ *
26
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
27
+ * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
28
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
29
+ * JPEG textbook (see REFERENCES section in file README.ijg). The following
30
+ * code is based directly on figure 4-8 in P&M.
31
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
32
+ * possible to arrange the computation so that many of the multiplies are
33
+ * simple scalings of the final outputs. These multiplies can then be
34
+ * folded into the multiplications or divisions by the JPEG quantization
35
+ * table entries. The AA&N method leaves only 5 multiplies and 29 adds
36
+ * to be done in the DCT itself.
37
+ * The primary disadvantage of this method is that with a fixed-point
38
+ * implementation, accuracy is lost due to imprecise representation of the
39
+ * scaled quantization values. However, that problem does not arise if
40
+ * we use floating point arithmetic.
41
+ */
42
+
43
+ #define JPEG_INTERNALS
44
+ #include "jinclude.h"
45
+ #include "jpeglib.h"
46
+ #include "jdct.h" /* Private declarations for DCT subsystem */
47
+
48
+ #ifdef DCT_FLOAT_SUPPORTED
49
+
50
+
51
+ /*
52
+ * This module is specialized to the case DCTSIZE = 8.
53
+ */
54
+
55
+ #if DCTSIZE != 8
56
+ Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
57
+ #endif
58
+
59
+
60
+ /* Dequantize a coefficient by multiplying it by the multiplier-table
61
+ * entry; produce a float result.
62
+ */
63
+
64
+ #define DEQUANTIZE(coef, quantval) (((FAST_FLOAT)(coef)) * (quantval))
65
+
66
+
67
+ /*
68
+ * Perform dequantization and inverse DCT on one block of coefficients.
69
+ */
70
+
71
+ GLOBAL(void)
72
+ jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
73
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
74
+ JDIMENSION output_col)
75
+ {
76
+ FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
77
+ FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
78
+ FAST_FLOAT z5, z10, z11, z12, z13;
79
+ JCOEFPTR inptr;
80
+ FLOAT_MULT_TYPE *quantptr;
81
+ FAST_FLOAT *wsptr;
82
+ JSAMPROW outptr;
83
+ JSAMPLE *range_limit = cinfo->sample_range_limit;
84
+ int ctr;
85
+ FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
86
+ #define _0_125 ((FLOAT_MULT_TYPE)0.125)
87
+
88
+ /* Pass 1: process columns from input, store into work array. */
89
+
90
+ inptr = coef_block;
91
+ quantptr = (FLOAT_MULT_TYPE *)compptr->dct_table;
92
+ wsptr = workspace;
93
+ for (ctr = DCTSIZE; ctr > 0; ctr--) {
94
+ /* Due to quantization, we will usually find that many of the input
95
+ * coefficients are zero, especially the AC terms. We can exploit this
96
+ * by short-circuiting the IDCT calculation for any column in which all
97
+ * the AC terms are zero. In that case each output is equal to the
98
+ * DC coefficient (with scale factor as needed).
99
+ * With typical images and quantization tables, half or more of the
100
+ * column DCT calculations can be simplified this way.
101
+ */
102
+
103
+ if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
104
+ inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
105
+ inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
106
+ inptr[DCTSIZE * 7] == 0) {
107
+ /* AC terms all zero */
108
+ FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE * 0],
109
+ quantptr[DCTSIZE * 0] * _0_125);
110
+
111
+ wsptr[DCTSIZE * 0] = dcval;
112
+ wsptr[DCTSIZE * 1] = dcval;
113
+ wsptr[DCTSIZE * 2] = dcval;
114
+ wsptr[DCTSIZE * 3] = dcval;
115
+ wsptr[DCTSIZE * 4] = dcval;
116
+ wsptr[DCTSIZE * 5] = dcval;
117
+ wsptr[DCTSIZE * 6] = dcval;
118
+ wsptr[DCTSIZE * 7] = dcval;
119
+
120
+ inptr++; /* advance pointers to next column */
121
+ quantptr++;
122
+ wsptr++;
123
+ continue;
124
+ }
125
+
126
+ /* Even part */
127
+
128
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0] * _0_125);
129
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2] * _0_125);
130
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4] * _0_125);
131
+ tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6] * _0_125);
132
+
133
+ tmp10 = tmp0 + tmp2; /* phase 3 */
134
+ tmp11 = tmp0 - tmp2;
135
+
136
+ tmp13 = tmp1 + tmp3; /* phases 5-3 */
137
+ tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT)1.414213562) - tmp13; /* 2*c4 */
138
+
139
+ tmp0 = tmp10 + tmp13; /* phase 2 */
140
+ tmp3 = tmp10 - tmp13;
141
+ tmp1 = tmp11 + tmp12;
142
+ tmp2 = tmp11 - tmp12;
143
+
144
+ /* Odd part */
145
+
146
+ tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1] * _0_125);
147
+ tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3] * _0_125);
148
+ tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5] * _0_125);
149
+ tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7] * _0_125);
150
+
151
+ z13 = tmp6 + tmp5; /* phase 6 */
152
+ z10 = tmp6 - tmp5;
153
+ z11 = tmp4 + tmp7;
154
+ z12 = tmp4 - tmp7;
155
+
156
+ tmp7 = z11 + z13; /* phase 5 */
157
+ tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562); /* 2*c4 */
158
+
159
+ z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */
160
+ tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */
161
+ tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */
162
+
163
+ tmp6 = tmp12 - tmp7; /* phase 2 */
164
+ tmp5 = tmp11 - tmp6;
165
+ tmp4 = tmp10 - tmp5;
166
+
167
+ wsptr[DCTSIZE * 0] = tmp0 + tmp7;
168
+ wsptr[DCTSIZE * 7] = tmp0 - tmp7;
169
+ wsptr[DCTSIZE * 1] = tmp1 + tmp6;
170
+ wsptr[DCTSIZE * 6] = tmp1 - tmp6;
171
+ wsptr[DCTSIZE * 2] = tmp2 + tmp5;
172
+ wsptr[DCTSIZE * 5] = tmp2 - tmp5;
173
+ wsptr[DCTSIZE * 3] = tmp3 + tmp4;
174
+ wsptr[DCTSIZE * 4] = tmp3 - tmp4;
175
+
176
+ inptr++; /* advance pointers to next column */
177
+ quantptr++;
178
+ wsptr++;
179
+ }
180
+
181
+ /* Pass 2: process rows from work array, store into output array. */
182
+
183
+ wsptr = workspace;
184
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
185
+ outptr = output_buf[ctr] + output_col;
186
+ /* Rows of zeroes can be exploited in the same way as we did with columns.
187
+ * However, the column calculation has created many nonzero AC terms, so
188
+ * the simplification applies less often (typically 5% to 10% of the time).
189
+ * And testing floats for zero is relatively expensive, so we don't bother.
190
+ */
191
+
192
+ /* Even part */
193
+
194
+ /* Apply signed->unsigned and prepare float->int conversion */
195
+ z5 = wsptr[0] + ((FAST_FLOAT)CENTERJSAMPLE + (FAST_FLOAT)0.5);
196
+ tmp10 = z5 + wsptr[4];
197
+ tmp11 = z5 - wsptr[4];
198
+
199
+ tmp13 = wsptr[2] + wsptr[6];
200
+ tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT)1.414213562) - tmp13;
201
+
202
+ tmp0 = tmp10 + tmp13;
203
+ tmp3 = tmp10 - tmp13;
204
+ tmp1 = tmp11 + tmp12;
205
+ tmp2 = tmp11 - tmp12;
206
+
207
+ /* Odd part */
208
+
209
+ z13 = wsptr[5] + wsptr[3];
210
+ z10 = wsptr[5] - wsptr[3];
211
+ z11 = wsptr[1] + wsptr[7];
212
+ z12 = wsptr[1] - wsptr[7];
213
+
214
+ tmp7 = z11 + z13;
215
+ tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562);
216
+
217
+ z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */
218
+ tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */
219
+ tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */
220
+
221
+ tmp6 = tmp12 - tmp7;
222
+ tmp5 = tmp11 - tmp6;
223
+ tmp4 = tmp10 - tmp5;
224
+
225
+ /* Final output stage: float->int conversion and range-limit */
226
+
227
+ outptr[0] = range_limit[((int)(tmp0 + tmp7)) & RANGE_MASK];
228
+ outptr[7] = range_limit[((int)(tmp0 - tmp7)) & RANGE_MASK];
229
+ outptr[1] = range_limit[((int)(tmp1 + tmp6)) & RANGE_MASK];
230
+ outptr[6] = range_limit[((int)(tmp1 - tmp6)) & RANGE_MASK];
231
+ outptr[2] = range_limit[((int)(tmp2 + tmp5)) & RANGE_MASK];
232
+ outptr[5] = range_limit[((int)(tmp2 - tmp5)) & RANGE_MASK];
233
+ outptr[3] = range_limit[((int)(tmp3 + tmp4)) & RANGE_MASK];
234
+ outptr[4] = range_limit[((int)(tmp3 - tmp4)) & RANGE_MASK];
235
+
236
+ wsptr += DCTSIZE; /* advance pointer to next row */
237
+ }
238
+ }
239
+
240
+ #endif /* DCT_FLOAT_SUPPORTED */
@@ -0,0 +1,371 @@
1
+ /*
2
+ * jidctfst.c
3
+ *
4
+ * This file was part of the Independent JPEG Group's software:
5
+ * Copyright (C) 1994-1998, Thomas G. Lane.
6
+ * libjpeg-turbo Modifications:
7
+ * Copyright (C) 2015, D. R. Commander.
8
+ * For conditions of distribution and use, see the accompanying README.ijg
9
+ * file.
10
+ *
11
+ * This file contains a fast, not so accurate integer implementation of the
12
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
13
+ * must also perform dequantization of the input coefficients.
14
+ *
15
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
16
+ * on each row (or vice versa, but it's more convenient to emit a row at
17
+ * a time). Direct algorithms are also available, but they are much more
18
+ * complex and seem not to be any faster when reduced to code.
19
+ *
20
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
21
+ * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
22
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
23
+ * JPEG textbook (see REFERENCES section in file README.ijg). The following
24
+ * code is based directly on figure 4-8 in P&M.
25
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
26
+ * possible to arrange the computation so that many of the multiplies are
27
+ * simple scalings of the final outputs. These multiplies can then be
28
+ * folded into the multiplications or divisions by the JPEG quantization
29
+ * table entries. The AA&N method leaves only 5 multiplies and 29 adds
30
+ * to be done in the DCT itself.
31
+ * The primary disadvantage of this method is that with fixed-point math,
32
+ * accuracy is lost due to imprecise representation of the scaled
33
+ * quantization values. The smaller the quantization table entry, the less
34
+ * precise the scaled value, so this implementation does worse with high-
35
+ * quality-setting files than with low-quality ones.
36
+ */
37
+
38
+ #define JPEG_INTERNALS
39
+ #include "jinclude.h"
40
+ #include "jpeglib.h"
41
+ #include "jdct.h" /* Private declarations for DCT subsystem */
42
+
43
+ #ifdef DCT_IFAST_SUPPORTED
44
+
45
+
46
+ /*
47
+ * This module is specialized to the case DCTSIZE = 8.
48
+ */
49
+
50
+ #if DCTSIZE != 8
51
+ Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
52
+ #endif
53
+
54
+
55
+ /* Scaling decisions are generally the same as in the LL&M algorithm;
56
+ * see jidctint.c for more details. However, we choose to descale
57
+ * (right shift) multiplication products as soon as they are formed,
58
+ * rather than carrying additional fractional bits into subsequent additions.
59
+ * This compromises accuracy slightly, but it lets us save a few shifts.
60
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
61
+ * everywhere except in the multiplications proper; this saves a good deal
62
+ * of work on 16-bit-int machines.
63
+ *
64
+ * The dequantized coefficients are not integers because the AA&N scaling
65
+ * factors have been incorporated. We represent them scaled up by PASS1_BITS,
66
+ * so that the first and second IDCT rounds have the same input scaling.
67
+ * For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
68
+ * avoid a descaling shift; this compromises accuracy rather drastically
69
+ * for small quantization table entries, but it saves a lot of shifts.
70
+ * For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
71
+ * so we use a much larger scaling factor to preserve accuracy.
72
+ *
73
+ * A final compromise is to represent the multiplicative constants to only
74
+ * 8 fractional bits, rather than 13. This saves some shifting work on some
75
+ * machines, and may also reduce the cost of multiplication (since there
76
+ * are fewer one-bits in the constants).
77
+ */
78
+
79
+ #if BITS_IN_JSAMPLE == 8
80
+ #define CONST_BITS 8
81
+ #define PASS1_BITS 2
82
+ #else
83
+ #define CONST_BITS 8
84
+ #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
85
+ #endif
86
+
87
+ /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
88
+ * causing a lot of useless floating-point operations at run time.
89
+ * To get around this we use the following pre-calculated constants.
90
+ * If you change CONST_BITS you may want to add appropriate values.
91
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
92
+ */
93
+
94
+ #if CONST_BITS == 8
95
+ #define FIX_1_082392200 ((JLONG)277) /* FIX(1.082392200) */
96
+ #define FIX_1_414213562 ((JLONG)362) /* FIX(1.414213562) */
97
+ #define FIX_1_847759065 ((JLONG)473) /* FIX(1.847759065) */
98
+ #define FIX_2_613125930 ((JLONG)669) /* FIX(2.613125930) */
99
+ #else
100
+ #define FIX_1_082392200 FIX(1.082392200)
101
+ #define FIX_1_414213562 FIX(1.414213562)
102
+ #define FIX_1_847759065 FIX(1.847759065)
103
+ #define FIX_2_613125930 FIX(2.613125930)
104
+ #endif
105
+
106
+
107
+ /* We can gain a little more speed, with a further compromise in accuracy,
108
+ * by omitting the addition in a descaling shift. This yields an incorrectly
109
+ * rounded result half the time...
110
+ */
111
+
112
+ #ifndef USE_ACCURATE_ROUNDING
113
+ #undef DESCALE
114
+ #define DESCALE(x, n) RIGHT_SHIFT(x, n)
115
+ #endif
116
+
117
+
118
+ /* Multiply a DCTELEM variable by an JLONG constant, and immediately
119
+ * descale to yield a DCTELEM result.
120
+ */
121
+
122
+ #define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
123
+
124
+
125
+ /* Dequantize a coefficient by multiplying it by the multiplier-table
126
+ * entry; produce a DCTELEM result. For 8-bit data a 16x16->16
127
+ * multiplication will do. For 12-bit data, the multiplier table is
128
+ * declared JLONG, so a 32-bit multiply will be used.
129
+ */
130
+
131
+ #if BITS_IN_JSAMPLE == 8
132
+ #define DEQUANTIZE(coef, quantval) (((IFAST_MULT_TYPE)(coef)) * (quantval))
133
+ #else
134
+ #define DEQUANTIZE(coef, quantval) \
135
+ DESCALE((coef) * (quantval), IFAST_SCALE_BITS - PASS1_BITS)
136
+ #endif
137
+
138
+
139
+ /* Like DESCALE, but applies to a DCTELEM and produces an int.
140
+ * We assume that int right shift is unsigned if JLONG right shift is.
141
+ */
142
+
143
+ #ifdef RIGHT_SHIFT_IS_UNSIGNED
144
+ #define ISHIFT_TEMPS DCTELEM ishift_temp;
145
+ #if BITS_IN_JSAMPLE == 8
146
+ #define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */
147
+ #else
148
+ #define DCTELEMBITS 32 /* DCTELEM must be 32 bits */
149
+ #endif
150
+ #define IRIGHT_SHIFT(x, shft) \
151
+ ((ishift_temp = (x)) < 0 ? \
152
+ (ishift_temp >> (shft)) | ((~((DCTELEM)0)) << (DCTELEMBITS - (shft))) : \
153
+ (ishift_temp >> (shft)))
154
+ #else
155
+ #define ISHIFT_TEMPS
156
+ #define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
157
+ #endif
158
+
159
+ #ifdef USE_ACCURATE_ROUNDING
160
+ #define IDESCALE(x, n) ((int)IRIGHT_SHIFT((x) + (1 << ((n) - 1)), n))
161
+ #else
162
+ #define IDESCALE(x, n) ((int)IRIGHT_SHIFT(x, n))
163
+ #endif
164
+
165
+
166
+ /*
167
+ * Perform dequantization and inverse DCT on one block of coefficients.
168
+ */
169
+
170
+ GLOBAL(void)
171
+ jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
172
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
173
+ JDIMENSION output_col)
174
+ {
175
+ DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
176
+ DCTELEM tmp10, tmp11, tmp12, tmp13;
177
+ DCTELEM z5, z10, z11, z12, z13;
178
+ JCOEFPTR inptr;
179
+ IFAST_MULT_TYPE *quantptr;
180
+ int *wsptr;
181
+ JSAMPROW outptr;
182
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
183
+ int ctr;
184
+ int workspace[DCTSIZE2]; /* buffers data between passes */
185
+ SHIFT_TEMPS /* for DESCALE */
186
+ ISHIFT_TEMPS /* for IDESCALE */
187
+
188
+ /* Pass 1: process columns from input, store into work array. */
189
+
190
+ inptr = coef_block;
191
+ quantptr = (IFAST_MULT_TYPE *)compptr->dct_table;
192
+ wsptr = workspace;
193
+ for (ctr = DCTSIZE; ctr > 0; ctr--) {
194
+ /* Due to quantization, we will usually find that many of the input
195
+ * coefficients are zero, especially the AC terms. We can exploit this
196
+ * by short-circuiting the IDCT calculation for any column in which all
197
+ * the AC terms are zero. In that case each output is equal to the
198
+ * DC coefficient (with scale factor as needed).
199
+ * With typical images and quantization tables, half or more of the
200
+ * column DCT calculations can be simplified this way.
201
+ */
202
+
203
+ if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
204
+ inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
205
+ inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
206
+ inptr[DCTSIZE * 7] == 0) {
207
+ /* AC terms all zero */
208
+ int dcval = (int)DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
209
+
210
+ wsptr[DCTSIZE * 0] = dcval;
211
+ wsptr[DCTSIZE * 1] = dcval;
212
+ wsptr[DCTSIZE * 2] = dcval;
213
+ wsptr[DCTSIZE * 3] = dcval;
214
+ wsptr[DCTSIZE * 4] = dcval;
215
+ wsptr[DCTSIZE * 5] = dcval;
216
+ wsptr[DCTSIZE * 6] = dcval;
217
+ wsptr[DCTSIZE * 7] = dcval;
218
+
219
+ inptr++; /* advance pointers to next column */
220
+ quantptr++;
221
+ wsptr++;
222
+ continue;
223
+ }
224
+
225
+ /* Even part */
226
+
227
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
228
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
229
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
230
+ tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
231
+
232
+ tmp10 = tmp0 + tmp2; /* phase 3 */
233
+ tmp11 = tmp0 - tmp2;
234
+
235
+ tmp13 = tmp1 + tmp3; /* phases 5-3 */
236
+ tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
237
+
238
+ tmp0 = tmp10 + tmp13; /* phase 2 */
239
+ tmp3 = tmp10 - tmp13;
240
+ tmp1 = tmp11 + tmp12;
241
+ tmp2 = tmp11 - tmp12;
242
+
243
+ /* Odd part */
244
+
245
+ tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
246
+ tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
247
+ tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
248
+ tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
249
+
250
+ z13 = tmp6 + tmp5; /* phase 6 */
251
+ z10 = tmp6 - tmp5;
252
+ z11 = tmp4 + tmp7;
253
+ z12 = tmp4 - tmp7;
254
+
255
+ tmp7 = z11 + z13; /* phase 5 */
256
+ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
257
+
258
+ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
259
+ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
260
+ tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
261
+
262
+ tmp6 = tmp12 - tmp7; /* phase 2 */
263
+ tmp5 = tmp11 - tmp6;
264
+ tmp4 = tmp10 + tmp5;
265
+
266
+ wsptr[DCTSIZE * 0] = (int)(tmp0 + tmp7);
267
+ wsptr[DCTSIZE * 7] = (int)(tmp0 - tmp7);
268
+ wsptr[DCTSIZE * 1] = (int)(tmp1 + tmp6);
269
+ wsptr[DCTSIZE * 6] = (int)(tmp1 - tmp6);
270
+ wsptr[DCTSIZE * 2] = (int)(tmp2 + tmp5);
271
+ wsptr[DCTSIZE * 5] = (int)(tmp2 - tmp5);
272
+ wsptr[DCTSIZE * 4] = (int)(tmp3 + tmp4);
273
+ wsptr[DCTSIZE * 3] = (int)(tmp3 - tmp4);
274
+
275
+ inptr++; /* advance pointers to next column */
276
+ quantptr++;
277
+ wsptr++;
278
+ }
279
+
280
+ /* Pass 2: process rows from work array, store into output array. */
281
+ /* Note that we must descale the results by a factor of 8 == 2**3, */
282
+ /* and also undo the PASS1_BITS scaling. */
283
+
284
+ wsptr = workspace;
285
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
286
+ outptr = output_buf[ctr] + output_col;
287
+ /* Rows of zeroes can be exploited in the same way as we did with columns.
288
+ * However, the column calculation has created many nonzero AC terms, so
289
+ * the simplification applies less often (typically 5% to 10% of the time).
290
+ * On machines with very fast multiplication, it's possible that the
291
+ * test takes more time than it's worth. In that case this section
292
+ * may be commented out.
293
+ */
294
+
295
+ #ifndef NO_ZERO_ROW_TEST
296
+ if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
297
+ wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
298
+ /* AC terms all zero */
299
+ JSAMPLE dcval =
300
+ range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK];
301
+
302
+ outptr[0] = dcval;
303
+ outptr[1] = dcval;
304
+ outptr[2] = dcval;
305
+ outptr[3] = dcval;
306
+ outptr[4] = dcval;
307
+ outptr[5] = dcval;
308
+ outptr[6] = dcval;
309
+ outptr[7] = dcval;
310
+
311
+ wsptr += DCTSIZE; /* advance pointer to next row */
312
+ continue;
313
+ }
314
+ #endif
315
+
316
+ /* Even part */
317
+
318
+ tmp10 = ((DCTELEM)wsptr[0] + (DCTELEM)wsptr[4]);
319
+ tmp11 = ((DCTELEM)wsptr[0] - (DCTELEM)wsptr[4]);
320
+
321
+ tmp13 = ((DCTELEM)wsptr[2] + (DCTELEM)wsptr[6]);
322
+ tmp12 =
323
+ MULTIPLY((DCTELEM)wsptr[2] - (DCTELEM)wsptr[6], FIX_1_414213562) - tmp13;
324
+
325
+ tmp0 = tmp10 + tmp13;
326
+ tmp3 = tmp10 - tmp13;
327
+ tmp1 = tmp11 + tmp12;
328
+ tmp2 = tmp11 - tmp12;
329
+
330
+ /* Odd part */
331
+
332
+ z13 = (DCTELEM)wsptr[5] + (DCTELEM)wsptr[3];
333
+ z10 = (DCTELEM)wsptr[5] - (DCTELEM)wsptr[3];
334
+ z11 = (DCTELEM)wsptr[1] + (DCTELEM)wsptr[7];
335
+ z12 = (DCTELEM)wsptr[1] - (DCTELEM)wsptr[7];
336
+
337
+ tmp7 = z11 + z13; /* phase 5 */
338
+ tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
339
+
340
+ z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
341
+ tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
342
+ tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
343
+
344
+ tmp6 = tmp12 - tmp7; /* phase 2 */
345
+ tmp5 = tmp11 - tmp6;
346
+ tmp4 = tmp10 + tmp5;
347
+
348
+ /* Final output stage: scale down by a factor of 8 and range-limit */
349
+
350
+ outptr[0] =
351
+ range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS + 3) & RANGE_MASK];
352
+ outptr[7] =
353
+ range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS + 3) & RANGE_MASK];
354
+ outptr[1] =
355
+ range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS + 3) & RANGE_MASK];
356
+ outptr[6] =
357
+ range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS + 3) & RANGE_MASK];
358
+ outptr[2] =
359
+ range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS + 3) & RANGE_MASK];
360
+ outptr[5] =
361
+ range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS + 3) & RANGE_MASK];
362
+ outptr[4] =
363
+ range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS + 3) & RANGE_MASK];
364
+ outptr[3] =
365
+ range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS + 3) & RANGE_MASK];
366
+
367
+ wsptr += DCTSIZE; /* advance pointer to next row */
368
+ }
369
+ }
370
+
371
+ #endif /* DCT_IFAST_SUPPORTED */