image_pack 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -0
  3. data/README.md +23 -4
  4. data/ext/image_pack/extconf.rb +35 -124
  5. data/ext/image_pack/image_pack.c +638 -595
  6. data/ext/image_pack/mozjpeg_sources.rb +178 -0
  7. data/ext/image_pack/vendor/mozjpeg/BUILDING.md +744 -0
  8. data/ext/image_pack/vendor/mozjpeg/CODE_OF_CONDUCT.md +15 -0
  9. data/ext/image_pack/vendor/mozjpeg/ChangeLog.md +1996 -0
  10. data/lib/image_pack/configuration.rb +54 -8
  11. data/lib/image_pack/version.rb +1 -1
  12. data/lib/image_pack.rb +65 -18
  13. metadata +13 -78
  14. data/ext/image_pack/vendor/mozjpeg/README.ijg +0 -258
  15. data/ext/image_pack/vendor/mozjpeg/cdjpeg.c +0 -156
  16. data/ext/image_pack/vendor/mozjpeg/cjpeg.c +0 -961
  17. data/ext/image_pack/vendor/mozjpeg/djpeg.c +0 -855
  18. data/ext/image_pack/vendor/mozjpeg/jaricom.c +0 -157
  19. data/ext/image_pack/vendor/mozjpeg/jcarith.c +0 -972
  20. data/ext/image_pack/vendor/mozjpeg/jcstest.c +0 -126
  21. data/ext/image_pack/vendor/mozjpeg/jdarith.c +0 -782
  22. data/ext/image_pack/vendor/mozjpeg/jdatadst-tj.c +0 -198
  23. data/ext/image_pack/vendor/mozjpeg/jdatasrc-tj.c +0 -194
  24. data/ext/image_pack/vendor/mozjpeg/jpegtran.c +0 -827
  25. data/ext/image_pack/vendor/mozjpeg/jpegyuv.c +0 -172
  26. data/ext/image_pack/vendor/mozjpeg/rdbmp.c +0 -690
  27. data/ext/image_pack/vendor/mozjpeg/rdcolmap.c +0 -253
  28. data/ext/image_pack/vendor/mozjpeg/rdgif.c +0 -720
  29. data/ext/image_pack/vendor/mozjpeg/rdjpeg.c +0 -160
  30. data/ext/image_pack/vendor/mozjpeg/rdjpgcom.c +0 -494
  31. data/ext/image_pack/vendor/mozjpeg/rdpng.c +0 -194
  32. data/ext/image_pack/vendor/mozjpeg/rdppm.c +0 -781
  33. data/ext/image_pack/vendor/mozjpeg/rdswitch.c +0 -642
  34. data/ext/image_pack/vendor/mozjpeg/rdtarga.c +0 -508
  35. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jccolext-neon.c +0 -148
  36. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jchuff-neon.c +0 -334
  37. data/ext/image_pack/vendor/mozjpeg/simd/arm/aarch32/jsimd.c +0 -976
  38. data/ext/image_pack/vendor/mozjpeg/simd/i386/jsimd.c +0 -1312
  39. data/ext/image_pack/vendor/mozjpeg/simd/mips/jsimd.c +0 -1143
  40. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolext-mmi.c +0 -455
  41. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jccolor-mmi.c +0 -148
  42. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgray-mmi.c +0 -132
  43. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcgryext-mmi.c +0 -374
  44. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jcsample-mmi.c +0 -98
  45. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolext-mmi.c +0 -415
  46. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdcolor-mmi.c +0 -139
  47. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmerge-mmi.c +0 -149
  48. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdmrgext-mmi.c +0 -615
  49. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jdsample-mmi.c +0 -304
  50. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctfst-mmi.c +0 -255
  51. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jfdctint-mmi.c +0 -398
  52. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctfst-mmi.c +0 -395
  53. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jidctint-mmi.c +0 -571
  54. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jquanti-mmi.c +0 -124
  55. data/ext/image_pack/vendor/mozjpeg/simd/mips64/jsimd.c +0 -866
  56. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolext-altivec.c +0 -269
  57. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jccolor-altivec.c +0 -116
  58. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgray-altivec.c +0 -111
  59. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcgryext-altivec.c +0 -228
  60. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jcsample-altivec.c +0 -159
  61. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolext-altivec.c +0 -276
  62. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdcolor-altivec.c +0 -106
  63. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmerge-altivec.c +0 -130
  64. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdmrgext-altivec.c +0 -329
  65. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jdsample-altivec.c +0 -400
  66. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctfst-altivec.c +0 -154
  67. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jfdctint-altivec.c +0 -258
  68. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctfst-altivec.c +0 -255
  69. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jidctint-altivec.c +0 -357
  70. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jquanti-altivec.c +0 -250
  71. data/ext/image_pack/vendor/mozjpeg/simd/powerpc/jsimd.c +0 -884
  72. data/ext/image_pack/vendor/mozjpeg/strtest.c +0 -170
  73. data/ext/image_pack/vendor/mozjpeg/tjbench.c +0 -1044
  74. data/ext/image_pack/vendor/mozjpeg/tjexample.c +0 -406
  75. data/ext/image_pack/vendor/mozjpeg/tjunittest.c +0 -961
  76. data/ext/image_pack/vendor/mozjpeg/tjutil.c +0 -70
  77. data/ext/image_pack/vendor/mozjpeg/transupp.c +0 -2373
  78. data/ext/image_pack/vendor/mozjpeg/turbojpeg-jni.c +0 -1259
  79. data/ext/image_pack/vendor/mozjpeg/turbojpeg.c +0 -2320
  80. data/ext/image_pack/vendor/mozjpeg/wrbmp.c +0 -552
  81. data/ext/image_pack/vendor/mozjpeg/wrgif.c +0 -580
  82. data/ext/image_pack/vendor/mozjpeg/wrjpgcom.c +0 -577
  83. data/ext/image_pack/vendor/mozjpeg/wrppm.c +0 -366
  84. data/ext/image_pack/vendor/mozjpeg/wrtarga.c +0 -258
  85. data/ext/image_pack/vendor/mozjpeg/yuvjpeg.c +0 -268
@@ -1,228 +0,0 @@
1
- /*
2
- * AltiVec optimizations for libjpeg-turbo
3
- *
4
- * Copyright (C) 2014-2015, D. R. Commander. All Rights Reserved.
5
- * Copyright (C) 2014, Jay Foad. All Rights Reserved.
6
- *
7
- * This software is provided 'as-is', without any express or implied
8
- * warranty. In no event will the authors be held liable for any damages
9
- * arising from the use of this software.
10
- *
11
- * Permission is granted to anyone to use this software for any purpose,
12
- * including commercial applications, and to alter it and redistribute it
13
- * freely, subject to the following restrictions:
14
- *
15
- * 1. The origin of this software must not be misrepresented; you must not
16
- * claim that you wrote the original software. If you use this software
17
- * in a product, an acknowledgment in the product documentation would be
18
- * appreciated but is not required.
19
- * 2. Altered source versions must be plainly marked as such, and must not be
20
- * misrepresented as being the original software.
21
- * 3. This notice may not be removed or altered from any source distribution.
22
- */
23
-
24
- /* This file is included by jcgray-altivec.c */
25
-
26
-
27
- void jsimd_rgb_gray_convert_altivec(JDIMENSION img_width, JSAMPARRAY input_buf,
28
- JSAMPIMAGE output_buf,
29
- JDIMENSION output_row, int num_rows)
30
- {
31
- JSAMPROW inptr, outptr;
32
- int pitch = img_width * RGB_PIXELSIZE, num_cols;
33
- #if __BIG_ENDIAN__
34
- int offset;
35
- unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16];
36
- #endif
37
-
38
- __vector unsigned char rgb0, rgb1 = { 0 }, rgb2 = { 0 },
39
- rgbg0, rgbg1, rgbg2, rgbg3, y;
40
- #if __BIG_ENDIAN__ || RGB_PIXELSIZE == 4
41
- __vector unsigned char rgb3 = { 0 };
42
- #endif
43
- #if __BIG_ENDIAN__ && RGB_PIXELSIZE == 4
44
- __vector unsigned char rgb4 = { 0 };
45
- #endif
46
- __vector short rg0, rg1, rg2, rg3, bg0, bg1, bg2, bg3;
47
- __vector unsigned short yl, yh;
48
- __vector int y0, y1, y2, y3;
49
-
50
- /* Constants */
51
- __vector short pw_f0299_f0337 = { __4X2(F_0_299, F_0_337) },
52
- pw_f0114_f0250 = { __4X2(F_0_114, F_0_250) };
53
- __vector int pd_onehalf = { __4X(ONE_HALF) };
54
- __vector unsigned char pb_zero = { __16X(0) },
55
- #if __BIG_ENDIAN__
56
- shift_pack_index =
57
- { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
58
- #else
59
- shift_pack_index =
60
- { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
61
- #endif
62
-
63
- while (--num_rows >= 0) {
64
- inptr = *input_buf++;
65
- outptr = output_buf[0][output_row];
66
- output_row++;
67
-
68
- for (num_cols = pitch; num_cols > 0;
69
- num_cols -= RGB_PIXELSIZE * 16, inptr += RGB_PIXELSIZE * 16,
70
- outptr += 16) {
71
-
72
- #if __BIG_ENDIAN__
73
- /* Load 16 pixels == 48 or 64 bytes */
74
- offset = (size_t)inptr & 15;
75
- if (offset) {
76
- __vector unsigned char unaligned_shift_index;
77
- int bytes = num_cols + offset;
78
-
79
- if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) {
80
- /* Slow path to prevent buffer overread. Since there is no way to
81
- * read a partial AltiVec register, overread would occur on the last
82
- * chunk of the last image row if the right edge is not on a 16-byte
83
- * boundary. It could also occur on other rows if the bytes per row
84
- * is low enough. Since we can't determine whether we're on the last
85
- * image row, we have to assume every row is the last.
86
- */
87
- memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
88
- rgb0 = vec_ld(0, tmpbuf);
89
- rgb1 = vec_ld(16, tmpbuf);
90
- rgb2 = vec_ld(32, tmpbuf);
91
- #if RGB_PIXELSIZE == 4
92
- rgb3 = vec_ld(48, tmpbuf);
93
- #endif
94
- } else {
95
- /* Fast path */
96
- rgb0 = vec_ld(0, inptr);
97
- if (bytes > 16)
98
- rgb1 = vec_ld(16, inptr);
99
- if (bytes > 32)
100
- rgb2 = vec_ld(32, inptr);
101
- if (bytes > 48)
102
- rgb3 = vec_ld(48, inptr);
103
- #if RGB_PIXELSIZE == 4
104
- if (bytes > 64)
105
- rgb4 = vec_ld(64, inptr);
106
- #endif
107
- unaligned_shift_index = vec_lvsl(0, inptr);
108
- rgb0 = vec_perm(rgb0, rgb1, unaligned_shift_index);
109
- rgb1 = vec_perm(rgb1, rgb2, unaligned_shift_index);
110
- rgb2 = vec_perm(rgb2, rgb3, unaligned_shift_index);
111
- #if RGB_PIXELSIZE == 4
112
- rgb3 = vec_perm(rgb3, rgb4, unaligned_shift_index);
113
- #endif
114
- }
115
- } else {
116
- if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) {
117
- /* Slow path */
118
- memcpy(tmpbuf, inptr, min(num_cols, RGB_PIXELSIZE * 16));
119
- rgb0 = vec_ld(0, tmpbuf);
120
- rgb1 = vec_ld(16, tmpbuf);
121
- rgb2 = vec_ld(32, tmpbuf);
122
- #if RGB_PIXELSIZE == 4
123
- rgb3 = vec_ld(48, tmpbuf);
124
- #endif
125
- } else {
126
- /* Fast path */
127
- rgb0 = vec_ld(0, inptr);
128
- if (num_cols > 16)
129
- rgb1 = vec_ld(16, inptr);
130
- if (num_cols > 32)
131
- rgb2 = vec_ld(32, inptr);
132
- #if RGB_PIXELSIZE == 4
133
- if (num_cols > 48)
134
- rgb3 = vec_ld(48, inptr);
135
- #endif
136
- }
137
- }
138
- #else
139
- /* Little endian */
140
- rgb0 = vec_vsx_ld(0, inptr);
141
- if (num_cols > 16)
142
- rgb1 = vec_vsx_ld(16, inptr);
143
- if (num_cols > 32)
144
- rgb2 = vec_vsx_ld(32, inptr);
145
- #if RGB_PIXELSIZE == 4
146
- if (num_cols > 48)
147
- rgb3 = vec_vsx_ld(48, inptr);
148
- #endif
149
- #endif
150
-
151
- #if RGB_PIXELSIZE == 3
152
- /* rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
153
- * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
154
- * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf
155
- *
156
- * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
157
- * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
158
- * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
159
- * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
160
- */
161
- rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX0);
162
- rgbg1 = vec_perm(rgb0, rgb1, (__vector unsigned char)RGBG_INDEX1);
163
- rgbg2 = vec_perm(rgb1, rgb2, (__vector unsigned char)RGBG_INDEX2);
164
- rgbg3 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX3);
165
- #else
166
- /* rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3
167
- * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7
168
- * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb
169
- * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf
170
- *
171
- * rgbg0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 G0 B1 G1 B2 G2 B3 G3
172
- * rgbg1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 G4 B5 G5 B6 G6 B7 G7
173
- * rgbg2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 G8 B9 G9 Ba Ga Bb Gb
174
- * rgbg3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Gc Bd Gd Be Ge Bf Gf
175
- */
176
- rgbg0 = vec_perm(rgb0, rgb0, (__vector unsigned char)RGBG_INDEX);
177
- rgbg1 = vec_perm(rgb1, rgb1, (__vector unsigned char)RGBG_INDEX);
178
- rgbg2 = vec_perm(rgb2, rgb2, (__vector unsigned char)RGBG_INDEX);
179
- rgbg3 = vec_perm(rgb3, rgb3, (__vector unsigned char)RGBG_INDEX);
180
- #endif
181
-
182
- /* rg0 = R0 G0 R1 G1 R2 G2 R3 G3
183
- * bg0 = B0 G0 B1 G1 B2 G2 B3 G3
184
- * ...
185
- *
186
- * NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
187
- * support unsigned vectors.
188
- */
189
- rg0 = (__vector signed short)VEC_UNPACKHU(rgbg0);
190
- bg0 = (__vector signed short)VEC_UNPACKLU(rgbg0);
191
- rg1 = (__vector signed short)VEC_UNPACKHU(rgbg1);
192
- bg1 = (__vector signed short)VEC_UNPACKLU(rgbg1);
193
- rg2 = (__vector signed short)VEC_UNPACKHU(rgbg2);
194
- bg2 = (__vector signed short)VEC_UNPACKLU(rgbg2);
195
- rg3 = (__vector signed short)VEC_UNPACKHU(rgbg3);
196
- bg3 = (__vector signed short)VEC_UNPACKLU(rgbg3);
197
-
198
- /* (Original)
199
- * Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
200
- *
201
- * (This implementation)
202
- * Y = 0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
203
- */
204
-
205
- /* Calculate Y values */
206
-
207
- y0 = vec_msums(rg0, pw_f0299_f0337, pd_onehalf);
208
- y1 = vec_msums(rg1, pw_f0299_f0337, pd_onehalf);
209
- y2 = vec_msums(rg2, pw_f0299_f0337, pd_onehalf);
210
- y3 = vec_msums(rg3, pw_f0299_f0337, pd_onehalf);
211
- y0 = vec_msums(bg0, pw_f0114_f0250, y0);
212
- y1 = vec_msums(bg1, pw_f0114_f0250, y1);
213
- y2 = vec_msums(bg2, pw_f0114_f0250, y2);
214
- y3 = vec_msums(bg3, pw_f0114_f0250, y3);
215
- /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
216
- * each dword into a new 16-bit vector, which is the equivalent of
217
- * descaling the 32-bit results (right-shifting by 16 bits) and then
218
- * packing them.
219
- */
220
- yl = vec_perm((__vector unsigned short)y0, (__vector unsigned short)y1,
221
- shift_pack_index);
222
- yh = vec_perm((__vector unsigned short)y2, (__vector unsigned short)y3,
223
- shift_pack_index);
224
- y = vec_pack(yl, yh);
225
- vec_st(y, 0, outptr);
226
- }
227
- }
228
- }
@@ -1,159 +0,0 @@
1
- /*
2
- * AltiVec optimizations for libjpeg-turbo
3
- *
4
- * Copyright (C) 2015, D. R. Commander. All Rights Reserved.
5
- *
6
- * This software is provided 'as-is', without any express or implied
7
- * warranty. In no event will the authors be held liable for any damages
8
- * arising from the use of this software.
9
- *
10
- * Permission is granted to anyone to use this software for any purpose,
11
- * including commercial applications, and to alter it and redistribute it
12
- * freely, subject to the following restrictions:
13
- *
14
- * 1. The origin of this software must not be misrepresented; you must not
15
- * claim that you wrote the original software. If you use this software
16
- * in a product, an acknowledgment in the product documentation would be
17
- * appreciated but is not required.
18
- * 2. Altered source versions must be plainly marked as such, and must not be
19
- * misrepresented as being the original software.
20
- * 3. This notice may not be removed or altered from any source distribution.
21
- */
22
-
23
- /* CHROMA DOWNSAMPLING */
24
-
25
- #include "jsimd_altivec.h"
26
- #include "jcsample.h"
27
-
28
-
29
- void jsimd_h2v1_downsample_altivec(JDIMENSION image_width,
30
- int max_v_samp_factor,
31
- JDIMENSION v_samp_factor,
32
- JDIMENSION width_in_blocks,
33
- JSAMPARRAY input_data,
34
- JSAMPARRAY output_data)
35
- {
36
- int outrow, outcol;
37
- JDIMENSION output_cols = width_in_blocks * DCTSIZE;
38
- JSAMPROW inptr, outptr;
39
-
40
- __vector unsigned char this0, next0, out;
41
- __vector unsigned short this0e, this0o, next0e, next0o, outl, outh;
42
-
43
- /* Constants */
44
- __vector unsigned short pw_bias = { __4X2(0, 1) },
45
- pw_one = { __8X(1) };
46
- __vector unsigned char even_odd_index =
47
- { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
48
- pb_zero = { __16X(0) };
49
-
50
- expand_right_edge(input_data, max_v_samp_factor, image_width,
51
- output_cols * 2);
52
-
53
- for (outrow = 0; outrow < v_samp_factor; outrow++) {
54
- outptr = output_data[outrow];
55
- inptr = input_data[outrow];
56
-
57
- for (outcol = output_cols; outcol > 0;
58
- outcol -= 16, inptr += 32, outptr += 16) {
59
-
60
- this0 = vec_ld(0, inptr);
61
- this0 = vec_perm(this0, this0, even_odd_index);
62
- this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
63
- this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
64
- outl = vec_add(this0e, this0o);
65
- outl = vec_add(outl, pw_bias);
66
- outl = vec_sr(outl, pw_one);
67
-
68
- if (outcol > 8) {
69
- next0 = vec_ld(16, inptr);
70
- next0 = vec_perm(next0, next0, even_odd_index);
71
- next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
72
- next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
73
- outh = vec_add(next0e, next0o);
74
- outh = vec_add(outh, pw_bias);
75
- outh = vec_sr(outh, pw_one);
76
- } else
77
- outh = vec_splat_u16(0);
78
-
79
- out = vec_pack(outl, outh);
80
- vec_st(out, 0, outptr);
81
- }
82
- }
83
- }
84
-
85
-
86
- void
87
- jsimd_h2v2_downsample_altivec(JDIMENSION image_width, int max_v_samp_factor,
88
- JDIMENSION v_samp_factor,
89
- JDIMENSION width_in_blocks,
90
- JSAMPARRAY input_data, JSAMPARRAY output_data)
91
- {
92
- int inrow, outrow, outcol;
93
- JDIMENSION output_cols = width_in_blocks * DCTSIZE;
94
- JSAMPROW inptr0, inptr1, outptr;
95
-
96
- __vector unsigned char this0, next0, this1, next1, out;
97
- __vector unsigned short this0e, this0o, next0e, next0o, this1e, this1o,
98
- next1e, next1o, out0l, out0h, out1l, out1h, outl, outh;
99
-
100
- /* Constants */
101
- __vector unsigned short pw_bias = { __4X2(1, 2) },
102
- pw_two = { __8X(2) };
103
- __vector unsigned char even_odd_index =
104
- { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 },
105
- pb_zero = { __16X(0) };
106
-
107
- expand_right_edge(input_data, max_v_samp_factor, image_width,
108
- output_cols * 2);
109
-
110
- for (inrow = 0, outrow = 0; outrow < v_samp_factor;
111
- inrow += 2, outrow++) {
112
-
113
- inptr0 = input_data[inrow];
114
- inptr1 = input_data[inrow + 1];
115
- outptr = output_data[outrow];
116
-
117
- for (outcol = output_cols; outcol > 0;
118
- outcol -= 16, inptr0 += 32, inptr1 += 32, outptr += 16) {
119
-
120
- this0 = vec_ld(0, inptr0);
121
- this0 = vec_perm(this0, this0, even_odd_index);
122
- this0e = (__vector unsigned short)VEC_UNPACKHU(this0);
123
- this0o = (__vector unsigned short)VEC_UNPACKLU(this0);
124
- out0l = vec_add(this0e, this0o);
125
-
126
- this1 = vec_ld(0, inptr1);
127
- this1 = vec_perm(this1, this1, even_odd_index);
128
- this1e = (__vector unsigned short)VEC_UNPACKHU(this1);
129
- this1o = (__vector unsigned short)VEC_UNPACKLU(this1);
130
- out1l = vec_add(this1e, this1o);
131
-
132
- outl = vec_add(out0l, out1l);
133
- outl = vec_add(outl, pw_bias);
134
- outl = vec_sr(outl, pw_two);
135
-
136
- if (outcol > 8) {
137
- next0 = vec_ld(16, inptr0);
138
- next0 = vec_perm(next0, next0, even_odd_index);
139
- next0e = (__vector unsigned short)VEC_UNPACKHU(next0);
140
- next0o = (__vector unsigned short)VEC_UNPACKLU(next0);
141
- out0h = vec_add(next0e, next0o);
142
-
143
- next1 = vec_ld(16, inptr1);
144
- next1 = vec_perm(next1, next1, even_odd_index);
145
- next1e = (__vector unsigned short)VEC_UNPACKHU(next1);
146
- next1o = (__vector unsigned short)VEC_UNPACKLU(next1);
147
- out1h = vec_add(next1e, next1o);
148
-
149
- outh = vec_add(out0h, out1h);
150
- outh = vec_add(outh, pw_bias);
151
- outh = vec_sr(outh, pw_two);
152
- } else
153
- outh = vec_splat_u16(0);
154
-
155
- out = vec_pack(outl, outh);
156
- vec_st(out, 0, outptr);
157
- }
158
- }
159
- }
@@ -1,276 +0,0 @@
1
- /*
2
- * AltiVec optimizations for libjpeg-turbo
3
- *
4
- * Copyright (C) 2015, D. R. Commander. All Rights Reserved.
5
- *
6
- * This software is provided 'as-is', without any express or implied
7
- * warranty. In no event will the authors be held liable for any damages
8
- * arising from the use of this software.
9
- *
10
- * Permission is granted to anyone to use this software for any purpose,
11
- * including commercial applications, and to alter it and redistribute it
12
- * freely, subject to the following restrictions:
13
- *
14
- * 1. The origin of this software must not be misrepresented; you must not
15
- * claim that you wrote the original software. If you use this software
16
- * in a product, an acknowledgment in the product documentation would be
17
- * appreciated but is not required.
18
- * 2. Altered source versions must be plainly marked as such, and must not be
19
- * misrepresented as being the original software.
20
- * 3. This notice may not be removed or altered from any source distribution.
21
- */
22
-
23
- /* This file is included by jdcolor-altivec.c */
24
-
25
-
26
- void jsimd_ycc_rgb_convert_altivec(JDIMENSION out_width, JSAMPIMAGE input_buf,
27
- JDIMENSION input_row, JSAMPARRAY output_buf,
28
- int num_rows)
29
- {
30
- JSAMPROW outptr, inptr0, inptr1, inptr2;
31
- int pitch = out_width * RGB_PIXELSIZE, num_cols;
32
- #if __BIG_ENDIAN__
33
- int offset;
34
- #endif
35
- unsigned char __attribute__((aligned(16))) tmpbuf[RGB_PIXELSIZE * 16];
36
-
37
- __vector unsigned char rgb0, rgb1, rgb2, rgbx0, rgbx1, rgbx2, rgbx3,
38
- y, cb, cr;
39
- #if __BIG_ENDIAN__
40
- __vector unsigned char edgel, edgeh, edges, out0, out1, out2, out3;
41
- #if RGB_PIXELSIZE == 4
42
- __vector unsigned char out4;
43
- #endif
44
- #endif
45
- #if RGB_PIXELSIZE == 4
46
- __vector unsigned char rgb3;
47
- #endif
48
- __vector short rg0, rg1, rg2, rg3, bx0, bx1, bx2, bx3, yl, yh, cbl, cbh,
49
- crl, crh, rl, rh, gl, gh, bl, bh, g0w, g1w, g2w, g3w;
50
- __vector int g0, g1, g2, g3;
51
-
52
- /* Constants
53
- * NOTE: The >> 1 is to compensate for the fact that vec_madds() returns 17
54
- * high-order bits, not 16.
55
- */
56
- __vector short pw_f0402 = { __8X(F_0_402 >> 1) },
57
- pw_mf0228 = { __8X(-F_0_228 >> 1) },
58
- pw_mf0344_f0285 = { __4X2(-F_0_344, F_0_285) },
59
- pw_one = { __8X(1) }, pw_255 = { __8X(255) },
60
- pw_cj = { __8X(CENTERJSAMPLE) };
61
- __vector int pd_onehalf = { __4X(ONE_HALF) };
62
- __vector unsigned char pb_zero = { __16X(0) },
63
- #if __BIG_ENDIAN__
64
- shift_pack_index =
65
- { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
66
- #else
67
- shift_pack_index =
68
- { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
69
- #endif
70
-
71
- while (--num_rows >= 0) {
72
- inptr0 = input_buf[0][input_row];
73
- inptr1 = input_buf[1][input_row];
74
- inptr2 = input_buf[2][input_row];
75
- input_row++;
76
- outptr = *output_buf++;
77
-
78
- for (num_cols = pitch; num_cols > 0;
79
- num_cols -= RGB_PIXELSIZE * 16, outptr += RGB_PIXELSIZE * 16,
80
- inptr0 += 16, inptr1 += 16, inptr2 += 16) {
81
-
82
- y = vec_ld(0, inptr0);
83
- /* NOTE: We have to use vec_merge*() here because vec_unpack*() doesn't
84
- * support unsigned vectors.
85
- */
86
- yl = (__vector signed short)VEC_UNPACKHU(y);
87
- yh = (__vector signed short)VEC_UNPACKLU(y);
88
-
89
- cb = vec_ld(0, inptr1);
90
- cbl = (__vector signed short)VEC_UNPACKHU(cb);
91
- cbh = (__vector signed short)VEC_UNPACKLU(cb);
92
- cbl = vec_sub(cbl, pw_cj);
93
- cbh = vec_sub(cbh, pw_cj);
94
-
95
- cr = vec_ld(0, inptr2);
96
- crl = (__vector signed short)VEC_UNPACKHU(cr);
97
- crh = (__vector signed short)VEC_UNPACKLU(cr);
98
- crl = vec_sub(crl, pw_cj);
99
- crh = vec_sub(crh, pw_cj);
100
-
101
- /* (Original)
102
- * R = Y + 1.40200 * Cr
103
- * G = Y - 0.34414 * Cb - 0.71414 * Cr
104
- * B = Y + 1.77200 * Cb
105
- *
106
- * (This implementation)
107
- * R = Y + 0.40200 * Cr + Cr
108
- * G = Y - 0.34414 * Cb + 0.28586 * Cr - Cr
109
- * B = Y - 0.22800 * Cb + Cb + Cb
110
- */
111
- bl = vec_add(cbl, cbl);
112
- bh = vec_add(cbh, cbh);
113
- bl = vec_madds(bl, pw_mf0228, pw_one);
114
- bh = vec_madds(bh, pw_mf0228, pw_one);
115
- bl = vec_sra(bl, (__vector unsigned short)pw_one);
116
- bh = vec_sra(bh, (__vector unsigned short)pw_one);
117
- bl = vec_add(bl, cbl);
118
- bh = vec_add(bh, cbh);
119
- bl = vec_add(bl, cbl);
120
- bh = vec_add(bh, cbh);
121
- bl = vec_add(bl, yl);
122
- bh = vec_add(bh, yh);
123
-
124
- rl = vec_add(crl, crl);
125
- rh = vec_add(crh, crh);
126
- rl = vec_madds(rl, pw_f0402, pw_one);
127
- rh = vec_madds(rh, pw_f0402, pw_one);
128
- rl = vec_sra(rl, (__vector unsigned short)pw_one);
129
- rh = vec_sra(rh, (__vector unsigned short)pw_one);
130
- rl = vec_add(rl, crl);
131
- rh = vec_add(rh, crh);
132
- rl = vec_add(rl, yl);
133
- rh = vec_add(rh, yh);
134
-
135
- g0w = vec_mergeh(cbl, crl);
136
- g1w = vec_mergel(cbl, crl);
137
- g0 = vec_msums(g0w, pw_mf0344_f0285, pd_onehalf);
138
- g1 = vec_msums(g1w, pw_mf0344_f0285, pd_onehalf);
139
- g2w = vec_mergeh(cbh, crh);
140
- g3w = vec_mergel(cbh, crh);
141
- g2 = vec_msums(g2w, pw_mf0344_f0285, pd_onehalf);
142
- g3 = vec_msums(g3w, pw_mf0344_f0285, pd_onehalf);
143
- /* Clever way to avoid 4 shifts + 2 packs. This packs the high word from
144
- * each dword into a new 16-bit vector, which is the equivalent of
145
- * descaling the 32-bit results (right-shifting by 16 bits) and then
146
- * packing them.
147
- */
148
- gl = vec_perm((__vector short)g0, (__vector short)g1, shift_pack_index);
149
- gh = vec_perm((__vector short)g2, (__vector short)g3, shift_pack_index);
150
- gl = vec_sub(gl, crl);
151
- gh = vec_sub(gh, crh);
152
- gl = vec_add(gl, yl);
153
- gh = vec_add(gh, yh);
154
-
155
- rg0 = vec_mergeh(rl, gl);
156
- bx0 = vec_mergeh(bl, pw_255);
157
- rg1 = vec_mergel(rl, gl);
158
- bx1 = vec_mergel(bl, pw_255);
159
- rg2 = vec_mergeh(rh, gh);
160
- bx2 = vec_mergeh(bh, pw_255);
161
- rg3 = vec_mergel(rh, gh);
162
- bx3 = vec_mergel(bh, pw_255);
163
-
164
- rgbx0 = vec_packsu(rg0, bx0);
165
- rgbx1 = vec_packsu(rg1, bx1);
166
- rgbx2 = vec_packsu(rg2, bx2);
167
- rgbx3 = vec_packsu(rg3, bx3);
168
-
169
- #if RGB_PIXELSIZE == 3
170
- /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3
171
- * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7
172
- * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb
173
- * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf
174
- *
175
- * rgb0 = R0 G0 B0 R1 G1 B1 R2 G2 B2 R3 G3 B3 R4 G4 B4 R5
176
- * rgb1 = G5 B5 R6 G6 B6 R7 G7 B7 R8 G8 B8 R9 G9 B9 Ra Ga
177
- * rgb2 = Ba Rb Gb Bb Rc Gc Bc Rd Gd Bd Re Ge Be Rf Gf Bf
178
- */
179
- rgb0 = vec_perm(rgbx0, rgbx1, (__vector unsigned char)RGB_INDEX0);
180
- rgb1 = vec_perm(rgbx1, rgbx2, (__vector unsigned char)RGB_INDEX1);
181
- rgb2 = vec_perm(rgbx2, rgbx3, (__vector unsigned char)RGB_INDEX2);
182
- #else
183
- /* rgbx0 = R0 G0 R1 G1 R2 G2 R3 G3 B0 X0 B1 X1 B2 X2 B3 X3
184
- * rgbx1 = R4 G4 R5 G5 R6 G6 R7 G7 B4 X4 B5 X5 B6 X6 B7 X7
185
- * rgbx2 = R8 G8 R9 G9 Ra Ga Rb Gb B8 X8 B9 X9 Ba Xa Bb Xb
186
- * rgbx3 = Rc Gc Rd Gd Re Ge Rf Gf Bc Xc Bd Xd Be Xe Bf Xf
187
- *
188
- * rgb0 = R0 G0 B0 X0 R1 G1 B1 X1 R2 G2 B2 X2 R3 G3 B3 X3
189
- * rgb1 = R4 G4 B4 X4 R5 G5 B5 X5 R6 G6 B6 X6 R7 G7 B7 X7
190
- * rgb2 = R8 G8 B8 X8 R9 G9 B9 X9 Ra Ga Ba Xa Rb Gb Bb Xb
191
- * rgb3 = Rc Gc Bc Xc Rd Gd Bd Xd Re Ge Be Xe Rf Gf Bf Xf
192
- */
193
- rgb0 = vec_perm(rgbx0, rgbx0, (__vector unsigned char)RGB_INDEX);
194
- rgb1 = vec_perm(rgbx1, rgbx1, (__vector unsigned char)RGB_INDEX);
195
- rgb2 = vec_perm(rgbx2, rgbx2, (__vector unsigned char)RGB_INDEX);
196
- rgb3 = vec_perm(rgbx3, rgbx3, (__vector unsigned char)RGB_INDEX);
197
- #endif
198
-
199
- #if __BIG_ENDIAN__
200
- offset = (size_t)outptr & 15;
201
- if (offset) {
202
- __vector unsigned char unaligned_shift_index;
203
- int bytes = num_cols + offset;
204
-
205
- if (bytes < (RGB_PIXELSIZE + 1) * 16 && (bytes & 15)) {
206
- /* Slow path to prevent buffer overwrite. Since there is no way to
207
- * write a partial AltiVec register, overwrite would occur on the
208
- * last chunk of the last image row if the right edge is not on a
209
- * 16-byte boundary. It could also occur on other rows if the bytes
210
- * per row is low enough. Since we can't determine whether we're on
211
- * the last image row, we have to assume every row is the last.
212
- */
213
- vec_st(rgb0, 0, tmpbuf);
214
- vec_st(rgb1, 16, tmpbuf);
215
- vec_st(rgb2, 32, tmpbuf);
216
- #if RGB_PIXELSIZE == 4
217
- vec_st(rgb3, 48, tmpbuf);
218
- #endif
219
- memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16));
220
- } else {
221
- /* Fast path */
222
- unaligned_shift_index = vec_lvsl(0, outptr);
223
- edgel = vec_ld(0, outptr);
224
- edgeh = vec_ld(min(num_cols - 1, RGB_PIXELSIZE * 16), outptr);
225
- edges = vec_perm(edgeh, edgel, unaligned_shift_index);
226
- unaligned_shift_index = vec_lvsr(0, outptr);
227
- out0 = vec_perm(edges, rgb0, unaligned_shift_index);
228
- out1 = vec_perm(rgb0, rgb1, unaligned_shift_index);
229
- out2 = vec_perm(rgb1, rgb2, unaligned_shift_index);
230
- #if RGB_PIXELSIZE == 4
231
- out3 = vec_perm(rgb2, rgb3, unaligned_shift_index);
232
- out4 = vec_perm(rgb3, edges, unaligned_shift_index);
233
- #else
234
- out3 = vec_perm(rgb2, edges, unaligned_shift_index);
235
- #endif
236
- vec_st(out0, 0, outptr);
237
- if (bytes > 16)
238
- vec_st(out1, 16, outptr);
239
- if (bytes > 32)
240
- vec_st(out2, 32, outptr);
241
- if (bytes > 48)
242
- vec_st(out3, 48, outptr);
243
- #if RGB_PIXELSIZE == 4
244
- if (bytes > 64)
245
- vec_st(out4, 64, outptr);
246
- #endif
247
- }
248
- } else {
249
- #endif /* __BIG_ENDIAN__ */
250
- if (num_cols < RGB_PIXELSIZE * 16 && (num_cols & 15)) {
251
- /* Slow path */
252
- VEC_ST(rgb0, 0, tmpbuf);
253
- VEC_ST(rgb1, 16, tmpbuf);
254
- VEC_ST(rgb2, 32, tmpbuf);
255
- #if RGB_PIXELSIZE == 4
256
- VEC_ST(rgb3, 48, tmpbuf);
257
- #endif
258
- memcpy(outptr, tmpbuf, min(num_cols, RGB_PIXELSIZE * 16));
259
- } else {
260
- /* Fast path */
261
- VEC_ST(rgb0, 0, outptr);
262
- if (num_cols > 16)
263
- VEC_ST(rgb1, 16, outptr);
264
- if (num_cols > 32)
265
- VEC_ST(rgb2, 32, outptr);
266
- #if RGB_PIXELSIZE == 4
267
- if (num_cols > 48)
268
- VEC_ST(rgb3, 48, outptr);
269
- #endif
270
- }
271
- #if __BIG_ENDIAN__
272
- }
273
- #endif
274
- }
275
- }
276
- }