entangledstate-isbn 1.4.0 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (290) hide show
  1. data/README +1 -1
  2. data/Rakefile +0 -18
  3. data/VERSION +1 -0
  4. data/isbn.gemspec +290 -7
  5. data/lib/isbn.rb +6 -6
  6. data/src/gocr-0.48/.cvsignore +6 -0
  7. data/src/gocr-0.48/AUTHORS +7 -0
  8. data/src/gocr-0.48/BUGS +55 -0
  9. data/src/gocr-0.48/CREDITS +17 -0
  10. data/src/gocr-0.48/HISTORY +243 -0
  11. data/src/gocr-0.48/INSTALL +83 -0
  12. data/src/gocr-0.48/Makefile +193 -0
  13. data/src/gocr-0.48/Makefile.in +193 -0
  14. data/src/gocr-0.48/README +165 -0
  15. data/src/gocr-0.48/READMEde.txt +80 -0
  16. data/src/gocr-0.48/REMARK.txt +18 -0
  17. data/src/gocr-0.48/REVIEW +538 -0
  18. data/src/gocr-0.48/TODO +65 -0
  19. data/src/gocr-0.48/bin/.cvsignore +2 -0
  20. data/src/gocr-0.48/bin/create_db +38 -0
  21. data/src/gocr-0.48/bin/gocr.tcl +527 -0
  22. data/src/gocr-0.48/bin/gocr_chk.sh +44 -0
  23. data/src/gocr-0.48/configure +4689 -0
  24. data/src/gocr-0.48/configure.in +71 -0
  25. data/src/gocr-0.48/doc/.#Makefile.1.6 +39 -0
  26. data/src/gocr-0.48/doc/.cvsignore +2 -0
  27. data/src/gocr-0.48/doc/Makefile +39 -0
  28. data/src/gocr-0.48/doc/Makefile.in +39 -0
  29. data/src/gocr-0.48/doc/example.dtd +53 -0
  30. data/src/gocr-0.48/doc/example.xml +21 -0
  31. data/src/gocr-0.48/doc/examples.txt +67 -0
  32. data/src/gocr-0.48/doc/gocr.html +578 -0
  33. data/src/gocr-0.48/doc/unicode.txt +57 -0
  34. data/src/gocr-0.48/examples/.#Makefile.1.22 +166 -0
  35. data/src/gocr-0.48/examples/4x6.png +0 -0
  36. data/src/gocr-0.48/examples/4x6.txt +2 -0
  37. data/src/gocr-0.48/examples/5x7.png +0 -0
  38. data/src/gocr-0.48/examples/5x7.png.txt +2 -0
  39. data/src/gocr-0.48/examples/5x8.png +0 -0
  40. data/src/gocr-0.48/examples/5x8.png.txt +2 -0
  41. data/src/gocr-0.48/examples/Makefile +166 -0
  42. data/src/gocr-0.48/examples/color.fig +20 -0
  43. data/src/gocr-0.48/examples/ex.fig +16 -0
  44. data/src/gocr-0.48/examples/font.tex +22 -0
  45. data/src/gocr-0.48/examples/font1.tex +46 -0
  46. data/src/gocr-0.48/examples/font2.fig +27 -0
  47. data/src/gocr-0.48/examples/font_nw.tex +24 -0
  48. data/src/gocr-0.48/examples/handwrt1.jpg +0 -0
  49. data/src/gocr-0.48/examples/handwrt1.txt +10 -0
  50. data/src/gocr-0.48/examples/inverse.fig +20 -0
  51. data/src/gocr-0.48/examples/matrix.jpg +0 -0
  52. data/src/gocr-0.48/examples/ocr-a-subset.png +0 -0
  53. data/src/gocr-0.48/examples/ocr-a-subset.png.txt +4 -0
  54. data/src/gocr-0.48/examples/ocr-a.png +0 -0
  55. data/src/gocr-0.48/examples/ocr-a.txt +6 -0
  56. data/src/gocr-0.48/examples/ocr-b.png +0 -0
  57. data/src/gocr-0.48/examples/ocr-b.png.txt +4 -0
  58. data/src/gocr-0.48/examples/polish.tex +28 -0
  59. data/src/gocr-0.48/examples/rotate45.fig +14 -0
  60. data/src/gocr-0.48/examples/score +36 -0
  61. data/src/gocr-0.48/examples/text.tex +28 -0
  62. data/src/gocr-0.48/gocr.spec +143 -0
  63. data/src/gocr-0.48/gpl.html +537 -0
  64. data/src/gocr-0.48/include/.cvsignore +2 -0
  65. data/src/gocr-0.48/include/config.h +36 -0
  66. data/src/gocr-0.48/include/config.h.in +36 -0
  67. data/src/gocr-0.48/include/version.h +2 -0
  68. data/src/gocr-0.48/install-sh +3 -0
  69. data/src/gocr-0.48/make.bat +57 -0
  70. data/src/gocr-0.48/man/.cvsignore +2 -0
  71. data/src/gocr-0.48/man/Makefile +29 -0
  72. data/src/gocr-0.48/man/Makefile.in +29 -0
  73. data/src/gocr-0.48/man/man1/gocr.1 +166 -0
  74. data/src/gocr-0.48/src/.cvsignore +4 -0
  75. data/src/gocr-0.48/src/Makefile +132 -0
  76. data/src/gocr-0.48/src/Makefile.in +132 -0
  77. data/src/gocr-0.48/src/amiga.h +31 -0
  78. data/src/gocr-0.48/src/barcode.c +846 -0
  79. data/src/gocr-0.48/src/barcode.c.orig +593 -0
  80. data/src/gocr-0.48/src/barcode.h +11 -0
  81. data/src/gocr-0.48/src/box.c +372 -0
  82. data/src/gocr-0.48/src/database.c +462 -0
  83. data/src/gocr-0.48/src/detect.c +943 -0
  84. data/src/gocr-0.48/src/gocr.c +373 -0
  85. data/src/gocr-0.48/src/gocr.h +288 -0
  86. data/src/gocr-0.48/src/jconv.c +168 -0
  87. data/src/gocr-0.48/src/job.c +84 -0
  88. data/src/gocr-0.48/src/lines.c +350 -0
  89. data/src/gocr-0.48/src/list.c +334 -0
  90. data/src/gocr-0.48/src/list.h +90 -0
  91. data/src/gocr-0.48/src/ocr0.c +6756 -0
  92. data/src/gocr-0.48/src/ocr0.h +63 -0
  93. data/src/gocr-0.48/src/ocr0n.c +1475 -0
  94. data/src/gocr-0.48/src/ocr1.c +85 -0
  95. data/src/gocr-0.48/src/ocr1.h +3 -0
  96. data/src/gocr-0.48/src/otsu.c +289 -0
  97. data/src/gocr-0.48/src/otsu.h +23 -0
  98. data/src/gocr-0.48/src/output.c +289 -0
  99. data/src/gocr-0.48/src/output.h +37 -0
  100. data/src/gocr-0.48/src/pcx.c +153 -0
  101. data/src/gocr-0.48/src/pcx.h +9 -0
  102. data/src/gocr-0.48/src/pgm2asc.c +2893 -0
  103. data/src/gocr-0.48/src/pgm2asc.h +105 -0
  104. data/src/gocr-0.48/src/pixel.c +537 -0
  105. data/src/gocr-0.48/src/pnm.c +533 -0
  106. data/src/gocr-0.48/src/pnm.h +35 -0
  107. data/src/gocr-0.48/src/progress.c +87 -0
  108. data/src/gocr-0.48/src/progress.h +42 -0
  109. data/src/gocr-0.48/src/remove.c +703 -0
  110. data/src/gocr-0.48/src/tga.c +87 -0
  111. data/src/gocr-0.48/src/tga.h +6 -0
  112. data/src/gocr-0.48/src/unicode.c +1314 -0
  113. data/src/gocr-0.48/src/unicode.h +1257 -0
  114. data/src/jpeg-7/Makefile.am +133 -0
  115. data/src/jpeg-7/Makefile.in +1089 -0
  116. data/src/jpeg-7/README +322 -0
  117. data/src/jpeg-7/aclocal.m4 +8990 -0
  118. data/src/jpeg-7/ansi2knr.1 +36 -0
  119. data/src/jpeg-7/ansi2knr.c +739 -0
  120. data/src/jpeg-7/cderror.h +132 -0
  121. data/src/jpeg-7/cdjpeg.c +181 -0
  122. data/src/jpeg-7/cdjpeg.h +187 -0
  123. data/src/jpeg-7/change.log +270 -0
  124. data/src/jpeg-7/cjpeg.1 +325 -0
  125. data/src/jpeg-7/cjpeg.c +616 -0
  126. data/src/jpeg-7/ckconfig.c +402 -0
  127. data/src/jpeg-7/coderules.txt +118 -0
  128. data/src/jpeg-7/config.guess +1561 -0
  129. data/src/jpeg-7/config.sub +1686 -0
  130. data/src/jpeg-7/configure +17139 -0
  131. data/src/jpeg-7/configure.ac +317 -0
  132. data/src/jpeg-7/depcomp +630 -0
  133. data/src/jpeg-7/djpeg.1 +251 -0
  134. data/src/jpeg-7/djpeg.c +617 -0
  135. data/src/jpeg-7/example.c +433 -0
  136. data/src/jpeg-7/filelist.txt +215 -0
  137. data/src/jpeg-7/install-sh +520 -0
  138. data/src/jpeg-7/install.txt +1097 -0
  139. data/src/jpeg-7/jaricom.c +148 -0
  140. data/src/jpeg-7/jcapimin.c +282 -0
  141. data/src/jpeg-7/jcapistd.c +161 -0
  142. data/src/jpeg-7/jcarith.c +921 -0
  143. data/src/jpeg-7/jccoefct.c +453 -0
  144. data/src/jpeg-7/jccolor.c +459 -0
  145. data/src/jpeg-7/jcdctmgr.c +482 -0
  146. data/src/jpeg-7/jchuff.c +1612 -0
  147. data/src/jpeg-7/jcinit.c +65 -0
  148. data/src/jpeg-7/jcmainct.c +293 -0
  149. data/src/jpeg-7/jcmarker.c +667 -0
  150. data/src/jpeg-7/jcmaster.c +770 -0
  151. data/src/jpeg-7/jcomapi.c +106 -0
  152. data/src/jpeg-7/jconfig.bcc +48 -0
  153. data/src/jpeg-7/jconfig.cfg +45 -0
  154. data/src/jpeg-7/jconfig.dj +38 -0
  155. data/src/jpeg-7/jconfig.mac +43 -0
  156. data/src/jpeg-7/jconfig.manx +43 -0
  157. data/src/jpeg-7/jconfig.mc6 +52 -0
  158. data/src/jpeg-7/jconfig.sas +43 -0
  159. data/src/jpeg-7/jconfig.st +42 -0
  160. data/src/jpeg-7/jconfig.txt +155 -0
  161. data/src/jpeg-7/jconfig.vc +45 -0
  162. data/src/jpeg-7/jconfig.vms +37 -0
  163. data/src/jpeg-7/jconfig.wat +38 -0
  164. data/src/jpeg-7/jcparam.c +632 -0
  165. data/src/jpeg-7/jcprepct.c +358 -0
  166. data/src/jpeg-7/jcsample.c +545 -0
  167. data/src/jpeg-7/jctrans.c +381 -0
  168. data/src/jpeg-7/jdapimin.c +396 -0
  169. data/src/jpeg-7/jdapistd.c +275 -0
  170. data/src/jpeg-7/jdarith.c +762 -0
  171. data/src/jpeg-7/jdatadst.c +151 -0
  172. data/src/jpeg-7/jdatasrc.c +212 -0
  173. data/src/jpeg-7/jdcoefct.c +736 -0
  174. data/src/jpeg-7/jdcolor.c +396 -0
  175. data/src/jpeg-7/jdct.h +393 -0
  176. data/src/jpeg-7/jddctmgr.c +382 -0
  177. data/src/jpeg-7/jdhuff.c +1309 -0
  178. data/src/jpeg-7/jdinput.c +384 -0
  179. data/src/jpeg-7/jdmainct.c +512 -0
  180. data/src/jpeg-7/jdmarker.c +1360 -0
  181. data/src/jpeg-7/jdmaster.c +663 -0
  182. data/src/jpeg-7/jdmerge.c +400 -0
  183. data/src/jpeg-7/jdpostct.c +290 -0
  184. data/src/jpeg-7/jdsample.c +361 -0
  185. data/src/jpeg-7/jdtrans.c +136 -0
  186. data/src/jpeg-7/jerror.c +252 -0
  187. data/src/jpeg-7/jerror.h +304 -0
  188. data/src/jpeg-7/jfdctflt.c +174 -0
  189. data/src/jpeg-7/jfdctfst.c +230 -0
  190. data/src/jpeg-7/jfdctint.c +4348 -0
  191. data/src/jpeg-7/jidctflt.c +242 -0
  192. data/src/jpeg-7/jidctfst.c +368 -0
  193. data/src/jpeg-7/jidctint.c +5137 -0
  194. data/src/jpeg-7/jinclude.h +91 -0
  195. data/src/jpeg-7/jmemansi.c +167 -0
  196. data/src/jpeg-7/jmemdos.c +638 -0
  197. data/src/jpeg-7/jmemdosa.asm +379 -0
  198. data/src/jpeg-7/jmemmac.c +289 -0
  199. data/src/jpeg-7/jmemmgr.c +1118 -0
  200. data/src/jpeg-7/jmemname.c +276 -0
  201. data/src/jpeg-7/jmemnobs.c +109 -0
  202. data/src/jpeg-7/jmemsys.h +198 -0
  203. data/src/jpeg-7/jmorecfg.h +369 -0
  204. data/src/jpeg-7/jpegint.h +395 -0
  205. data/src/jpeg-7/jpeglib.h +1135 -0
  206. data/src/jpeg-7/jpegtran.1 +272 -0
  207. data/src/jpeg-7/jpegtran.c +546 -0
  208. data/src/jpeg-7/jquant1.c +856 -0
  209. data/src/jpeg-7/jquant2.c +1310 -0
  210. data/src/jpeg-7/jutils.c +179 -0
  211. data/src/jpeg-7/jversion.h +14 -0
  212. data/src/jpeg-7/libjpeg.map +4 -0
  213. data/src/jpeg-7/libjpeg.txt +3067 -0
  214. data/src/jpeg-7/ltmain.sh +8406 -0
  215. data/src/jpeg-7/makcjpeg.st +36 -0
  216. data/src/jpeg-7/makdjpeg.st +36 -0
  217. data/src/jpeg-7/makeadsw.vc6 +77 -0
  218. data/src/jpeg-7/makeasln.vc9 +33 -0
  219. data/src/jpeg-7/makecdep.vc6 +82 -0
  220. data/src/jpeg-7/makecdsp.vc6 +130 -0
  221. data/src/jpeg-7/makecmak.vc6 +159 -0
  222. data/src/jpeg-7/makecvcp.vc9 +186 -0
  223. data/src/jpeg-7/makeddep.vc6 +82 -0
  224. data/src/jpeg-7/makeddsp.vc6 +130 -0
  225. data/src/jpeg-7/makedmak.vc6 +159 -0
  226. data/src/jpeg-7/makedvcp.vc9 +186 -0
  227. data/src/jpeg-7/makefile.ansi +220 -0
  228. data/src/jpeg-7/makefile.bcc +291 -0
  229. data/src/jpeg-7/makefile.dj +226 -0
  230. data/src/jpeg-7/makefile.manx +220 -0
  231. data/src/jpeg-7/makefile.mc6 +255 -0
  232. data/src/jpeg-7/makefile.mms +224 -0
  233. data/src/jpeg-7/makefile.sas +258 -0
  234. data/src/jpeg-7/makefile.unix +234 -0
  235. data/src/jpeg-7/makefile.vc +217 -0
  236. data/src/jpeg-7/makefile.vms +142 -0
  237. data/src/jpeg-7/makefile.wat +239 -0
  238. data/src/jpeg-7/makejdep.vc6 +423 -0
  239. data/src/jpeg-7/makejdsp.vc6 +285 -0
  240. data/src/jpeg-7/makejdsw.vc6 +29 -0
  241. data/src/jpeg-7/makejmak.vc6 +425 -0
  242. data/src/jpeg-7/makejsln.vc9 +17 -0
  243. data/src/jpeg-7/makejvcp.vc9 +328 -0
  244. data/src/jpeg-7/makeproj.mac +213 -0
  245. data/src/jpeg-7/makerdep.vc6 +6 -0
  246. data/src/jpeg-7/makerdsp.vc6 +78 -0
  247. data/src/jpeg-7/makermak.vc6 +110 -0
  248. data/src/jpeg-7/makervcp.vc9 +133 -0
  249. data/src/jpeg-7/maketdep.vc6 +43 -0
  250. data/src/jpeg-7/maketdsp.vc6 +122 -0
  251. data/src/jpeg-7/maketmak.vc6 +131 -0
  252. data/src/jpeg-7/maketvcp.vc9 +178 -0
  253. data/src/jpeg-7/makewdep.vc6 +6 -0
  254. data/src/jpeg-7/makewdsp.vc6 +78 -0
  255. data/src/jpeg-7/makewmak.vc6 +110 -0
  256. data/src/jpeg-7/makewvcp.vc9 +133 -0
  257. data/src/jpeg-7/makljpeg.st +68 -0
  258. data/src/jpeg-7/maktjpeg.st +30 -0
  259. data/src/jpeg-7/makvms.opt +4 -0
  260. data/src/jpeg-7/missing +376 -0
  261. data/src/jpeg-7/rdbmp.c +439 -0
  262. data/src/jpeg-7/rdcolmap.c +253 -0
  263. data/src/jpeg-7/rdgif.c +38 -0
  264. data/src/jpeg-7/rdjpgcom.1 +63 -0
  265. data/src/jpeg-7/rdjpgcom.c +515 -0
  266. data/src/jpeg-7/rdppm.c +459 -0
  267. data/src/jpeg-7/rdrle.c +387 -0
  268. data/src/jpeg-7/rdswitch.c +365 -0
  269. data/src/jpeg-7/rdtarga.c +500 -0
  270. data/src/jpeg-7/structure.txt +945 -0
  271. data/src/jpeg-7/testimg.bmp +0 -0
  272. data/src/jpeg-7/testimg.jpg +0 -0
  273. data/src/jpeg-7/testimg.ppm +4 -0
  274. data/src/jpeg-7/testimgp.jpg +0 -0
  275. data/src/jpeg-7/testorig.jpg +0 -0
  276. data/src/jpeg-7/testprog.jpg +0 -0
  277. data/src/jpeg-7/transupp.c +1533 -0
  278. data/src/jpeg-7/transupp.h +205 -0
  279. data/src/jpeg-7/usage.txt +605 -0
  280. data/src/jpeg-7/wizard.txt +211 -0
  281. data/src/jpeg-7/wrbmp.c +442 -0
  282. data/src/jpeg-7/wrgif.c +399 -0
  283. data/src/jpeg-7/wrjpgcom.1 +103 -0
  284. data/src/jpeg-7/wrjpgcom.c +583 -0
  285. data/src/jpeg-7/wrppm.c +269 -0
  286. data/src/jpeg-7/wrrle.c +305 -0
  287. data/src/jpeg-7/wrtarga.c +253 -0
  288. metadata +287 -6
  289. data/LICENSE +0 -20
  290. data/VERSION.yml +0 -4
@@ -0,0 +1,174 @@
1
+ /*
2
+ * jfdctflt.c
3
+ *
4
+ * Copyright (C) 1994-1996, Thomas G. Lane.
5
+ * Modified 2003-2009 by Guido Vollbeding.
6
+ * This file is part of the Independent JPEG Group's software.
7
+ * For conditions of distribution and use, see the accompanying README file.
8
+ *
9
+ * This file contains a floating-point implementation of the
10
+ * forward DCT (Discrete Cosine Transform).
11
+ *
12
+ * This implementation should be more accurate than either of the integer
13
+ * DCT implementations. However, it may not give the same results on all
14
+ * machines because of differences in roundoff behavior. Speed will depend
15
+ * on the hardware's floating point capacity.
16
+ *
17
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
18
+ * on each column. Direct algorithms are also available, but they are
19
+ * much more complex and seem not to be any faster when reduced to code.
20
+ *
21
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
22
+ * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
23
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
24
+ * JPEG textbook (see REFERENCES section in file README). The following code
25
+ * is based directly on figure 4-8 in P&M.
26
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
27
+ * possible to arrange the computation so that many of the multiplies are
28
+ * simple scalings of the final outputs. These multiplies can then be
29
+ * folded into the multiplications or divisions by the JPEG quantization
30
+ * table entries. The AA&N method leaves only 5 multiplies and 29 adds
31
+ * to be done in the DCT itself.
32
+ * The primary disadvantage of this method is that with a fixed-point
33
+ * implementation, accuracy is lost due to imprecise representation of the
34
+ * scaled quantization values. However, that problem does not arise if
35
+ * we use floating point arithmetic.
36
+ */
37
+
38
+ #define JPEG_INTERNALS
39
+ #include "jinclude.h"
40
+ #include "jpeglib.h"
41
+ #include "jdct.h" /* Private declarations for DCT subsystem */
42
+
43
+ #ifdef DCT_FLOAT_SUPPORTED
44
+
45
+
46
+ /*
47
+ * This module is specialized to the case DCTSIZE = 8.
48
+ */
49
+
50
+ #if DCTSIZE != 8
51
+ Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
52
+ #endif
53
+
54
+
55
+ /*
56
+ * Perform the forward DCT on one block of samples.
57
+ */
58
+
59
+ GLOBAL(void)
60
+ jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)
61
+ {
62
+ FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
63
+ FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
64
+ FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
65
+ FAST_FLOAT *dataptr;
66
+ JSAMPROW elemptr;
67
+ int ctr;
68
+
69
+ /* Pass 1: process rows. */
70
+
71
+ dataptr = data;
72
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
73
+ elemptr = sample_data[ctr] + start_col;
74
+
75
+ /* Load data into workspace */
76
+ tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]));
77
+ tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]));
78
+ tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]));
79
+ tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]));
80
+ tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]));
81
+ tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]));
82
+ tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]));
83
+ tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]));
84
+
85
+ /* Even part */
86
+
87
+ tmp10 = tmp0 + tmp3; /* phase 2 */
88
+ tmp13 = tmp0 - tmp3;
89
+ tmp11 = tmp1 + tmp2;
90
+ tmp12 = tmp1 - tmp2;
91
+
92
+ /* Apply unsigned->signed conversion */
93
+ dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
94
+ dataptr[4] = tmp10 - tmp11;
95
+
96
+ z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
97
+ dataptr[2] = tmp13 + z1; /* phase 5 */
98
+ dataptr[6] = tmp13 - z1;
99
+
100
+ /* Odd part */
101
+
102
+ tmp10 = tmp4 + tmp5; /* phase 2 */
103
+ tmp11 = tmp5 + tmp6;
104
+ tmp12 = tmp6 + tmp7;
105
+
106
+ /* The rotator is modified from fig 4-8 to avoid extra negations. */
107
+ z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
108
+ z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
109
+ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
110
+ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
111
+
112
+ z11 = tmp7 + z3; /* phase 5 */
113
+ z13 = tmp7 - z3;
114
+
115
+ dataptr[5] = z13 + z2; /* phase 6 */
116
+ dataptr[3] = z13 - z2;
117
+ dataptr[1] = z11 + z4;
118
+ dataptr[7] = z11 - z4;
119
+
120
+ dataptr += DCTSIZE; /* advance pointer to next row */
121
+ }
122
+
123
+ /* Pass 2: process columns. */
124
+
125
+ dataptr = data;
126
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
127
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
128
+ tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
129
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
130
+ tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
131
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
132
+ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
133
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
134
+ tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
135
+
136
+ /* Even part */
137
+
138
+ tmp10 = tmp0 + tmp3; /* phase 2 */
139
+ tmp13 = tmp0 - tmp3;
140
+ tmp11 = tmp1 + tmp2;
141
+ tmp12 = tmp1 - tmp2;
142
+
143
+ dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
144
+ dataptr[DCTSIZE*4] = tmp10 - tmp11;
145
+
146
+ z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */
147
+ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
148
+ dataptr[DCTSIZE*6] = tmp13 - z1;
149
+
150
+ /* Odd part */
151
+
152
+ tmp10 = tmp4 + tmp5; /* phase 2 */
153
+ tmp11 = tmp5 + tmp6;
154
+ tmp12 = tmp6 + tmp7;
155
+
156
+ /* The rotator is modified from fig 4-8 to avoid extra negations. */
157
+ z5 = (tmp10 - tmp12) * ((FAST_FLOAT) 0.382683433); /* c6 */
158
+ z2 = ((FAST_FLOAT) 0.541196100) * tmp10 + z5; /* c2-c6 */
159
+ z4 = ((FAST_FLOAT) 1.306562965) * tmp12 + z5; /* c2+c6 */
160
+ z3 = tmp11 * ((FAST_FLOAT) 0.707106781); /* c4 */
161
+
162
+ z11 = tmp7 + z3; /* phase 5 */
163
+ z13 = tmp7 - z3;
164
+
165
+ dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
166
+ dataptr[DCTSIZE*3] = z13 - z2;
167
+ dataptr[DCTSIZE*1] = z11 + z4;
168
+ dataptr[DCTSIZE*7] = z11 - z4;
169
+
170
+ dataptr++; /* advance pointer to next column */
171
+ }
172
+ }
173
+
174
+ #endif /* DCT_FLOAT_SUPPORTED */
@@ -0,0 +1,230 @@
1
+ /*
2
+ * jfdctfst.c
3
+ *
4
+ * Copyright (C) 1994-1996, Thomas G. Lane.
5
+ * Modified 2003-2009 by Guido Vollbeding.
6
+ * This file is part of the Independent JPEG Group's software.
7
+ * For conditions of distribution and use, see the accompanying README file.
8
+ *
9
+ * This file contains a fast, not so accurate integer implementation of the
10
+ * forward DCT (Discrete Cosine Transform).
11
+ *
12
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13
+ * on each column. Direct algorithms are also available, but they are
14
+ * much more complex and seem not to be any faster when reduced to code.
15
+ *
16
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
17
+ * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
18
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
19
+ * JPEG textbook (see REFERENCES section in file README). The following code
20
+ * is based directly on figure 4-8 in P&M.
21
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
22
+ * possible to arrange the computation so that many of the multiplies are
23
+ * simple scalings of the final outputs. These multiplies can then be
24
+ * folded into the multiplications or divisions by the JPEG quantization
25
+ * table entries. The AA&N method leaves only 5 multiplies and 29 adds
26
+ * to be done in the DCT itself.
27
+ * The primary disadvantage of this method is that with fixed-point math,
28
+ * accuracy is lost due to imprecise representation of the scaled
29
+ * quantization values. The smaller the quantization table entry, the less
30
+ * precise the scaled value, so this implementation does worse with high-
31
+ * quality-setting files than with low-quality ones.
32
+ */
33
+
34
+ #define JPEG_INTERNALS
35
+ #include "jinclude.h"
36
+ #include "jpeglib.h"
37
+ #include "jdct.h" /* Private declarations for DCT subsystem */
38
+
39
+ #ifdef DCT_IFAST_SUPPORTED
40
+
41
+
42
+ /*
43
+ * This module is specialized to the case DCTSIZE = 8.
44
+ */
45
+
46
+ #if DCTSIZE != 8
47
+ Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
48
+ #endif
49
+
50
+
51
+ /* Scaling decisions are generally the same as in the LL&M algorithm;
52
+ * see jfdctint.c for more details. However, we choose to descale
53
+ * (right shift) multiplication products as soon as they are formed,
54
+ * rather than carrying additional fractional bits into subsequent additions.
55
+ * This compromises accuracy slightly, but it lets us save a few shifts.
56
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
57
+ * everywhere except in the multiplications proper; this saves a good deal
58
+ * of work on 16-bit-int machines.
59
+ *
60
+ * Again to save a few shifts, the intermediate results between pass 1 and
61
+ * pass 2 are not upscaled, but are represented only to integral precision.
62
+ *
63
+ * A final compromise is to represent the multiplicative constants to only
64
+ * 8 fractional bits, rather than 13. This saves some shifting work on some
65
+ * machines, and may also reduce the cost of multiplication (since there
66
+ * are fewer one-bits in the constants).
67
+ */
68
+
69
+ #define CONST_BITS 8
70
+
71
+
72
+ /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
73
+ * causing a lot of useless floating-point operations at run time.
74
+ * To get around this we use the following pre-calculated constants.
75
+ * If you change CONST_BITS you may want to add appropriate values.
76
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
77
+ */
78
+
79
+ #if CONST_BITS == 8
80
+ #define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */
81
+ #define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */
82
+ #define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */
83
+ #define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */
84
+ #else
85
+ #define FIX_0_382683433 FIX(0.382683433)
86
+ #define FIX_0_541196100 FIX(0.541196100)
87
+ #define FIX_0_707106781 FIX(0.707106781)
88
+ #define FIX_1_306562965 FIX(1.306562965)
89
+ #endif
90
+
91
+
92
+ /* We can gain a little more speed, with a further compromise in accuracy,
93
+ * by omitting the addition in a descaling shift. This yields an incorrectly
94
+ * rounded result half the time...
95
+ */
96
+
97
+ #ifndef USE_ACCURATE_ROUNDING
98
+ #undef DESCALE
99
+ #define DESCALE(x,n) RIGHT_SHIFT(x, n)
100
+ #endif
101
+
102
+
103
+ /* Multiply a DCTELEM variable by an INT32 constant, and immediately
104
+ * descale to yield a DCTELEM result.
105
+ */
106
+
107
+ #define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
108
+
109
+
110
+ /*
111
+ * Perform the forward DCT on one block of samples.
112
+ */
113
+
114
+ GLOBAL(void)
115
+ jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
116
+ {
117
+ DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
118
+ DCTELEM tmp10, tmp11, tmp12, tmp13;
119
+ DCTELEM z1, z2, z3, z4, z5, z11, z13;
120
+ DCTELEM *dataptr;
121
+ JSAMPROW elemptr;
122
+ int ctr;
123
+ SHIFT_TEMPS
124
+
125
+ /* Pass 1: process rows. */
126
+
127
+ dataptr = data;
128
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
129
+ elemptr = sample_data[ctr] + start_col;
130
+
131
+ /* Load data into workspace */
132
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
133
+ tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
134
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
135
+ tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
136
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
137
+ tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
138
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
139
+ tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
140
+
141
+ /* Even part */
142
+
143
+ tmp10 = tmp0 + tmp3; /* phase 2 */
144
+ tmp13 = tmp0 - tmp3;
145
+ tmp11 = tmp1 + tmp2;
146
+ tmp12 = tmp1 - tmp2;
147
+
148
+ /* Apply unsigned->signed conversion */
149
+ dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */
150
+ dataptr[4] = tmp10 - tmp11;
151
+
152
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
153
+ dataptr[2] = tmp13 + z1; /* phase 5 */
154
+ dataptr[6] = tmp13 - z1;
155
+
156
+ /* Odd part */
157
+
158
+ tmp10 = tmp4 + tmp5; /* phase 2 */
159
+ tmp11 = tmp5 + tmp6;
160
+ tmp12 = tmp6 + tmp7;
161
+
162
+ /* The rotator is modified from fig 4-8 to avoid extra negations. */
163
+ z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
164
+ z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
165
+ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
166
+ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
167
+
168
+ z11 = tmp7 + z3; /* phase 5 */
169
+ z13 = tmp7 - z3;
170
+
171
+ dataptr[5] = z13 + z2; /* phase 6 */
172
+ dataptr[3] = z13 - z2;
173
+ dataptr[1] = z11 + z4;
174
+ dataptr[7] = z11 - z4;
175
+
176
+ dataptr += DCTSIZE; /* advance pointer to next row */
177
+ }
178
+
179
+ /* Pass 2: process columns. */
180
+
181
+ dataptr = data;
182
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
183
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
184
+ tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
185
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
186
+ tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
187
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
188
+ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
189
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
190
+ tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
191
+
192
+ /* Even part */
193
+
194
+ tmp10 = tmp0 + tmp3; /* phase 2 */
195
+ tmp13 = tmp0 - tmp3;
196
+ tmp11 = tmp1 + tmp2;
197
+ tmp12 = tmp1 - tmp2;
198
+
199
+ dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
200
+ dataptr[DCTSIZE*4] = tmp10 - tmp11;
201
+
202
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
203
+ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
204
+ dataptr[DCTSIZE*6] = tmp13 - z1;
205
+
206
+ /* Odd part */
207
+
208
+ tmp10 = tmp4 + tmp5; /* phase 2 */
209
+ tmp11 = tmp5 + tmp6;
210
+ tmp12 = tmp6 + tmp7;
211
+
212
+ /* The rotator is modified from fig 4-8 to avoid extra negations. */
213
+ z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
214
+ z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
215
+ z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
216
+ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
217
+
218
+ z11 = tmp7 + z3; /* phase 5 */
219
+ z13 = tmp7 - z3;
220
+
221
+ dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
222
+ dataptr[DCTSIZE*3] = z13 - z2;
223
+ dataptr[DCTSIZE*1] = z11 + z4;
224
+ dataptr[DCTSIZE*7] = z11 - z4;
225
+
226
+ dataptr++; /* advance pointer to next column */
227
+ }
228
+ }
229
+
230
+ #endif /* DCT_IFAST_SUPPORTED */
@@ -0,0 +1,4348 @@
1
+ /*
2
+ * jfdctint.c
3
+ *
4
+ * Copyright (C) 1991-1996, Thomas G. Lane.
5
+ * Modification developed 2003-2009 by Guido Vollbeding.
6
+ * This file is part of the Independent JPEG Group's software.
7
+ * For conditions of distribution and use, see the accompanying README file.
8
+ *
9
+ * This file contains a slow-but-accurate integer implementation of the
10
+ * forward DCT (Discrete Cosine Transform).
11
+ *
12
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13
+ * on each column. Direct algorithms are also available, but they are
14
+ * much more complex and seem not to be any faster when reduced to code.
15
+ *
16
+ * This implementation is based on an algorithm described in
17
+ * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18
+ * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19
+ * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
21
+ * We use their alternate method with 12 multiplies and 32 adds.
22
+ * The advantage of this method is that no data path contains more than one
23
+ * multiplication; this allows a very simple and accurate implementation in
24
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
25
+ *
26
+ * We also provide FDCT routines with various input sample block sizes for
27
+ * direct resolution reduction or enlargement and for direct resolving the
28
+ * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
29
+ * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
30
+ *
31
+ * For N<8 we fill the remaining block coefficients with zero.
32
+ * For N>8 we apply a partial N-point FDCT on the input samples, computing
33
+ * just the lower 8 frequency coefficients and discarding the rest.
34
+ *
35
+ * We must scale the output coefficients of the N-point FDCT appropriately
36
+ * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling
37
+ * is folded into the constant multipliers (pass 2) and/or final/initial
38
+ * shifting.
39
+ *
40
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
41
+ * since there would be too many additional constants to pre-calculate.
42
+ */
43
+
44
+ #define JPEG_INTERNALS
45
+ #include "jinclude.h"
46
+ #include "jpeglib.h"
47
+ #include "jdct.h" /* Private declarations for DCT subsystem */
48
+
49
+ #ifdef DCT_ISLOW_SUPPORTED
50
+
51
+
52
+ /*
53
+ * This module is specialized to the case DCTSIZE = 8.
54
+ */
55
+
56
+ #if DCTSIZE != 8
57
+ Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
58
+ #endif
59
+
60
+
61
+ /*
62
+ * The poop on this scaling stuff is as follows:
63
+ *
64
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
65
+ * larger than the true DCT outputs. The final outputs are therefore
66
+ * a factor of N larger than desired; since N=8 this can be cured by
67
+ * a simple right shift at the end of the algorithm. The advantage of
68
+ * this arrangement is that we save two multiplications per 1-D DCT,
69
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
70
+ * In the IJG code, this factor of 8 is removed by the quantization step
71
+ * (in jcdctmgr.c), NOT in this module.
72
+ *
73
+ * We have to do addition and subtraction of the integer inputs, which
74
+ * is no problem, and multiplication by fractional constants, which is
75
+ * a problem to do in integer arithmetic. We multiply all the constants
76
+ * by CONST_SCALE and convert them to integer constants (thus retaining
77
+ * CONST_BITS bits of precision in the constants). After doing a
78
+ * multiplication we have to divide the product by CONST_SCALE, with proper
79
+ * rounding, to produce the correct output. This division can be done
80
+ * cheaply as a right shift of CONST_BITS bits. We postpone shifting
81
+ * as long as possible so that partial sums can be added together with
82
+ * full fractional precision.
83
+ *
84
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
85
+ * they are represented to better-than-integral precision. These outputs
86
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
87
+ * with the recommended scaling. (For 12-bit sample data, the intermediate
88
+ * array is INT32 anyway.)
89
+ *
90
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
91
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
92
+ * shows that the values given below are the most effective.
93
+ */
94
+
95
+ #if BITS_IN_JSAMPLE == 8
96
+ #define CONST_BITS 13
97
+ #define PASS1_BITS 2
98
+ #else
99
+ #define CONST_BITS 13
100
+ #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
101
+ #endif
102
+
103
+ /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
104
+ * causing a lot of useless floating-point operations at run time.
105
+ * To get around this we use the following pre-calculated constants.
106
+ * If you change CONST_BITS you may want to add appropriate values.
107
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
108
+ */
109
+
110
+ #if CONST_BITS == 13
111
+ #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
112
+ #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
113
+ #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
114
+ #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
115
+ #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
116
+ #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
117
+ #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
118
+ #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
119
+ #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
120
+ #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
121
+ #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
122
+ #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
123
+ #else
124
+ #define FIX_0_298631336 FIX(0.298631336)
125
+ #define FIX_0_390180644 FIX(0.390180644)
126
+ #define FIX_0_541196100 FIX(0.541196100)
127
+ #define FIX_0_765366865 FIX(0.765366865)
128
+ #define FIX_0_899976223 FIX(0.899976223)
129
+ #define FIX_1_175875602 FIX(1.175875602)
130
+ #define FIX_1_501321110 FIX(1.501321110)
131
+ #define FIX_1_847759065 FIX(1.847759065)
132
+ #define FIX_1_961570560 FIX(1.961570560)
133
+ #define FIX_2_053119869 FIX(2.053119869)
134
+ #define FIX_2_562915447 FIX(2.562915447)
135
+ #define FIX_3_072711026 FIX(3.072711026)
136
+ #endif
137
+
138
+
139
+ /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
140
+ * For 8-bit samples with the recommended scaling, all the variable
141
+ * and constant values involved are no more than 16 bits wide, so a
142
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
143
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
144
+ */
145
+
146
+ #if BITS_IN_JSAMPLE == 8
147
+ #define MULTIPLY(var,const) MULTIPLY16C16(var,const)
148
+ #else
149
+ #define MULTIPLY(var,const) ((var) * (const))
150
+ #endif
151
+
152
+
153
+ /*
154
+ * Perform the forward DCT on one block of samples.
155
+ */
156
+
157
+ GLOBAL(void)
158
+ jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
159
+ {
160
+ INT32 tmp0, tmp1, tmp2, tmp3;
161
+ INT32 tmp10, tmp11, tmp12, tmp13;
162
+ INT32 z1;
163
+ DCTELEM *dataptr;
164
+ JSAMPROW elemptr;
165
+ int ctr;
166
+ SHIFT_TEMPS
167
+
168
+ /* Pass 1: process rows. */
169
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
170
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
171
+
172
+ dataptr = data;
173
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
174
+ elemptr = sample_data[ctr] + start_col;
175
+
176
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
177
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
178
+ */
179
+
180
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
181
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
182
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
183
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
184
+
185
+ tmp10 = tmp0 + tmp3;
186
+ tmp12 = tmp0 - tmp3;
187
+ tmp11 = tmp1 + tmp2;
188
+ tmp13 = tmp1 - tmp2;
189
+
190
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
191
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
192
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
193
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
194
+
195
+ /* Apply unsigned->signed conversion */
196
+ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
197
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
198
+
199
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
200
+ /* Add fudge factor here for final descale. */
201
+ z1 += ONE << (CONST_BITS-PASS1_BITS-1);
202
+ dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
203
+ CONST_BITS-PASS1_BITS);
204
+ dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
205
+ CONST_BITS-PASS1_BITS);
206
+
207
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
208
+ * cK represents sqrt(2) * cos(K*pi/16).
209
+ * i0..i3 in the paper are tmp0..tmp3 here.
210
+ */
211
+
212
+ tmp10 = tmp0 + tmp3;
213
+ tmp11 = tmp1 + tmp2;
214
+ tmp12 = tmp0 + tmp2;
215
+ tmp13 = tmp1 + tmp3;
216
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
217
+ /* Add fudge factor here for final descale. */
218
+ z1 += ONE << (CONST_BITS-PASS1_BITS-1);
219
+
220
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
221
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
222
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
223
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
224
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
225
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
226
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
227
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
228
+
229
+ tmp12 += z1;
230
+ tmp13 += z1;
231
+
232
+ dataptr[1] = (DCTELEM)
233
+ RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
234
+ dataptr[3] = (DCTELEM)
235
+ RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
236
+ dataptr[5] = (DCTELEM)
237
+ RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
238
+ dataptr[7] = (DCTELEM)
239
+ RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
240
+
241
+ dataptr += DCTSIZE; /* advance pointer to next row */
242
+ }
243
+
244
+ /* Pass 2: process columns.
245
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
246
+ * by an overall factor of 8.
247
+ */
248
+
249
+ dataptr = data;
250
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
251
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
252
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
253
+ */
254
+
255
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
256
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
257
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
258
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
259
+
260
+ /* Add fudge factor here for final descale. */
261
+ tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
262
+ tmp12 = tmp0 - tmp3;
263
+ tmp11 = tmp1 + tmp2;
264
+ tmp13 = tmp1 - tmp2;
265
+
266
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
267
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
268
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
269
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
270
+
271
+ dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
272
+ dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
273
+
274
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
275
+ /* Add fudge factor here for final descale. */
276
+ z1 += ONE << (CONST_BITS+PASS1_BITS-1);
277
+ dataptr[DCTSIZE*2] = (DCTELEM)
278
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
279
+ dataptr[DCTSIZE*6] = (DCTELEM)
280
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
281
+
282
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
283
+ * cK represents sqrt(2) * cos(K*pi/16).
284
+ * i0..i3 in the paper are tmp0..tmp3 here.
285
+ */
286
+
287
+ tmp10 = tmp0 + tmp3;
288
+ tmp11 = tmp1 + tmp2;
289
+ tmp12 = tmp0 + tmp2;
290
+ tmp13 = tmp1 + tmp3;
291
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
292
+ /* Add fudge factor here for final descale. */
293
+ z1 += ONE << (CONST_BITS+PASS1_BITS-1);
294
+
295
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
296
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
297
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
298
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
299
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
300
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
301
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
302
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
303
+
304
+ tmp12 += z1;
305
+ tmp13 += z1;
306
+
307
+ dataptr[DCTSIZE*1] = (DCTELEM)
308
+ RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
309
+ dataptr[DCTSIZE*3] = (DCTELEM)
310
+ RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
311
+ dataptr[DCTSIZE*5] = (DCTELEM)
312
+ RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
313
+ dataptr[DCTSIZE*7] = (DCTELEM)
314
+ RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
315
+
316
+ dataptr++; /* advance pointer to next column */
317
+ }
318
+ }
319
+
320
+ #ifdef DCT_SCALING_SUPPORTED
321
+
322
+
323
+ /*
324
+ * Perform the forward DCT on a 7x7 sample block.
325
+ */
326
+
327
+ GLOBAL(void)
328
+ jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
329
+ {
330
+ INT32 tmp0, tmp1, tmp2, tmp3;
331
+ INT32 tmp10, tmp11, tmp12;
332
+ INT32 z1, z2, z3;
333
+ DCTELEM *dataptr;
334
+ JSAMPROW elemptr;
335
+ int ctr;
336
+ SHIFT_TEMPS
337
+
338
+ /* Pre-zero output coefficient block. */
339
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
340
+
341
+ /* Pass 1: process rows. */
342
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
343
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
344
+ /* cK represents sqrt(2) * cos(K*pi/14). */
345
+
346
+ dataptr = data;
347
+ for (ctr = 0; ctr < 7; ctr++) {
348
+ elemptr = sample_data[ctr] + start_col;
349
+
350
+ /* Even part */
351
+
352
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
353
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
354
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
355
+ tmp3 = GETJSAMPLE(elemptr[3]);
356
+
357
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
358
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
359
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
360
+
361
+ z1 = tmp0 + tmp2;
362
+ /* Apply unsigned->signed conversion */
363
+ dataptr[0] = (DCTELEM)
364
+ ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
365
+ tmp3 += tmp3;
366
+ z1 -= tmp3;
367
+ z1 -= tmp3;
368
+ z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
369
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
370
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
371
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
372
+ z1 -= z2;
373
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
374
+ dataptr[4] = (DCTELEM)
375
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
376
+ CONST_BITS-PASS1_BITS);
377
+ dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
378
+
379
+ /* Odd part */
380
+
381
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
382
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
383
+ tmp0 = tmp1 - tmp2;
384
+ tmp1 += tmp2;
385
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
386
+ tmp1 += tmp2;
387
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
388
+ tmp0 += tmp3;
389
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
390
+
391
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
392
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
393
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
394
+
395
+ dataptr += DCTSIZE; /* advance pointer to next row */
396
+ }
397
+
398
+ /* Pass 2: process columns.
399
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
400
+ * by an overall factor of 8.
401
+ * We must also scale the output by (8/7)**2 = 64/49, which we fold
402
+ * into the constant multipliers:
403
+ * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
404
+ */
405
+
406
+ dataptr = data;
407
+ for (ctr = 0; ctr < 7; ctr++) {
408
+ /* Even part */
409
+
410
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
411
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
412
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
413
+ tmp3 = dataptr[DCTSIZE*3];
414
+
415
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
416
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
417
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
418
+
419
+ z1 = tmp0 + tmp2;
420
+ dataptr[DCTSIZE*0] = (DCTELEM)
421
+ DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
422
+ CONST_BITS+PASS1_BITS);
423
+ tmp3 += tmp3;
424
+ z1 -= tmp3;
425
+ z1 -= tmp3;
426
+ z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
427
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
428
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
429
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
430
+ z1 -= z2;
431
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
432
+ dataptr[DCTSIZE*4] = (DCTELEM)
433
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
434
+ CONST_BITS+PASS1_BITS);
435
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
436
+
437
+ /* Odd part */
438
+
439
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
440
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
441
+ tmp0 = tmp1 - tmp2;
442
+ tmp1 += tmp2;
443
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
444
+ tmp1 += tmp2;
445
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
446
+ tmp0 += tmp3;
447
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
448
+
449
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
450
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
451
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
452
+
453
+ dataptr++; /* advance pointer to next column */
454
+ }
455
+ }
456
+
457
+
458
+ /*
459
+ * Perform the forward DCT on a 6x6 sample block.
460
+ */
461
+
462
+ GLOBAL(void)
463
+ jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
464
+ {
465
+ INT32 tmp0, tmp1, tmp2;
466
+ INT32 tmp10, tmp11, tmp12;
467
+ DCTELEM *dataptr;
468
+ JSAMPROW elemptr;
469
+ int ctr;
470
+ SHIFT_TEMPS
471
+
472
+ /* Pre-zero output coefficient block. */
473
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
474
+
475
+ /* Pass 1: process rows. */
476
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
477
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
478
+ /* cK represents sqrt(2) * cos(K*pi/12). */
479
+
480
+ dataptr = data;
481
+ for (ctr = 0; ctr < 6; ctr++) {
482
+ elemptr = sample_data[ctr] + start_col;
483
+
484
+ /* Even part */
485
+
486
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
487
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
488
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
489
+
490
+ tmp10 = tmp0 + tmp2;
491
+ tmp12 = tmp0 - tmp2;
492
+
493
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
494
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
495
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
496
+
497
+ /* Apply unsigned->signed conversion */
498
+ dataptr[0] = (DCTELEM)
499
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
500
+ dataptr[2] = (DCTELEM)
501
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
502
+ CONST_BITS-PASS1_BITS);
503
+ dataptr[4] = (DCTELEM)
504
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
505
+ CONST_BITS-PASS1_BITS);
506
+
507
+ /* Odd part */
508
+
509
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
510
+ CONST_BITS-PASS1_BITS);
511
+
512
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
513
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
514
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
515
+
516
+ dataptr += DCTSIZE; /* advance pointer to next row */
517
+ }
518
+
519
+ /* Pass 2: process columns.
520
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
521
+ * by an overall factor of 8.
522
+ * We must also scale the output by (8/6)**2 = 16/9, which we fold
523
+ * into the constant multipliers:
524
+ * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
525
+ */
526
+
527
+ dataptr = data;
528
+ for (ctr = 0; ctr < 6; ctr++) {
529
+ /* Even part */
530
+
531
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
532
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
533
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
534
+
535
+ tmp10 = tmp0 + tmp2;
536
+ tmp12 = tmp0 - tmp2;
537
+
538
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
539
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
540
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
541
+
542
+ dataptr[DCTSIZE*0] = (DCTELEM)
543
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
544
+ CONST_BITS+PASS1_BITS);
545
+ dataptr[DCTSIZE*2] = (DCTELEM)
546
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
547
+ CONST_BITS+PASS1_BITS);
548
+ dataptr[DCTSIZE*4] = (DCTELEM)
549
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
550
+ CONST_BITS+PASS1_BITS);
551
+
552
+ /* Odd part */
553
+
554
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
555
+
556
+ dataptr[DCTSIZE*1] = (DCTELEM)
557
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
558
+ CONST_BITS+PASS1_BITS);
559
+ dataptr[DCTSIZE*3] = (DCTELEM)
560
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
561
+ CONST_BITS+PASS1_BITS);
562
+ dataptr[DCTSIZE*5] = (DCTELEM)
563
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
564
+ CONST_BITS+PASS1_BITS);
565
+
566
+ dataptr++; /* advance pointer to next column */
567
+ }
568
+ }
569
+
570
+
571
+ /*
572
+ * Perform the forward DCT on a 5x5 sample block.
573
+ */
574
+
575
+ GLOBAL(void)
576
+ jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
577
+ {
578
+ INT32 tmp0, tmp1, tmp2;
579
+ INT32 tmp10, tmp11;
580
+ DCTELEM *dataptr;
581
+ JSAMPROW elemptr;
582
+ int ctr;
583
+ SHIFT_TEMPS
584
+
585
+ /* Pre-zero output coefficient block. */
586
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
587
+
588
+ /* Pass 1: process rows. */
589
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
590
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
591
+ /* We scale the results further by 2 as part of output adaption */
592
+ /* scaling for different DCT size. */
593
+ /* cK represents sqrt(2) * cos(K*pi/10). */
594
+
595
+ dataptr = data;
596
+ for (ctr = 0; ctr < 5; ctr++) {
597
+ elemptr = sample_data[ctr] + start_col;
598
+
599
+ /* Even part */
600
+
601
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
602
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
603
+ tmp2 = GETJSAMPLE(elemptr[2]);
604
+
605
+ tmp10 = tmp0 + tmp1;
606
+ tmp11 = tmp0 - tmp1;
607
+
608
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
609
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
610
+
611
+ /* Apply unsigned->signed conversion */
612
+ dataptr[0] = (DCTELEM)
613
+ ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
614
+ tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
615
+ tmp10 -= tmp2 << 2;
616
+ tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
617
+ dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
618
+ dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
619
+
620
+ /* Odd part */
621
+
622
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
623
+
624
+ dataptr[1] = (DCTELEM)
625
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
626
+ CONST_BITS-PASS1_BITS-1);
627
+ dataptr[3] = (DCTELEM)
628
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
629
+ CONST_BITS-PASS1_BITS-1);
630
+
631
+ dataptr += DCTSIZE; /* advance pointer to next row */
632
+ }
633
+
634
+ /* Pass 2: process columns.
635
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
636
+ * by an overall factor of 8.
637
+ * We must also scale the output by (8/5)**2 = 64/25, which we partially
638
+ * fold into the constant multipliers (other part was done in pass 1):
639
+ * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
640
+ */
641
+
642
+ dataptr = data;
643
+ for (ctr = 0; ctr < 5; ctr++) {
644
+ /* Even part */
645
+
646
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
647
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
648
+ tmp2 = dataptr[DCTSIZE*2];
649
+
650
+ tmp10 = tmp0 + tmp1;
651
+ tmp11 = tmp0 - tmp1;
652
+
653
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
654
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
655
+
656
+ dataptr[DCTSIZE*0] = (DCTELEM)
657
+ DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
658
+ CONST_BITS+PASS1_BITS);
659
+ tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
660
+ tmp10 -= tmp2 << 2;
661
+ tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
662
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
663
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
664
+
665
+ /* Odd part */
666
+
667
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
668
+
669
+ dataptr[DCTSIZE*1] = (DCTELEM)
670
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
671
+ CONST_BITS+PASS1_BITS);
672
+ dataptr[DCTSIZE*3] = (DCTELEM)
673
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
674
+ CONST_BITS+PASS1_BITS);
675
+
676
+ dataptr++; /* advance pointer to next column */
677
+ }
678
+ }
679
+
680
+
681
+ /*
682
+ * Perform the forward DCT on a 4x4 sample block.
683
+ */
684
+
685
+ GLOBAL(void)
686
+ jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
687
+ {
688
+ INT32 tmp0, tmp1;
689
+ INT32 tmp10, tmp11;
690
+ DCTELEM *dataptr;
691
+ JSAMPROW elemptr;
692
+ int ctr;
693
+ SHIFT_TEMPS
694
+
695
+ /* Pre-zero output coefficient block. */
696
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
697
+
698
+ /* Pass 1: process rows. */
699
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
700
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
701
+ /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
702
+ /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
703
+
704
+ dataptr = data;
705
+ for (ctr = 0; ctr < 4; ctr++) {
706
+ elemptr = sample_data[ctr] + start_col;
707
+
708
+ /* Even part */
709
+
710
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
711
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
712
+
713
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
714
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
715
+
716
+ /* Apply unsigned->signed conversion */
717
+ dataptr[0] = (DCTELEM)
718
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
719
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
720
+
721
+ /* Odd part */
722
+
723
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
724
+ /* Add fudge factor here for final descale. */
725
+ tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
726
+
727
+ dataptr[1] = (DCTELEM)
728
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
729
+ CONST_BITS-PASS1_BITS-2);
730
+ dataptr[3] = (DCTELEM)
731
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
732
+ CONST_BITS-PASS1_BITS-2);
733
+
734
+ dataptr += DCTSIZE; /* advance pointer to next row */
735
+ }
736
+
737
+ /* Pass 2: process columns.
738
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
739
+ * by an overall factor of 8.
740
+ */
741
+
742
+ dataptr = data;
743
+ for (ctr = 0; ctr < 4; ctr++) {
744
+ /* Even part */
745
+
746
+ /* Add fudge factor here for final descale. */
747
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
748
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
749
+
750
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
751
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
752
+
753
+ dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
754
+ dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
755
+
756
+ /* Odd part */
757
+
758
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
759
+ /* Add fudge factor here for final descale. */
760
+ tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
761
+
762
+ dataptr[DCTSIZE*1] = (DCTELEM)
763
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
764
+ CONST_BITS+PASS1_BITS);
765
+ dataptr[DCTSIZE*3] = (DCTELEM)
766
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
767
+ CONST_BITS+PASS1_BITS);
768
+
769
+ dataptr++; /* advance pointer to next column */
770
+ }
771
+ }
772
+
773
+
774
+ /*
775
+ * Perform the forward DCT on a 3x3 sample block.
776
+ */
777
+
778
+ GLOBAL(void)
779
+ jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
780
+ {
781
+ INT32 tmp0, tmp1, tmp2;
782
+ DCTELEM *dataptr;
783
+ JSAMPROW elemptr;
784
+ int ctr;
785
+ SHIFT_TEMPS
786
+
787
+ /* Pre-zero output coefficient block. */
788
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
789
+
790
+ /* Pass 1: process rows. */
791
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
792
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
793
+ /* We scale the results further by 2**2 as part of output adaption */
794
+ /* scaling for different DCT size. */
795
+ /* cK represents sqrt(2) * cos(K*pi/6). */
796
+
797
+ dataptr = data;
798
+ for (ctr = 0; ctr < 3; ctr++) {
799
+ elemptr = sample_data[ctr] + start_col;
800
+
801
+ /* Even part */
802
+
803
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
804
+ tmp1 = GETJSAMPLE(elemptr[1]);
805
+
806
+ tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
807
+
808
+ /* Apply unsigned->signed conversion */
809
+ dataptr[0] = (DCTELEM)
810
+ ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
811
+ dataptr[2] = (DCTELEM)
812
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
813
+ CONST_BITS-PASS1_BITS-2);
814
+
815
+ /* Odd part */
816
+
817
+ dataptr[1] = (DCTELEM)
818
+ DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
819
+ CONST_BITS-PASS1_BITS-2);
820
+
821
+ dataptr += DCTSIZE; /* advance pointer to next row */
822
+ }
823
+
824
+ /* Pass 2: process columns.
825
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
826
+ * by an overall factor of 8.
827
+ * We must also scale the output by (8/3)**2 = 64/9, which we partially
828
+ * fold into the constant multipliers (other part was done in pass 1):
829
+ * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
830
+ */
831
+
832
+ dataptr = data;
833
+ for (ctr = 0; ctr < 3; ctr++) {
834
+ /* Even part */
835
+
836
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
837
+ tmp1 = dataptr[DCTSIZE*1];
838
+
839
+ tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
840
+
841
+ dataptr[DCTSIZE*0] = (DCTELEM)
842
+ DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
843
+ CONST_BITS+PASS1_BITS);
844
+ dataptr[DCTSIZE*2] = (DCTELEM)
845
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
846
+ CONST_BITS+PASS1_BITS);
847
+
848
+ /* Odd part */
849
+
850
+ dataptr[DCTSIZE*1] = (DCTELEM)
851
+ DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
852
+ CONST_BITS+PASS1_BITS);
853
+
854
+ dataptr++; /* advance pointer to next column */
855
+ }
856
+ }
857
+
858
+
859
+ /*
860
+ * Perform the forward DCT on a 2x2 sample block.
861
+ */
862
+
863
+ GLOBAL(void)
864
+ jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
865
+ {
866
+ INT32 tmp0, tmp1, tmp2, tmp3;
867
+ JSAMPROW elemptr;
868
+
869
+ /* Pre-zero output coefficient block. */
870
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
871
+
872
+ /* Pass 1: process rows. */
873
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
874
+
875
+ /* Row 0 */
876
+ elemptr = sample_data[0] + start_col;
877
+
878
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
879
+ tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
880
+
881
+ /* Row 1 */
882
+ elemptr = sample_data[1] + start_col;
883
+
884
+ tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
885
+ tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
886
+
887
+ /* Pass 2: process columns.
888
+ * We leave the results scaled up by an overall factor of 8.
889
+ * We must also scale the output by (8/2)**2 = 2**4.
890
+ */
891
+
892
+ /* Column 0 */
893
+ /* Apply unsigned->signed conversion */
894
+ data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4);
895
+ data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4);
896
+
897
+ /* Column 1 */
898
+ data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4);
899
+ data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4);
900
+ }
901
+
902
+
903
+ /*
904
+ * Perform the forward DCT on a 1x1 sample block.
905
+ */
906
+
907
+ GLOBAL(void)
908
+ jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
909
+ {
910
+ /* Pre-zero output coefficient block. */
911
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
912
+
913
+ /* We leave the result scaled up by an overall factor of 8. */
914
+ /* We must also scale the output by (8/1)**2 = 2**6. */
915
+ /* Apply unsigned->signed conversion */
916
+ data[0] = (DCTELEM)
917
+ ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6);
918
+ }
919
+
920
+
921
+ /*
922
+ * Perform the forward DCT on a 9x9 sample block.
923
+ */
924
+
925
+ GLOBAL(void)
926
+ jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
927
+ {
928
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
929
+ INT32 tmp10, tmp11, tmp12, tmp13;
930
+ INT32 z1, z2;
931
+ DCTELEM workspace[8];
932
+ DCTELEM *dataptr;
933
+ DCTELEM *wsptr;
934
+ JSAMPROW elemptr;
935
+ int ctr;
936
+ SHIFT_TEMPS
937
+
938
+ /* Pass 1: process rows. */
939
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
940
+ /* we scale the results further by 2 as part of output adaption */
941
+ /* scaling for different DCT size. */
942
+ /* cK represents sqrt(2) * cos(K*pi/18). */
943
+
944
+ dataptr = data;
945
+ ctr = 0;
946
+ for (;;) {
947
+ elemptr = sample_data[ctr] + start_col;
948
+
949
+ /* Even part */
950
+
951
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
952
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
953
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
954
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
955
+ tmp4 = GETJSAMPLE(elemptr[4]);
956
+
957
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
958
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
959
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
960
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
961
+
962
+ z1 = tmp0 + tmp2 + tmp3;
963
+ z2 = tmp1 + tmp4;
964
+ /* Apply unsigned->signed conversion */
965
+ dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
966
+ dataptr[6] = (DCTELEM)
967
+ DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
968
+ CONST_BITS-1);
969
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
970
+ z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
971
+ dataptr[2] = (DCTELEM)
972
+ DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
973
+ + z1 + z2, CONST_BITS-1);
974
+ dataptr[4] = (DCTELEM)
975
+ DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
976
+ + z1 - z2, CONST_BITS-1);
977
+
978
+ /* Odd part */
979
+
980
+ dataptr[3] = (DCTELEM)
981
+ DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
982
+ CONST_BITS-1);
983
+
984
+ tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
985
+ tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
986
+ tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
987
+
988
+ dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
989
+
990
+ tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
991
+
992
+ dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
993
+ dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
994
+
995
+ ctr++;
996
+
997
+ if (ctr != DCTSIZE) {
998
+ if (ctr == 9)
999
+ break; /* Done. */
1000
+ dataptr += DCTSIZE; /* advance pointer to next row */
1001
+ } else
1002
+ dataptr = workspace; /* switch pointer to extended workspace */
1003
+ }
1004
+
1005
+ /* Pass 2: process columns.
1006
+ * We leave the results scaled up by an overall factor of 8.
1007
+ * We must also scale the output by (8/9)**2 = 64/81, which we partially
1008
+ * fold into the constant multipliers and final/initial shifting:
1009
+ * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
1010
+ */
1011
+
1012
+ dataptr = data;
1013
+ wsptr = workspace;
1014
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1015
+ /* Even part */
1016
+
1017
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
1018
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
1019
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
1020
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
1021
+ tmp4 = dataptr[DCTSIZE*4];
1022
+
1023
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
1024
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
1025
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
1026
+ tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
1027
+
1028
+ z1 = tmp0 + tmp2 + tmp3;
1029
+ z2 = tmp1 + tmp4;
1030
+ dataptr[DCTSIZE*0] = (DCTELEM)
1031
+ DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */
1032
+ CONST_BITS+2);
1033
+ dataptr[DCTSIZE*6] = (DCTELEM)
1034
+ DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */
1035
+ CONST_BITS+2);
1036
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */
1037
+ z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
1038
+ dataptr[DCTSIZE*2] = (DCTELEM)
1039
+ DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */
1040
+ + z1 + z2, CONST_BITS+2);
1041
+ dataptr[DCTSIZE*4] = (DCTELEM)
1042
+ DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */
1043
+ + z1 - z2, CONST_BITS+2);
1044
+
1045
+ /* Odd part */
1046
+
1047
+ dataptr[DCTSIZE*3] = (DCTELEM)
1048
+ DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
1049
+ CONST_BITS+2);
1050
+
1051
+ tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */
1052
+ tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
1053
+ tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
1054
+
1055
+ dataptr[DCTSIZE*1] = (DCTELEM)
1056
+ DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
1057
+
1058
+ tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
1059
+
1060
+ dataptr[DCTSIZE*5] = (DCTELEM)
1061
+ DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
1062
+ dataptr[DCTSIZE*7] = (DCTELEM)
1063
+ DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
1064
+
1065
+ dataptr++; /* advance pointer to next column */
1066
+ wsptr++; /* advance pointer to next column */
1067
+ }
1068
+ }
1069
+
1070
+
1071
+ /*
1072
+ * Perform the forward DCT on a 10x10 sample block.
1073
+ */
1074
+
1075
+ GLOBAL(void)
1076
+ jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1077
+ {
1078
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1079
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1080
+ DCTELEM workspace[8*2];
1081
+ DCTELEM *dataptr;
1082
+ DCTELEM *wsptr;
1083
+ JSAMPROW elemptr;
1084
+ int ctr;
1085
+ SHIFT_TEMPS
1086
+
1087
+ /* Pass 1: process rows. */
1088
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
1089
+ /* we scale the results further by 2 as part of output adaption */
1090
+ /* scaling for different DCT size. */
1091
+ /* cK represents sqrt(2) * cos(K*pi/20). */
1092
+
1093
+ dataptr = data;
1094
+ ctr = 0;
1095
+ for (;;) {
1096
+ elemptr = sample_data[ctr] + start_col;
1097
+
1098
+ /* Even part */
1099
+
1100
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1101
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1102
+ tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1103
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1104
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1105
+
1106
+ tmp10 = tmp0 + tmp4;
1107
+ tmp13 = tmp0 - tmp4;
1108
+ tmp11 = tmp1 + tmp3;
1109
+ tmp14 = tmp1 - tmp3;
1110
+
1111
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1112
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1113
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1114
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1115
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1116
+
1117
+ /* Apply unsigned->signed conversion */
1118
+ dataptr[0] = (DCTELEM)
1119
+ ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
1120
+ tmp12 += tmp12;
1121
+ dataptr[4] = (DCTELEM)
1122
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1123
+ MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
1124
+ CONST_BITS-1);
1125
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
1126
+ dataptr[2] = (DCTELEM)
1127
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
1128
+ CONST_BITS-1);
1129
+ dataptr[6] = (DCTELEM)
1130
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
1131
+ CONST_BITS-1);
1132
+
1133
+ /* Odd part */
1134
+
1135
+ tmp10 = tmp0 + tmp4;
1136
+ tmp11 = tmp1 - tmp3;
1137
+ dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
1138
+ tmp2 <<= CONST_BITS;
1139
+ dataptr[1] = (DCTELEM)
1140
+ DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
1141
+ MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
1142
+ MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
1143
+ MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
1144
+ CONST_BITS-1);
1145
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
1146
+ MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
1147
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
1148
+ (tmp11 << (CONST_BITS - 1)) - tmp2;
1149
+ dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
1150
+ dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
1151
+
1152
+ ctr++;
1153
+
1154
+ if (ctr != DCTSIZE) {
1155
+ if (ctr == 10)
1156
+ break; /* Done. */
1157
+ dataptr += DCTSIZE; /* advance pointer to next row */
1158
+ } else
1159
+ dataptr = workspace; /* switch pointer to extended workspace */
1160
+ }
1161
+
1162
+ /* Pass 2: process columns.
1163
+ * We leave the results scaled up by an overall factor of 8.
1164
+ * We must also scale the output by (8/10)**2 = 16/25, which we partially
1165
+ * fold into the constant multipliers and final/initial shifting:
1166
+ * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
1167
+ */
1168
+
1169
+ dataptr = data;
1170
+ wsptr = workspace;
1171
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1172
+ /* Even part */
1173
+
1174
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
1175
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
1176
+ tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
1177
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
1178
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
1179
+
1180
+ tmp10 = tmp0 + tmp4;
1181
+ tmp13 = tmp0 - tmp4;
1182
+ tmp11 = tmp1 + tmp3;
1183
+ tmp14 = tmp1 - tmp3;
1184
+
1185
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
1186
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
1187
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
1188
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
1189
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
1190
+
1191
+ dataptr[DCTSIZE*0] = (DCTELEM)
1192
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
1193
+ CONST_BITS+2);
1194
+ tmp12 += tmp12;
1195
+ dataptr[DCTSIZE*4] = (DCTELEM)
1196
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
1197
+ MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
1198
+ CONST_BITS+2);
1199
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
1200
+ dataptr[DCTSIZE*2] = (DCTELEM)
1201
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
1202
+ CONST_BITS+2);
1203
+ dataptr[DCTSIZE*6] = (DCTELEM)
1204
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
1205
+ CONST_BITS+2);
1206
+
1207
+ /* Odd part */
1208
+
1209
+ tmp10 = tmp0 + tmp4;
1210
+ tmp11 = tmp1 - tmp3;
1211
+ dataptr[DCTSIZE*5] = (DCTELEM)
1212
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
1213
+ CONST_BITS+2);
1214
+ tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
1215
+ dataptr[DCTSIZE*1] = (DCTELEM)
1216
+ DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
1217
+ MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
1218
+ MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
1219
+ MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
1220
+ CONST_BITS+2);
1221
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
1222
+ MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
1223
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
1224
+ MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
1225
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
1226
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
1227
+
1228
+ dataptr++; /* advance pointer to next column */
1229
+ wsptr++; /* advance pointer to next column */
1230
+ }
1231
+ }
1232
+
1233
+
1234
+ /*
1235
+ * Perform the forward DCT on an 11x11 sample block.
1236
+ */
1237
+
1238
+ GLOBAL(void)
1239
+ jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1240
+ {
1241
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1242
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1243
+ INT32 z1, z2, z3;
1244
+ DCTELEM workspace[8*3];
1245
+ DCTELEM *dataptr;
1246
+ DCTELEM *wsptr;
1247
+ JSAMPROW elemptr;
1248
+ int ctr;
1249
+ SHIFT_TEMPS
1250
+
1251
+ /* Pass 1: process rows. */
1252
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
1253
+ /* we scale the results further by 2 as part of output adaption */
1254
+ /* scaling for different DCT size. */
1255
+ /* cK represents sqrt(2) * cos(K*pi/22). */
1256
+
1257
+ dataptr = data;
1258
+ ctr = 0;
1259
+ for (;;) {
1260
+ elemptr = sample_data[ctr] + start_col;
1261
+
1262
+ /* Even part */
1263
+
1264
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1265
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1266
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1267
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1268
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1269
+ tmp5 = GETJSAMPLE(elemptr[5]);
1270
+
1271
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1272
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1273
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1274
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1275
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1276
+
1277
+ /* Apply unsigned->signed conversion */
1278
+ dataptr[0] = (DCTELEM)
1279
+ ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
1280
+ tmp5 += tmp5;
1281
+ tmp0 -= tmp5;
1282
+ tmp1 -= tmp5;
1283
+ tmp2 -= tmp5;
1284
+ tmp3 -= tmp5;
1285
+ tmp4 -= tmp5;
1286
+ z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
1287
+ MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
1288
+ z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
1289
+ z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
1290
+ dataptr[2] = (DCTELEM)
1291
+ DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1292
+ - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
1293
+ CONST_BITS-1);
1294
+ dataptr[4] = (DCTELEM)
1295
+ DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1296
+ - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
1297
+ + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
1298
+ CONST_BITS-1);
1299
+ dataptr[6] = (DCTELEM)
1300
+ DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
1301
+ - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */
1302
+ CONST_BITS-1);
1303
+
1304
+ /* Odd part */
1305
+
1306
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */
1307
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */
1308
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */
1309
+ tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
1310
+ + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */
1311
+ tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */
1312
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */
1313
+ tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
1314
+ - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */
1315
+ tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */
1316
+ tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
1317
+ + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */
1318
+ tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
1319
+ - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */
1320
+
1321
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
1322
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
1323
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
1324
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
1325
+
1326
+ ctr++;
1327
+
1328
+ if (ctr != DCTSIZE) {
1329
+ if (ctr == 11)
1330
+ break; /* Done. */
1331
+ dataptr += DCTSIZE; /* advance pointer to next row */
1332
+ } else
1333
+ dataptr = workspace; /* switch pointer to extended workspace */
1334
+ }
1335
+
1336
+ /* Pass 2: process columns.
1337
+ * We leave the results scaled up by an overall factor of 8.
1338
+ * We must also scale the output by (8/11)**2 = 64/121, which we partially
1339
+ * fold into the constant multipliers and final/initial shifting:
1340
+ * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
1341
+ */
1342
+
1343
+ dataptr = data;
1344
+ wsptr = workspace;
1345
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1346
+ /* Even part */
1347
+
1348
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
1349
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
1350
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
1351
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
1352
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
1353
+ tmp5 = dataptr[DCTSIZE*5];
1354
+
1355
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
1356
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
1357
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
1358
+ tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
1359
+ tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
1360
+
1361
+ dataptr[DCTSIZE*0] = (DCTELEM)
1362
+ DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
1363
+ FIX(1.057851240)), /* 128/121 */
1364
+ CONST_BITS+2);
1365
+ tmp5 += tmp5;
1366
+ tmp0 -= tmp5;
1367
+ tmp1 -= tmp5;
1368
+ tmp2 -= tmp5;
1369
+ tmp3 -= tmp5;
1370
+ tmp4 -= tmp5;
1371
+ z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */
1372
+ MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */
1373
+ z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */
1374
+ z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */
1375
+ dataptr[DCTSIZE*2] = (DCTELEM)
1376
+ DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
1377
+ - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */
1378
+ CONST_BITS+2);
1379
+ dataptr[DCTSIZE*4] = (DCTELEM)
1380
+ DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
1381
+ - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */
1382
+ + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */
1383
+ CONST_BITS+2);
1384
+ dataptr[DCTSIZE*6] = (DCTELEM)
1385
+ DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
1386
+ - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */
1387
+ CONST_BITS+2);
1388
+
1389
+ /* Odd part */
1390
+
1391
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */
1392
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */
1393
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */
1394
+ tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
1395
+ + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */
1396
+ tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */
1397
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */
1398
+ tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
1399
+ - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */
1400
+ tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */
1401
+ tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
1402
+ + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */
1403
+ tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
1404
+ - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */
1405
+
1406
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
1407
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
1408
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
1409
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
1410
+
1411
+ dataptr++; /* advance pointer to next column */
1412
+ wsptr++; /* advance pointer to next column */
1413
+ }
1414
+ }
1415
+
1416
+
1417
+ /*
1418
+ * Perform the forward DCT on a 12x12 sample block.
1419
+ */
1420
+
1421
+ GLOBAL(void)
1422
+ jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1423
+ {
1424
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1425
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1426
+ DCTELEM workspace[8*4];
1427
+ DCTELEM *dataptr;
1428
+ DCTELEM *wsptr;
1429
+ JSAMPROW elemptr;
1430
+ int ctr;
1431
+ SHIFT_TEMPS
1432
+
1433
+ /* Pass 1: process rows. */
1434
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1435
+ /* cK represents sqrt(2) * cos(K*pi/24). */
1436
+
1437
+ dataptr = data;
1438
+ ctr = 0;
1439
+ for (;;) {
1440
+ elemptr = sample_data[ctr] + start_col;
1441
+
1442
+ /* Even part */
1443
+
1444
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1445
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1446
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1447
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1448
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1449
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1450
+
1451
+ tmp10 = tmp0 + tmp5;
1452
+ tmp13 = tmp0 - tmp5;
1453
+ tmp11 = tmp1 + tmp4;
1454
+ tmp14 = tmp1 - tmp4;
1455
+ tmp12 = tmp2 + tmp3;
1456
+ tmp15 = tmp2 - tmp3;
1457
+
1458
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1459
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1460
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1461
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1462
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1463
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1464
+
1465
+ /* Apply unsigned->signed conversion */
1466
+ dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1467
+ dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1468
+ dataptr[4] = (DCTELEM)
1469
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1470
+ CONST_BITS);
1471
+ dataptr[2] = (DCTELEM)
1472
+ DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1473
+ CONST_BITS);
1474
+
1475
+ /* Odd part */
1476
+
1477
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
1478
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
1479
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
1480
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
1481
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
1482
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1483
+ + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
1484
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1485
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1486
+ + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
1487
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
1488
+ - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
1489
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
1490
+ - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
1491
+
1492
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
1493
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
1494
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
1495
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
1496
+
1497
+ ctr++;
1498
+
1499
+ if (ctr != DCTSIZE) {
1500
+ if (ctr == 12)
1501
+ break; /* Done. */
1502
+ dataptr += DCTSIZE; /* advance pointer to next row */
1503
+ } else
1504
+ dataptr = workspace; /* switch pointer to extended workspace */
1505
+ }
1506
+
1507
+ /* Pass 2: process columns.
1508
+ * We leave the results scaled up by an overall factor of 8.
1509
+ * We must also scale the output by (8/12)**2 = 4/9, which we partially
1510
+ * fold into the constant multipliers and final shifting:
1511
+ * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
1512
+ */
1513
+
1514
+ dataptr = data;
1515
+ wsptr = workspace;
1516
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1517
+ /* Even part */
1518
+
1519
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
1520
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
1521
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
1522
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
1523
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
1524
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
1525
+
1526
+ tmp10 = tmp0 + tmp5;
1527
+ tmp13 = tmp0 - tmp5;
1528
+ tmp11 = tmp1 + tmp4;
1529
+ tmp14 = tmp1 - tmp4;
1530
+ tmp12 = tmp2 + tmp3;
1531
+ tmp15 = tmp2 - tmp3;
1532
+
1533
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
1534
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
1535
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
1536
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
1537
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
1538
+ tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
1539
+
1540
+ dataptr[DCTSIZE*0] = (DCTELEM)
1541
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
1542
+ CONST_BITS+1);
1543
+ dataptr[DCTSIZE*6] = (DCTELEM)
1544
+ DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
1545
+ CONST_BITS+1);
1546
+ dataptr[DCTSIZE*4] = (DCTELEM)
1547
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
1548
+ CONST_BITS+1);
1549
+ dataptr[DCTSIZE*2] = (DCTELEM)
1550
+ DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
1551
+ MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
1552
+ CONST_BITS+1);
1553
+
1554
+ /* Odd part */
1555
+
1556
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
1557
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
1558
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
1559
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
1560
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
1561
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
1562
+ + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
1563
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
1564
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
1565
+ + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
1566
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
1567
+ - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
1568
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
1569
+ - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
1570
+
1571
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
1572
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
1573
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
1574
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
1575
+
1576
+ dataptr++; /* advance pointer to next column */
1577
+ wsptr++; /* advance pointer to next column */
1578
+ }
1579
+ }
1580
+
1581
+
1582
+ /*
1583
+ * Perform the forward DCT on a 13x13 sample block.
1584
+ */
1585
+
1586
+ GLOBAL(void)
1587
+ jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1588
+ {
1589
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1590
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1591
+ INT32 z1, z2;
1592
+ DCTELEM workspace[8*5];
1593
+ DCTELEM *dataptr;
1594
+ DCTELEM *wsptr;
1595
+ JSAMPROW elemptr;
1596
+ int ctr;
1597
+ SHIFT_TEMPS
1598
+
1599
+ /* Pass 1: process rows. */
1600
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1601
+ /* cK represents sqrt(2) * cos(K*pi/26). */
1602
+
1603
+ dataptr = data;
1604
+ ctr = 0;
1605
+ for (;;) {
1606
+ elemptr = sample_data[ctr] + start_col;
1607
+
1608
+ /* Even part */
1609
+
1610
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1611
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1612
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1613
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1614
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1615
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1616
+ tmp6 = GETJSAMPLE(elemptr[6]);
1617
+
1618
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1619
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1620
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1621
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1622
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1623
+ tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1624
+
1625
+ /* Apply unsigned->signed conversion */
1626
+ dataptr[0] = (DCTELEM)
1627
+ (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1628
+ tmp6 += tmp6;
1629
+ tmp0 -= tmp6;
1630
+ tmp1 -= tmp6;
1631
+ tmp2 -= tmp6;
1632
+ tmp3 -= tmp6;
1633
+ tmp4 -= tmp6;
1634
+ tmp5 -= tmp6;
1635
+ dataptr[2] = (DCTELEM)
1636
+ DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
1637
+ MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
1638
+ MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
1639
+ MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
1640
+ MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
1641
+ MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
1642
+ CONST_BITS);
1643
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1644
+ MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1645
+ MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
1646
+ z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
1647
+ MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
1648
+ MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */
1649
+
1650
+ dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
1651
+ dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
1652
+
1653
+ /* Odd part */
1654
+
1655
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */
1656
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */
1657
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */
1658
+ MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */
1659
+ tmp0 = tmp1 + tmp2 + tmp3 -
1660
+ MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */
1661
+ MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */
1662
+ tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */
1663
+ MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */
1664
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
1665
+ tmp1 += tmp4 + tmp5 +
1666
+ MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */
1667
+ MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */
1668
+ tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
1669
+ tmp2 += tmp4 + tmp6 -
1670
+ MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */
1671
+ MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */
1672
+ tmp3 += tmp5 + tmp6 +
1673
+ MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */
1674
+ MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */
1675
+
1676
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
1677
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
1678
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
1679
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
1680
+
1681
+ ctr++;
1682
+
1683
+ if (ctr != DCTSIZE) {
1684
+ if (ctr == 13)
1685
+ break; /* Done. */
1686
+ dataptr += DCTSIZE; /* advance pointer to next row */
1687
+ } else
1688
+ dataptr = workspace; /* switch pointer to extended workspace */
1689
+ }
1690
+
1691
+ /* Pass 2: process columns.
1692
+ * We leave the results scaled up by an overall factor of 8.
1693
+ * We must also scale the output by (8/13)**2 = 64/169, which we partially
1694
+ * fold into the constant multipliers and final shifting:
1695
+ * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
1696
+ */
1697
+
1698
+ dataptr = data;
1699
+ wsptr = workspace;
1700
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1701
+ /* Even part */
1702
+
1703
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
1704
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
1705
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
1706
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
1707
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
1708
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
1709
+ tmp6 = dataptr[DCTSIZE*6];
1710
+
1711
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
1712
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
1713
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
1714
+ tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
1715
+ tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
1716
+ tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
1717
+
1718
+ dataptr[DCTSIZE*0] = (DCTELEM)
1719
+ DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
1720
+ FIX(0.757396450)), /* 128/169 */
1721
+ CONST_BITS+1);
1722
+ tmp6 += tmp6;
1723
+ tmp0 -= tmp6;
1724
+ tmp1 -= tmp6;
1725
+ tmp2 -= tmp6;
1726
+ tmp3 -= tmp6;
1727
+ tmp4 -= tmp6;
1728
+ tmp5 -= tmp6;
1729
+ dataptr[DCTSIZE*2] = (DCTELEM)
1730
+ DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */
1731
+ MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */
1732
+ MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */
1733
+ MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */
1734
+ MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */
1735
+ MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */
1736
+ CONST_BITS+1);
1737
+ z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
1738
+ MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
1739
+ MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */
1740
+ z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
1741
+ MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
1742
+ MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */
1743
+
1744
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
1745
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
1746
+
1747
+ /* Odd part */
1748
+
1749
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */
1750
+ tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */
1751
+ tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */
1752
+ MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */
1753
+ tmp0 = tmp1 + tmp2 + tmp3 -
1754
+ MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */
1755
+ MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */
1756
+ tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */
1757
+ MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */
1758
+ tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
1759
+ tmp1 += tmp4 + tmp5 +
1760
+ MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */
1761
+ MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */
1762
+ tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
1763
+ tmp2 += tmp4 + tmp6 -
1764
+ MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */
1765
+ MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */
1766
+ tmp3 += tmp5 + tmp6 +
1767
+ MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */
1768
+ MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */
1769
+
1770
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
1771
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
1772
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
1773
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
1774
+
1775
+ dataptr++; /* advance pointer to next column */
1776
+ wsptr++; /* advance pointer to next column */
1777
+ }
1778
+ }
1779
+
1780
+
1781
+ /*
1782
+ * Perform the forward DCT on a 14x14 sample block.
1783
+ */
1784
+
1785
+ GLOBAL(void)
1786
+ jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1787
+ {
1788
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1789
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1790
+ DCTELEM workspace[8*6];
1791
+ DCTELEM *dataptr;
1792
+ DCTELEM *wsptr;
1793
+ JSAMPROW elemptr;
1794
+ int ctr;
1795
+ SHIFT_TEMPS
1796
+
1797
+ /* Pass 1: process rows. */
1798
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
1799
+ /* cK represents sqrt(2) * cos(K*pi/28). */
1800
+
1801
+ dataptr = data;
1802
+ ctr = 0;
1803
+ for (;;) {
1804
+ elemptr = sample_data[ctr] + start_col;
1805
+
1806
+ /* Even part */
1807
+
1808
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1809
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1810
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1811
+ tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1812
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1813
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1814
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1815
+
1816
+ tmp10 = tmp0 + tmp6;
1817
+ tmp14 = tmp0 - tmp6;
1818
+ tmp11 = tmp1 + tmp5;
1819
+ tmp15 = tmp1 - tmp5;
1820
+ tmp12 = tmp2 + tmp4;
1821
+ tmp16 = tmp2 - tmp4;
1822
+
1823
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1824
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1825
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1826
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1827
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1828
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1829
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1830
+
1831
+ /* Apply unsigned->signed conversion */
1832
+ dataptr[0] = (DCTELEM)
1833
+ (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1834
+ tmp13 += tmp13;
1835
+ dataptr[4] = (DCTELEM)
1836
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1837
+ MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1838
+ MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
1839
+ CONST_BITS);
1840
+
1841
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
1842
+
1843
+ dataptr[2] = (DCTELEM)
1844
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
1845
+ + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
1846
+ CONST_BITS);
1847
+ dataptr[6] = (DCTELEM)
1848
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
1849
+ - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
1850
+ CONST_BITS);
1851
+
1852
+ /* Odd part */
1853
+
1854
+ tmp10 = tmp1 + tmp2;
1855
+ tmp11 = tmp5 - tmp4;
1856
+ dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
1857
+ tmp3 <<= CONST_BITS;
1858
+ tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
1859
+ tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
1860
+ tmp10 += tmp11 - tmp3;
1861
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
1862
+ MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
1863
+ dataptr[5] = (DCTELEM)
1864
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
1865
+ + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
1866
+ CONST_BITS);
1867
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
1868
+ MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
1869
+ dataptr[3] = (DCTELEM)
1870
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
1871
+ - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
1872
+ CONST_BITS);
1873
+ dataptr[1] = (DCTELEM)
1874
+ DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
1875
+ MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
1876
+ CONST_BITS);
1877
+
1878
+ ctr++;
1879
+
1880
+ if (ctr != DCTSIZE) {
1881
+ if (ctr == 14)
1882
+ break; /* Done. */
1883
+ dataptr += DCTSIZE; /* advance pointer to next row */
1884
+ } else
1885
+ dataptr = workspace; /* switch pointer to extended workspace */
1886
+ }
1887
+
1888
+ /* Pass 2: process columns.
1889
+ * We leave the results scaled up by an overall factor of 8.
1890
+ * We must also scale the output by (8/14)**2 = 16/49, which we partially
1891
+ * fold into the constant multipliers and final shifting:
1892
+ * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
1893
+ */
1894
+
1895
+ dataptr = data;
1896
+ wsptr = workspace;
1897
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1898
+ /* Even part */
1899
+
1900
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
1901
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
1902
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
1903
+ tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
1904
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
1905
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
1906
+ tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
1907
+
1908
+ tmp10 = tmp0 + tmp6;
1909
+ tmp14 = tmp0 - tmp6;
1910
+ tmp11 = tmp1 + tmp5;
1911
+ tmp15 = tmp1 - tmp5;
1912
+ tmp12 = tmp2 + tmp4;
1913
+ tmp16 = tmp2 - tmp4;
1914
+
1915
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
1916
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
1917
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
1918
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
1919
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
1920
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
1921
+ tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
1922
+
1923
+ dataptr[DCTSIZE*0] = (DCTELEM)
1924
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
1925
+ FIX(0.653061224)), /* 32/49 */
1926
+ CONST_BITS+1);
1927
+ tmp13 += tmp13;
1928
+ dataptr[DCTSIZE*4] = (DCTELEM)
1929
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
1930
+ MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
1931
+ MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
1932
+ CONST_BITS+1);
1933
+
1934
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
1935
+
1936
+ dataptr[DCTSIZE*2] = (DCTELEM)
1937
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
1938
+ + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
1939
+ CONST_BITS+1);
1940
+ dataptr[DCTSIZE*6] = (DCTELEM)
1941
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
1942
+ - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
1943
+ CONST_BITS+1);
1944
+
1945
+ /* Odd part */
1946
+
1947
+ tmp10 = tmp1 + tmp2;
1948
+ tmp11 = tmp5 - tmp4;
1949
+ dataptr[DCTSIZE*7] = (DCTELEM)
1950
+ DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
1951
+ FIX(0.653061224)), /* 32/49 */
1952
+ CONST_BITS+1);
1953
+ tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
1954
+ tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
1955
+ tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
1956
+ tmp10 += tmp11 - tmp3;
1957
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
1958
+ MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
1959
+ dataptr[DCTSIZE*5] = (DCTELEM)
1960
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
1961
+ + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
1962
+ CONST_BITS+1);
1963
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
1964
+ MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
1965
+ dataptr[DCTSIZE*3] = (DCTELEM)
1966
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
1967
+ - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
1968
+ CONST_BITS+1);
1969
+ dataptr[DCTSIZE*1] = (DCTELEM)
1970
+ DESCALE(tmp11 + tmp12 + tmp3
1971
+ - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
1972
+ - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
1973
+ CONST_BITS+1);
1974
+
1975
+ dataptr++; /* advance pointer to next column */
1976
+ wsptr++; /* advance pointer to next column */
1977
+ }
1978
+ }
1979
+
1980
+
1981
+ /*
1982
+ * Perform the forward DCT on a 15x15 sample block.
1983
+ */
1984
+
1985
+ GLOBAL(void)
1986
+ jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1987
+ {
1988
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
1989
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1990
+ INT32 z1, z2, z3;
1991
+ DCTELEM workspace[8*7];
1992
+ DCTELEM *dataptr;
1993
+ DCTELEM *wsptr;
1994
+ JSAMPROW elemptr;
1995
+ int ctr;
1996
+ SHIFT_TEMPS
1997
+
1998
+ /* Pass 1: process rows. */
1999
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
2000
+ /* cK represents sqrt(2) * cos(K*pi/30). */
2001
+
2002
+ dataptr = data;
2003
+ ctr = 0;
2004
+ for (;;) {
2005
+ elemptr = sample_data[ctr] + start_col;
2006
+
2007
+ /* Even part */
2008
+
2009
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2010
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2011
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2012
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2013
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2014
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2015
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2016
+ tmp7 = GETJSAMPLE(elemptr[7]);
2017
+
2018
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2019
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2020
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2021
+ tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2022
+ tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2023
+ tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2024
+ tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2025
+
2026
+ z1 = tmp0 + tmp4 + tmp5;
2027
+ z2 = tmp1 + tmp3 + tmp6;
2028
+ z3 = tmp2 + tmp7;
2029
+ /* Apply unsigned->signed conversion */
2030
+ dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2031
+ z3 += z3;
2032
+ dataptr[6] = (DCTELEM)
2033
+ DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2034
+ MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
2035
+ CONST_BITS);
2036
+ tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2037
+ z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
2038
+ MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
2039
+ z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
2040
+ MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
2041
+ z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
2042
+ MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
2043
+ MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
2044
+
2045
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2046
+ dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2047
+
2048
+ /* Odd part */
2049
+
2050
+ tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2051
+ FIX(1.224744871)); /* c5 */
2052
+ tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
2053
+ MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */
2054
+ tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */
2055
+ tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */
2056
+ MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */
2057
+ MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */
2058
+ tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */
2059
+ MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */
2060
+ MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */
2061
+ tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */
2062
+ MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */
2063
+ MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */
2064
+
2065
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
2066
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
2067
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
2068
+ dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
2069
+
2070
+ ctr++;
2071
+
2072
+ if (ctr != DCTSIZE) {
2073
+ if (ctr == 15)
2074
+ break; /* Done. */
2075
+ dataptr += DCTSIZE; /* advance pointer to next row */
2076
+ } else
2077
+ dataptr = workspace; /* switch pointer to extended workspace */
2078
+ }
2079
+
2080
+ /* Pass 2: process columns.
2081
+ * We leave the results scaled up by an overall factor of 8.
2082
+ * We must also scale the output by (8/15)**2 = 64/225, which we partially
2083
+ * fold into the constant multipliers and final shifting:
2084
+ * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
2085
+ */
2086
+
2087
+ dataptr = data;
2088
+ wsptr = workspace;
2089
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2090
+ /* Even part */
2091
+
2092
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
2093
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
2094
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
2095
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
2096
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
2097
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
2098
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
2099
+ tmp7 = dataptr[DCTSIZE*7];
2100
+
2101
+ tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
2102
+ tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
2103
+ tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
2104
+ tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
2105
+ tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
2106
+ tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
2107
+ tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
2108
+
2109
+ z1 = tmp0 + tmp4 + tmp5;
2110
+ z2 = tmp1 + tmp3 + tmp6;
2111
+ z3 = tmp2 + tmp7;
2112
+ dataptr[DCTSIZE*0] = (DCTELEM)
2113
+ DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
2114
+ CONST_BITS+2);
2115
+ z3 += z3;
2116
+ dataptr[DCTSIZE*6] = (DCTELEM)
2117
+ DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
2118
+ MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */
2119
+ CONST_BITS+2);
2120
+ tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2121
+ z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */
2122
+ MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */
2123
+ z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */
2124
+ MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */
2125
+ z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */
2126
+ MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */
2127
+ MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */
2128
+
2129
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
2130
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
2131
+
2132
+ /* Odd part */
2133
+
2134
+ tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2135
+ FIX(1.393487498)); /* c5 */
2136
+ tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
2137
+ MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */
2138
+ tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */
2139
+ tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */
2140
+ MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */
2141
+ MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */
2142
+ tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */
2143
+ MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */
2144
+ MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */
2145
+ tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */
2146
+ MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */
2147
+ MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */
2148
+
2149
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
2150
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
2151
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
2152
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
2153
+
2154
+ dataptr++; /* advance pointer to next column */
2155
+ wsptr++; /* advance pointer to next column */
2156
+ }
2157
+ }
2158
+
2159
+
2160
+ /*
2161
+ * Perform the forward DCT on a 16x16 sample block.
2162
+ */
2163
+
2164
+ GLOBAL(void)
2165
+ jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2166
+ {
2167
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2168
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2169
+ DCTELEM workspace[DCTSIZE2];
2170
+ DCTELEM *dataptr;
2171
+ DCTELEM *wsptr;
2172
+ JSAMPROW elemptr;
2173
+ int ctr;
2174
+ SHIFT_TEMPS
2175
+
2176
+ /* Pass 1: process rows. */
2177
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2178
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
2179
+ /* cK represents sqrt(2) * cos(K*pi/32). */
2180
+
2181
+ dataptr = data;
2182
+ ctr = 0;
2183
+ for (;;) {
2184
+ elemptr = sample_data[ctr] + start_col;
2185
+
2186
+ /* Even part */
2187
+
2188
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2189
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2190
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2191
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2192
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2193
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2194
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2195
+ tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2196
+
2197
+ tmp10 = tmp0 + tmp7;
2198
+ tmp14 = tmp0 - tmp7;
2199
+ tmp11 = tmp1 + tmp6;
2200
+ tmp15 = tmp1 - tmp6;
2201
+ tmp12 = tmp2 + tmp5;
2202
+ tmp16 = tmp2 - tmp5;
2203
+ tmp13 = tmp3 + tmp4;
2204
+ tmp17 = tmp3 - tmp4;
2205
+
2206
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2207
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2208
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2209
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2210
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2211
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2212
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2213
+ tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2214
+
2215
+ /* Apply unsigned->signed conversion */
2216
+ dataptr[0] = (DCTELEM)
2217
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2218
+ dataptr[4] = (DCTELEM)
2219
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2220
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2221
+ CONST_BITS-PASS1_BITS);
2222
+
2223
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2224
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2225
+
2226
+ dataptr[2] = (DCTELEM)
2227
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2228
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2229
+ CONST_BITS-PASS1_BITS);
2230
+ dataptr[6] = (DCTELEM)
2231
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2232
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2233
+ CONST_BITS-PASS1_BITS);
2234
+
2235
+ /* Odd part */
2236
+
2237
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2238
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2239
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2240
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2241
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2242
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2243
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2244
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2245
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2246
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2247
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2248
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2249
+ tmp10 = tmp11 + tmp12 + tmp13 -
2250
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2251
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2252
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2253
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2254
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2255
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2256
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2257
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2258
+
2259
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2260
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2261
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2262
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2263
+
2264
+ ctr++;
2265
+
2266
+ if (ctr != DCTSIZE) {
2267
+ if (ctr == DCTSIZE * 2)
2268
+ break; /* Done. */
2269
+ dataptr += DCTSIZE; /* advance pointer to next row */
2270
+ } else
2271
+ dataptr = workspace; /* switch pointer to extended workspace */
2272
+ }
2273
+
2274
+ /* Pass 2: process columns.
2275
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
2276
+ * by an overall factor of 8.
2277
+ * We must also scale the output by (8/16)**2 = 1/2**2.
2278
+ */
2279
+
2280
+ dataptr = data;
2281
+ wsptr = workspace;
2282
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2283
+ /* Even part */
2284
+
2285
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2286
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2287
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2288
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2289
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2290
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2291
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2292
+ tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2293
+
2294
+ tmp10 = tmp0 + tmp7;
2295
+ tmp14 = tmp0 - tmp7;
2296
+ tmp11 = tmp1 + tmp6;
2297
+ tmp15 = tmp1 - tmp6;
2298
+ tmp12 = tmp2 + tmp5;
2299
+ tmp16 = tmp2 - tmp5;
2300
+ tmp13 = tmp3 + tmp4;
2301
+ tmp17 = tmp3 - tmp4;
2302
+
2303
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
2304
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
2305
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
2306
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
2307
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
2308
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
2309
+ tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
2310
+ tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
2311
+
2312
+ dataptr[DCTSIZE*0] = (DCTELEM)
2313
+ DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
2314
+ dataptr[DCTSIZE*4] = (DCTELEM)
2315
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2316
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2317
+ CONST_BITS+PASS1_BITS+2);
2318
+
2319
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2320
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2321
+
2322
+ dataptr[DCTSIZE*2] = (DCTELEM)
2323
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2324
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */
2325
+ CONST_BITS+PASS1_BITS+2);
2326
+ dataptr[DCTSIZE*6] = (DCTELEM)
2327
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2328
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2329
+ CONST_BITS+PASS1_BITS+2);
2330
+
2331
+ /* Odd part */
2332
+
2333
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2334
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2335
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2336
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2337
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2338
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2339
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2340
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2341
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2342
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2343
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2344
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2345
+ tmp10 = tmp11 + tmp12 + tmp13 -
2346
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2347
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2348
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2349
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2350
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2351
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2352
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2353
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2354
+
2355
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
2356
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
2357
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
2358
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
2359
+
2360
+ dataptr++; /* advance pointer to next column */
2361
+ wsptr++; /* advance pointer to next column */
2362
+ }
2363
+ }
2364
+
2365
+
2366
+ /*
2367
+ * Perform the forward DCT on a 16x8 sample block.
2368
+ *
2369
+ * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2370
+ */
2371
+
2372
+ GLOBAL(void)
2373
+ jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2374
+ {
2375
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2376
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2377
+ INT32 z1;
2378
+ DCTELEM *dataptr;
2379
+ JSAMPROW elemptr;
2380
+ int ctr;
2381
+ SHIFT_TEMPS
2382
+
2383
+ /* Pass 1: process rows. */
2384
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2385
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
2386
+ /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
2387
+
2388
+ dataptr = data;
2389
+ ctr = 0;
2390
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
2391
+ elemptr = sample_data[ctr] + start_col;
2392
+
2393
+ /* Even part */
2394
+
2395
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2396
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2397
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2398
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2399
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2400
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2401
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2402
+ tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2403
+
2404
+ tmp10 = tmp0 + tmp7;
2405
+ tmp14 = tmp0 - tmp7;
2406
+ tmp11 = tmp1 + tmp6;
2407
+ tmp15 = tmp1 - tmp6;
2408
+ tmp12 = tmp2 + tmp5;
2409
+ tmp16 = tmp2 - tmp5;
2410
+ tmp13 = tmp3 + tmp4;
2411
+ tmp17 = tmp3 - tmp4;
2412
+
2413
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2414
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2415
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2416
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2417
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2418
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2419
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2420
+ tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2421
+
2422
+ /* Apply unsigned->signed conversion */
2423
+ dataptr[0] = (DCTELEM)
2424
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2425
+ dataptr[4] = (DCTELEM)
2426
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2427
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2428
+ CONST_BITS-PASS1_BITS);
2429
+
2430
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2431
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2432
+
2433
+ dataptr[2] = (DCTELEM)
2434
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2435
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2436
+ CONST_BITS-PASS1_BITS);
2437
+ dataptr[6] = (DCTELEM)
2438
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2439
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2440
+ CONST_BITS-PASS1_BITS);
2441
+
2442
+ /* Odd part */
2443
+
2444
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2445
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2446
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2447
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2448
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2449
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2450
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2451
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2452
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2453
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2454
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2455
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2456
+ tmp10 = tmp11 + tmp12 + tmp13 -
2457
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2458
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2459
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2460
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2461
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2462
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2463
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2464
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2465
+
2466
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2467
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2468
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2469
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2470
+
2471
+ dataptr += DCTSIZE; /* advance pointer to next row */
2472
+ }
2473
+
2474
+ /* Pass 2: process columns.
2475
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
2476
+ * by an overall factor of 8.
2477
+ * We must also scale the output by 8/16 = 1/2.
2478
+ */
2479
+
2480
+ dataptr = data;
2481
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2482
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
2483
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
2484
+ */
2485
+
2486
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2487
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2488
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2489
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2490
+
2491
+ tmp10 = tmp0 + tmp3;
2492
+ tmp12 = tmp0 - tmp3;
2493
+ tmp11 = tmp1 + tmp2;
2494
+ tmp13 = tmp1 - tmp2;
2495
+
2496
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2497
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2498
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2499
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2500
+
2501
+ dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
2502
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
2503
+
2504
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
2505
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
2506
+ CONST_BITS+PASS1_BITS+1);
2507
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
2508
+ CONST_BITS+PASS1_BITS+1);
2509
+
2510
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2511
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2512
+ * i0..i3 in the paper are tmp0..tmp3 here.
2513
+ */
2514
+
2515
+ tmp10 = tmp0 + tmp3;
2516
+ tmp11 = tmp1 + tmp2;
2517
+ tmp12 = tmp0 + tmp2;
2518
+ tmp13 = tmp1 + tmp3;
2519
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
2520
+
2521
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
2522
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
2523
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
2524
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
2525
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
2526
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
2527
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
2528
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
2529
+
2530
+ tmp12 += z1;
2531
+ tmp13 += z1;
2532
+
2533
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
2534
+ CONST_BITS+PASS1_BITS+1);
2535
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
2536
+ CONST_BITS+PASS1_BITS+1);
2537
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
2538
+ CONST_BITS+PASS1_BITS+1);
2539
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
2540
+ CONST_BITS+PASS1_BITS+1);
2541
+
2542
+ dataptr++; /* advance pointer to next column */
2543
+ }
2544
+ }
2545
+
2546
+
2547
+ /*
2548
+ * Perform the forward DCT on a 14x7 sample block.
2549
+ *
2550
+ * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2551
+ */
2552
+
2553
+ GLOBAL(void)
2554
+ jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2555
+ {
2556
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2557
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2558
+ INT32 z1, z2, z3;
2559
+ DCTELEM *dataptr;
2560
+ JSAMPROW elemptr;
2561
+ int ctr;
2562
+ SHIFT_TEMPS
2563
+
2564
+ /* Zero bottom row of output coefficient block. */
2565
+ MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2566
+
2567
+ /* Pass 1: process rows. */
2568
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2569
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
2570
+ /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
2571
+
2572
+ dataptr = data;
2573
+ for (ctr = 0; ctr < 7; ctr++) {
2574
+ elemptr = sample_data[ctr] + start_col;
2575
+
2576
+ /* Even part */
2577
+
2578
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2579
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2580
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2581
+ tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2582
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2583
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2584
+ tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2585
+
2586
+ tmp10 = tmp0 + tmp6;
2587
+ tmp14 = tmp0 - tmp6;
2588
+ tmp11 = tmp1 + tmp5;
2589
+ tmp15 = tmp1 - tmp5;
2590
+ tmp12 = tmp2 + tmp4;
2591
+ tmp16 = tmp2 - tmp4;
2592
+
2593
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2594
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2595
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2596
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2597
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2598
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2599
+ tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2600
+
2601
+ /* Apply unsigned->signed conversion */
2602
+ dataptr[0] = (DCTELEM)
2603
+ ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
2604
+ tmp13 += tmp13;
2605
+ dataptr[4] = (DCTELEM)
2606
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2607
+ MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2608
+ MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
2609
+ CONST_BITS-PASS1_BITS);
2610
+
2611
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
2612
+
2613
+ dataptr[2] = (DCTELEM)
2614
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
2615
+ + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
2616
+ CONST_BITS-PASS1_BITS);
2617
+ dataptr[6] = (DCTELEM)
2618
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
2619
+ - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
2620
+ CONST_BITS-PASS1_BITS);
2621
+
2622
+ /* Odd part */
2623
+
2624
+ tmp10 = tmp1 + tmp2;
2625
+ tmp11 = tmp5 - tmp4;
2626
+ dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
2627
+ tmp3 <<= CONST_BITS;
2628
+ tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
2629
+ tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
2630
+ tmp10 += tmp11 - tmp3;
2631
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
2632
+ MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
2633
+ dataptr[5] = (DCTELEM)
2634
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
2635
+ + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
2636
+ CONST_BITS-PASS1_BITS);
2637
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
2638
+ MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
2639
+ dataptr[3] = (DCTELEM)
2640
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
2641
+ - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
2642
+ CONST_BITS-PASS1_BITS);
2643
+ dataptr[1] = (DCTELEM)
2644
+ DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
2645
+ MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
2646
+ CONST_BITS-PASS1_BITS);
2647
+
2648
+ dataptr += DCTSIZE; /* advance pointer to next row */
2649
+ }
2650
+
2651
+ /* Pass 2: process columns.
2652
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
2653
+ * by an overall factor of 8.
2654
+ * We must also scale the output by (8/14)*(8/7) = 32/49, which we
2655
+ * partially fold into the constant multipliers and final shifting:
2656
+ * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
2657
+ */
2658
+
2659
+ dataptr = data;
2660
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2661
+ /* Even part */
2662
+
2663
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
2664
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
2665
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
2666
+ tmp3 = dataptr[DCTSIZE*3];
2667
+
2668
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
2669
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
2670
+ tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
2671
+
2672
+ z1 = tmp0 + tmp2;
2673
+ dataptr[DCTSIZE*0] = (DCTELEM)
2674
+ DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
2675
+ CONST_BITS+PASS1_BITS+1);
2676
+ tmp3 += tmp3;
2677
+ z1 -= tmp3;
2678
+ z1 -= tmp3;
2679
+ z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
2680
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
2681
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
2682
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
2683
+ z1 -= z2;
2684
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
2685
+ dataptr[DCTSIZE*4] = (DCTELEM)
2686
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
2687
+ CONST_BITS+PASS1_BITS+1);
2688
+ dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
2689
+
2690
+ /* Odd part */
2691
+
2692
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
2693
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
2694
+ tmp0 = tmp1 - tmp2;
2695
+ tmp1 += tmp2;
2696
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
2697
+ tmp1 += tmp2;
2698
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
2699
+ tmp0 += tmp3;
2700
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
2701
+
2702
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
2703
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
2704
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
2705
+
2706
+ dataptr++; /* advance pointer to next column */
2707
+ }
2708
+ }
2709
+
2710
+
2711
+ /*
2712
+ * Perform the forward DCT on a 12x6 sample block.
2713
+ *
2714
+ * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2715
+ */
2716
+
2717
+ GLOBAL(void)
2718
+ jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2719
+ {
2720
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2721
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2722
+ DCTELEM *dataptr;
2723
+ JSAMPROW elemptr;
2724
+ int ctr;
2725
+ SHIFT_TEMPS
2726
+
2727
+ /* Zero 2 bottom rows of output coefficient block. */
2728
+ MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2729
+
2730
+ /* Pass 1: process rows. */
2731
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2732
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
2733
+ /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
2734
+
2735
+ dataptr = data;
2736
+ for (ctr = 0; ctr < 6; ctr++) {
2737
+ elemptr = sample_data[ctr] + start_col;
2738
+
2739
+ /* Even part */
2740
+
2741
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2742
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2743
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2744
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2745
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2746
+ tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2747
+
2748
+ tmp10 = tmp0 + tmp5;
2749
+ tmp13 = tmp0 - tmp5;
2750
+ tmp11 = tmp1 + tmp4;
2751
+ tmp14 = tmp1 - tmp4;
2752
+ tmp12 = tmp2 + tmp3;
2753
+ tmp15 = tmp2 - tmp3;
2754
+
2755
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2756
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2757
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2758
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2759
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2760
+ tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2761
+
2762
+ /* Apply unsigned->signed conversion */
2763
+ dataptr[0] = (DCTELEM)
2764
+ ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
2765
+ dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
2766
+ dataptr[4] = (DCTELEM)
2767
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2768
+ CONST_BITS-PASS1_BITS);
2769
+ dataptr[2] = (DCTELEM)
2770
+ DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2771
+ CONST_BITS-PASS1_BITS);
2772
+
2773
+ /* Odd part */
2774
+
2775
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
2776
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
2777
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
2778
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
2779
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
2780
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2781
+ + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
2782
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2783
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
2784
+ + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
2785
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
2786
+ - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
2787
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
2788
+ - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
2789
+
2790
+ dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2791
+ dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2792
+ dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2793
+ dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2794
+
2795
+ dataptr += DCTSIZE; /* advance pointer to next row */
2796
+ }
2797
+
2798
+ /* Pass 2: process columns.
2799
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
2800
+ * by an overall factor of 8.
2801
+ * We must also scale the output by (8/12)*(8/6) = 8/9, which we
2802
+ * partially fold into the constant multipliers and final shifting:
2803
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
2804
+ */
2805
+
2806
+ dataptr = data;
2807
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2808
+ /* Even part */
2809
+
2810
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
2811
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
2812
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
2813
+
2814
+ tmp10 = tmp0 + tmp2;
2815
+ tmp12 = tmp0 - tmp2;
2816
+
2817
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
2818
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
2819
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
2820
+
2821
+ dataptr[DCTSIZE*0] = (DCTELEM)
2822
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
2823
+ CONST_BITS+PASS1_BITS+1);
2824
+ dataptr[DCTSIZE*2] = (DCTELEM)
2825
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
2826
+ CONST_BITS+PASS1_BITS+1);
2827
+ dataptr[DCTSIZE*4] = (DCTELEM)
2828
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
2829
+ CONST_BITS+PASS1_BITS+1);
2830
+
2831
+ /* Odd part */
2832
+
2833
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
2834
+
2835
+ dataptr[DCTSIZE*1] = (DCTELEM)
2836
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
2837
+ CONST_BITS+PASS1_BITS+1);
2838
+ dataptr[DCTSIZE*3] = (DCTELEM)
2839
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
2840
+ CONST_BITS+PASS1_BITS+1);
2841
+ dataptr[DCTSIZE*5] = (DCTELEM)
2842
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
2843
+ CONST_BITS+PASS1_BITS+1);
2844
+
2845
+ dataptr++; /* advance pointer to next column */
2846
+ }
2847
+ }
2848
+
2849
+
2850
+ /*
2851
+ * Perform the forward DCT on a 10x5 sample block.
2852
+ *
2853
+ * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2854
+ */
2855
+
2856
+ GLOBAL(void)
2857
+ jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2858
+ {
2859
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2860
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2861
+ DCTELEM *dataptr;
2862
+ JSAMPROW elemptr;
2863
+ int ctr;
2864
+ SHIFT_TEMPS
2865
+
2866
+ /* Zero 3 bottom rows of output coefficient block. */
2867
+ MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
2868
+
2869
+ /* Pass 1: process rows. */
2870
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
2871
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
2872
+ /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
2873
+
2874
+ dataptr = data;
2875
+ for (ctr = 0; ctr < 5; ctr++) {
2876
+ elemptr = sample_data[ctr] + start_col;
2877
+
2878
+ /* Even part */
2879
+
2880
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
2881
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
2882
+ tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
2883
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
2884
+ tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
2885
+
2886
+ tmp10 = tmp0 + tmp4;
2887
+ tmp13 = tmp0 - tmp4;
2888
+ tmp11 = tmp1 + tmp3;
2889
+ tmp14 = tmp1 - tmp3;
2890
+
2891
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
2892
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
2893
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
2894
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
2895
+ tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
2896
+
2897
+ /* Apply unsigned->signed conversion */
2898
+ dataptr[0] = (DCTELEM)
2899
+ ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
2900
+ tmp12 += tmp12;
2901
+ dataptr[4] = (DCTELEM)
2902
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
2903
+ MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
2904
+ CONST_BITS-PASS1_BITS);
2905
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
2906
+ dataptr[2] = (DCTELEM)
2907
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
2908
+ CONST_BITS-PASS1_BITS);
2909
+ dataptr[6] = (DCTELEM)
2910
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
2911
+ CONST_BITS-PASS1_BITS);
2912
+
2913
+ /* Odd part */
2914
+
2915
+ tmp10 = tmp0 + tmp4;
2916
+ tmp11 = tmp1 - tmp3;
2917
+ dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
2918
+ tmp2 <<= CONST_BITS;
2919
+ dataptr[1] = (DCTELEM)
2920
+ DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
2921
+ MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
2922
+ MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
2923
+ MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
2924
+ CONST_BITS-PASS1_BITS);
2925
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
2926
+ MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
2927
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
2928
+ (tmp11 << (CONST_BITS - 1)) - tmp2;
2929
+ dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
2930
+ dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
2931
+
2932
+ dataptr += DCTSIZE; /* advance pointer to next row */
2933
+ }
2934
+
2935
+ /* Pass 2: process columns.
2936
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
2937
+ * by an overall factor of 8.
2938
+ * We must also scale the output by (8/10)*(8/5) = 32/25, which we
2939
+ * fold into the constant multipliers:
2940
+ * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
2941
+ */
2942
+
2943
+ dataptr = data;
2944
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2945
+ /* Even part */
2946
+
2947
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
2948
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
2949
+ tmp2 = dataptr[DCTSIZE*2];
2950
+
2951
+ tmp10 = tmp0 + tmp1;
2952
+ tmp11 = tmp0 - tmp1;
2953
+
2954
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
2955
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
2956
+
2957
+ dataptr[DCTSIZE*0] = (DCTELEM)
2958
+ DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
2959
+ CONST_BITS+PASS1_BITS);
2960
+ tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
2961
+ tmp10 -= tmp2 << 2;
2962
+ tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
2963
+ dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
2964
+ dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
2965
+
2966
+ /* Odd part */
2967
+
2968
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
2969
+
2970
+ dataptr[DCTSIZE*1] = (DCTELEM)
2971
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
2972
+ CONST_BITS+PASS1_BITS);
2973
+ dataptr[DCTSIZE*3] = (DCTELEM)
2974
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
2975
+ CONST_BITS+PASS1_BITS);
2976
+
2977
+ dataptr++; /* advance pointer to next column */
2978
+ }
2979
+ }
2980
+
2981
+
2982
+ /*
2983
+ * Perform the forward DCT on an 8x4 sample block.
2984
+ *
2985
+ * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
2986
+ */
2987
+
2988
+ GLOBAL(void)
2989
+ jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2990
+ {
2991
+ INT32 tmp0, tmp1, tmp2, tmp3;
2992
+ INT32 tmp10, tmp11, tmp12, tmp13;
2993
+ INT32 z1;
2994
+ DCTELEM *dataptr;
2995
+ JSAMPROW elemptr;
2996
+ int ctr;
2997
+ SHIFT_TEMPS
2998
+
2999
+ /* Zero 4 bottom rows of output coefficient block. */
3000
+ MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3001
+
3002
+ /* Pass 1: process rows. */
3003
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3004
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3005
+ /* We must also scale the output by 8/4 = 2, which we add here. */
3006
+
3007
+ dataptr = data;
3008
+ for (ctr = 0; ctr < 4; ctr++) {
3009
+ elemptr = sample_data[ctr] + start_col;
3010
+
3011
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
3012
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
3013
+ */
3014
+
3015
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3016
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3017
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3018
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3019
+
3020
+ tmp10 = tmp0 + tmp3;
3021
+ tmp12 = tmp0 - tmp3;
3022
+ tmp11 = tmp1 + tmp2;
3023
+ tmp13 = tmp1 - tmp2;
3024
+
3025
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3026
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3027
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3028
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3029
+
3030
+ /* Apply unsigned->signed conversion */
3031
+ dataptr[0] = (DCTELEM)
3032
+ ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3033
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3034
+
3035
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
3036
+ /* Add fudge factor here for final descale. */
3037
+ z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3038
+ dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
3039
+ CONST_BITS-PASS1_BITS-1);
3040
+ dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
3041
+ CONST_BITS-PASS1_BITS-1);
3042
+
3043
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3044
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3045
+ * i0..i3 in the paper are tmp0..tmp3 here.
3046
+ */
3047
+
3048
+ tmp10 = tmp0 + tmp3;
3049
+ tmp11 = tmp1 + tmp2;
3050
+ tmp12 = tmp0 + tmp2;
3051
+ tmp13 = tmp1 + tmp3;
3052
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3053
+ /* Add fudge factor here for final descale. */
3054
+ z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3055
+
3056
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3057
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3058
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3059
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3060
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
3061
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
3062
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
3063
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3064
+
3065
+ tmp12 += z1;
3066
+ tmp13 += z1;
3067
+
3068
+ dataptr[1] = (DCTELEM)
3069
+ RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
3070
+ dataptr[3] = (DCTELEM)
3071
+ RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
3072
+ dataptr[5] = (DCTELEM)
3073
+ RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
3074
+ dataptr[7] = (DCTELEM)
3075
+ RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
3076
+
3077
+ dataptr += DCTSIZE; /* advance pointer to next row */
3078
+ }
3079
+
3080
+ /* Pass 2: process columns.
3081
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3082
+ * by an overall factor of 8.
3083
+ * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3084
+ */
3085
+
3086
+ dataptr = data;
3087
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3088
+ /* Even part */
3089
+
3090
+ /* Add fudge factor here for final descale. */
3091
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
3092
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3093
+
3094
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3095
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3096
+
3097
+ dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3098
+ dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3099
+
3100
+ /* Odd part */
3101
+
3102
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3103
+ /* Add fudge factor here for final descale. */
3104
+ tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
3105
+
3106
+ dataptr[DCTSIZE*1] = (DCTELEM)
3107
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3108
+ CONST_BITS+PASS1_BITS);
3109
+ dataptr[DCTSIZE*3] = (DCTELEM)
3110
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3111
+ CONST_BITS+PASS1_BITS);
3112
+
3113
+ dataptr++; /* advance pointer to next column */
3114
+ }
3115
+ }
3116
+
3117
+
3118
+ /*
3119
+ * Perform the forward DCT on a 6x3 sample block.
3120
+ *
3121
+ * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3122
+ */
3123
+
3124
+ GLOBAL(void)
3125
+ jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3126
+ {
3127
+ INT32 tmp0, tmp1, tmp2;
3128
+ INT32 tmp10, tmp11, tmp12;
3129
+ DCTELEM *dataptr;
3130
+ JSAMPROW elemptr;
3131
+ int ctr;
3132
+ SHIFT_TEMPS
3133
+
3134
+ /* Pre-zero output coefficient block. */
3135
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3136
+
3137
+ /* Pass 1: process rows. */
3138
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3139
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3140
+ /* We scale the results further by 2 as part of output adaption */
3141
+ /* scaling for different DCT size. */
3142
+ /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
3143
+
3144
+ dataptr = data;
3145
+ for (ctr = 0; ctr < 3; ctr++) {
3146
+ elemptr = sample_data[ctr] + start_col;
3147
+
3148
+ /* Even part */
3149
+
3150
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3151
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3152
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3153
+
3154
+ tmp10 = tmp0 + tmp2;
3155
+ tmp12 = tmp0 - tmp2;
3156
+
3157
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3158
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3159
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3160
+
3161
+ /* Apply unsigned->signed conversion */
3162
+ dataptr[0] = (DCTELEM)
3163
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3164
+ dataptr[2] = (DCTELEM)
3165
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3166
+ CONST_BITS-PASS1_BITS-1);
3167
+ dataptr[4] = (DCTELEM)
3168
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3169
+ CONST_BITS-PASS1_BITS-1);
3170
+
3171
+ /* Odd part */
3172
+
3173
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3174
+ CONST_BITS-PASS1_BITS-1);
3175
+
3176
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3177
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3178
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3179
+
3180
+ dataptr += DCTSIZE; /* advance pointer to next row */
3181
+ }
3182
+
3183
+ /* Pass 2: process columns.
3184
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3185
+ * by an overall factor of 8.
3186
+ * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
3187
+ * fold into the constant multipliers (other part was done in pass 1):
3188
+ * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
3189
+ */
3190
+
3191
+ dataptr = data;
3192
+ for (ctr = 0; ctr < 6; ctr++) {
3193
+ /* Even part */
3194
+
3195
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
3196
+ tmp1 = dataptr[DCTSIZE*1];
3197
+
3198
+ tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
3199
+
3200
+ dataptr[DCTSIZE*0] = (DCTELEM)
3201
+ DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
3202
+ CONST_BITS+PASS1_BITS);
3203
+ dataptr[DCTSIZE*2] = (DCTELEM)
3204
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
3205
+ CONST_BITS+PASS1_BITS);
3206
+
3207
+ /* Odd part */
3208
+
3209
+ dataptr[DCTSIZE*1] = (DCTELEM)
3210
+ DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
3211
+ CONST_BITS+PASS1_BITS);
3212
+
3213
+ dataptr++; /* advance pointer to next column */
3214
+ }
3215
+ }
3216
+
3217
+
3218
+ /*
3219
+ * Perform the forward DCT on a 4x2 sample block.
3220
+ *
3221
+ * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3222
+ */
3223
+
3224
+ GLOBAL(void)
3225
+ jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3226
+ {
3227
+ INT32 tmp0, tmp1;
3228
+ INT32 tmp10, tmp11;
3229
+ DCTELEM *dataptr;
3230
+ JSAMPROW elemptr;
3231
+ int ctr;
3232
+ SHIFT_TEMPS
3233
+
3234
+ /* Pre-zero output coefficient block. */
3235
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3236
+
3237
+ /* Pass 1: process rows. */
3238
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3239
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3240
+ /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
3241
+ /* 4-point FDCT kernel, */
3242
+ /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
3243
+
3244
+ dataptr = data;
3245
+ for (ctr = 0; ctr < 2; ctr++) {
3246
+ elemptr = sample_data[ctr] + start_col;
3247
+
3248
+ /* Even part */
3249
+
3250
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3251
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3252
+
3253
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3254
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3255
+
3256
+ /* Apply unsigned->signed conversion */
3257
+ dataptr[0] = (DCTELEM)
3258
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
3259
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
3260
+
3261
+ /* Odd part */
3262
+
3263
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3264
+ /* Add fudge factor here for final descale. */
3265
+ tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
3266
+
3267
+ dataptr[1] = (DCTELEM)
3268
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3269
+ CONST_BITS-PASS1_BITS-3);
3270
+ dataptr[3] = (DCTELEM)
3271
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3272
+ CONST_BITS-PASS1_BITS-3);
3273
+
3274
+ dataptr += DCTSIZE; /* advance pointer to next row */
3275
+ }
3276
+
3277
+ /* Pass 2: process columns.
3278
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3279
+ * by an overall factor of 8.
3280
+ */
3281
+
3282
+ dataptr = data;
3283
+ for (ctr = 0; ctr < 4; ctr++) {
3284
+ /* Even part */
3285
+
3286
+ /* Add fudge factor here for final descale. */
3287
+ tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1));
3288
+ tmp1 = dataptr[DCTSIZE*1];
3289
+
3290
+ dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3291
+
3292
+ /* Odd part */
3293
+
3294
+ dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3295
+
3296
+ dataptr++; /* advance pointer to next column */
3297
+ }
3298
+ }
3299
+
3300
+
3301
+ /*
3302
+ * Perform the forward DCT on a 2x1 sample block.
3303
+ *
3304
+ * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3305
+ */
3306
+
3307
+ GLOBAL(void)
3308
+ jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3309
+ {
3310
+ INT32 tmp0, tmp1;
3311
+ JSAMPROW elemptr;
3312
+
3313
+ /* Pre-zero output coefficient block. */
3314
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3315
+
3316
+ elemptr = sample_data[0] + start_col;
3317
+
3318
+ tmp0 = GETJSAMPLE(elemptr[0]);
3319
+ tmp1 = GETJSAMPLE(elemptr[1]);
3320
+
3321
+ /* We leave the results scaled up by an overall factor of 8.
3322
+ * We must also scale the output by (8/2)*(8/1) = 2**5.
3323
+ */
3324
+
3325
+ /* Even part */
3326
+ /* Apply unsigned->signed conversion */
3327
+ data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
3328
+
3329
+ /* Odd part */
3330
+ data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
3331
+ }
3332
+
3333
+
3334
+ /*
3335
+ * Perform the forward DCT on an 8x16 sample block.
3336
+ *
3337
+ * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3338
+ */
3339
+
3340
+ GLOBAL(void)
3341
+ jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3342
+ {
3343
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3344
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3345
+ INT32 z1;
3346
+ DCTELEM workspace[DCTSIZE2];
3347
+ DCTELEM *dataptr;
3348
+ DCTELEM *wsptr;
3349
+ JSAMPROW elemptr;
3350
+ int ctr;
3351
+ SHIFT_TEMPS
3352
+
3353
+ /* Pass 1: process rows. */
3354
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3355
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3356
+
3357
+ dataptr = data;
3358
+ ctr = 0;
3359
+ for (;;) {
3360
+ elemptr = sample_data[ctr] + start_col;
3361
+
3362
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
3363
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
3364
+ */
3365
+
3366
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3367
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3368
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3369
+ tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3370
+
3371
+ tmp10 = tmp0 + tmp3;
3372
+ tmp12 = tmp0 - tmp3;
3373
+ tmp11 = tmp1 + tmp2;
3374
+ tmp13 = tmp1 - tmp2;
3375
+
3376
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3377
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3378
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3379
+ tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3380
+
3381
+ /* Apply unsigned->signed conversion */
3382
+ dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
3383
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
3384
+
3385
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
3386
+ dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
3387
+ CONST_BITS-PASS1_BITS);
3388
+ dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
3389
+ CONST_BITS-PASS1_BITS);
3390
+
3391
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3392
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3393
+ * i0..i3 in the paper are tmp0..tmp3 here.
3394
+ */
3395
+
3396
+ tmp10 = tmp0 + tmp3;
3397
+ tmp11 = tmp1 + tmp2;
3398
+ tmp12 = tmp0 + tmp2;
3399
+ tmp13 = tmp1 + tmp3;
3400
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3401
+
3402
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3403
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3404
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3405
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3406
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
3407
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
3408
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
3409
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3410
+
3411
+ tmp12 += z1;
3412
+ tmp13 += z1;
3413
+
3414
+ dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
3415
+ dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
3416
+ dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
3417
+ dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3418
+
3419
+ ctr++;
3420
+
3421
+ if (ctr != DCTSIZE) {
3422
+ if (ctr == DCTSIZE * 2)
3423
+ break; /* Done. */
3424
+ dataptr += DCTSIZE; /* advance pointer to next row */
3425
+ } else
3426
+ dataptr = workspace; /* switch pointer to extended workspace */
3427
+ }
3428
+
3429
+ /* Pass 2: process columns.
3430
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3431
+ * by an overall factor of 8.
3432
+ * We must also scale the output by 8/16 = 1/2.
3433
+ * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3434
+ */
3435
+
3436
+ dataptr = data;
3437
+ wsptr = workspace;
3438
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3439
+ /* Even part */
3440
+
3441
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
3442
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
3443
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
3444
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
3445
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
3446
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
3447
+ tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
3448
+ tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
3449
+
3450
+ tmp10 = tmp0 + tmp7;
3451
+ tmp14 = tmp0 - tmp7;
3452
+ tmp11 = tmp1 + tmp6;
3453
+ tmp15 = tmp1 - tmp6;
3454
+ tmp12 = tmp2 + tmp5;
3455
+ tmp16 = tmp2 - tmp5;
3456
+ tmp13 = tmp3 + tmp4;
3457
+ tmp17 = tmp3 - tmp4;
3458
+
3459
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
3460
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
3461
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
3462
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
3463
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
3464
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
3465
+ tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
3466
+ tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
3467
+
3468
+ dataptr[DCTSIZE*0] = (DCTELEM)
3469
+ DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
3470
+ dataptr[DCTSIZE*4] = (DCTELEM)
3471
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
3472
+ MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
3473
+ CONST_BITS+PASS1_BITS+1);
3474
+
3475
+ tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
3476
+ MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
3477
+
3478
+ dataptr[DCTSIZE*2] = (DCTELEM)
3479
+ DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
3480
+ + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
3481
+ CONST_BITS+PASS1_BITS+1);
3482
+ dataptr[DCTSIZE*6] = (DCTELEM)
3483
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
3484
+ - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
3485
+ CONST_BITS+PASS1_BITS+1);
3486
+
3487
+ /* Odd part */
3488
+
3489
+ tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
3490
+ MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
3491
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
3492
+ MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
3493
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
3494
+ MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
3495
+ tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
3496
+ MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
3497
+ tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
3498
+ MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
3499
+ tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
3500
+ MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
3501
+ tmp10 = tmp11 + tmp12 + tmp13 -
3502
+ MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
3503
+ MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
3504
+ tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
3505
+ - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
3506
+ tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
3507
+ + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
3508
+ tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
3509
+ + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
3510
+
3511
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
3512
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
3513
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
3514
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
3515
+
3516
+ dataptr++; /* advance pointer to next column */
3517
+ wsptr++; /* advance pointer to next column */
3518
+ }
3519
+ }
3520
+
3521
+
3522
+ /*
3523
+ * Perform the forward DCT on a 7x14 sample block.
3524
+ *
3525
+ * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3526
+ */
3527
+
3528
+ GLOBAL(void)
3529
+ jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3530
+ {
3531
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3532
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3533
+ INT32 z1, z2, z3;
3534
+ DCTELEM workspace[8*6];
3535
+ DCTELEM *dataptr;
3536
+ DCTELEM *wsptr;
3537
+ JSAMPROW elemptr;
3538
+ int ctr;
3539
+ SHIFT_TEMPS
3540
+
3541
+ /* Pre-zero output coefficient block. */
3542
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3543
+
3544
+ /* Pass 1: process rows. */
3545
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3546
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3547
+ /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
3548
+
3549
+ dataptr = data;
3550
+ ctr = 0;
3551
+ for (;;) {
3552
+ elemptr = sample_data[ctr] + start_col;
3553
+
3554
+ /* Even part */
3555
+
3556
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3557
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3558
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3559
+ tmp3 = GETJSAMPLE(elemptr[3]);
3560
+
3561
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3562
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3563
+ tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3564
+
3565
+ z1 = tmp0 + tmp2;
3566
+ /* Apply unsigned->signed conversion */
3567
+ dataptr[0] = (DCTELEM)
3568
+ ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
3569
+ tmp3 += tmp3;
3570
+ z1 -= tmp3;
3571
+ z1 -= tmp3;
3572
+ z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
3573
+ z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
3574
+ z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
3575
+ dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3576
+ z1 -= z2;
3577
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
3578
+ dataptr[4] = (DCTELEM)
3579
+ DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3580
+ CONST_BITS-PASS1_BITS);
3581
+ dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3582
+
3583
+ /* Odd part */
3584
+
3585
+ tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3586
+ tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3587
+ tmp0 = tmp1 - tmp2;
3588
+ tmp1 += tmp2;
3589
+ tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
3590
+ tmp1 += tmp2;
3591
+ tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
3592
+ tmp0 += tmp3;
3593
+ tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
3594
+
3595
+ dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3596
+ dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3597
+ dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3598
+
3599
+ ctr++;
3600
+
3601
+ if (ctr != DCTSIZE) {
3602
+ if (ctr == 14)
3603
+ break; /* Done. */
3604
+ dataptr += DCTSIZE; /* advance pointer to next row */
3605
+ } else
3606
+ dataptr = workspace; /* switch pointer to extended workspace */
3607
+ }
3608
+
3609
+ /* Pass 2: process columns.
3610
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3611
+ * by an overall factor of 8.
3612
+ * We must also scale the output by (8/7)*(8/14) = 32/49, which we
3613
+ * fold into the constant multipliers:
3614
+ * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
3615
+ */
3616
+
3617
+ dataptr = data;
3618
+ wsptr = workspace;
3619
+ for (ctr = 0; ctr < 7; ctr++) {
3620
+ /* Even part */
3621
+
3622
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
3623
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
3624
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
3625
+ tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
3626
+ tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
3627
+ tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
3628
+ tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
3629
+
3630
+ tmp10 = tmp0 + tmp6;
3631
+ tmp14 = tmp0 - tmp6;
3632
+ tmp11 = tmp1 + tmp5;
3633
+ tmp15 = tmp1 - tmp5;
3634
+ tmp12 = tmp2 + tmp4;
3635
+ tmp16 = tmp2 - tmp4;
3636
+
3637
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
3638
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
3639
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
3640
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
3641
+ tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
3642
+ tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
3643
+ tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
3644
+
3645
+ dataptr[DCTSIZE*0] = (DCTELEM)
3646
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
3647
+ FIX(0.653061224)), /* 32/49 */
3648
+ CONST_BITS+PASS1_BITS);
3649
+ tmp13 += tmp13;
3650
+ dataptr[DCTSIZE*4] = (DCTELEM)
3651
+ DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
3652
+ MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
3653
+ MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
3654
+ CONST_BITS+PASS1_BITS);
3655
+
3656
+ tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
3657
+
3658
+ dataptr[DCTSIZE*2] = (DCTELEM)
3659
+ DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
3660
+ + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
3661
+ CONST_BITS+PASS1_BITS);
3662
+ dataptr[DCTSIZE*6] = (DCTELEM)
3663
+ DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
3664
+ - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
3665
+ CONST_BITS+PASS1_BITS);
3666
+
3667
+ /* Odd part */
3668
+
3669
+ tmp10 = tmp1 + tmp2;
3670
+ tmp11 = tmp5 - tmp4;
3671
+ dataptr[DCTSIZE*7] = (DCTELEM)
3672
+ DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
3673
+ FIX(0.653061224)), /* 32/49 */
3674
+ CONST_BITS+PASS1_BITS);
3675
+ tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
3676
+ tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
3677
+ tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
3678
+ tmp10 += tmp11 - tmp3;
3679
+ tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
3680
+ MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
3681
+ dataptr[DCTSIZE*5] = (DCTELEM)
3682
+ DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
3683
+ + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
3684
+ CONST_BITS+PASS1_BITS);
3685
+ tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
3686
+ MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
3687
+ dataptr[DCTSIZE*3] = (DCTELEM)
3688
+ DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
3689
+ - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
3690
+ CONST_BITS+PASS1_BITS);
3691
+ dataptr[DCTSIZE*1] = (DCTELEM)
3692
+ DESCALE(tmp11 + tmp12 + tmp3
3693
+ - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
3694
+ - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
3695
+ CONST_BITS+PASS1_BITS);
3696
+
3697
+ dataptr++; /* advance pointer to next column */
3698
+ wsptr++; /* advance pointer to next column */
3699
+ }
3700
+ }
3701
+
3702
+
3703
+ /*
3704
+ * Perform the forward DCT on a 6x12 sample block.
3705
+ *
3706
+ * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3707
+ */
3708
+
3709
+ GLOBAL(void)
3710
+ jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3711
+ {
3712
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3713
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3714
+ DCTELEM workspace[8*4];
3715
+ DCTELEM *dataptr;
3716
+ DCTELEM *wsptr;
3717
+ JSAMPROW elemptr;
3718
+ int ctr;
3719
+ SHIFT_TEMPS
3720
+
3721
+ /* Pre-zero output coefficient block. */
3722
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3723
+
3724
+ /* Pass 1: process rows. */
3725
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3726
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3727
+ /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
3728
+
3729
+ dataptr = data;
3730
+ ctr = 0;
3731
+ for (;;) {
3732
+ elemptr = sample_data[ctr] + start_col;
3733
+
3734
+ /* Even part */
3735
+
3736
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3737
+ tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3738
+ tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3739
+
3740
+ tmp10 = tmp0 + tmp2;
3741
+ tmp12 = tmp0 - tmp2;
3742
+
3743
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3744
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3745
+ tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3746
+
3747
+ /* Apply unsigned->signed conversion */
3748
+ dataptr[0] = (DCTELEM)
3749
+ ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
3750
+ dataptr[2] = (DCTELEM)
3751
+ DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3752
+ CONST_BITS-PASS1_BITS);
3753
+ dataptr[4] = (DCTELEM)
3754
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3755
+ CONST_BITS-PASS1_BITS);
3756
+
3757
+ /* Odd part */
3758
+
3759
+ tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3760
+ CONST_BITS-PASS1_BITS);
3761
+
3762
+ dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3763
+ dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3764
+ dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3765
+
3766
+ ctr++;
3767
+
3768
+ if (ctr != DCTSIZE) {
3769
+ if (ctr == 12)
3770
+ break; /* Done. */
3771
+ dataptr += DCTSIZE; /* advance pointer to next row */
3772
+ } else
3773
+ dataptr = workspace; /* switch pointer to extended workspace */
3774
+ }
3775
+
3776
+ /* Pass 2: process columns.
3777
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3778
+ * by an overall factor of 8.
3779
+ * We must also scale the output by (8/6)*(8/12) = 8/9, which we
3780
+ * fold into the constant multipliers:
3781
+ * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
3782
+ */
3783
+
3784
+ dataptr = data;
3785
+ wsptr = workspace;
3786
+ for (ctr = 0; ctr < 6; ctr++) {
3787
+ /* Even part */
3788
+
3789
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
3790
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
3791
+ tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
3792
+ tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
3793
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
3794
+ tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
3795
+
3796
+ tmp10 = tmp0 + tmp5;
3797
+ tmp13 = tmp0 - tmp5;
3798
+ tmp11 = tmp1 + tmp4;
3799
+ tmp14 = tmp1 - tmp4;
3800
+ tmp12 = tmp2 + tmp3;
3801
+ tmp15 = tmp2 - tmp3;
3802
+
3803
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
3804
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
3805
+ tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
3806
+ tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
3807
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
3808
+ tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
3809
+
3810
+ dataptr[DCTSIZE*0] = (DCTELEM)
3811
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
3812
+ CONST_BITS+PASS1_BITS);
3813
+ dataptr[DCTSIZE*6] = (DCTELEM)
3814
+ DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
3815
+ CONST_BITS+PASS1_BITS);
3816
+ dataptr[DCTSIZE*4] = (DCTELEM)
3817
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
3818
+ CONST_BITS+PASS1_BITS);
3819
+ dataptr[DCTSIZE*2] = (DCTELEM)
3820
+ DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
3821
+ MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
3822
+ CONST_BITS+PASS1_BITS);
3823
+
3824
+ /* Odd part */
3825
+
3826
+ tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
3827
+ tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
3828
+ tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
3829
+ tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
3830
+ tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
3831
+ tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
3832
+ + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
3833
+ tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
3834
+ tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
3835
+ + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
3836
+ tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
3837
+ - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
3838
+ tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
3839
+ - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
3840
+
3841
+ dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
3842
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
3843
+ dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
3844
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
3845
+
3846
+ dataptr++; /* advance pointer to next column */
3847
+ wsptr++; /* advance pointer to next column */
3848
+ }
3849
+ }
3850
+
3851
+
3852
+ /*
3853
+ * Perform the forward DCT on a 5x10 sample block.
3854
+ *
3855
+ * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
3856
+ */
3857
+
3858
+ GLOBAL(void)
3859
+ jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3860
+ {
3861
+ INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
3862
+ INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3863
+ DCTELEM workspace[8*2];
3864
+ DCTELEM *dataptr;
3865
+ DCTELEM *wsptr;
3866
+ JSAMPROW elemptr;
3867
+ int ctr;
3868
+ SHIFT_TEMPS
3869
+
3870
+ /* Pre-zero output coefficient block. */
3871
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3872
+
3873
+ /* Pass 1: process rows. */
3874
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
3875
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
3876
+ /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
3877
+
3878
+ dataptr = data;
3879
+ ctr = 0;
3880
+ for (;;) {
3881
+ elemptr = sample_data[ctr] + start_col;
3882
+
3883
+ /* Even part */
3884
+
3885
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
3886
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
3887
+ tmp2 = GETJSAMPLE(elemptr[2]);
3888
+
3889
+ tmp10 = tmp0 + tmp1;
3890
+ tmp11 = tmp0 - tmp1;
3891
+
3892
+ tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
3893
+ tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
3894
+
3895
+ /* Apply unsigned->signed conversion */
3896
+ dataptr[0] = (DCTELEM)
3897
+ ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
3898
+ tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
3899
+ tmp10 -= tmp2 << 2;
3900
+ tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
3901
+ dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
3902
+ dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
3903
+
3904
+ /* Odd part */
3905
+
3906
+ tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
3907
+
3908
+ dataptr[1] = (DCTELEM)
3909
+ DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
3910
+ CONST_BITS-PASS1_BITS);
3911
+ dataptr[3] = (DCTELEM)
3912
+ DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
3913
+ CONST_BITS-PASS1_BITS);
3914
+
3915
+ ctr++;
3916
+
3917
+ if (ctr != DCTSIZE) {
3918
+ if (ctr == 10)
3919
+ break; /* Done. */
3920
+ dataptr += DCTSIZE; /* advance pointer to next row */
3921
+ } else
3922
+ dataptr = workspace; /* switch pointer to extended workspace */
3923
+ }
3924
+
3925
+ /* Pass 2: process columns.
3926
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
3927
+ * by an overall factor of 8.
3928
+ * We must also scale the output by (8/5)*(8/10) = 32/25, which we
3929
+ * fold into the constant multipliers:
3930
+ * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
3931
+ */
3932
+
3933
+ dataptr = data;
3934
+ wsptr = workspace;
3935
+ for (ctr = 0; ctr < 5; ctr++) {
3936
+ /* Even part */
3937
+
3938
+ tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
3939
+ tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
3940
+ tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
3941
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
3942
+ tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
3943
+
3944
+ tmp10 = tmp0 + tmp4;
3945
+ tmp13 = tmp0 - tmp4;
3946
+ tmp11 = tmp1 + tmp3;
3947
+ tmp14 = tmp1 - tmp3;
3948
+
3949
+ tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
3950
+ tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
3951
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
3952
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
3953
+ tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
3954
+
3955
+ dataptr[DCTSIZE*0] = (DCTELEM)
3956
+ DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
3957
+ CONST_BITS+PASS1_BITS);
3958
+ tmp12 += tmp12;
3959
+ dataptr[DCTSIZE*4] = (DCTELEM)
3960
+ DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
3961
+ MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
3962
+ CONST_BITS+PASS1_BITS);
3963
+ tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
3964
+ dataptr[DCTSIZE*2] = (DCTELEM)
3965
+ DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
3966
+ CONST_BITS+PASS1_BITS);
3967
+ dataptr[DCTSIZE*6] = (DCTELEM)
3968
+ DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
3969
+ CONST_BITS+PASS1_BITS);
3970
+
3971
+ /* Odd part */
3972
+
3973
+ tmp10 = tmp0 + tmp4;
3974
+ tmp11 = tmp1 - tmp3;
3975
+ dataptr[DCTSIZE*5] = (DCTELEM)
3976
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
3977
+ CONST_BITS+PASS1_BITS);
3978
+ tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
3979
+ dataptr[DCTSIZE*1] = (DCTELEM)
3980
+ DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
3981
+ MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
3982
+ MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
3983
+ MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
3984
+ CONST_BITS+PASS1_BITS);
3985
+ tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
3986
+ MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
3987
+ tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
3988
+ MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
3989
+ dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
3990
+ dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
3991
+
3992
+ dataptr++; /* advance pointer to next column */
3993
+ wsptr++; /* advance pointer to next column */
3994
+ }
3995
+ }
3996
+
3997
+
3998
+ /*
3999
+ * Perform the forward DCT on a 4x8 sample block.
4000
+ *
4001
+ * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4002
+ */
4003
+
4004
+ GLOBAL(void)
4005
+ jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4006
+ {
4007
+ INT32 tmp0, tmp1, tmp2, tmp3;
4008
+ INT32 tmp10, tmp11, tmp12, tmp13;
4009
+ INT32 z1;
4010
+ DCTELEM *dataptr;
4011
+ JSAMPROW elemptr;
4012
+ int ctr;
4013
+ SHIFT_TEMPS
4014
+
4015
+ /* Pre-zero output coefficient block. */
4016
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4017
+
4018
+ /* Pass 1: process rows. */
4019
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
4020
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
4021
+ /* We must also scale the output by 8/4 = 2, which we add here. */
4022
+ /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
4023
+
4024
+ dataptr = data;
4025
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
4026
+ elemptr = sample_data[ctr] + start_col;
4027
+
4028
+ /* Even part */
4029
+
4030
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4031
+ tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4032
+
4033
+ tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4034
+ tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4035
+
4036
+ /* Apply unsigned->signed conversion */
4037
+ dataptr[0] = (DCTELEM)
4038
+ ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4039
+ dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4040
+
4041
+ /* Odd part */
4042
+
4043
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4044
+ /* Add fudge factor here for final descale. */
4045
+ tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4046
+
4047
+ dataptr[1] = (DCTELEM)
4048
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4049
+ CONST_BITS-PASS1_BITS-1);
4050
+ dataptr[3] = (DCTELEM)
4051
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4052
+ CONST_BITS-PASS1_BITS-1);
4053
+
4054
+ dataptr += DCTSIZE; /* advance pointer to next row */
4055
+ }
4056
+
4057
+ /* Pass 2: process columns.
4058
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
4059
+ * by an overall factor of 8.
4060
+ */
4061
+
4062
+ dataptr = data;
4063
+ for (ctr = 0; ctr < 4; ctr++) {
4064
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
4065
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
4066
+ */
4067
+
4068
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4069
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4070
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4071
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4072
+
4073
+ /* Add fudge factor here for final descale. */
4074
+ tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
4075
+ tmp12 = tmp0 - tmp3;
4076
+ tmp11 = tmp1 + tmp2;
4077
+ tmp13 = tmp1 - tmp2;
4078
+
4079
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4080
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4081
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4082
+ tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4083
+
4084
+ dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
4085
+ dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
4086
+
4087
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
4088
+ /* Add fudge factor here for final descale. */
4089
+ z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4090
+ dataptr[DCTSIZE*2] = (DCTELEM)
4091
+ RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
4092
+ dataptr[DCTSIZE*6] = (DCTELEM)
4093
+ RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
4094
+
4095
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4096
+ * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4097
+ * i0..i3 in the paper are tmp0..tmp3 here.
4098
+ */
4099
+
4100
+ tmp10 = tmp0 + tmp3;
4101
+ tmp11 = tmp1 + tmp2;
4102
+ tmp12 = tmp0 + tmp2;
4103
+ tmp13 = tmp1 + tmp3;
4104
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
4105
+ /* Add fudge factor here for final descale. */
4106
+ z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4107
+
4108
+ tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
4109
+ tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
4110
+ tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
4111
+ tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
4112
+ tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */
4113
+ tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */
4114
+ tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */
4115
+ tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
4116
+
4117
+ tmp12 += z1;
4118
+ tmp13 += z1;
4119
+
4120
+ dataptr[DCTSIZE*1] = (DCTELEM)
4121
+ RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
4122
+ dataptr[DCTSIZE*3] = (DCTELEM)
4123
+ RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
4124
+ dataptr[DCTSIZE*5] = (DCTELEM)
4125
+ RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
4126
+ dataptr[DCTSIZE*7] = (DCTELEM)
4127
+ RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
4128
+
4129
+ dataptr++; /* advance pointer to next column */
4130
+ }
4131
+ }
4132
+
4133
+
4134
+ /*
4135
+ * Perform the forward DCT on a 3x6 sample block.
4136
+ *
4137
+ * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4138
+ */
4139
+
4140
+ GLOBAL(void)
4141
+ jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4142
+ {
4143
+ INT32 tmp0, tmp1, tmp2;
4144
+ INT32 tmp10, tmp11, tmp12;
4145
+ DCTELEM *dataptr;
4146
+ JSAMPROW elemptr;
4147
+ int ctr;
4148
+ SHIFT_TEMPS
4149
+
4150
+ /* Pre-zero output coefficient block. */
4151
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4152
+
4153
+ /* Pass 1: process rows. */
4154
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
4155
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
4156
+ /* We scale the results further by 2 as part of output adaption */
4157
+ /* scaling for different DCT size. */
4158
+ /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
4159
+
4160
+ dataptr = data;
4161
+ for (ctr = 0; ctr < 6; ctr++) {
4162
+ elemptr = sample_data[ctr] + start_col;
4163
+
4164
+ /* Even part */
4165
+
4166
+ tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4167
+ tmp1 = GETJSAMPLE(elemptr[1]);
4168
+
4169
+ tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4170
+
4171
+ /* Apply unsigned->signed conversion */
4172
+ dataptr[0] = (DCTELEM)
4173
+ ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4174
+ dataptr[2] = (DCTELEM)
4175
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4176
+ CONST_BITS-PASS1_BITS-1);
4177
+
4178
+ /* Odd part */
4179
+
4180
+ dataptr[1] = (DCTELEM)
4181
+ DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
4182
+ CONST_BITS-PASS1_BITS-1);
4183
+
4184
+ dataptr += DCTSIZE; /* advance pointer to next row */
4185
+ }
4186
+
4187
+ /* Pass 2: process columns.
4188
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
4189
+ * by an overall factor of 8.
4190
+ * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4191
+ * fold into the constant multipliers (other part was done in pass 1):
4192
+ * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
4193
+ */
4194
+
4195
+ dataptr = data;
4196
+ for (ctr = 0; ctr < 3; ctr++) {
4197
+ /* Even part */
4198
+
4199
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
4200
+ tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
4201
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
4202
+
4203
+ tmp10 = tmp0 + tmp2;
4204
+ tmp12 = tmp0 - tmp2;
4205
+
4206
+ tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
4207
+ tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
4208
+ tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
4209
+
4210
+ dataptr[DCTSIZE*0] = (DCTELEM)
4211
+ DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
4212
+ CONST_BITS+PASS1_BITS);
4213
+ dataptr[DCTSIZE*2] = (DCTELEM)
4214
+ DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
4215
+ CONST_BITS+PASS1_BITS);
4216
+ dataptr[DCTSIZE*4] = (DCTELEM)
4217
+ DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
4218
+ CONST_BITS+PASS1_BITS);
4219
+
4220
+ /* Odd part */
4221
+
4222
+ tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
4223
+
4224
+ dataptr[DCTSIZE*1] = (DCTELEM)
4225
+ DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
4226
+ CONST_BITS+PASS1_BITS);
4227
+ dataptr[DCTSIZE*3] = (DCTELEM)
4228
+ DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
4229
+ CONST_BITS+PASS1_BITS);
4230
+ dataptr[DCTSIZE*5] = (DCTELEM)
4231
+ DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
4232
+ CONST_BITS+PASS1_BITS);
4233
+
4234
+ dataptr++; /* advance pointer to next column */
4235
+ }
4236
+ }
4237
+
4238
+
4239
+ /*
4240
+ * Perform the forward DCT on a 2x4 sample block.
4241
+ *
4242
+ * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4243
+ */
4244
+
4245
+ GLOBAL(void)
4246
+ jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4247
+ {
4248
+ INT32 tmp0, tmp1;
4249
+ INT32 tmp10, tmp11;
4250
+ DCTELEM *dataptr;
4251
+ JSAMPROW elemptr;
4252
+ int ctr;
4253
+ SHIFT_TEMPS
4254
+
4255
+ /* Pre-zero output coefficient block. */
4256
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4257
+
4258
+ /* Pass 1: process rows. */
4259
+ /* Note results are scaled up by sqrt(8) compared to a true DCT. */
4260
+ /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
4261
+
4262
+ dataptr = data;
4263
+ for (ctr = 0; ctr < 4; ctr++) {
4264
+ elemptr = sample_data[ctr] + start_col;
4265
+
4266
+ /* Even part */
4267
+
4268
+ tmp0 = GETJSAMPLE(elemptr[0]);
4269
+ tmp1 = GETJSAMPLE(elemptr[1]);
4270
+
4271
+ /* Apply unsigned->signed conversion */
4272
+ dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
4273
+
4274
+ /* Odd part */
4275
+
4276
+ dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
4277
+
4278
+ dataptr += DCTSIZE; /* advance pointer to next row */
4279
+ }
4280
+
4281
+ /* Pass 2: process columns.
4282
+ * We leave the results scaled up by an overall factor of 8.
4283
+ * 4-point FDCT kernel,
4284
+ * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4285
+ */
4286
+
4287
+ dataptr = data;
4288
+ for (ctr = 0; ctr < 2; ctr++) {
4289
+ /* Even part */
4290
+
4291
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4292
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
4293
+
4294
+ tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
4295
+ tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
4296
+
4297
+ dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
4298
+ dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
4299
+
4300
+ /* Odd part */
4301
+
4302
+ tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4303
+ /* Add fudge factor here for final descale. */
4304
+ tmp0 += ONE << (CONST_BITS-1);
4305
+
4306
+ dataptr[DCTSIZE*1] = (DCTELEM)
4307
+ RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4308
+ CONST_BITS);
4309
+ dataptr[DCTSIZE*3] = (DCTELEM)
4310
+ RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4311
+ CONST_BITS);
4312
+
4313
+ dataptr++; /* advance pointer to next column */
4314
+ }
4315
+ }
4316
+
4317
+
4318
+ /*
4319
+ * Perform the forward DCT on a 1x2 sample block.
4320
+ *
4321
+ * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4322
+ */
4323
+
4324
+ GLOBAL(void)
4325
+ jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4326
+ {
4327
+ INT32 tmp0, tmp1;
4328
+
4329
+ /* Pre-zero output coefficient block. */
4330
+ MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4331
+
4332
+ tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4333
+ tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4334
+
4335
+ /* We leave the results scaled up by an overall factor of 8.
4336
+ * We must also scale the output by (8/1)*(8/2) = 2**5.
4337
+ */
4338
+
4339
+ /* Even part */
4340
+ /* Apply unsigned->signed conversion */
4341
+ data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
4342
+
4343
+ /* Odd part */
4344
+ data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
4345
+ }
4346
+
4347
+ #endif /* DCT_SCALING_SUPPORTED */
4348
+ #endif /* DCT_ISLOW_SUPPORTED */