epeg 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (504) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/MANIFEST +5 -0
  4. data/TODO +1 -0
  5. data/epeg/.dockerignore +4 -0
  6. data/epeg/.gitignore +5 -0
  7. data/epeg/CMakeLists.txt +30 -0
  8. data/epeg/Dockerfile +23 -0
  9. data/epeg/Epeg.h +90 -0
  10. data/epeg/README.md +42 -0
  11. data/epeg/epeg_main.c +1642 -0
  12. data/epeg/epeg_private.h +85 -0
  13. data/epeg/example/.gitignore +1 -0
  14. data/epeg/example/CMakeLists.txt +20 -0
  15. data/epeg/example/example.jpg +0 -0
  16. data/epeg/example/rotatetest.c +29 -0
  17. data/epeg/example/scaletest.c +48 -0
  18. data/epeg/vendor/libjpeg-turbo-2.0.4/BUILDING.md +828 -0
  19. data/epeg/vendor/libjpeg-turbo-2.0.4/CMakeLists.txt +1420 -0
  20. data/epeg/vendor/libjpeg-turbo-2.0.4/ChangeLog.md +1494 -0
  21. data/epeg/vendor/libjpeg-turbo-2.0.4/LICENSE.md +132 -0
  22. data/epeg/vendor/libjpeg-turbo-2.0.4/README.ijg +277 -0
  23. data/epeg/vendor/libjpeg-turbo-2.0.4/README.md +356 -0
  24. data/epeg/vendor/libjpeg-turbo-2.0.4/cderror.h +137 -0
  25. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.c +145 -0
  26. data/epeg/vendor/libjpeg-turbo-2.0.4/cdjpeg.h +157 -0
  27. data/epeg/vendor/libjpeg-turbo-2.0.4/change.log +315 -0
  28. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.1 +354 -0
  29. data/epeg/vendor/libjpeg-turbo-2.0.4/cjpeg.c +695 -0
  30. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/BuildPackages.cmake +182 -0
  31. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/GNUInstallDirs.cmake +416 -0
  32. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/cmake_uninstall.cmake.in +24 -0
  33. data/epeg/vendor/libjpeg-turbo-2.0.4/cmakescripts/testclean.cmake +41 -0
  34. data/epeg/vendor/libjpeg-turbo-2.0.4/cmyk.h +61 -0
  35. data/epeg/vendor/libjpeg-turbo-2.0.4/coderules.txt +78 -0
  36. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.1 +296 -0
  37. data/epeg/vendor/libjpeg-turbo-2.0.4/djpeg.c +822 -0
  38. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/annotated.html +104 -0
  39. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bc_s.png +0 -0
  40. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/bdwn.png +0 -0
  41. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/classes.html +106 -0
  42. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/closed.png +0 -0
  43. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen-extra.css +3 -0
  44. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.css +1184 -0
  45. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/doxygen.png +0 -0
  46. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/dynsections.js +97 -0
  47. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2blank.png +0 -0
  48. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2cl.png +0 -0
  49. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2doc.png +0 -0
  50. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderclosed.png +0 -0
  51. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2folderopen.png +0 -0
  52. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2lastnode.png +0 -0
  53. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2link.png +0 -0
  54. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mlastnode.png +0 -0
  55. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mnode.png +0 -0
  56. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2mo.png +0 -0
  57. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2node.png +0 -0
  58. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2ns.png +0 -0
  59. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2plastnode.png +0 -0
  60. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2pnode.png +0 -0
  61. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2splitbar.png +0 -0
  62. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/ftv2vertline.png +0 -0
  63. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions.html +134 -0
  64. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/functions_vars.html +134 -0
  65. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/group___turbo_j_p_e_g.html +2775 -0
  66. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/index.html +90 -0
  67. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/jquery.js +8 -0
  68. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/modules.html +95 -0
  69. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_f.png +0 -0
  70. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_g.png +0 -0
  71. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/nav_h.png +0 -0
  72. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/open.png +0 -0
  73. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.html +26 -0
  74. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_63.js +4 -0
  75. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.html +26 -0
  76. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_64.js +5 -0
  77. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.html +26 -0
  78. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_68.js +4 -0
  79. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.html +26 -0
  80. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6e.js +4 -0
  81. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.html +26 -0
  82. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_6f.js +5 -0
  83. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.html +26 -0
  84. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_72.js +4 -0
  85. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.html +26 -0
  86. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_74.js +102 -0
  87. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.html +26 -0
  88. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_77.js +4 -0
  89. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.html +26 -0
  90. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_78.js +4 -0
  91. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.html +26 -0
  92. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/all_79.js +4 -0
  93. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.html +26 -0
  94. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/classes_74.js +6 -0
  95. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/close.png +0 -0
  96. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.html +26 -0
  97. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enums_74.js +8 -0
  98. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.html +26 -0
  99. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/enumvalues_74.js +37 -0
  100. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.html +26 -0
  101. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/functions_74.js +31 -0
  102. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.html +26 -0
  103. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/groups_74.js +4 -0
  104. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/mag_sel.png +0 -0
  105. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/nomatches.html +12 -0
  106. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.css +271 -0
  107. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search.js +809 -0
  108. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_l.png +0 -0
  109. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_m.png +0 -0
  110. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/search_r.png +0 -0
  111. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.html +26 -0
  112. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/typedefs_74.js +5 -0
  113. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.html +26 -0
  114. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_63.js +4 -0
  115. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.html +26 -0
  116. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_64.js +5 -0
  117. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.html +26 -0
  118. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_68.js +4 -0
  119. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.html +26 -0
  120. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6e.js +4 -0
  121. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.html +26 -0
  122. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_6f.js +5 -0
  123. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.html +26 -0
  124. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_72.js +4 -0
  125. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.html +26 -0
  126. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_74.js +10 -0
  127. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.html +26 -0
  128. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_77.js +4 -0
  129. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.html +26 -0
  130. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_78.js +4 -0
  131. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.html +26 -0
  132. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/search/variables_79.js +4 -0
  133. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjregion.html +186 -0
  134. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjscalingfactor.html +148 -0
  135. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/structtjtransform.html +212 -0
  136. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_off.png +0 -0
  137. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/sync_on.png +0 -0
  138. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_a.png +0 -0
  139. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_b.png +0 -0
  140. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_h.png +0 -0
  141. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tab_s.png +0 -0
  142. data/epeg/vendor/libjpeg-turbo-2.0.4/doc/html/tabs.css +60 -0
  143. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen-extra.css +3 -0
  144. data/epeg/vendor/libjpeg-turbo-2.0.4/doxygen.config +16 -0
  145. data/epeg/vendor/libjpeg-turbo-2.0.4/example.txt +464 -0
  146. data/epeg/vendor/libjpeg-turbo-2.0.4/jaricom.c +157 -0
  147. data/epeg/vendor/libjpeg-turbo-2.0.4/java/CMakeLists.txt +88 -0
  148. data/epeg/vendor/libjpeg-turbo-2.0.4/java/MANIFEST.MF +2 -0
  149. data/epeg/vendor/libjpeg-turbo-2.0.4/java/README +52 -0
  150. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJBench.java +1021 -0
  151. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJExample.java +405 -0
  152. data/epeg/vendor/libjpeg-turbo-2.0.4/java/TJUnitTest.java +960 -0
  153. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-frame.html +24 -0
  154. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/allclasses-noframe.html +24 -0
  155. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/constant-values.html +532 -0
  156. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/deprecated-list.html +252 -0
  157. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/help-doc.html +210 -0
  158. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index-all.html +1029 -0
  159. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/index.html +71 -0
  160. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJ.html +1356 -0
  161. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCompressor.html +926 -0
  162. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJCustomFilter.html +241 -0
  163. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJDecompressor.html +1255 -0
  164. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJException.html +340 -0
  165. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJScalingFactor.html +343 -0
  166. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransform.html +751 -0
  167. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/TJTransformer.html +421 -0
  168. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/YUVImage.html +765 -0
  169. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-frame.html +31 -0
  170. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-summary.html +202 -0
  171. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/org/libjpegturbo/turbojpeg/package-tree.html +160 -0
  172. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/overview-tree.html +164 -0
  173. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/package-list +1 -0
  174. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/background.gif +0 -0
  175. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/tab.gif +0 -0
  176. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar.gif +0 -0
  177. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/resources/titlebar_end.gif +0 -0
  178. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/script.js +30 -0
  179. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/serialized-form.html +176 -0
  180. data/epeg/vendor/libjpeg-turbo-2.0.4/java/doc/stylesheet.css +474 -0
  181. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJ.java +584 -0
  182. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCompressor.java +677 -0
  183. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJCustomFilter.java +76 -0
  184. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJDecompressor.java +931 -0
  185. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJException.java +78 -0
  186. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-unix.java.in +59 -0
  187. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJLoader-win.java.in +35 -0
  188. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJScalingFactor.java +115 -0
  189. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransform.java +227 -0
  190. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/TJTransformer.java +163 -0
  191. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org/libjpegturbo/turbojpeg/YUVImage.java +445 -0
  192. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJ.h +129 -0
  193. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJCompressor.h +101 -0
  194. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJDecompressor.h +101 -0
  195. data/epeg/vendor/libjpeg-turbo-2.0.4/java/org_libjpegturbo_turbojpeg_TJTransformer.h +29 -0
  196. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapimin.c +295 -0
  197. data/epeg/vendor/libjpeg-turbo-2.0.4/jcapistd.c +162 -0
  198. data/epeg/vendor/libjpeg-turbo-2.0.4/jcarith.c +932 -0
  199. data/epeg/vendor/libjpeg-turbo-2.0.4/jccoefct.c +449 -0
  200. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolext.c +144 -0
  201. data/epeg/vendor/libjpeg-turbo-2.0.4/jccolor.c +710 -0
  202. data/epeg/vendor/libjpeg-turbo-2.0.4/jcdctmgr.c +721 -0
  203. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.c +1096 -0
  204. data/epeg/vendor/libjpeg-turbo-2.0.4/jchuff.h +42 -0
  205. data/epeg/vendor/libjpeg-turbo-2.0.4/jcicc.c +105 -0
  206. data/epeg/vendor/libjpeg-turbo-2.0.4/jcinit.c +77 -0
  207. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmainct.c +162 -0
  208. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmarker.c +664 -0
  209. data/epeg/vendor/libjpeg-turbo-2.0.4/jcmaster.c +640 -0
  210. data/epeg/vendor/libjpeg-turbo-2.0.4/jcomapi.c +109 -0
  211. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.h.in +73 -0
  212. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfig.txt +143 -0
  213. data/epeg/vendor/libjpeg-turbo-2.0.4/jconfigint.h.in +31 -0
  214. data/epeg/vendor/libjpeg-turbo-2.0.4/jcparam.c +541 -0
  215. data/epeg/vendor/libjpeg-turbo-2.0.4/jcphuff.c +1105 -0
  216. data/epeg/vendor/libjpeg-turbo-2.0.4/jcprepct.c +351 -0
  217. data/epeg/vendor/libjpeg-turbo-2.0.4/jcsample.c +539 -0
  218. data/epeg/vendor/libjpeg-turbo-2.0.4/jcstest.c +126 -0
  219. data/epeg/vendor/libjpeg-turbo-2.0.4/jctrans.c +400 -0
  220. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapimin.c +407 -0
  221. data/epeg/vendor/libjpeg-turbo-2.0.4/jdapistd.c +639 -0
  222. data/epeg/vendor/libjpeg-turbo-2.0.4/jdarith.c +773 -0
  223. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst-tj.c +203 -0
  224. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatadst.c +293 -0
  225. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc-tj.c +194 -0
  226. data/epeg/vendor/libjpeg-turbo-2.0.4/jdatasrc.c +295 -0
  227. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.c +692 -0
  228. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcoefct.h +82 -0
  229. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcol565.c +384 -0
  230. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolext.c +143 -0
  231. data/epeg/vendor/libjpeg-turbo-2.0.4/jdcolor.c +883 -0
  232. data/epeg/vendor/libjpeg-turbo-2.0.4/jdct.h +208 -0
  233. data/epeg/vendor/libjpeg-turbo-2.0.4/jddctmgr.c +352 -0
  234. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.c +831 -0
  235. data/epeg/vendor/libjpeg-turbo-2.0.4/jdhuff.h +238 -0
  236. data/epeg/vendor/libjpeg-turbo-2.0.4/jdicc.c +171 -0
  237. data/epeg/vendor/libjpeg-turbo-2.0.4/jdinput.c +408 -0
  238. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.c +460 -0
  239. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmainct.h +71 -0
  240. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmarker.c +1377 -0
  241. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.c +737 -0
  242. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmaster.h +28 -0
  243. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmerge.c +617 -0
  244. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrg565.c +354 -0
  245. data/epeg/vendor/libjpeg-turbo-2.0.4/jdmrgext.c +184 -0
  246. data/epeg/vendor/libjpeg-turbo-2.0.4/jdphuff.c +687 -0
  247. data/epeg/vendor/libjpeg-turbo-2.0.4/jdpostct.c +294 -0
  248. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.c +518 -0
  249. data/epeg/vendor/libjpeg-turbo-2.0.4/jdsample.h +50 -0
  250. data/epeg/vendor/libjpeg-turbo-2.0.4/jdtrans.c +155 -0
  251. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.c +251 -0
  252. data/epeg/vendor/libjpeg-turbo-2.0.4/jerror.h +316 -0
  253. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctflt.c +169 -0
  254. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctfst.c +227 -0
  255. data/epeg/vendor/libjpeg-turbo-2.0.4/jfdctint.c +288 -0
  256. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctflt.c +240 -0
  257. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctfst.c +371 -0
  258. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctint.c +2627 -0
  259. data/epeg/vendor/libjpeg-turbo-2.0.4/jidctred.c +409 -0
  260. data/epeg/vendor/libjpeg-turbo-2.0.4/jinclude.h +88 -0
  261. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemmgr.c +1179 -0
  262. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemnobs.c +115 -0
  263. data/epeg/vendor/libjpeg-turbo-2.0.4/jmemsys.h +178 -0
  264. data/epeg/vendor/libjpeg-turbo-2.0.4/jmorecfg.h +421 -0
  265. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeg_nbits_table.h +4098 -0
  266. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegcomp.h +31 -0
  267. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegint.h +368 -0
  268. data/epeg/vendor/libjpeg-turbo-2.0.4/jpeglib.h +1132 -0
  269. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.1 +295 -0
  270. data/epeg/vendor/libjpeg-turbo-2.0.4/jpegtran.c +601 -0
  271. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant1.c +859 -0
  272. data/epeg/vendor/libjpeg-turbo-2.0.4/jquant2.c +1285 -0
  273. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd.h +117 -0
  274. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimd_none.c +418 -0
  275. data/epeg/vendor/libjpeg-turbo-2.0.4/jsimddct.h +70 -0
  276. data/epeg/vendor/libjpeg-turbo-2.0.4/jstdhuff.c +143 -0
  277. data/epeg/vendor/libjpeg-turbo-2.0.4/jutils.c +133 -0
  278. data/epeg/vendor/libjpeg-turbo-2.0.4/jversion.h +52 -0
  279. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.map.in +11 -0
  280. data/epeg/vendor/libjpeg-turbo-2.0.4/libjpeg.txt +3144 -0
  281. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/CMakeLists.txt +1 -0
  282. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.c +275 -0
  283. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5.h +57 -0
  284. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5cmp.c +59 -0
  285. data/epeg/vendor/libjpeg-turbo-2.0.4/md5/md5hl.c +125 -0
  286. data/epeg/vendor/libjpeg-turbo-2.0.4/rdbmp.c +689 -0
  287. data/epeg/vendor/libjpeg-turbo-2.0.4/rdcolmap.c +254 -0
  288. data/epeg/vendor/libjpeg-turbo-2.0.4/rdgif.c +39 -0
  289. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.1 +63 -0
  290. data/epeg/vendor/libjpeg-turbo-2.0.4/rdjpgcom.c +510 -0
  291. data/epeg/vendor/libjpeg-turbo-2.0.4/rdppm.c +766 -0
  292. data/epeg/vendor/libjpeg-turbo-2.0.4/rdrle.c +389 -0
  293. data/epeg/vendor/libjpeg-turbo-2.0.4/rdswitch.c +424 -0
  294. data/epeg/vendor/libjpeg-turbo-2.0.4/rdtarga.c +509 -0
  295. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Distribution.xml.in +24 -0
  296. data/epeg/vendor/libjpeg-turbo-2.0.4/release/License.rtf +20 -0
  297. data/epeg/vendor/libjpeg-turbo-2.0.4/release/ReadMe.txt +5 -0
  298. data/epeg/vendor/libjpeg-turbo-2.0.4/release/Welcome.rtf +17 -0
  299. data/epeg/vendor/libjpeg-turbo-2.0.4/release/deb-control.in +31 -0
  300. data/epeg/vendor/libjpeg-turbo-2.0.4/release/installer.nsi.in +191 -0
  301. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libjpeg.pc.in +10 -0
  302. data/epeg/vendor/libjpeg-turbo-2.0.4/release/libturbojpeg.pc.in +10 -0
  303. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makecygwinpkg.in +66 -0
  304. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makedpkg.in +115 -0
  305. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makemacpkg.in +284 -0
  306. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makerpm.in +30 -0
  307. data/epeg/vendor/libjpeg-turbo-2.0.4/release/makesrpm.in +48 -0
  308. data/epeg/vendor/libjpeg-turbo-2.0.4/release/maketarball.in +51 -0
  309. data/epeg/vendor/libjpeg-turbo-2.0.4/release/rpm.spec.in +221 -0
  310. data/epeg/vendor/libjpeg-turbo-2.0.4/release/uninstall.in +113 -0
  311. data/epeg/vendor/libjpeg-turbo-2.0.4/sharedlib/CMakeLists.txt +99 -0
  312. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/CMakeLists.txt +385 -0
  313. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd.c +721 -0
  314. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm/jsimd_neon.S +2878 -0
  315. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd.c +798 -0
  316. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/arm64/jsimd_neon.S +3433 -0
  317. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/gas-preprocessor.in +1 -0
  318. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-avx2.asm +578 -0
  319. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-mmx.asm +476 -0
  320. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolext-sse2.asm +503 -0
  321. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-avx2.asm +121 -0
  322. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-mmx.asm +121 -0
  323. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jccolor-sse2.asm +120 -0
  324. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-avx2.asm +113 -0
  325. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-mmx.asm +113 -0
  326. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgray-sse2.asm +112 -0
  327. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-avx2.asm +457 -0
  328. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-mmx.asm +355 -0
  329. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcgryext-sse2.asm +382 -0
  330. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jchuff-sse2.asm +424 -0
  331. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcphuff-sse2.asm +660 -0
  332. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-avx2.asm +388 -0
  333. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-mmx.asm +324 -0
  334. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jcsample-sse2.asm +351 -0
  335. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-avx2.asm +515 -0
  336. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-mmx.asm +404 -0
  337. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolext-sse2.asm +458 -0
  338. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-avx2.asm +118 -0
  339. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-mmx.asm +117 -0
  340. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdcolor-sse2.asm +117 -0
  341. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-avx2.asm +136 -0
  342. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-mmx.asm +123 -0
  343. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmerge-sse2.asm +135 -0
  344. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-avx2.asm +575 -0
  345. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-mmx.asm +460 -0
  346. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdmrgext-sse2.asm +517 -0
  347. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-avx2.asm +760 -0
  348. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-mmx.asm +731 -0
  349. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jdsample-sse2.asm +724 -0
  350. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-3dn.asm +318 -0
  351. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctflt-sse.asm +369 -0
  352. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-mmx.asm +395 -0
  353. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctfst-sse2.asm +403 -0
  354. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-avx2.asm +331 -0
  355. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-mmx.asm +620 -0
  356. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jfdctint-sse2.asm +633 -0
  357. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-3dn.asm +451 -0
  358. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse.asm +571 -0
  359. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctflt-sse2.asm +497 -0
  360. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-mmx.asm +499 -0
  361. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctfst-sse2.asm +501 -0
  362. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-avx2.asm +453 -0
  363. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-mmx.asm +851 -0
  364. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctint-sse2.asm +858 -0
  365. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-mmx.asm +704 -0
  366. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jidctred-sse2.asm +592 -0
  367. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-3dn.asm +230 -0
  368. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-mmx.asm +276 -0
  369. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquant-sse.asm +208 -0
  370. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquantf-sse2.asm +168 -0
  371. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-avx2.asm +188 -0
  372. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jquanti-sse2.asm +201 -0
  373. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimd.c +1253 -0
  374. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/i386/jsimdcpu.asm +135 -0
  375. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/jsimd.h +1083 -0
  376. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolext-mmi.c +483 -0
  377. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jccolor-mmi.c +148 -0
  378. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample-mmi.c +100 -0
  379. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jcsample.h +28 -0
  380. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolext-mmi.c +424 -0
  381. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdcolor-mmi.c +139 -0
  382. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jdsample-mmi.c +245 -0
  383. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jfdctint-mmi.c +398 -0
  384. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jidctint-mmi.c +571 -0
  385. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jquanti-mmi.c +130 -0
  386. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd.c +610 -0
  387. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/jsimd_mmi.h +57 -0
  388. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/loongson/loongson-mmintrin.h +1324 -0
  389. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd.c +1123 -0
  390. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2.S +4479 -0
  391. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/mips/jsimd_dspr2_asm.h +292 -0
  392. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jcolsamp.inc +135 -0
  393. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jdct.inc +31 -0
  394. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jpeg_nbits_table.inc +4097 -0
  395. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc +93 -0
  396. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdcfg.inc.h +131 -0
  397. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/nasm/jsimdext.inc +479 -0
  398. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolext-altivec.c +269 -0
  399. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jccolor-altivec.c +116 -0
  400. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgray-altivec.c +111 -0
  401. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcgryext-altivec.c +228 -0
  402. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample-altivec.c +159 -0
  403. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jcsample.h +28 -0
  404. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolext-altivec.c +276 -0
  405. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdcolor-altivec.c +106 -0
  406. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmerge-altivec.c +130 -0
  407. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdmrgext-altivec.c +329 -0
  408. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jdsample-altivec.c +400 -0
  409. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctfst-altivec.c +154 -0
  410. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jfdctint-altivec.c +258 -0
  411. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctfst-altivec.c +255 -0
  412. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jidctint-altivec.c +357 -0
  413. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jquanti-altivec.c +250 -0
  414. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd.c +872 -0
  415. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/powerpc/jsimd_altivec.h +98 -0
  416. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-avx2.asm +558 -0
  417. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolext-sse2.asm +483 -0
  418. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-avx2.asm +121 -0
  419. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jccolor-sse2.asm +120 -0
  420. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-avx2.asm +113 -0
  421. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgray-sse2.asm +112 -0
  422. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-avx2.asm +437 -0
  423. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcgryext-sse2.asm +362 -0
  424. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jchuff-sse2.asm +346 -0
  425. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcphuff-sse2.asm +637 -0
  426. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-avx2.asm +366 -0
  427. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jcsample-sse2.asm +329 -0
  428. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-avx2.asm +495 -0
  429. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolext-sse2.asm +438 -0
  430. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-avx2.asm +118 -0
  431. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdcolor-sse2.asm +117 -0
  432. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-avx2.asm +136 -0
  433. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmerge-sse2.asm +135 -0
  434. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-avx2.asm +593 -0
  435. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdmrgext-sse2.asm +535 -0
  436. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-avx2.asm +695 -0
  437. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jdsample-sse2.asm +664 -0
  438. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctflt-sse.asm +355 -0
  439. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctfst-sse2.asm +389 -0
  440. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-avx2.asm +320 -0
  441. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jfdctint-sse2.asm +619 -0
  442. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctflt-sse2.asm +481 -0
  443. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctfst-sse2.asm +490 -0
  444. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-avx2.asm +417 -0
  445. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctint-sse2.asm +846 -0
  446. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jidctred-sse2.asm +573 -0
  447. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquantf-sse2.asm +154 -0
  448. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-avx2.asm +162 -0
  449. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jquanti-sse2.asm +187 -0
  450. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimd.c +1076 -0
  451. data/epeg/vendor/libjpeg-turbo-2.0.4/simd/x86_64/jsimdcpu.asm +86 -0
  452. data/epeg/vendor/libjpeg-turbo-2.0.4/structure.txt +904 -0
  453. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.bmp +0 -0
  454. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/nightshot_iso_100.txt +25 -0
  455. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test.scan +5 -0
  456. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc +0 -0
  457. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test1.icc.txt +20 -0
  458. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc +0 -0
  459. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/test2.icc.txt +20 -0
  460. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgari.jpg +0 -0
  461. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testimgint.jpg +0 -0
  462. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.jpg +0 -0
  463. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig.ppm +4 -0
  464. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/testorig12.jpg +0 -0
  465. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_5674_0098.bmp +0 -0
  466. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6434_0018a.bmp +0 -0
  467. data/epeg/vendor/libjpeg-turbo-2.0.4/testimages/vgl_6548_0026a.bmp +0 -0
  468. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbench.c +1031 -0
  469. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.in +256 -0
  470. data/epeg/vendor/libjpeg-turbo-2.0.4/tjbenchtest.java.in +215 -0
  471. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexample.c +396 -0
  472. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.in +149 -0
  473. data/epeg/vendor/libjpeg-turbo-2.0.4/tjexampletest.java.in +151 -0
  474. data/epeg/vendor/libjpeg-turbo-2.0.4/tjunittest.c +931 -0
  475. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.c +70 -0
  476. data/epeg/vendor/libjpeg-turbo-2.0.4/tjutil.h +47 -0
  477. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.c +1628 -0
  478. data/epeg/vendor/libjpeg-turbo-2.0.4/transupp.h +210 -0
  479. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-jni.c +1246 -0
  480. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile +65 -0
  481. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg-mapfile.jni +101 -0
  482. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.c +2152 -0
  483. data/epeg/vendor/libjpeg-turbo-2.0.4/turbojpeg.h +1744 -0
  484. data/epeg/vendor/libjpeg-turbo-2.0.4/usage.txt +635 -0
  485. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jconfig.h.in +34 -0
  486. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62-memsrcdst.def +108 -0
  487. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg62.def +106 -0
  488. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7-memsrcdst.def +110 -0
  489. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg7.def +108 -0
  490. data/epeg/vendor/libjpeg-turbo-2.0.4/win/jpeg8.def +111 -0
  491. data/epeg/vendor/libjpeg-turbo-2.0.4/wizard.txt +212 -0
  492. data/epeg/vendor/libjpeg-turbo-2.0.4/wrbmp.c +558 -0
  493. data/epeg/vendor/libjpeg-turbo-2.0.4/wrgif.c +413 -0
  494. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.1 +103 -0
  495. data/epeg/vendor/libjpeg-turbo-2.0.4/wrjpgcom.c +591 -0
  496. data/epeg/vendor/libjpeg-turbo-2.0.4/wrppm.c +365 -0
  497. data/epeg/vendor/libjpeg-turbo-2.0.4/wrrle.c +309 -0
  498. data/epeg/vendor/libjpeg-turbo-2.0.4/wrtarga.c +261 -0
  499. data/epeg.c +131 -0
  500. data/epeg.gemspec +18 -0
  501. data/extconf.rb +80 -0
  502. data/test.jpg +0 -0
  503. data/test.rb +42 -0
  504. metadata +546 -0
@@ -0,0 +1,2627 @@
1
+ /*
2
+ * jidctint.c
3
+ *
4
+ * This file was part of the Independent JPEG Group's software:
5
+ * Copyright (C) 1991-1998, Thomas G. Lane.
6
+ * Modification developed 2002-2009 by Guido Vollbeding.
7
+ * libjpeg-turbo Modifications:
8
+ * Copyright (C) 2015, D. R. Commander.
9
+ * For conditions of distribution and use, see the accompanying README.ijg
10
+ * file.
11
+ *
12
+ * This file contains a slow-but-accurate integer implementation of the
13
+ * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
14
+ * must also perform dequantization of the input coefficients.
15
+ *
16
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
17
+ * on each row (or vice versa, but it's more convenient to emit a row at
18
+ * a time). Direct algorithms are also available, but they are much more
19
+ * complex and seem not to be any faster when reduced to code.
20
+ *
21
+ * This implementation is based on an algorithm described in
22
+ * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
23
+ * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
24
+ * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
25
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
26
+ * We use their alternate method with 12 multiplies and 32 adds.
27
+ * The advantage of this method is that no data path contains more than one
28
+ * multiplication; this allows a very simple and accurate implementation in
29
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
30
+ *
31
+ * We also provide IDCT routines with various output sample block sizes for
32
+ * direct resolution reduction or enlargement without additional resampling:
33
+ * NxN (N=1...16) pixels for one 8x8 input DCT block.
34
+ *
35
+ * For N<8 we simply take the corresponding low-frequency coefficients of
36
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
37
+ * to yield the downscaled outputs.
38
+ * This can be seen as direct low-pass downsampling from the DCT domain
39
+ * point of view rather than the usual spatial domain point of view,
40
+ * yielding significant computational savings and results at least
41
+ * as good as common bilinear (averaging) spatial downsampling.
42
+ *
43
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
44
+ * lower frequencies and higher frequencies assumed to be zero.
45
+ * It turns out that the computational effort is similar to the 8x8 IDCT
46
+ * regarding the output size.
47
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
48
+ *
49
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
50
+ * since there would be too many additional constants to pre-calculate.
51
+ */
52
+
53
+ #define JPEG_INTERNALS
54
+ #include "jinclude.h"
55
+ #include "jpeglib.h"
56
+ #include "jdct.h" /* Private declarations for DCT subsystem */
57
+
58
+ #ifdef DCT_ISLOW_SUPPORTED
59
+
60
+
61
+ /*
62
+ * This module is specialized to the case DCTSIZE = 8.
63
+ */
64
+
65
+ #if DCTSIZE != 8
66
+ Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
67
+ #endif
68
+
69
+
70
+ /*
71
+ * The poop on this scaling stuff is as follows:
72
+ *
73
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
74
+ * larger than the true IDCT outputs. The final outputs are therefore
75
+ * a factor of N larger than desired; since N=8 this can be cured by
76
+ * a simple right shift at the end of the algorithm. The advantage of
77
+ * this arrangement is that we save two multiplications per 1-D IDCT,
78
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
79
+ *
80
+ * We have to do addition and subtraction of the integer inputs, which
81
+ * is no problem, and multiplication by fractional constants, which is
82
+ * a problem to do in integer arithmetic. We multiply all the constants
83
+ * by CONST_SCALE and convert them to integer constants (thus retaining
84
+ * CONST_BITS bits of precision in the constants). After doing a
85
+ * multiplication we have to divide the product by CONST_SCALE, with proper
86
+ * rounding, to produce the correct output. This division can be done
87
+ * cheaply as a right shift of CONST_BITS bits. We postpone shifting
88
+ * as long as possible so that partial sums can be added together with
89
+ * full fractional precision.
90
+ *
91
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
92
+ * they are represented to better-than-integral precision. These outputs
93
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
94
+ * with the recommended scaling. (To scale up 12-bit sample data further, an
95
+ * intermediate JLONG array would be needed.)
96
+ *
97
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
98
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
99
+ * shows that the values given below are the most effective.
100
+ */
101
+
102
+ #if BITS_IN_JSAMPLE == 8
103
+ #define CONST_BITS 13
104
+ #define PASS1_BITS 2
105
+ #else
106
+ #define CONST_BITS 13
107
+ #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
108
+ #endif
109
+
110
+ /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
111
+ * causing a lot of useless floating-point operations at run time.
112
+ * To get around this we use the following pre-calculated constants.
113
+ * If you change CONST_BITS you may want to add appropriate values.
114
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
115
+ */
116
+
117
+ #if CONST_BITS == 13
118
+ #define FIX_0_298631336 ((JLONG)2446) /* FIX(0.298631336) */
119
+ #define FIX_0_390180644 ((JLONG)3196) /* FIX(0.390180644) */
120
+ #define FIX_0_541196100 ((JLONG)4433) /* FIX(0.541196100) */
121
+ #define FIX_0_765366865 ((JLONG)6270) /* FIX(0.765366865) */
122
+ #define FIX_0_899976223 ((JLONG)7373) /* FIX(0.899976223) */
123
+ #define FIX_1_175875602 ((JLONG)9633) /* FIX(1.175875602) */
124
+ #define FIX_1_501321110 ((JLONG)12299) /* FIX(1.501321110) */
125
+ #define FIX_1_847759065 ((JLONG)15137) /* FIX(1.847759065) */
126
+ #define FIX_1_961570560 ((JLONG)16069) /* FIX(1.961570560) */
127
+ #define FIX_2_053119869 ((JLONG)16819) /* FIX(2.053119869) */
128
+ #define FIX_2_562915447 ((JLONG)20995) /* FIX(2.562915447) */
129
+ #define FIX_3_072711026 ((JLONG)25172) /* FIX(3.072711026) */
130
+ #else
131
+ #define FIX_0_298631336 FIX(0.298631336)
132
+ #define FIX_0_390180644 FIX(0.390180644)
133
+ #define FIX_0_541196100 FIX(0.541196100)
134
+ #define FIX_0_765366865 FIX(0.765366865)
135
+ #define FIX_0_899976223 FIX(0.899976223)
136
+ #define FIX_1_175875602 FIX(1.175875602)
137
+ #define FIX_1_501321110 FIX(1.501321110)
138
+ #define FIX_1_847759065 FIX(1.847759065)
139
+ #define FIX_1_961570560 FIX(1.961570560)
140
+ #define FIX_2_053119869 FIX(2.053119869)
141
+ #define FIX_2_562915447 FIX(2.562915447)
142
+ #define FIX_3_072711026 FIX(3.072711026)
143
+ #endif
144
+
145
+
146
+ /* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
147
+ * For 8-bit samples with the recommended scaling, all the variable
148
+ * and constant values involved are no more than 16 bits wide, so a
149
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
150
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
151
+ */
152
+
153
+ #if BITS_IN_JSAMPLE == 8
154
+ #define MULTIPLY(var, const) MULTIPLY16C16(var, const)
155
+ #else
156
+ #define MULTIPLY(var, const) ((var) * (const))
157
+ #endif
158
+
159
+
160
+ /* Dequantize a coefficient by multiplying it by the multiplier-table
161
+ * entry; produce an int result. In this module, both inputs and result
162
+ * are 16 bits or less, so either int or short multiply will work.
163
+ */
164
+
165
+ #define DEQUANTIZE(coef, quantval) (((ISLOW_MULT_TYPE)(coef)) * (quantval))
166
+
167
+
168
+ /*
169
+ * Perform dequantization and inverse DCT on one block of coefficients.
170
+ */
171
+
172
+ GLOBAL(void)
173
+ jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
174
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
175
+ JDIMENSION output_col)
176
+ {
177
+ JLONG tmp0, tmp1, tmp2, tmp3;
178
+ JLONG tmp10, tmp11, tmp12, tmp13;
179
+ JLONG z1, z2, z3, z4, z5;
180
+ JCOEFPTR inptr;
181
+ ISLOW_MULT_TYPE *quantptr;
182
+ int *wsptr;
183
+ JSAMPROW outptr;
184
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
185
+ int ctr;
186
+ int workspace[DCTSIZE2]; /* buffers data between passes */
187
+ SHIFT_TEMPS
188
+
189
+ /* Pass 1: process columns from input, store into work array. */
190
+ /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
191
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
192
+
193
+ inptr = coef_block;
194
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
195
+ wsptr = workspace;
196
+ for (ctr = DCTSIZE; ctr > 0; ctr--) {
197
+ /* Due to quantization, we will usually find that many of the input
198
+ * coefficients are zero, especially the AC terms. We can exploit this
199
+ * by short-circuiting the IDCT calculation for any column in which all
200
+ * the AC terms are zero. In that case each output is equal to the
201
+ * DC coefficient (with scale factor as needed).
202
+ * With typical images and quantization tables, half or more of the
203
+ * column DCT calculations can be simplified this way.
204
+ */
205
+
206
+ if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
207
+ inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
208
+ inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
209
+ inptr[DCTSIZE * 7] == 0) {
210
+ /* AC terms all zero */
211
+ int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0],
212
+ quantptr[DCTSIZE * 0]), PASS1_BITS);
213
+
214
+ wsptr[DCTSIZE * 0] = dcval;
215
+ wsptr[DCTSIZE * 1] = dcval;
216
+ wsptr[DCTSIZE * 2] = dcval;
217
+ wsptr[DCTSIZE * 3] = dcval;
218
+ wsptr[DCTSIZE * 4] = dcval;
219
+ wsptr[DCTSIZE * 5] = dcval;
220
+ wsptr[DCTSIZE * 6] = dcval;
221
+ wsptr[DCTSIZE * 7] = dcval;
222
+
223
+ inptr++; /* advance pointers to next column */
224
+ quantptr++;
225
+ wsptr++;
226
+ continue;
227
+ }
228
+
229
+ /* Even part: reverse the even part of the forward DCT. */
230
+ /* The rotator is sqrt(2)*c(-6). */
231
+
232
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
233
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
234
+
235
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
236
+ tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
237
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
238
+
239
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
240
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
241
+
242
+ tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS);
243
+ tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS);
244
+
245
+ tmp10 = tmp0 + tmp3;
246
+ tmp13 = tmp0 - tmp3;
247
+ tmp11 = tmp1 + tmp2;
248
+ tmp12 = tmp1 - tmp2;
249
+
250
+ /* Odd part per figure 8; the matrix is unitary and hence its
251
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
252
+ */
253
+
254
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
255
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
256
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
257
+ tmp3 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
258
+
259
+ z1 = tmp0 + tmp3;
260
+ z2 = tmp1 + tmp2;
261
+ z3 = tmp0 + tmp2;
262
+ z4 = tmp1 + tmp3;
263
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
264
+
265
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
266
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
267
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
268
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
269
+ z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
270
+ z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
271
+ z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
272
+ z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
273
+
274
+ z3 += z5;
275
+ z4 += z5;
276
+
277
+ tmp0 += z1 + z3;
278
+ tmp1 += z2 + z4;
279
+ tmp2 += z2 + z3;
280
+ tmp3 += z1 + z4;
281
+
282
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
283
+
284
+ wsptr[DCTSIZE * 0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS - PASS1_BITS);
285
+ wsptr[DCTSIZE * 7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS - PASS1_BITS);
286
+ wsptr[DCTSIZE * 1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS - PASS1_BITS);
287
+ wsptr[DCTSIZE * 6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS - PASS1_BITS);
288
+ wsptr[DCTSIZE * 2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS - PASS1_BITS);
289
+ wsptr[DCTSIZE * 5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS - PASS1_BITS);
290
+ wsptr[DCTSIZE * 3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS - PASS1_BITS);
291
+ wsptr[DCTSIZE * 4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS - PASS1_BITS);
292
+
293
+ inptr++; /* advance pointers to next column */
294
+ quantptr++;
295
+ wsptr++;
296
+ }
297
+
298
+ /* Pass 2: process rows from work array, store into output array. */
299
+ /* Note that we must descale the results by a factor of 8 == 2**3, */
300
+ /* and also undo the PASS1_BITS scaling. */
301
+
302
+ wsptr = workspace;
303
+ for (ctr = 0; ctr < DCTSIZE; ctr++) {
304
+ outptr = output_buf[ctr] + output_col;
305
+ /* Rows of zeroes can be exploited in the same way as we did with columns.
306
+ * However, the column calculation has created many nonzero AC terms, so
307
+ * the simplification applies less often (typically 5% to 10% of the time).
308
+ * On machines with very fast multiplication, it's possible that the
309
+ * test takes more time than it's worth. In that case this section
310
+ * may be commented out.
311
+ */
312
+
313
+ #ifndef NO_ZERO_ROW_TEST
314
+ if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
315
+ wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
316
+ /* AC terms all zero */
317
+ JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
318
+ PASS1_BITS + 3) & RANGE_MASK];
319
+
320
+ outptr[0] = dcval;
321
+ outptr[1] = dcval;
322
+ outptr[2] = dcval;
323
+ outptr[3] = dcval;
324
+ outptr[4] = dcval;
325
+ outptr[5] = dcval;
326
+ outptr[6] = dcval;
327
+ outptr[7] = dcval;
328
+
329
+ wsptr += DCTSIZE; /* advance pointer to next row */
330
+ continue;
331
+ }
332
+ #endif
333
+
334
+ /* Even part: reverse the even part of the forward DCT. */
335
+ /* The rotator is sqrt(2)*c(-6). */
336
+
337
+ z2 = (JLONG)wsptr[2];
338
+ z3 = (JLONG)wsptr[6];
339
+
340
+ z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
341
+ tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
342
+ tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
343
+
344
+ tmp0 = LEFT_SHIFT((JLONG)wsptr[0] + (JLONG)wsptr[4], CONST_BITS);
345
+ tmp1 = LEFT_SHIFT((JLONG)wsptr[0] - (JLONG)wsptr[4], CONST_BITS);
346
+
347
+ tmp10 = tmp0 + tmp3;
348
+ tmp13 = tmp0 - tmp3;
349
+ tmp11 = tmp1 + tmp2;
350
+ tmp12 = tmp1 - tmp2;
351
+
352
+ /* Odd part per figure 8; the matrix is unitary and hence its
353
+ * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
354
+ */
355
+
356
+ tmp0 = (JLONG)wsptr[7];
357
+ tmp1 = (JLONG)wsptr[5];
358
+ tmp2 = (JLONG)wsptr[3];
359
+ tmp3 = (JLONG)wsptr[1];
360
+
361
+ z1 = tmp0 + tmp3;
362
+ z2 = tmp1 + tmp2;
363
+ z3 = tmp0 + tmp2;
364
+ z4 = tmp1 + tmp3;
365
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
366
+
367
+ tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
368
+ tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
369
+ tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
370
+ tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
371
+ z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
372
+ z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
373
+ z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
374
+ z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
375
+
376
+ z3 += z5;
377
+ z4 += z5;
378
+
379
+ tmp0 += z1 + z3;
380
+ tmp1 += z2 + z4;
381
+ tmp2 += z2 + z3;
382
+ tmp3 += z1 + z4;
383
+
384
+ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
385
+
386
+ outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp3,
387
+ CONST_BITS + PASS1_BITS + 3) &
388
+ RANGE_MASK];
389
+ outptr[7] = range_limit[(int)DESCALE(tmp10 - tmp3,
390
+ CONST_BITS + PASS1_BITS + 3) &
391
+ RANGE_MASK];
392
+ outptr[1] = range_limit[(int)DESCALE(tmp11 + tmp2,
393
+ CONST_BITS + PASS1_BITS + 3) &
394
+ RANGE_MASK];
395
+ outptr[6] = range_limit[(int)DESCALE(tmp11 - tmp2,
396
+ CONST_BITS + PASS1_BITS + 3) &
397
+ RANGE_MASK];
398
+ outptr[2] = range_limit[(int)DESCALE(tmp12 + tmp1,
399
+ CONST_BITS + PASS1_BITS + 3) &
400
+ RANGE_MASK];
401
+ outptr[5] = range_limit[(int)DESCALE(tmp12 - tmp1,
402
+ CONST_BITS + PASS1_BITS + 3) &
403
+ RANGE_MASK];
404
+ outptr[3] = range_limit[(int)DESCALE(tmp13 + tmp0,
405
+ CONST_BITS + PASS1_BITS + 3) &
406
+ RANGE_MASK];
407
+ outptr[4] = range_limit[(int)DESCALE(tmp13 - tmp0,
408
+ CONST_BITS + PASS1_BITS + 3) &
409
+ RANGE_MASK];
410
+
411
+ wsptr += DCTSIZE; /* advance pointer to next row */
412
+ }
413
+ }
414
+
415
+ #ifdef IDCT_SCALING_SUPPORTED
416
+
417
+
418
+ /*
419
+ * Perform dequantization and inverse DCT on one block of coefficients,
420
+ * producing a 7x7 output block.
421
+ *
422
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
423
+ * cK represents sqrt(2) * cos(K*pi/14).
424
+ */
425
+
426
+ GLOBAL(void)
427
+ jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
428
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
429
+ JDIMENSION output_col)
430
+ {
431
+ JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
432
+ JLONG z1, z2, z3;
433
+ JCOEFPTR inptr;
434
+ ISLOW_MULT_TYPE *quantptr;
435
+ int *wsptr;
436
+ JSAMPROW outptr;
437
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
438
+ int ctr;
439
+ int workspace[7 * 7]; /* buffers data between passes */
440
+ SHIFT_TEMPS
441
+
442
+ /* Pass 1: process columns from input, store into work array. */
443
+
444
+ inptr = coef_block;
445
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
446
+ wsptr = workspace;
447
+ for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
448
+ /* Even part */
449
+
450
+ tmp13 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
451
+ tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
452
+ /* Add fudge factor here for final descale. */
453
+ tmp13 += ONE << (CONST_BITS - PASS1_BITS - 1);
454
+
455
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
456
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
457
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
458
+
459
+ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
460
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
461
+ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
462
+ tmp0 = z1 + z3;
463
+ z2 -= tmp0;
464
+ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
465
+ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
466
+ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
467
+ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
468
+
469
+ /* Odd part */
470
+
471
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
472
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
473
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
474
+
475
+ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
476
+ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
477
+ tmp0 = tmp1 - tmp2;
478
+ tmp1 += tmp2;
479
+ tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276)); /* -c1 */
480
+ tmp1 += tmp2;
481
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
482
+ tmp0 += z2;
483
+ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
484
+
485
+ /* Final output stage */
486
+
487
+ wsptr[7 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
488
+ wsptr[7 * 6] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
489
+ wsptr[7 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
490
+ wsptr[7 * 5] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
491
+ wsptr[7 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
492
+ wsptr[7 * 4] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
493
+ wsptr[7 * 3] = (int)RIGHT_SHIFT(tmp13, CONST_BITS - PASS1_BITS);
494
+ }
495
+
496
+ /* Pass 2: process 7 rows from work array, store into output array. */
497
+
498
+ wsptr = workspace;
499
+ for (ctr = 0; ctr < 7; ctr++) {
500
+ outptr = output_buf[ctr] + output_col;
501
+
502
+ /* Even part */
503
+
504
+ /* Add fudge factor here for final descale. */
505
+ tmp13 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
506
+ tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
507
+
508
+ z1 = (JLONG)wsptr[2];
509
+ z2 = (JLONG)wsptr[4];
510
+ z3 = (JLONG)wsptr[6];
511
+
512
+ tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
513
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
514
+ tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
515
+ tmp0 = z1 + z3;
516
+ z2 -= tmp0;
517
+ tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
518
+ tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
519
+ tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
520
+ tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
521
+
522
+ /* Odd part */
523
+
524
+ z1 = (JLONG)wsptr[1];
525
+ z2 = (JLONG)wsptr[3];
526
+ z3 = (JLONG)wsptr[5];
527
+
528
+ tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
529
+ tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
530
+ tmp0 = tmp1 - tmp2;
531
+ tmp1 += tmp2;
532
+ tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276)); /* -c1 */
533
+ tmp1 += tmp2;
534
+ z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
535
+ tmp0 += z2;
536
+ tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
537
+
538
+ /* Final output stage */
539
+
540
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
541
+ CONST_BITS + PASS1_BITS + 3) &
542
+ RANGE_MASK];
543
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
544
+ CONST_BITS + PASS1_BITS + 3) &
545
+ RANGE_MASK];
546
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
547
+ CONST_BITS + PASS1_BITS + 3) &
548
+ RANGE_MASK];
549
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
550
+ CONST_BITS + PASS1_BITS + 3) &
551
+ RANGE_MASK];
552
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
553
+ CONST_BITS + PASS1_BITS + 3) &
554
+ RANGE_MASK];
555
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
556
+ CONST_BITS + PASS1_BITS + 3) &
557
+ RANGE_MASK];
558
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13,
559
+ CONST_BITS + PASS1_BITS + 3) &
560
+ RANGE_MASK];
561
+
562
+ wsptr += 7; /* advance pointer to next row */
563
+ }
564
+ }
565
+
566
+
567
+ /*
568
+ * Perform dequantization and inverse DCT on one block of coefficients,
569
+ * producing a reduced-size 6x6 output block.
570
+ *
571
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
572
+ * cK represents sqrt(2) * cos(K*pi/12).
573
+ */
574
+
575
+ GLOBAL(void)
576
+ jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
577
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
578
+ JDIMENSION output_col)
579
+ {
580
+ JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
581
+ JLONG z1, z2, z3;
582
+ JCOEFPTR inptr;
583
+ ISLOW_MULT_TYPE *quantptr;
584
+ int *wsptr;
585
+ JSAMPROW outptr;
586
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
587
+ int ctr;
588
+ int workspace[6 * 6]; /* buffers data between passes */
589
+ SHIFT_TEMPS
590
+
591
+ /* Pass 1: process columns from input, store into work array. */
592
+
593
+ inptr = coef_block;
594
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
595
+ wsptr = workspace;
596
+ for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
597
+ /* Even part */
598
+
599
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
600
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
601
+ /* Add fudge factor here for final descale. */
602
+ tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
603
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
604
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
605
+ tmp1 = tmp0 + tmp10;
606
+ tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS - PASS1_BITS);
607
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
608
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
609
+ tmp10 = tmp1 + tmp0;
610
+ tmp12 = tmp1 - tmp0;
611
+
612
+ /* Odd part */
613
+
614
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
615
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
616
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
617
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
618
+ tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
619
+ tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
620
+ tmp1 = LEFT_SHIFT(z1 - z2 - z3, PASS1_BITS);
621
+
622
+ /* Final output stage */
623
+
624
+ wsptr[6 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
625
+ wsptr[6 * 5] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
626
+ wsptr[6 * 1] = (int)(tmp11 + tmp1);
627
+ wsptr[6 * 4] = (int)(tmp11 - tmp1);
628
+ wsptr[6 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
629
+ wsptr[6 * 3] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
630
+ }
631
+
632
+ /* Pass 2: process 6 rows from work array, store into output array. */
633
+
634
+ wsptr = workspace;
635
+ for (ctr = 0; ctr < 6; ctr++) {
636
+ outptr = output_buf[ctr] + output_col;
637
+
638
+ /* Even part */
639
+
640
+ /* Add fudge factor here for final descale. */
641
+ tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
642
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
643
+ tmp2 = (JLONG)wsptr[4];
644
+ tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
645
+ tmp1 = tmp0 + tmp10;
646
+ tmp11 = tmp0 - tmp10 - tmp10;
647
+ tmp10 = (JLONG)wsptr[2];
648
+ tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
649
+ tmp10 = tmp1 + tmp0;
650
+ tmp12 = tmp1 - tmp0;
651
+
652
+ /* Odd part */
653
+
654
+ z1 = (JLONG)wsptr[1];
655
+ z2 = (JLONG)wsptr[3];
656
+ z3 = (JLONG)wsptr[5];
657
+ tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
658
+ tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
659
+ tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
660
+ tmp1 = LEFT_SHIFT(z1 - z2 - z3, CONST_BITS);
661
+
662
+ /* Final output stage */
663
+
664
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
665
+ CONST_BITS + PASS1_BITS + 3) &
666
+ RANGE_MASK];
667
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
668
+ CONST_BITS + PASS1_BITS + 3) &
669
+ RANGE_MASK];
670
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
671
+ CONST_BITS + PASS1_BITS + 3) &
672
+ RANGE_MASK];
673
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
674
+ CONST_BITS + PASS1_BITS + 3) &
675
+ RANGE_MASK];
676
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
677
+ CONST_BITS + PASS1_BITS + 3) &
678
+ RANGE_MASK];
679
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
680
+ CONST_BITS + PASS1_BITS + 3) &
681
+ RANGE_MASK];
682
+
683
+ wsptr += 6; /* advance pointer to next row */
684
+ }
685
+ }
686
+
687
+
688
+ /*
689
+ * Perform dequantization and inverse DCT on one block of coefficients,
690
+ * producing a reduced-size 5x5 output block.
691
+ *
692
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
693
+ * cK represents sqrt(2) * cos(K*pi/10).
694
+ */
695
+
696
+ GLOBAL(void)
697
+ jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
698
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
699
+ JDIMENSION output_col)
700
+ {
701
+ JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
702
+ JLONG z1, z2, z3;
703
+ JCOEFPTR inptr;
704
+ ISLOW_MULT_TYPE *quantptr;
705
+ int *wsptr;
706
+ JSAMPROW outptr;
707
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
708
+ int ctr;
709
+ int workspace[5 * 5]; /* buffers data between passes */
710
+ SHIFT_TEMPS
711
+
712
+ /* Pass 1: process columns from input, store into work array. */
713
+
714
+ inptr = coef_block;
715
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
716
+ wsptr = workspace;
717
+ for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
718
+ /* Even part */
719
+
720
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
721
+ tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
722
+ /* Add fudge factor here for final descale. */
723
+ tmp12 += ONE << (CONST_BITS - PASS1_BITS - 1);
724
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
725
+ tmp1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
726
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
727
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
728
+ z3 = tmp12 + z2;
729
+ tmp10 = z3 + z1;
730
+ tmp11 = z3 - z1;
731
+ tmp12 -= LEFT_SHIFT(z2, 2);
732
+
733
+ /* Odd part */
734
+
735
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
736
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
737
+
738
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
739
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
740
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
741
+
742
+ /* Final output stage */
743
+
744
+ wsptr[5 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
745
+ wsptr[5 * 4] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
746
+ wsptr[5 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
747
+ wsptr[5 * 3] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
748
+ wsptr[5 * 2] = (int)RIGHT_SHIFT(tmp12, CONST_BITS - PASS1_BITS);
749
+ }
750
+
751
+ /* Pass 2: process 5 rows from work array, store into output array. */
752
+
753
+ wsptr = workspace;
754
+ for (ctr = 0; ctr < 5; ctr++) {
755
+ outptr = output_buf[ctr] + output_col;
756
+
757
+ /* Even part */
758
+
759
+ /* Add fudge factor here for final descale. */
760
+ tmp12 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
761
+ tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
762
+ tmp0 = (JLONG)wsptr[2];
763
+ tmp1 = (JLONG)wsptr[4];
764
+ z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
765
+ z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
766
+ z3 = tmp12 + z2;
767
+ tmp10 = z3 + z1;
768
+ tmp11 = z3 - z1;
769
+ tmp12 -= LEFT_SHIFT(z2, 2);
770
+
771
+ /* Odd part */
772
+
773
+ z2 = (JLONG)wsptr[1];
774
+ z3 = (JLONG)wsptr[3];
775
+
776
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
777
+ tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
778
+ tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
779
+
780
+ /* Final output stage */
781
+
782
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
783
+ CONST_BITS + PASS1_BITS + 3) &
784
+ RANGE_MASK];
785
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
786
+ CONST_BITS + PASS1_BITS + 3) &
787
+ RANGE_MASK];
788
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
789
+ CONST_BITS + PASS1_BITS + 3) &
790
+ RANGE_MASK];
791
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
792
+ CONST_BITS + PASS1_BITS + 3) &
793
+ RANGE_MASK];
794
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12,
795
+ CONST_BITS + PASS1_BITS + 3) &
796
+ RANGE_MASK];
797
+
798
+ wsptr += 5; /* advance pointer to next row */
799
+ }
800
+ }
801
+
802
+
803
+ /*
804
+ * Perform dequantization and inverse DCT on one block of coefficients,
805
+ * producing a reduced-size 3x3 output block.
806
+ *
807
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
808
+ * cK represents sqrt(2) * cos(K*pi/6).
809
+ */
810
+
811
+ GLOBAL(void)
812
+ jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
813
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
814
+ JDIMENSION output_col)
815
+ {
816
+ JLONG tmp0, tmp2, tmp10, tmp12;
817
+ JCOEFPTR inptr;
818
+ ISLOW_MULT_TYPE *quantptr;
819
+ int *wsptr;
820
+ JSAMPROW outptr;
821
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
822
+ int ctr;
823
+ int workspace[3 * 3]; /* buffers data between passes */
824
+ SHIFT_TEMPS
825
+
826
+ /* Pass 1: process columns from input, store into work array. */
827
+
828
+ inptr = coef_block;
829
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
830
+ wsptr = workspace;
831
+ for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
832
+ /* Even part */
833
+
834
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
835
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
836
+ /* Add fudge factor here for final descale. */
837
+ tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
838
+ tmp2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
839
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
840
+ tmp10 = tmp0 + tmp12;
841
+ tmp2 = tmp0 - tmp12 - tmp12;
842
+
843
+ /* Odd part */
844
+
845
+ tmp12 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
846
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
847
+
848
+ /* Final output stage */
849
+
850
+ wsptr[3 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
851
+ wsptr[3 * 2] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
852
+ wsptr[3 * 1] = (int)RIGHT_SHIFT(tmp2, CONST_BITS - PASS1_BITS);
853
+ }
854
+
855
+ /* Pass 2: process 3 rows from work array, store into output array. */
856
+
857
+ wsptr = workspace;
858
+ for (ctr = 0; ctr < 3; ctr++) {
859
+ outptr = output_buf[ctr] + output_col;
860
+
861
+ /* Even part */
862
+
863
+ /* Add fudge factor here for final descale. */
864
+ tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
865
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
866
+ tmp2 = (JLONG)wsptr[2];
867
+ tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
868
+ tmp10 = tmp0 + tmp12;
869
+ tmp2 = tmp0 - tmp12 - tmp12;
870
+
871
+ /* Odd part */
872
+
873
+ tmp12 = (JLONG)wsptr[1];
874
+ tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
875
+
876
+ /* Final output stage */
877
+
878
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
879
+ CONST_BITS + PASS1_BITS + 3) &
880
+ RANGE_MASK];
881
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
882
+ CONST_BITS + PASS1_BITS + 3) &
883
+ RANGE_MASK];
884
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp2,
885
+ CONST_BITS + PASS1_BITS + 3) &
886
+ RANGE_MASK];
887
+
888
+ wsptr += 3; /* advance pointer to next row */
889
+ }
890
+ }
891
+
892
+
893
+ /*
894
+ * Perform dequantization and inverse DCT on one block of coefficients,
895
+ * producing a 9x9 output block.
896
+ *
897
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
898
+ * cK represents sqrt(2) * cos(K*pi/18).
899
+ */
900
+
901
+ GLOBAL(void)
902
+ jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
903
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
904
+ JDIMENSION output_col)
905
+ {
906
+ JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
907
+ JLONG z1, z2, z3, z4;
908
+ JCOEFPTR inptr;
909
+ ISLOW_MULT_TYPE *quantptr;
910
+ int *wsptr;
911
+ JSAMPROW outptr;
912
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
913
+ int ctr;
914
+ int workspace[8 * 9]; /* buffers data between passes */
915
+ SHIFT_TEMPS
916
+
917
+ /* Pass 1: process columns from input, store into work array. */
918
+
919
+ inptr = coef_block;
920
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
921
+ wsptr = workspace;
922
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
923
+ /* Even part */
924
+
925
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
926
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
927
+ /* Add fudge factor here for final descale. */
928
+ tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
929
+
930
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
931
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
932
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
933
+
934
+ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
935
+ tmp1 = tmp0 + tmp3;
936
+ tmp2 = tmp0 - tmp3 - tmp3;
937
+
938
+ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
939
+ tmp11 = tmp2 + tmp0;
940
+ tmp14 = tmp2 - tmp0 - tmp0;
941
+
942
+ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
943
+ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
944
+ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
945
+
946
+ tmp10 = tmp1 + tmp0 - tmp3;
947
+ tmp12 = tmp1 - tmp0 + tmp2;
948
+ tmp13 = tmp1 - tmp2 + tmp3;
949
+
950
+ /* Odd part */
951
+
952
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
953
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
954
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
955
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
956
+
957
+ z2 = MULTIPLY(z2, -FIX(1.224744871)); /* -c3 */
958
+
959
+ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
960
+ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
961
+ tmp0 = tmp2 + tmp3 - z2;
962
+ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
963
+ tmp2 += z2 - tmp1;
964
+ tmp3 += z2 + tmp1;
965
+ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
966
+
967
+ /* Final output stage */
968
+
969
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
970
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
971
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
972
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
973
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
974
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
975
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS - PASS1_BITS);
976
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS - PASS1_BITS);
977
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp14, CONST_BITS - PASS1_BITS);
978
+ }
979
+
980
+ /* Pass 2: process 9 rows from work array, store into output array. */
981
+
982
+ wsptr = workspace;
983
+ for (ctr = 0; ctr < 9; ctr++) {
984
+ outptr = output_buf[ctr] + output_col;
985
+
986
+ /* Even part */
987
+
988
+ /* Add fudge factor here for final descale. */
989
+ tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
990
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
991
+
992
+ z1 = (JLONG)wsptr[2];
993
+ z2 = (JLONG)wsptr[4];
994
+ z3 = (JLONG)wsptr[6];
995
+
996
+ tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
997
+ tmp1 = tmp0 + tmp3;
998
+ tmp2 = tmp0 - tmp3 - tmp3;
999
+
1000
+ tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1001
+ tmp11 = tmp2 + tmp0;
1002
+ tmp14 = tmp2 - tmp0 - tmp0;
1003
+
1004
+ tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1005
+ tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1006
+ tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1007
+
1008
+ tmp10 = tmp1 + tmp0 - tmp3;
1009
+ tmp12 = tmp1 - tmp0 + tmp2;
1010
+ tmp13 = tmp1 - tmp2 + tmp3;
1011
+
1012
+ /* Odd part */
1013
+
1014
+ z1 = (JLONG)wsptr[1];
1015
+ z2 = (JLONG)wsptr[3];
1016
+ z3 = (JLONG)wsptr[5];
1017
+ z4 = (JLONG)wsptr[7];
1018
+
1019
+ z2 = MULTIPLY(z2, -FIX(1.224744871)); /* -c3 */
1020
+
1021
+ tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1022
+ tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1023
+ tmp0 = tmp2 + tmp3 - z2;
1024
+ tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1025
+ tmp2 += z2 - tmp1;
1026
+ tmp3 += z2 + tmp1;
1027
+ tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1028
+
1029
+ /* Final output stage */
1030
+
1031
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
1032
+ CONST_BITS + PASS1_BITS + 3) &
1033
+ RANGE_MASK];
1034
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
1035
+ CONST_BITS + PASS1_BITS + 3) &
1036
+ RANGE_MASK];
1037
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
1038
+ CONST_BITS + PASS1_BITS + 3) &
1039
+ RANGE_MASK];
1040
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
1041
+ CONST_BITS + PASS1_BITS + 3) &
1042
+ RANGE_MASK];
1043
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
1044
+ CONST_BITS + PASS1_BITS + 3) &
1045
+ RANGE_MASK];
1046
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
1047
+ CONST_BITS + PASS1_BITS + 3) &
1048
+ RANGE_MASK];
1049
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13 + tmp3,
1050
+ CONST_BITS + PASS1_BITS + 3) &
1051
+ RANGE_MASK];
1052
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp13 - tmp3,
1053
+ CONST_BITS + PASS1_BITS + 3) &
1054
+ RANGE_MASK];
1055
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp14,
1056
+ CONST_BITS + PASS1_BITS + 3) &
1057
+ RANGE_MASK];
1058
+
1059
+ wsptr += 8; /* advance pointer to next row */
1060
+ }
1061
+ }
1062
+
1063
+
1064
+ /*
1065
+ * Perform dequantization and inverse DCT on one block of coefficients,
1066
+ * producing a 10x10 output block.
1067
+ *
1068
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
1069
+ * cK represents sqrt(2) * cos(K*pi/20).
1070
+ */
1071
+
1072
+ GLOBAL(void)
1073
+ jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1074
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
1075
+ JDIMENSION output_col)
1076
+ {
1077
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
1078
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
1079
+ JLONG z1, z2, z3, z4, z5;
1080
+ JCOEFPTR inptr;
1081
+ ISLOW_MULT_TYPE *quantptr;
1082
+ int *wsptr;
1083
+ JSAMPROW outptr;
1084
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1085
+ int ctr;
1086
+ int workspace[8 * 10]; /* buffers data between passes */
1087
+ SHIFT_TEMPS
1088
+
1089
+ /* Pass 1: process columns from input, store into work array. */
1090
+
1091
+ inptr = coef_block;
1092
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
1093
+ wsptr = workspace;
1094
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1095
+ /* Even part */
1096
+
1097
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
1098
+ z3 = LEFT_SHIFT(z3, CONST_BITS);
1099
+ /* Add fudge factor here for final descale. */
1100
+ z3 += ONE << (CONST_BITS - PASS1_BITS - 1);
1101
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
1102
+ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1103
+ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1104
+ tmp10 = z3 + z1;
1105
+ tmp11 = z3 - z2;
1106
+
1107
+ tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1),
1108
+ CONST_BITS - PASS1_BITS); /* c0 = (c4-c8)*2 */
1109
+
1110
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
1111
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
1112
+
1113
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1114
+ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1115
+ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1116
+
1117
+ tmp20 = tmp10 + tmp12;
1118
+ tmp24 = tmp10 - tmp12;
1119
+ tmp21 = tmp11 + tmp13;
1120
+ tmp23 = tmp11 - tmp13;
1121
+
1122
+ /* Odd part */
1123
+
1124
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
1125
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
1126
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
1127
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
1128
+
1129
+ tmp11 = z2 + z4;
1130
+ tmp13 = z2 - z4;
1131
+
1132
+ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1133
+ z5 = LEFT_SHIFT(z3, CONST_BITS);
1134
+
1135
+ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1136
+ z4 = z5 + tmp12;
1137
+
1138
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1139
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1140
+
1141
+ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1142
+ z4 = z5 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
1143
+
1144
+ tmp12 = LEFT_SHIFT(z1 - tmp13 - z3, PASS1_BITS);
1145
+
1146
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1147
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1148
+
1149
+ /* Final output stage */
1150
+
1151
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
1152
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
1153
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
1154
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
1155
+ wsptr[8 * 2] = (int)(tmp22 + tmp12);
1156
+ wsptr[8 * 7] = (int)(tmp22 - tmp12);
1157
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
1158
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
1159
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
1160
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
1161
+ }
1162
+
1163
+ /* Pass 2: process 10 rows from work array, store into output array. */
1164
+
1165
+ wsptr = workspace;
1166
+ for (ctr = 0; ctr < 10; ctr++) {
1167
+ outptr = output_buf[ctr] + output_col;
1168
+
1169
+ /* Even part */
1170
+
1171
+ /* Add fudge factor here for final descale. */
1172
+ z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
1173
+ z3 = LEFT_SHIFT(z3, CONST_BITS);
1174
+ z4 = (JLONG)wsptr[4];
1175
+ z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1176
+ z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1177
+ tmp10 = z3 + z1;
1178
+ tmp11 = z3 - z2;
1179
+
1180
+ tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1); /* c0 = (c4-c8)*2 */
1181
+
1182
+ z2 = (JLONG)wsptr[2];
1183
+ z3 = (JLONG)wsptr[6];
1184
+
1185
+ z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1186
+ tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1187
+ tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1188
+
1189
+ tmp20 = tmp10 + tmp12;
1190
+ tmp24 = tmp10 - tmp12;
1191
+ tmp21 = tmp11 + tmp13;
1192
+ tmp23 = tmp11 - tmp13;
1193
+
1194
+ /* Odd part */
1195
+
1196
+ z1 = (JLONG)wsptr[1];
1197
+ z2 = (JLONG)wsptr[3];
1198
+ z3 = (JLONG)wsptr[5];
1199
+ z3 = LEFT_SHIFT(z3, CONST_BITS);
1200
+ z4 = (JLONG)wsptr[7];
1201
+
1202
+ tmp11 = z2 + z4;
1203
+ tmp13 = z2 - z4;
1204
+
1205
+ tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1206
+
1207
+ z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1208
+ z4 = z3 + tmp12;
1209
+
1210
+ tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1211
+ tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1212
+
1213
+ z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1214
+ z4 = z3 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
1215
+
1216
+ tmp12 = LEFT_SHIFT(z1 - tmp13, CONST_BITS) - z3;
1217
+
1218
+ tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1219
+ tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1220
+
1221
+ /* Final output stage */
1222
+
1223
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
1224
+ CONST_BITS + PASS1_BITS + 3) &
1225
+ RANGE_MASK];
1226
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
1227
+ CONST_BITS + PASS1_BITS + 3) &
1228
+ RANGE_MASK];
1229
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
1230
+ CONST_BITS + PASS1_BITS + 3) &
1231
+ RANGE_MASK];
1232
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
1233
+ CONST_BITS + PASS1_BITS + 3) &
1234
+ RANGE_MASK];
1235
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
1236
+ CONST_BITS + PASS1_BITS + 3) &
1237
+ RANGE_MASK];
1238
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
1239
+ CONST_BITS + PASS1_BITS + 3) &
1240
+ RANGE_MASK];
1241
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
1242
+ CONST_BITS + PASS1_BITS + 3) &
1243
+ RANGE_MASK];
1244
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
1245
+ CONST_BITS + PASS1_BITS + 3) &
1246
+ RANGE_MASK];
1247
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
1248
+ CONST_BITS + PASS1_BITS + 3) &
1249
+ RANGE_MASK];
1250
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
1251
+ CONST_BITS + PASS1_BITS + 3) &
1252
+ RANGE_MASK];
1253
+
1254
+ wsptr += 8; /* advance pointer to next row */
1255
+ }
1256
+ }
1257
+
1258
+
1259
+ /*
1260
+ * Perform dequantization and inverse DCT on one block of coefficients,
1261
+ * producing a 11x11 output block.
1262
+ *
1263
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
1264
+ * cK represents sqrt(2) * cos(K*pi/22).
1265
+ */
1266
+
1267
+ GLOBAL(void)
1268
+ jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1269
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
1270
+ JDIMENSION output_col)
1271
+ {
1272
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
1273
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1274
+ JLONG z1, z2, z3, z4;
1275
+ JCOEFPTR inptr;
1276
+ ISLOW_MULT_TYPE *quantptr;
1277
+ int *wsptr;
1278
+ JSAMPROW outptr;
1279
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1280
+ int ctr;
1281
+ int workspace[8 * 11]; /* buffers data between passes */
1282
+ SHIFT_TEMPS
1283
+
1284
+ /* Pass 1: process columns from input, store into work array. */
1285
+
1286
+ inptr = coef_block;
1287
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
1288
+ wsptr = workspace;
1289
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1290
+ /* Even part */
1291
+
1292
+ tmp10 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
1293
+ tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
1294
+ /* Add fudge factor here for final descale. */
1295
+ tmp10 += ONE << (CONST_BITS - PASS1_BITS - 1);
1296
+
1297
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
1298
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
1299
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
1300
+
1301
+ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1302
+ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1303
+ z4 = z1 + z3;
1304
+ tmp24 = MULTIPLY(z4, -FIX(1.155664402)); /* -(c2-c10) */
1305
+ z4 -= z2;
1306
+ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1307
+ tmp21 = tmp20 + tmp23 + tmp25 -
1308
+ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1309
+ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1310
+ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1311
+ tmp24 += tmp25;
1312
+ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1313
+ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1314
+ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1315
+ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1316
+
1317
+ /* Odd part */
1318
+
1319
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
1320
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
1321
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
1322
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
1323
+
1324
+ tmp11 = z1 + z2;
1325
+ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1326
+ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1327
+ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1328
+ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1329
+ tmp10 = tmp11 + tmp12 + tmp13 -
1330
+ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1331
+ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1332
+ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1333
+ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1334
+ z1 = MULTIPLY(z2 + z4, -FIX(1.798248910)); /* -(c1+c9) */
1335
+ tmp11 += z1;
1336
+ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1337
+ tmp14 += MULTIPLY(z2, -FIX(1.467221301)) + /* -(c5+c9) */
1338
+ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1339
+ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1340
+
1341
+ /* Final output stage */
1342
+
1343
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
1344
+ wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
1345
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
1346
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
1347
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
1348
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
1349
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
1350
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
1351
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
1352
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
1353
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25, CONST_BITS - PASS1_BITS);
1354
+ }
1355
+
1356
+ /* Pass 2: process 11 rows from work array, store into output array. */
1357
+
1358
+ wsptr = workspace;
1359
+ for (ctr = 0; ctr < 11; ctr++) {
1360
+ outptr = output_buf[ctr] + output_col;
1361
+
1362
+ /* Even part */
1363
+
1364
+ /* Add fudge factor here for final descale. */
1365
+ tmp10 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
1366
+ tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
1367
+
1368
+ z1 = (JLONG)wsptr[2];
1369
+ z2 = (JLONG)wsptr[4];
1370
+ z3 = (JLONG)wsptr[6];
1371
+
1372
+ tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1373
+ tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1374
+ z4 = z1 + z3;
1375
+ tmp24 = MULTIPLY(z4, -FIX(1.155664402)); /* -(c2-c10) */
1376
+ z4 -= z2;
1377
+ tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1378
+ tmp21 = tmp20 + tmp23 + tmp25 -
1379
+ MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1380
+ tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1381
+ tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1382
+ tmp24 += tmp25;
1383
+ tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1384
+ tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1385
+ MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1386
+ tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1387
+
1388
+ /* Odd part */
1389
+
1390
+ z1 = (JLONG)wsptr[1];
1391
+ z2 = (JLONG)wsptr[3];
1392
+ z3 = (JLONG)wsptr[5];
1393
+ z4 = (JLONG)wsptr[7];
1394
+
1395
+ tmp11 = z1 + z2;
1396
+ tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1397
+ tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1398
+ tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1399
+ tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1400
+ tmp10 = tmp11 + tmp12 + tmp13 -
1401
+ MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1402
+ z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1403
+ tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1404
+ tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1405
+ z1 = MULTIPLY(z2 + z4, -FIX(1.798248910)); /* -(c1+c9) */
1406
+ tmp11 += z1;
1407
+ tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1408
+ tmp14 += MULTIPLY(z2, -FIX(1.467221301)) + /* -(c5+c9) */
1409
+ MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1410
+ MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1411
+
1412
+ /* Final output stage */
1413
+
1414
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
1415
+ CONST_BITS + PASS1_BITS + 3) &
1416
+ RANGE_MASK];
1417
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
1418
+ CONST_BITS + PASS1_BITS + 3) &
1419
+ RANGE_MASK];
1420
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
1421
+ CONST_BITS + PASS1_BITS + 3) &
1422
+ RANGE_MASK];
1423
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
1424
+ CONST_BITS + PASS1_BITS + 3) &
1425
+ RANGE_MASK];
1426
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
1427
+ CONST_BITS + PASS1_BITS + 3) &
1428
+ RANGE_MASK];
1429
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
1430
+ CONST_BITS + PASS1_BITS + 3) &
1431
+ RANGE_MASK];
1432
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
1433
+ CONST_BITS + PASS1_BITS + 3) &
1434
+ RANGE_MASK];
1435
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
1436
+ CONST_BITS + PASS1_BITS + 3) &
1437
+ RANGE_MASK];
1438
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
1439
+ CONST_BITS + PASS1_BITS + 3) &
1440
+ RANGE_MASK];
1441
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
1442
+ CONST_BITS + PASS1_BITS + 3) &
1443
+ RANGE_MASK];
1444
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25,
1445
+ CONST_BITS + PASS1_BITS + 3) &
1446
+ RANGE_MASK];
1447
+
1448
+ wsptr += 8; /* advance pointer to next row */
1449
+ }
1450
+ }
1451
+
1452
+
1453
+ /*
1454
+ * Perform dequantization and inverse DCT on one block of coefficients,
1455
+ * producing a 12x12 output block.
1456
+ *
1457
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
1458
+ * cK represents sqrt(2) * cos(K*pi/24).
1459
+ */
1460
+
1461
+ GLOBAL(void)
1462
+ jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1463
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
1464
+ JDIMENSION output_col)
1465
+ {
1466
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1467
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1468
+ JLONG z1, z2, z3, z4;
1469
+ JCOEFPTR inptr;
1470
+ ISLOW_MULT_TYPE *quantptr;
1471
+ int *wsptr;
1472
+ JSAMPROW outptr;
1473
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1474
+ int ctr;
1475
+ int workspace[8 * 12]; /* buffers data between passes */
1476
+ SHIFT_TEMPS
1477
+
1478
+ /* Pass 1: process columns from input, store into work array. */
1479
+
1480
+ inptr = coef_block;
1481
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
1482
+ wsptr = workspace;
1483
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1484
+ /* Even part */
1485
+
1486
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
1487
+ z3 = LEFT_SHIFT(z3, CONST_BITS);
1488
+ /* Add fudge factor here for final descale. */
1489
+ z3 += ONE << (CONST_BITS - PASS1_BITS - 1);
1490
+
1491
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
1492
+ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1493
+
1494
+ tmp10 = z3 + z4;
1495
+ tmp11 = z3 - z4;
1496
+
1497
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
1498
+ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1499
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
1500
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
1501
+ z2 = LEFT_SHIFT(z2, CONST_BITS);
1502
+
1503
+ tmp12 = z1 - z2;
1504
+
1505
+ tmp21 = z3 + tmp12;
1506
+ tmp24 = z3 - tmp12;
1507
+
1508
+ tmp12 = z4 + z2;
1509
+
1510
+ tmp20 = tmp10 + tmp12;
1511
+ tmp25 = tmp10 - tmp12;
1512
+
1513
+ tmp12 = z4 - z1 - z2;
1514
+
1515
+ tmp22 = tmp11 + tmp12;
1516
+ tmp23 = tmp11 - tmp12;
1517
+
1518
+ /* Odd part */
1519
+
1520
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
1521
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
1522
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
1523
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
1524
+
1525
+ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1526
+ tmp14 = MULTIPLY(z2, -FIX_0_541196100); /* -c9 */
1527
+
1528
+ tmp10 = z1 + z3;
1529
+ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1530
+ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1531
+ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1532
+ tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580)); /* -(c7+c11) */
1533
+ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1534
+ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1535
+ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1536
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1537
+
1538
+ z1 -= z4;
1539
+ z2 -= z3;
1540
+ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1541
+ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1542
+ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1543
+
1544
+ /* Final output stage */
1545
+
1546
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
1547
+ wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
1548
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
1549
+ wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
1550
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
1551
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
1552
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
1553
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
1554
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
1555
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
1556
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
1557
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
1558
+ }
1559
+
1560
+ /* Pass 2: process 12 rows from work array, store into output array. */
1561
+
1562
+ wsptr = workspace;
1563
+ for (ctr = 0; ctr < 12; ctr++) {
1564
+ outptr = output_buf[ctr] + output_col;
1565
+
1566
+ /* Even part */
1567
+
1568
+ /* Add fudge factor here for final descale. */
1569
+ z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
1570
+ z3 = LEFT_SHIFT(z3, CONST_BITS);
1571
+
1572
+ z4 = (JLONG)wsptr[4];
1573
+ z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1574
+
1575
+ tmp10 = z3 + z4;
1576
+ tmp11 = z3 - z4;
1577
+
1578
+ z1 = (JLONG)wsptr[2];
1579
+ z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1580
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
1581
+ z2 = (JLONG)wsptr[6];
1582
+ z2 = LEFT_SHIFT(z2, CONST_BITS);
1583
+
1584
+ tmp12 = z1 - z2;
1585
+
1586
+ tmp21 = z3 + tmp12;
1587
+ tmp24 = z3 - tmp12;
1588
+
1589
+ tmp12 = z4 + z2;
1590
+
1591
+ tmp20 = tmp10 + tmp12;
1592
+ tmp25 = tmp10 - tmp12;
1593
+
1594
+ tmp12 = z4 - z1 - z2;
1595
+
1596
+ tmp22 = tmp11 + tmp12;
1597
+ tmp23 = tmp11 - tmp12;
1598
+
1599
+ /* Odd part */
1600
+
1601
+ z1 = (JLONG)wsptr[1];
1602
+ z2 = (JLONG)wsptr[3];
1603
+ z3 = (JLONG)wsptr[5];
1604
+ z4 = (JLONG)wsptr[7];
1605
+
1606
+ tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1607
+ tmp14 = MULTIPLY(z2, -FIX_0_541196100); /* -c9 */
1608
+
1609
+ tmp10 = z1 + z3;
1610
+ tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1611
+ tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1612
+ tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1613
+ tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580)); /* -(c7+c11) */
1614
+ tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1615
+ tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1616
+ tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1617
+ MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1618
+
1619
+ z1 -= z4;
1620
+ z2 -= z3;
1621
+ z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1622
+ tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1623
+ tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1624
+
1625
+ /* Final output stage */
1626
+
1627
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
1628
+ CONST_BITS + PASS1_BITS + 3) &
1629
+ RANGE_MASK];
1630
+ outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
1631
+ CONST_BITS + PASS1_BITS + 3) &
1632
+ RANGE_MASK];
1633
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
1634
+ CONST_BITS + PASS1_BITS + 3) &
1635
+ RANGE_MASK];
1636
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
1637
+ CONST_BITS + PASS1_BITS + 3) &
1638
+ RANGE_MASK];
1639
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
1640
+ CONST_BITS + PASS1_BITS + 3) &
1641
+ RANGE_MASK];
1642
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
1643
+ CONST_BITS + PASS1_BITS + 3) &
1644
+ RANGE_MASK];
1645
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
1646
+ CONST_BITS + PASS1_BITS + 3) &
1647
+ RANGE_MASK];
1648
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
1649
+ CONST_BITS + PASS1_BITS + 3) &
1650
+ RANGE_MASK];
1651
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
1652
+ CONST_BITS + PASS1_BITS + 3) &
1653
+ RANGE_MASK];
1654
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
1655
+ CONST_BITS + PASS1_BITS + 3) &
1656
+ RANGE_MASK];
1657
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
1658
+ CONST_BITS + PASS1_BITS + 3) &
1659
+ RANGE_MASK];
1660
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
1661
+ CONST_BITS + PASS1_BITS + 3) &
1662
+ RANGE_MASK];
1663
+
1664
+ wsptr += 8; /* advance pointer to next row */
1665
+ }
1666
+ }
1667
+
1668
+
1669
+ /*
1670
+ * Perform dequantization and inverse DCT on one block of coefficients,
1671
+ * producing a 13x13 output block.
1672
+ *
1673
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
1674
+ * cK represents sqrt(2) * cos(K*pi/26).
1675
+ */
1676
+
1677
+ GLOBAL(void)
1678
+ jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1679
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
1680
+ JDIMENSION output_col)
1681
+ {
1682
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1683
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1684
+ JLONG z1, z2, z3, z4;
1685
+ JCOEFPTR inptr;
1686
+ ISLOW_MULT_TYPE *quantptr;
1687
+ int *wsptr;
1688
+ JSAMPROW outptr;
1689
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1690
+ int ctr;
1691
+ int workspace[8 * 13]; /* buffers data between passes */
1692
+ SHIFT_TEMPS
1693
+
1694
+ /* Pass 1: process columns from input, store into work array. */
1695
+
1696
+ inptr = coef_block;
1697
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
1698
+ wsptr = workspace;
1699
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1700
+ /* Even part */
1701
+
1702
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
1703
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
1704
+ /* Add fudge factor here for final descale. */
1705
+ z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
1706
+
1707
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
1708
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
1709
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
1710
+
1711
+ tmp10 = z3 + z4;
1712
+ tmp11 = z3 - z4;
1713
+
1714
+ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1715
+ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1716
+
1717
+ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1718
+ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1719
+
1720
+ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1721
+ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1722
+
1723
+ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1724
+ tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1725
+
1726
+ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1727
+ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1728
+
1729
+ tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1730
+ tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1731
+
1732
+ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1733
+
1734
+ /* Odd part */
1735
+
1736
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
1737
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
1738
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
1739
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
1740
+
1741
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1742
+ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1743
+ tmp15 = z1 + z4;
1744
+ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1745
+ tmp10 = tmp11 + tmp12 + tmp13 -
1746
+ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1747
+ tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458)); /* -c11 */
1748
+ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1749
+ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1750
+ tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945)); /* -c5 */
1751
+ tmp11 += tmp14;
1752
+ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1753
+ tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813)); /* -c9 */
1754
+ tmp12 += tmp14;
1755
+ tmp13 += tmp14;
1756
+ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1757
+ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1758
+ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1759
+ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1760
+ tmp14 += z1;
1761
+ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1762
+ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1763
+
1764
+ /* Final output stage */
1765
+
1766
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
1767
+ wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
1768
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
1769
+ wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
1770
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
1771
+ wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
1772
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
1773
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
1774
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
1775
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
1776
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
1777
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
1778
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26, CONST_BITS - PASS1_BITS);
1779
+ }
1780
+
1781
+ /* Pass 2: process 13 rows from work array, store into output array. */
1782
+
1783
+ wsptr = workspace;
1784
+ for (ctr = 0; ctr < 13; ctr++) {
1785
+ outptr = output_buf[ctr] + output_col;
1786
+
1787
+ /* Even part */
1788
+
1789
+ /* Add fudge factor here for final descale. */
1790
+ z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
1791
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
1792
+
1793
+ z2 = (JLONG)wsptr[2];
1794
+ z3 = (JLONG)wsptr[4];
1795
+ z4 = (JLONG)wsptr[6];
1796
+
1797
+ tmp10 = z3 + z4;
1798
+ tmp11 = z3 - z4;
1799
+
1800
+ tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1801
+ tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1802
+
1803
+ tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1804
+ tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1805
+
1806
+ tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1807
+ tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1808
+
1809
+ tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1810
+ tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1811
+
1812
+ tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1813
+ tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1814
+
1815
+ tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1816
+ tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1817
+
1818
+ tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1819
+
1820
+ /* Odd part */
1821
+
1822
+ z1 = (JLONG)wsptr[1];
1823
+ z2 = (JLONG)wsptr[3];
1824
+ z3 = (JLONG)wsptr[5];
1825
+ z4 = (JLONG)wsptr[7];
1826
+
1827
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1828
+ tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1829
+ tmp15 = z1 + z4;
1830
+ tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1831
+ tmp10 = tmp11 + tmp12 + tmp13 -
1832
+ MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1833
+ tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458)); /* -c11 */
1834
+ tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1835
+ tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1836
+ tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945)); /* -c5 */
1837
+ tmp11 += tmp14;
1838
+ tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1839
+ tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813)); /* -c9 */
1840
+ tmp12 += tmp14;
1841
+ tmp13 += tmp14;
1842
+ tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1843
+ tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1844
+ MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1845
+ z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1846
+ tmp14 += z1;
1847
+ tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1848
+ MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1849
+
1850
+ /* Final output stage */
1851
+
1852
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
1853
+ CONST_BITS + PASS1_BITS + 3) &
1854
+ RANGE_MASK];
1855
+ outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
1856
+ CONST_BITS + PASS1_BITS + 3) &
1857
+ RANGE_MASK];
1858
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
1859
+ CONST_BITS + PASS1_BITS + 3) &
1860
+ RANGE_MASK];
1861
+ outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
1862
+ CONST_BITS + PASS1_BITS + 3) &
1863
+ RANGE_MASK];
1864
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
1865
+ CONST_BITS + PASS1_BITS + 3) &
1866
+ RANGE_MASK];
1867
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
1868
+ CONST_BITS + PASS1_BITS + 3) &
1869
+ RANGE_MASK];
1870
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
1871
+ CONST_BITS + PASS1_BITS + 3) &
1872
+ RANGE_MASK];
1873
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
1874
+ CONST_BITS + PASS1_BITS + 3) &
1875
+ RANGE_MASK];
1876
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
1877
+ CONST_BITS + PASS1_BITS + 3) &
1878
+ RANGE_MASK];
1879
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
1880
+ CONST_BITS + PASS1_BITS + 3) &
1881
+ RANGE_MASK];
1882
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
1883
+ CONST_BITS + PASS1_BITS + 3) &
1884
+ RANGE_MASK];
1885
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
1886
+ CONST_BITS + PASS1_BITS + 3) &
1887
+ RANGE_MASK];
1888
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26,
1889
+ CONST_BITS + PASS1_BITS + 3) &
1890
+ RANGE_MASK];
1891
+
1892
+ wsptr += 8; /* advance pointer to next row */
1893
+ }
1894
+ }
1895
+
1896
+
1897
+ /*
1898
+ * Perform dequantization and inverse DCT on one block of coefficients,
1899
+ * producing a 14x14 output block.
1900
+ *
1901
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
1902
+ * cK represents sqrt(2) * cos(K*pi/28).
1903
+ */
1904
+
1905
+ GLOBAL(void)
1906
+ jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1907
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
1908
+ JDIMENSION output_col)
1909
+ {
1910
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1911
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1912
+ JLONG z1, z2, z3, z4;
1913
+ JCOEFPTR inptr;
1914
+ ISLOW_MULT_TYPE *quantptr;
1915
+ int *wsptr;
1916
+ JSAMPROW outptr;
1917
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1918
+ int ctr;
1919
+ int workspace[8 * 14]; /* buffers data between passes */
1920
+ SHIFT_TEMPS
1921
+
1922
+ /* Pass 1: process columns from input, store into work array. */
1923
+
1924
+ inptr = coef_block;
1925
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
1926
+ wsptr = workspace;
1927
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1928
+ /* Even part */
1929
+
1930
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
1931
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
1932
+ /* Add fudge factor here for final descale. */
1933
+ z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
1934
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
1935
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
1936
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
1937
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
1938
+
1939
+ tmp10 = z1 + z2;
1940
+ tmp11 = z1 + z3;
1941
+ tmp12 = z1 - z4;
1942
+
1943
+ tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1),
1944
+ CONST_BITS - PASS1_BITS); /* c0 = (c4+c12-c8)*2 */
1945
+
1946
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
1947
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
1948
+
1949
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
1950
+
1951
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
1952
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
1953
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
1954
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
1955
+
1956
+ tmp20 = tmp10 + tmp13;
1957
+ tmp26 = tmp10 - tmp13;
1958
+ tmp21 = tmp11 + tmp14;
1959
+ tmp25 = tmp11 - tmp14;
1960
+ tmp22 = tmp12 + tmp15;
1961
+ tmp24 = tmp12 - tmp15;
1962
+
1963
+ /* Odd part */
1964
+
1965
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
1966
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
1967
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
1968
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
1969
+ tmp13 = LEFT_SHIFT(z4, CONST_BITS);
1970
+
1971
+ tmp14 = z1 + z3;
1972
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
1973
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
1974
+ tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
1975
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
1976
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
1977
+ z1 -= z2;
1978
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
1979
+ tmp16 += tmp15;
1980
+ z1 += z4;
1981
+ z4 = MULTIPLY(z2 + z3, -FIX(0.158341681)) - tmp13; /* -c13 */
1982
+ tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
1983
+ tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
1984
+ z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
1985
+ tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
1986
+ tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
1987
+
1988
+ tmp13 = LEFT_SHIFT(z1 - z3, PASS1_BITS);
1989
+
1990
+ /* Final output stage */
1991
+
1992
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
1993
+ wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
1994
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
1995
+ wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
1996
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
1997
+ wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
1998
+ wsptr[8 * 3] = (int)(tmp23 + tmp13);
1999
+ wsptr[8 * 10] = (int)(tmp23 - tmp13);
2000
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
2001
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
2002
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
2003
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
2004
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS);
2005
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS);
2006
+ }
2007
+
2008
+ /* Pass 2: process 14 rows from work array, store into output array. */
2009
+
2010
+ wsptr = workspace;
2011
+ for (ctr = 0; ctr < 14; ctr++) {
2012
+ outptr = output_buf[ctr] + output_col;
2013
+
2014
+ /* Even part */
2015
+
2016
+ /* Add fudge factor here for final descale. */
2017
+ z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
2018
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
2019
+ z4 = (JLONG)wsptr[4];
2020
+ z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2021
+ z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2022
+ z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2023
+
2024
+ tmp10 = z1 + z2;
2025
+ tmp11 = z1 + z3;
2026
+ tmp12 = z1 - z4;
2027
+
2028
+ tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1); /* c0 = (c4+c12-c8)*2 */
2029
+
2030
+ z1 = (JLONG)wsptr[2];
2031
+ z2 = (JLONG)wsptr[6];
2032
+
2033
+ z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2034
+
2035
+ tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2036
+ tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2037
+ tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2038
+ MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2039
+
2040
+ tmp20 = tmp10 + tmp13;
2041
+ tmp26 = tmp10 - tmp13;
2042
+ tmp21 = tmp11 + tmp14;
2043
+ tmp25 = tmp11 - tmp14;
2044
+ tmp22 = tmp12 + tmp15;
2045
+ tmp24 = tmp12 - tmp15;
2046
+
2047
+ /* Odd part */
2048
+
2049
+ z1 = (JLONG)wsptr[1];
2050
+ z2 = (JLONG)wsptr[3];
2051
+ z3 = (JLONG)wsptr[5];
2052
+ z4 = (JLONG)wsptr[7];
2053
+ z4 = LEFT_SHIFT(z4, CONST_BITS);
2054
+
2055
+ tmp14 = z1 + z3;
2056
+ tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2057
+ tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2058
+ tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2059
+ tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2060
+ tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2061
+ z1 -= z2;
2062
+ tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2063
+ tmp16 += tmp15;
2064
+ tmp13 = MULTIPLY(z2 + z3, -FIX(0.158341681)) - z4; /* -c13 */
2065
+ tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2066
+ tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2067
+ tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2068
+ tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2069
+ tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2070
+
2071
+ tmp13 = LEFT_SHIFT(z1 - z3, CONST_BITS) + z4;
2072
+
2073
+ /* Final output stage */
2074
+
2075
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
2076
+ CONST_BITS + PASS1_BITS + 3) &
2077
+ RANGE_MASK];
2078
+ outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
2079
+ CONST_BITS + PASS1_BITS + 3) &
2080
+ RANGE_MASK];
2081
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
2082
+ CONST_BITS + PASS1_BITS + 3) &
2083
+ RANGE_MASK];
2084
+ outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
2085
+ CONST_BITS + PASS1_BITS + 3) &
2086
+ RANGE_MASK];
2087
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
2088
+ CONST_BITS + PASS1_BITS + 3) &
2089
+ RANGE_MASK];
2090
+ outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
2091
+ CONST_BITS + PASS1_BITS + 3) &
2092
+ RANGE_MASK];
2093
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
2094
+ CONST_BITS + PASS1_BITS + 3) &
2095
+ RANGE_MASK];
2096
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
2097
+ CONST_BITS + PASS1_BITS + 3) &
2098
+ RANGE_MASK];
2099
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
2100
+ CONST_BITS + PASS1_BITS + 3) &
2101
+ RANGE_MASK];
2102
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
2103
+ CONST_BITS + PASS1_BITS + 3) &
2104
+ RANGE_MASK];
2105
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
2106
+ CONST_BITS + PASS1_BITS + 3) &
2107
+ RANGE_MASK];
2108
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
2109
+ CONST_BITS + PASS1_BITS + 3) &
2110
+ RANGE_MASK];
2111
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16,
2112
+ CONST_BITS + PASS1_BITS + 3) &
2113
+ RANGE_MASK];
2114
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16,
2115
+ CONST_BITS + PASS1_BITS + 3) &
2116
+ RANGE_MASK];
2117
+
2118
+ wsptr += 8; /* advance pointer to next row */
2119
+ }
2120
+ }
2121
+
2122
+
2123
+ /*
2124
+ * Perform dequantization and inverse DCT on one block of coefficients,
2125
+ * producing a 15x15 output block.
2126
+ *
2127
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
2128
+ * cK represents sqrt(2) * cos(K*pi/30).
2129
+ */
2130
+
2131
+ GLOBAL(void)
2132
+ jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
2133
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
2134
+ JDIMENSION output_col)
2135
+ {
2136
+ JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2137
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2138
+ JLONG z1, z2, z3, z4;
2139
+ JCOEFPTR inptr;
2140
+ ISLOW_MULT_TYPE *quantptr;
2141
+ int *wsptr;
2142
+ JSAMPROW outptr;
2143
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2144
+ int ctr;
2145
+ int workspace[8 * 15]; /* buffers data between passes */
2146
+ SHIFT_TEMPS
2147
+
2148
+ /* Pass 1: process columns from input, store into work array. */
2149
+
2150
+ inptr = coef_block;
2151
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
2152
+ wsptr = workspace;
2153
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2154
+ /* Even part */
2155
+
2156
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
2157
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
2158
+ /* Add fudge factor here for final descale. */
2159
+ z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
2160
+
2161
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
2162
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
2163
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
2164
+
2165
+ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2166
+ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2167
+
2168
+ tmp12 = z1 - tmp10;
2169
+ tmp13 = z1 + tmp11;
2170
+ z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */
2171
+
2172
+ z4 = z2 - z3;
2173
+ z3 += z2;
2174
+ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2175
+ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2176
+ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2177
+
2178
+ tmp20 = tmp13 + tmp10 + tmp11;
2179
+ tmp23 = tmp12 - tmp10 + tmp11 + z2;
2180
+
2181
+ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2182
+ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2183
+
2184
+ tmp25 = tmp13 - tmp10 - tmp11;
2185
+ tmp26 = tmp12 + tmp10 - tmp11 - z2;
2186
+
2187
+ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2188
+ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2189
+
2190
+ tmp21 = tmp12 + tmp10 + tmp11;
2191
+ tmp24 = tmp13 - tmp10 + tmp11;
2192
+ tmp11 += tmp11;
2193
+ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2194
+ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2195
+
2196
+ /* Odd part */
2197
+
2198
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
2199
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
2200
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
2201
+ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2202
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
2203
+
2204
+ tmp13 = z2 - z4;
2205
+ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2206
+ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2207
+ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2208
+
2209
+ tmp13 = MULTIPLY(z2, -FIX(0.831253876)); /* -c9 */
2210
+ tmp15 = MULTIPLY(z2, -FIX(1.344997024)); /* -c3 */
2211
+ z2 = z1 - z4;
2212
+ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2213
+
2214
+ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2215
+ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2216
+ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2217
+ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2218
+ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2219
+ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2220
+
2221
+ /* Final output stage */
2222
+
2223
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
2224
+ wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
2225
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
2226
+ wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
2227
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
2228
+ wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
2229
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
2230
+ wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
2231
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
2232
+ wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
2233
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
2234
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
2235
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS);
2236
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS);
2237
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp27, CONST_BITS - PASS1_BITS);
2238
+ }
2239
+
2240
+ /* Pass 2: process 15 rows from work array, store into output array. */
2241
+
2242
+ wsptr = workspace;
2243
+ for (ctr = 0; ctr < 15; ctr++) {
2244
+ outptr = output_buf[ctr] + output_col;
2245
+
2246
+ /* Even part */
2247
+
2248
+ /* Add fudge factor here for final descale. */
2249
+ z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
2250
+ z1 = LEFT_SHIFT(z1, CONST_BITS);
2251
+
2252
+ z2 = (JLONG)wsptr[2];
2253
+ z3 = (JLONG)wsptr[4];
2254
+ z4 = (JLONG)wsptr[6];
2255
+
2256
+ tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2257
+ tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2258
+
2259
+ tmp12 = z1 - tmp10;
2260
+ tmp13 = z1 + tmp11;
2261
+ z1 -= LEFT_SHIFT(tmp11 - tmp10, 1); /* c0 = (c6-c12)*2 */
2262
+
2263
+ z4 = z2 - z3;
2264
+ z3 += z2;
2265
+ tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2266
+ tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2267
+ z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2268
+
2269
+ tmp20 = tmp13 + tmp10 + tmp11;
2270
+ tmp23 = tmp12 - tmp10 + tmp11 + z2;
2271
+
2272
+ tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2273
+ tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2274
+
2275
+ tmp25 = tmp13 - tmp10 - tmp11;
2276
+ tmp26 = tmp12 + tmp10 - tmp11 - z2;
2277
+
2278
+ tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2279
+ tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2280
+
2281
+ tmp21 = tmp12 + tmp10 + tmp11;
2282
+ tmp24 = tmp13 - tmp10 + tmp11;
2283
+ tmp11 += tmp11;
2284
+ tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2285
+ tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2286
+
2287
+ /* Odd part */
2288
+
2289
+ z1 = (JLONG)wsptr[1];
2290
+ z2 = (JLONG)wsptr[3];
2291
+ z4 = (JLONG)wsptr[5];
2292
+ z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2293
+ z4 = (JLONG)wsptr[7];
2294
+
2295
+ tmp13 = z2 - z4;
2296
+ tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2297
+ tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2298
+ tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2299
+
2300
+ tmp13 = MULTIPLY(z2, -FIX(0.831253876)); /* -c9 */
2301
+ tmp15 = MULTIPLY(z2, -FIX(1.344997024)); /* -c3 */
2302
+ z2 = z1 - z4;
2303
+ tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2304
+
2305
+ tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2306
+ tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2307
+ tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2308
+ z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2309
+ tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2310
+ tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2311
+
2312
+ /* Final output stage */
2313
+
2314
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
2315
+ CONST_BITS + PASS1_BITS + 3) &
2316
+ RANGE_MASK];
2317
+ outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
2318
+ CONST_BITS + PASS1_BITS + 3) &
2319
+ RANGE_MASK];
2320
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
2321
+ CONST_BITS + PASS1_BITS + 3) &
2322
+ RANGE_MASK];
2323
+ outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
2324
+ CONST_BITS + PASS1_BITS + 3) &
2325
+ RANGE_MASK];
2326
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
2327
+ CONST_BITS + PASS1_BITS + 3) &
2328
+ RANGE_MASK];
2329
+ outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
2330
+ CONST_BITS + PASS1_BITS + 3) &
2331
+ RANGE_MASK];
2332
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
2333
+ CONST_BITS + PASS1_BITS + 3) &
2334
+ RANGE_MASK];
2335
+ outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
2336
+ CONST_BITS + PASS1_BITS + 3) &
2337
+ RANGE_MASK];
2338
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
2339
+ CONST_BITS + PASS1_BITS + 3) &
2340
+ RANGE_MASK];
2341
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
2342
+ CONST_BITS + PASS1_BITS + 3) &
2343
+ RANGE_MASK];
2344
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
2345
+ CONST_BITS + PASS1_BITS + 3) &
2346
+ RANGE_MASK];
2347
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
2348
+ CONST_BITS + PASS1_BITS + 3) &
2349
+ RANGE_MASK];
2350
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16,
2351
+ CONST_BITS + PASS1_BITS + 3) &
2352
+ RANGE_MASK];
2353
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16,
2354
+ CONST_BITS + PASS1_BITS + 3) &
2355
+ RANGE_MASK];
2356
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp27,
2357
+ CONST_BITS + PASS1_BITS + 3) &
2358
+ RANGE_MASK];
2359
+
2360
+ wsptr += 8; /* advance pointer to next row */
2361
+ }
2362
+ }
2363
+
2364
+
2365
+ /*
2366
+ * Perform dequantization and inverse DCT on one block of coefficients,
2367
+ * producing a 16x16 output block.
2368
+ *
2369
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
2370
+ * cK represents sqrt(2) * cos(K*pi/32).
2371
+ */
2372
+
2373
+ GLOBAL(void)
2374
+ jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
2375
+ JCOEFPTR coef_block, JSAMPARRAY output_buf,
2376
+ JDIMENSION output_col)
2377
+ {
2378
+ JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2379
+ JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2380
+ JLONG z1, z2, z3, z4;
2381
+ JCOEFPTR inptr;
2382
+ ISLOW_MULT_TYPE *quantptr;
2383
+ int *wsptr;
2384
+ JSAMPROW outptr;
2385
+ JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2386
+ int ctr;
2387
+ int workspace[8 * 16]; /* buffers data between passes */
2388
+ SHIFT_TEMPS
2389
+
2390
+ /* Pass 1: process columns from input, store into work array. */
2391
+
2392
+ inptr = coef_block;
2393
+ quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
2394
+ wsptr = workspace;
2395
+ for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2396
+ /* Even part */
2397
+
2398
+ tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
2399
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
2400
+ /* Add fudge factor here for final descale. */
2401
+ tmp0 += 1 << (CONST_BITS - PASS1_BITS - 1);
2402
+
2403
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
2404
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2405
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2406
+
2407
+ tmp10 = tmp0 + tmp1;
2408
+ tmp11 = tmp0 - tmp1;
2409
+ tmp12 = tmp0 + tmp2;
2410
+ tmp13 = tmp0 - tmp2;
2411
+
2412
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
2413
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
2414
+ z3 = z1 - z2;
2415
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2416
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2417
+
2418
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2419
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2420
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2421
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2422
+
2423
+ tmp20 = tmp10 + tmp0;
2424
+ tmp27 = tmp10 - tmp0;
2425
+ tmp21 = tmp12 + tmp1;
2426
+ tmp26 = tmp12 - tmp1;
2427
+ tmp22 = tmp13 + tmp2;
2428
+ tmp25 = tmp13 - tmp2;
2429
+ tmp23 = tmp11 + tmp3;
2430
+ tmp24 = tmp11 - tmp3;
2431
+
2432
+ /* Odd part */
2433
+
2434
+ z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
2435
+ z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
2436
+ z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
2437
+ z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
2438
+
2439
+ tmp11 = z1 + z3;
2440
+
2441
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2442
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2443
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2444
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2445
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2446
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2447
+ tmp0 = tmp1 + tmp2 + tmp3 -
2448
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2449
+ tmp13 = tmp10 + tmp11 + tmp12 -
2450
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2451
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2452
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2453
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2454
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2455
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2456
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2457
+ z2 += z4;
2458
+ z1 = MULTIPLY(z2, -FIX(0.666655658)); /* -c11 */
2459
+ tmp1 += z1;
2460
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2461
+ z2 = MULTIPLY(z2, -FIX(1.247225013)); /* -c5 */
2462
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2463
+ tmp12 += z2;
2464
+ z2 = MULTIPLY(z3 + z4, -FIX(1.353318001)); /* -c3 */
2465
+ tmp2 += z2;
2466
+ tmp3 += z2;
2467
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2468
+ tmp10 += z2;
2469
+ tmp11 += z2;
2470
+
2471
+ /* Final output stage */
2472
+
2473
+ wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS - PASS1_BITS);
2474
+ wsptr[8 * 15] = (int)RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS - PASS1_BITS);
2475
+ wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS - PASS1_BITS);
2476
+ wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS - PASS1_BITS);
2477
+ wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS - PASS1_BITS);
2478
+ wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS - PASS1_BITS);
2479
+ wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS - PASS1_BITS);
2480
+ wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS - PASS1_BITS);
2481
+ wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS - PASS1_BITS);
2482
+ wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS - PASS1_BITS);
2483
+ wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS - PASS1_BITS);
2484
+ wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS - PASS1_BITS);
2485
+ wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS - PASS1_BITS);
2486
+ wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS - PASS1_BITS);
2487
+ wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS - PASS1_BITS);
2488
+ wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS - PASS1_BITS);
2489
+ }
2490
+
2491
+ /* Pass 2: process 16 rows from work array, store into output array. */
2492
+
2493
+ wsptr = workspace;
2494
+ for (ctr = 0; ctr < 16; ctr++) {
2495
+ outptr = output_buf[ctr] + output_col;
2496
+
2497
+ /* Even part */
2498
+
2499
+ /* Add fudge factor here for final descale. */
2500
+ tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
2501
+ tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
2502
+
2503
+ z1 = (JLONG)wsptr[4];
2504
+ tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2505
+ tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2506
+
2507
+ tmp10 = tmp0 + tmp1;
2508
+ tmp11 = tmp0 - tmp1;
2509
+ tmp12 = tmp0 + tmp2;
2510
+ tmp13 = tmp0 - tmp2;
2511
+
2512
+ z1 = (JLONG)wsptr[2];
2513
+ z2 = (JLONG)wsptr[6];
2514
+ z3 = z1 - z2;
2515
+ z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2516
+ z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2517
+
2518
+ tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2519
+ tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2520
+ tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2521
+ tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2522
+
2523
+ tmp20 = tmp10 + tmp0;
2524
+ tmp27 = tmp10 - tmp0;
2525
+ tmp21 = tmp12 + tmp1;
2526
+ tmp26 = tmp12 - tmp1;
2527
+ tmp22 = tmp13 + tmp2;
2528
+ tmp25 = tmp13 - tmp2;
2529
+ tmp23 = tmp11 + tmp3;
2530
+ tmp24 = tmp11 - tmp3;
2531
+
2532
+ /* Odd part */
2533
+
2534
+ z1 = (JLONG)wsptr[1];
2535
+ z2 = (JLONG)wsptr[3];
2536
+ z3 = (JLONG)wsptr[5];
2537
+ z4 = (JLONG)wsptr[7];
2538
+
2539
+ tmp11 = z1 + z3;
2540
+
2541
+ tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2542
+ tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2543
+ tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2544
+ tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2545
+ tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2546
+ tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2547
+ tmp0 = tmp1 + tmp2 + tmp3 -
2548
+ MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2549
+ tmp13 = tmp10 + tmp11 + tmp12 -
2550
+ MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2551
+ z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2552
+ tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2553
+ tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2554
+ z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2555
+ tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2556
+ tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2557
+ z2 += z4;
2558
+ z1 = MULTIPLY(z2, -FIX(0.666655658)); /* -c11 */
2559
+ tmp1 += z1;
2560
+ tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2561
+ z2 = MULTIPLY(z2, -FIX(1.247225013)); /* -c5 */
2562
+ tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2563
+ tmp12 += z2;
2564
+ z2 = MULTIPLY(z3 + z4, -FIX(1.353318001)); /* -c3 */
2565
+ tmp2 += z2;
2566
+ tmp3 += z2;
2567
+ z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2568
+ tmp10 += z2;
2569
+ tmp11 += z2;
2570
+
2571
+ /* Final output stage */
2572
+
2573
+ outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp0,
2574
+ CONST_BITS + PASS1_BITS + 3) &
2575
+ RANGE_MASK];
2576
+ outptr[15] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp0,
2577
+ CONST_BITS + PASS1_BITS + 3) &
2578
+ RANGE_MASK];
2579
+ outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp1,
2580
+ CONST_BITS + PASS1_BITS + 3) &
2581
+ RANGE_MASK];
2582
+ outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp1,
2583
+ CONST_BITS + PASS1_BITS + 3) &
2584
+ RANGE_MASK];
2585
+ outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp2,
2586
+ CONST_BITS + PASS1_BITS + 3) &
2587
+ RANGE_MASK];
2588
+ outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp2,
2589
+ CONST_BITS + PASS1_BITS + 3) &
2590
+ RANGE_MASK];
2591
+ outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp3,
2592
+ CONST_BITS + PASS1_BITS + 3) &
2593
+ RANGE_MASK];
2594
+ outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp3,
2595
+ CONST_BITS + PASS1_BITS + 3) &
2596
+ RANGE_MASK];
2597
+ outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp10,
2598
+ CONST_BITS + PASS1_BITS + 3) &
2599
+ RANGE_MASK];
2600
+ outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp10,
2601
+ CONST_BITS + PASS1_BITS + 3) &
2602
+ RANGE_MASK];
2603
+ outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp11,
2604
+ CONST_BITS + PASS1_BITS + 3) &
2605
+ RANGE_MASK];
2606
+ outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp11,
2607
+ CONST_BITS + PASS1_BITS + 3) &
2608
+ RANGE_MASK];
2609
+ outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp12,
2610
+ CONST_BITS + PASS1_BITS + 3) &
2611
+ RANGE_MASK];
2612
+ outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp12,
2613
+ CONST_BITS + PASS1_BITS + 3) &
2614
+ RANGE_MASK];
2615
+ outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp27 + tmp13,
2616
+ CONST_BITS + PASS1_BITS + 3) &
2617
+ RANGE_MASK];
2618
+ outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp27 - tmp13,
2619
+ CONST_BITS + PASS1_BITS + 3) &
2620
+ RANGE_MASK];
2621
+
2622
+ wsptr += 8; /* advance pointer to next row */
2623
+ }
2624
+ }
2625
+
2626
+ #endif /* IDCT_SCALING_SUPPORTED */
2627
+ #endif /* DCT_ISLOW_SUPPORTED */