mlpack 4.6.2__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. mlpack/__init__.py +4 -4
  2. mlpack/adaboost_classify.cp313-win_amd64.pyd +0 -0
  3. mlpack/adaboost_probabilities.cp313-win_amd64.pyd +0 -0
  4. mlpack/adaboost_train.cp313-win_amd64.pyd +0 -0
  5. mlpack/approx_kfn.cp313-win_amd64.pyd +0 -0
  6. mlpack/arma_numpy.cp313-win_amd64.pyd +0 -0
  7. mlpack/bayesian_linear_regression.cp313-win_amd64.pyd +0 -0
  8. mlpack/cf.cp313-win_amd64.pyd +0 -0
  9. mlpack/dbscan.cp313-win_amd64.pyd +0 -0
  10. mlpack/decision_tree.cp313-win_amd64.pyd +0 -0
  11. mlpack/det.cp313-win_amd64.pyd +0 -0
  12. mlpack/emst.cp313-win_amd64.pyd +0 -0
  13. mlpack/fastmks.cp313-win_amd64.pyd +0 -0
  14. mlpack/gmm_generate.cp313-win_amd64.pyd +0 -0
  15. mlpack/gmm_probability.cp313-win_amd64.pyd +0 -0
  16. mlpack/gmm_train.cp313-win_amd64.pyd +0 -0
  17. mlpack/hmm_generate.cp313-win_amd64.pyd +0 -0
  18. mlpack/hmm_loglik.cp313-win_amd64.pyd +0 -0
  19. mlpack/hmm_train.cp313-win_amd64.pyd +0 -0
  20. mlpack/hmm_viterbi.cp313-win_amd64.pyd +0 -0
  21. mlpack/hoeffding_tree.cp313-win_amd64.pyd +0 -0
  22. mlpack/image_converter.cp313-win_amd64.pyd +0 -0
  23. mlpack/include/mlpack/base.hpp +1 -0
  24. mlpack/include/mlpack/core/arma_extend/find_nan.hpp +63 -0
  25. mlpack/include/mlpack/core/cereal/low_precision.hpp +48 -0
  26. mlpack/include/mlpack/core/cv/cv_base.hpp +11 -11
  27. mlpack/include/mlpack/core/cv/cv_base_impl.hpp +7 -7
  28. mlpack/include/mlpack/core/cv/k_fold_cv.hpp +4 -4
  29. mlpack/include/mlpack/core/cv/k_fold_cv_impl.hpp +4 -4
  30. mlpack/include/mlpack/core/cv/meta_info_extractor.hpp +10 -10
  31. mlpack/include/mlpack/core/cv/metrics/f1_impl.hpp +1 -1
  32. mlpack/include/mlpack/core/cv/metrics/facilities.hpp +2 -1
  33. mlpack/include/mlpack/core/cv/metrics/precision_impl.hpp +1 -1
  34. mlpack/include/mlpack/core/cv/metrics/r2_score_impl.hpp +1 -1
  35. mlpack/include/mlpack/core/cv/metrics/silhouette_score_impl.hpp +1 -1
  36. mlpack/include/mlpack/core/cv/simple_cv.hpp +4 -4
  37. mlpack/include/mlpack/core/cv/simple_cv_impl.hpp +2 -2
  38. mlpack/include/mlpack/core/data/binarize.hpp +0 -2
  39. mlpack/include/mlpack/core/data/check_categorical_param.hpp +0 -2
  40. mlpack/include/mlpack/core/data/combine_options.hpp +151 -0
  41. mlpack/include/mlpack/core/data/confusion_matrix.hpp +0 -2
  42. mlpack/include/mlpack/core/data/confusion_matrix_impl.hpp +0 -2
  43. mlpack/include/mlpack/core/data/data.hpp +6 -4
  44. mlpack/include/mlpack/core/data/data_options.hpp +341 -18
  45. mlpack/include/mlpack/core/data/dataset_mapper.hpp +3 -5
  46. mlpack/include/mlpack/core/data/dataset_mapper_impl.hpp +0 -2
  47. mlpack/include/mlpack/core/data/detect_file_type.hpp +34 -5
  48. mlpack/include/mlpack/core/data/detect_file_type_impl.hpp +185 -11
  49. mlpack/include/mlpack/core/data/extension.hpp +2 -4
  50. mlpack/include/mlpack/core/data/font8x8_basic.h +152 -0
  51. mlpack/include/mlpack/core/data/has_serialize.hpp +0 -2
  52. mlpack/include/mlpack/core/data/image_bounding_box.hpp +36 -0
  53. mlpack/include/mlpack/core/data/image_bounding_box_impl.hpp +155 -0
  54. mlpack/include/mlpack/core/data/image_layout.hpp +63 -0
  55. mlpack/include/mlpack/core/data/image_layout_impl.hpp +75 -0
  56. mlpack/include/mlpack/core/data/image_letterbox.hpp +116 -0
  57. mlpack/include/mlpack/core/data/image_options.hpp +257 -0
  58. mlpack/include/mlpack/core/data/image_resize_crop.hpp +113 -48
  59. mlpack/include/mlpack/core/data/imputation_methods/custom_imputation.hpp +16 -32
  60. mlpack/include/mlpack/core/data/imputation_methods/listwise_deletion.hpp +19 -29
  61. mlpack/include/mlpack/core/data/imputation_methods/mean_imputation.hpp +113 -44
  62. mlpack/include/mlpack/core/data/imputation_methods/median_imputation.hpp +44 -43
  63. mlpack/include/mlpack/core/data/imputer.hpp +41 -49
  64. mlpack/include/mlpack/core/data/is_naninf.hpp +0 -2
  65. mlpack/include/mlpack/core/data/load.hpp +49 -233
  66. mlpack/include/mlpack/core/data/load_arff.hpp +0 -2
  67. mlpack/include/mlpack/core/data/load_arff_impl.hpp +2 -4
  68. mlpack/include/mlpack/core/data/load_categorical.hpp +1 -4
  69. mlpack/include/mlpack/core/data/load_categorical_impl.hpp +10 -26
  70. mlpack/include/mlpack/core/data/load_dense.hpp +279 -0
  71. mlpack/include/mlpack/core/data/load_deprecated.hpp +466 -0
  72. mlpack/include/mlpack/core/data/load_image.hpp +71 -43
  73. mlpack/include/mlpack/core/data/load_impl.hpp +95 -274
  74. mlpack/include/mlpack/core/data/load_model.hpp +62 -0
  75. mlpack/include/mlpack/core/data/load_numeric.hpp +124 -87
  76. mlpack/include/mlpack/core/data/load_sparse.hpp +91 -0
  77. mlpack/include/mlpack/core/data/map_policies/datatype.hpp +0 -2
  78. mlpack/include/mlpack/core/data/map_policies/increment_policy.hpp +0 -2
  79. mlpack/include/mlpack/core/data/map_policies/map_policies.hpp +0 -1
  80. mlpack/include/mlpack/core/data/matrix_options.hpp +152 -20
  81. mlpack/include/mlpack/core/data/normalize_labels.hpp +0 -2
  82. mlpack/include/mlpack/core/data/normalize_labels_impl.hpp +0 -2
  83. mlpack/include/mlpack/core/data/one_hot_encoding.hpp +2 -4
  84. mlpack/include/mlpack/core/data/one_hot_encoding_impl.hpp +3 -5
  85. mlpack/include/mlpack/core/data/save.hpp +26 -120
  86. mlpack/include/mlpack/core/data/save_dense.hpp +42 -0
  87. mlpack/include/mlpack/core/data/save_deprecated.hpp +308 -0
  88. mlpack/include/mlpack/core/data/save_image.hpp +82 -42
  89. mlpack/include/mlpack/core/data/save_impl.hpp +60 -245
  90. mlpack/include/mlpack/core/data/save_matrix.hpp +45 -0
  91. mlpack/include/mlpack/core/data/save_model.hpp +61 -0
  92. mlpack/include/mlpack/core/data/save_numeric.hpp +60 -0
  93. mlpack/include/mlpack/core/data/save_sparse.hpp +44 -0
  94. mlpack/include/mlpack/core/data/scaler_methods/max_abs_scaler.hpp +0 -2
  95. mlpack/include/mlpack/core/data/scaler_methods/mean_normalization.hpp +2 -4
  96. mlpack/include/mlpack/core/data/scaler_methods/min_max_scaler.hpp +0 -2
  97. mlpack/include/mlpack/core/data/scaler_methods/pca_whitening.hpp +1 -3
  98. mlpack/include/mlpack/core/data/scaler_methods/standard_scaler.hpp +2 -4
  99. mlpack/include/mlpack/core/data/scaler_methods/zca_whitening.hpp +0 -2
  100. mlpack/include/mlpack/core/data/split_data.hpp +6 -8
  101. mlpack/include/mlpack/core/data/string_algorithms.hpp +0 -2
  102. mlpack/include/mlpack/core/data/string_encoding.hpp +0 -2
  103. mlpack/include/mlpack/core/data/string_encoding_dictionary.hpp +0 -2
  104. mlpack/include/mlpack/core/data/string_encoding_impl.hpp +0 -2
  105. mlpack/include/mlpack/core/data/string_encoding_policies/bag_of_words_encoding_policy.hpp +0 -2
  106. mlpack/include/mlpack/core/data/string_encoding_policies/dictionary_encoding_policy.hpp +0 -2
  107. mlpack/include/mlpack/core/data/string_encoding_policies/policy_traits.hpp +0 -2
  108. mlpack/include/mlpack/core/data/string_encoding_policies/tf_idf_encoding_policy.hpp +0 -2
  109. mlpack/include/mlpack/core/data/text_options.hpp +91 -53
  110. mlpack/include/mlpack/core/data/tokenizers/char_extract.hpp +0 -2
  111. mlpack/include/mlpack/core/data/tokenizers/split_by_any_of.hpp +0 -2
  112. mlpack/include/mlpack/core/distributions/gamma_distribution_impl.hpp +4 -4
  113. mlpack/include/mlpack/core/distributions/laplace_distribution.hpp +9 -9
  114. mlpack/include/mlpack/core/distributions/laplace_distribution_impl.hpp +7 -7
  115. mlpack/include/mlpack/core/hpt/cv_function.hpp +2 -2
  116. mlpack/include/mlpack/core/hpt/cv_function_impl.hpp +2 -2
  117. mlpack/include/mlpack/core/hpt/hpt.hpp +4 -4
  118. mlpack/include/mlpack/core/hpt/hpt_impl.hpp +9 -9
  119. mlpack/include/mlpack/core/math/make_alias.hpp +7 -5
  120. mlpack/include/mlpack/core/math/random.hpp +19 -5
  121. mlpack/include/mlpack/core/math/shuffle_data.hpp +79 -245
  122. mlpack/include/mlpack/core/metrics/non_maximal_suppression_impl.hpp +9 -10
  123. mlpack/include/mlpack/core/stb/bundled/stb_image_resize2.h +291 -239
  124. mlpack/include/mlpack/core/tree/binary_space_tree/rp_tree_mean_split_impl.hpp +7 -7
  125. mlpack/include/mlpack/core/tree/cellbound.hpp +2 -2
  126. mlpack/include/mlpack/core/tree/cosine_tree/cosine_tree_impl.hpp +10 -10
  127. mlpack/include/mlpack/core/tree/octree/octree.hpp +10 -0
  128. mlpack/include/mlpack/core/tree/octree/octree_impl.hpp +14 -4
  129. mlpack/include/mlpack/core/util/arma_traits.hpp +25 -38
  130. mlpack/include/mlpack/core/util/coot_traits.hpp +97 -0
  131. mlpack/include/mlpack/core/util/forward.hpp +0 -2
  132. mlpack/include/mlpack/core/util/param.hpp +4 -4
  133. mlpack/include/mlpack/core/util/params_impl.hpp +2 -2
  134. mlpack/include/mlpack/core/util/using.hpp +29 -2
  135. mlpack/include/mlpack/core/util/version.hpp +5 -3
  136. mlpack/include/mlpack/core/util/version_impl.hpp +3 -6
  137. mlpack/include/mlpack/methods/adaboost/adaboost_classify_main.cpp +1 -1
  138. mlpack/include/mlpack/methods/adaboost/adaboost_main.cpp +3 -3
  139. mlpack/include/mlpack/methods/adaboost/adaboost_train_main.cpp +2 -2
  140. mlpack/include/mlpack/methods/ann/activation_functions/activation_functions.hpp +1 -0
  141. mlpack/include/mlpack/methods/ann/activation_functions/bipolar_sigmoid_function.hpp +6 -4
  142. mlpack/include/mlpack/methods/ann/activation_functions/elish_function.hpp +17 -12
  143. mlpack/include/mlpack/methods/ann/activation_functions/elliot_function.hpp +9 -7
  144. mlpack/include/mlpack/methods/ann/activation_functions/gaussian_function.hpp +7 -6
  145. mlpack/include/mlpack/methods/ann/activation_functions/gelu_exact_function.hpp +73 -0
  146. mlpack/include/mlpack/methods/ann/activation_functions/gelu_function.hpp +27 -16
  147. mlpack/include/mlpack/methods/ann/activation_functions/hard_sigmoid_function.hpp +8 -6
  148. mlpack/include/mlpack/methods/ann/activation_functions/hard_swish_function.hpp +6 -4
  149. mlpack/include/mlpack/methods/ann/activation_functions/hyper_sinh_function.hpp +13 -8
  150. mlpack/include/mlpack/methods/ann/activation_functions/identity_function.hpp +6 -4
  151. mlpack/include/mlpack/methods/ann/activation_functions/inverse_quadratic_function.hpp +8 -6
  152. mlpack/include/mlpack/methods/ann/activation_functions/lisht_function.hpp +7 -5
  153. mlpack/include/mlpack/methods/ann/activation_functions/logistic_function.hpp +14 -12
  154. mlpack/include/mlpack/methods/ann/activation_functions/mish_function.hpp +7 -5
  155. mlpack/include/mlpack/methods/ann/activation_functions/multi_quadratic_function.hpp +6 -4
  156. mlpack/include/mlpack/methods/ann/activation_functions/poisson1_function.hpp +4 -2
  157. mlpack/include/mlpack/methods/ann/activation_functions/quadratic_function.hpp +6 -4
  158. mlpack/include/mlpack/methods/ann/activation_functions/rectifier_function.hpp +10 -10
  159. mlpack/include/mlpack/methods/ann/activation_functions/silu_function.hpp +10 -8
  160. mlpack/include/mlpack/methods/ann/activation_functions/softplus_function.hpp +12 -9
  161. mlpack/include/mlpack/methods/ann/activation_functions/softsign_function.hpp +15 -23
  162. mlpack/include/mlpack/methods/ann/activation_functions/spline_function.hpp +9 -7
  163. mlpack/include/mlpack/methods/ann/activation_functions/swish_function.hpp +11 -9
  164. mlpack/include/mlpack/methods/ann/activation_functions/tanh_exponential_function.hpp +9 -7
  165. mlpack/include/mlpack/methods/ann/activation_functions/tanh_function.hpp +10 -7
  166. mlpack/include/mlpack/methods/ann/ann.hpp +3 -0
  167. mlpack/include/mlpack/methods/ann/convolution_rules/base_convolution.hpp +197 -0
  168. mlpack/include/mlpack/methods/ann/convolution_rules/convolution_rules.hpp +1 -2
  169. mlpack/include/mlpack/methods/ann/convolution_rules/im2col_convolution.hpp +215 -0
  170. mlpack/include/mlpack/methods/ann/convolution_rules/naive_convolution.hpp +109 -154
  171. mlpack/include/mlpack/methods/ann/dag_network.hpp +728 -0
  172. mlpack/include/mlpack/methods/ann/dag_network_impl.hpp +1640 -0
  173. mlpack/include/mlpack/methods/ann/dists/bernoulli_distribution_impl.hpp +1 -1
  174. mlpack/include/mlpack/methods/ann/dists/normal_distribution_impl.hpp +7 -2
  175. mlpack/include/mlpack/methods/ann/ffn.hpp +39 -3
  176. mlpack/include/mlpack/methods/ann/ffn_impl.hpp +14 -32
  177. mlpack/include/mlpack/methods/ann/init_rules/const_init.hpp +4 -4
  178. mlpack/include/mlpack/methods/ann/init_rules/gaussian_init.hpp +6 -2
  179. mlpack/include/mlpack/methods/ann/init_rules/he_init.hpp +4 -2
  180. mlpack/include/mlpack/methods/ann/init_rules/kathirvalavakumar_subavathi_init.hpp +3 -3
  181. mlpack/include/mlpack/methods/ann/init_rules/lecun_normal_init.hpp +4 -2
  182. mlpack/include/mlpack/methods/ann/init_rules/nguyen_widrow_init.hpp +2 -2
  183. mlpack/include/mlpack/methods/ann/init_rules/oivs_init.hpp +2 -2
  184. mlpack/include/mlpack/methods/ann/init_rules/orthogonal_init.hpp +2 -2
  185. mlpack/include/mlpack/methods/ann/init_rules/random_init.hpp +8 -4
  186. mlpack/include/mlpack/methods/ann/layer/adaptive_max_pooling.hpp +21 -23
  187. mlpack/include/mlpack/methods/ann/layer/adaptive_max_pooling_impl.hpp +15 -15
  188. mlpack/include/mlpack/methods/ann/layer/adaptive_mean_pooling.hpp +21 -23
  189. mlpack/include/mlpack/methods/ann/layer/adaptive_mean_pooling_impl.hpp +16 -16
  190. mlpack/include/mlpack/methods/ann/layer/add.hpp +18 -18
  191. mlpack/include/mlpack/methods/ann/layer/add_impl.hpp +13 -13
  192. mlpack/include/mlpack/methods/ann/layer/add_merge.hpp +19 -18
  193. mlpack/include/mlpack/methods/ann/layer/add_merge_impl.hpp +13 -13
  194. mlpack/include/mlpack/methods/ann/layer/alpha_dropout.hpp +17 -16
  195. mlpack/include/mlpack/methods/ann/layer/alpha_dropout_impl.hpp +14 -13
  196. mlpack/include/mlpack/methods/ann/layer/base_layer.hpp +28 -51
  197. mlpack/include/mlpack/methods/ann/layer/batch_norm.hpp +16 -18
  198. mlpack/include/mlpack/methods/ann/layer/batch_norm_impl.hpp +55 -54
  199. mlpack/include/mlpack/methods/ann/layer/c_relu.hpp +18 -20
  200. mlpack/include/mlpack/methods/ann/layer/c_relu_impl.hpp +20 -25
  201. mlpack/include/mlpack/methods/ann/layer/celu.hpp +14 -19
  202. mlpack/include/mlpack/methods/ann/layer/celu_impl.hpp +25 -34
  203. mlpack/include/mlpack/methods/ann/layer/concat.hpp +18 -18
  204. mlpack/include/mlpack/methods/ann/layer/concat_impl.hpp +13 -13
  205. mlpack/include/mlpack/methods/ann/layer/concatenate.hpp +18 -18
  206. mlpack/include/mlpack/methods/ann/layer/concatenate_impl.hpp +14 -14
  207. mlpack/include/mlpack/methods/ann/layer/convolution.hpp +42 -47
  208. mlpack/include/mlpack/methods/ann/layer/convolution_impl.hpp +170 -159
  209. mlpack/include/mlpack/methods/ann/layer/dropconnect.hpp +18 -20
  210. mlpack/include/mlpack/methods/ann/layer/dropconnect_impl.hpp +20 -20
  211. mlpack/include/mlpack/methods/ann/layer/dropout.hpp +17 -19
  212. mlpack/include/mlpack/methods/ann/layer/dropout_impl.hpp +14 -21
  213. mlpack/include/mlpack/methods/ann/layer/elu.hpp +23 -25
  214. mlpack/include/mlpack/methods/ann/layer/elu_impl.hpp +20 -27
  215. mlpack/include/mlpack/methods/ann/layer/embedding.hpp +160 -0
  216. mlpack/include/mlpack/methods/ann/layer/embedding_impl.hpp +189 -0
  217. mlpack/include/mlpack/methods/ann/layer/flexible_relu.hpp +17 -19
  218. mlpack/include/mlpack/methods/ann/layer/flexible_relu_impl.hpp +20 -20
  219. mlpack/include/mlpack/methods/ann/layer/ftswish.hpp +17 -18
  220. mlpack/include/mlpack/methods/ann/layer/ftswish_impl.hpp +17 -35
  221. mlpack/include/mlpack/methods/ann/layer/grouped_convolution.hpp +27 -33
  222. mlpack/include/mlpack/methods/ann/layer/grouped_convolution_impl.hpp +170 -163
  223. mlpack/include/mlpack/methods/ann/layer/gru.hpp +195 -0
  224. mlpack/include/mlpack/methods/ann/layer/gru_impl.hpp +325 -0
  225. mlpack/include/mlpack/methods/ann/layer/hard_tanh.hpp +13 -15
  226. mlpack/include/mlpack/methods/ann/layer/hard_tanh_impl.hpp +12 -12
  227. mlpack/include/mlpack/methods/ann/layer/identity.hpp +19 -20
  228. mlpack/include/mlpack/methods/ann/layer/identity_impl.hpp +12 -12
  229. mlpack/include/mlpack/methods/ann/layer/layer.hpp +37 -33
  230. mlpack/include/mlpack/methods/ann/layer/layer_norm.hpp +11 -13
  231. mlpack/include/mlpack/methods/ann/layer/layer_norm_impl.hpp +16 -16
  232. mlpack/include/mlpack/methods/ann/layer/layer_types.hpp +4 -1
  233. mlpack/include/mlpack/methods/ann/layer/leaky_relu.hpp +20 -23
  234. mlpack/include/mlpack/methods/ann/layer/leaky_relu_impl.hpp +12 -13
  235. mlpack/include/mlpack/methods/ann/layer/linear.hpp +16 -18
  236. mlpack/include/mlpack/methods/ann/layer/linear3d.hpp +18 -18
  237. mlpack/include/mlpack/methods/ann/layer/linear3d_impl.hpp +18 -18
  238. mlpack/include/mlpack/methods/ann/layer/linear_impl.hpp +15 -15
  239. mlpack/include/mlpack/methods/ann/layer/linear_no_bias.hpp +15 -17
  240. mlpack/include/mlpack/methods/ann/layer/linear_no_bias_impl.hpp +20 -20
  241. mlpack/include/mlpack/methods/ann/layer/linear_recurrent.hpp +25 -14
  242. mlpack/include/mlpack/methods/ann/layer/linear_recurrent_impl.hpp +60 -31
  243. mlpack/include/mlpack/methods/ann/layer/log_softmax.hpp +17 -36
  244. mlpack/include/mlpack/methods/ann/layer/log_softmax_impl.hpp +58 -74
  245. mlpack/include/mlpack/methods/ann/layer/lstm.hpp +26 -29
  246. mlpack/include/mlpack/methods/ann/layer/lstm_impl.hpp +128 -124
  247. mlpack/include/mlpack/methods/ann/layer/max_pooling.hpp +19 -19
  248. mlpack/include/mlpack/methods/ann/layer/max_pooling_impl.hpp +14 -14
  249. mlpack/include/mlpack/methods/ann/layer/mean_pooling.hpp +24 -24
  250. mlpack/include/mlpack/methods/ann/layer/mean_pooling_impl.hpp +16 -16
  251. mlpack/include/mlpack/methods/ann/layer/multi_layer.hpp +36 -6
  252. mlpack/include/mlpack/methods/ann/layer/multi_layer_impl.hpp +6 -2
  253. mlpack/include/mlpack/methods/ann/layer/multihead_attention.hpp +26 -22
  254. mlpack/include/mlpack/methods/ann/layer/multihead_attention_impl.hpp +161 -64
  255. mlpack/include/mlpack/methods/ann/layer/nearest_interpolation.hpp +28 -25
  256. mlpack/include/mlpack/methods/ann/layer/nearest_interpolation_impl.hpp +36 -37
  257. mlpack/include/mlpack/methods/ann/layer/noisylinear.hpp +39 -42
  258. mlpack/include/mlpack/methods/ann/layer/noisylinear_impl.hpp +18 -18
  259. mlpack/include/mlpack/methods/ann/layer/padding.hpp +21 -17
  260. mlpack/include/mlpack/methods/ann/layer/padding_impl.hpp +33 -19
  261. mlpack/include/mlpack/methods/ann/layer/parametric_relu.hpp +26 -28
  262. mlpack/include/mlpack/methods/ann/layer/parametric_relu_impl.hpp +18 -18
  263. mlpack/include/mlpack/methods/ann/layer/radial_basis_function.hpp +41 -28
  264. mlpack/include/mlpack/methods/ann/layer/radial_basis_function_impl.hpp +42 -17
  265. mlpack/include/mlpack/methods/ann/layer/recurrent_layer.hpp +13 -0
  266. mlpack/include/mlpack/methods/ann/layer/relu6.hpp +19 -21
  267. mlpack/include/mlpack/methods/ann/layer/relu6_impl.hpp +14 -14
  268. mlpack/include/mlpack/methods/ann/layer/repeat.hpp +24 -25
  269. mlpack/include/mlpack/methods/ann/layer/repeat_impl.hpp +10 -10
  270. mlpack/include/mlpack/methods/ann/layer/serialization.hpp +64 -54
  271. mlpack/include/mlpack/methods/ann/layer/softmax.hpp +20 -20
  272. mlpack/include/mlpack/methods/ann/layer/softmax_impl.hpp +10 -10
  273. mlpack/include/mlpack/methods/ann/layer/softmin.hpp +20 -23
  274. mlpack/include/mlpack/methods/ann/layer/softmin_impl.hpp +10 -10
  275. mlpack/include/mlpack/methods/ann/layer/sum_reduce.hpp +103 -0
  276. mlpack/include/mlpack/methods/ann/layer/sum_reduce_impl.hpp +143 -0
  277. mlpack/include/mlpack/methods/ann/loss_functions/cosine_embedding_loss_impl.hpp +3 -3
  278. mlpack/include/mlpack/methods/ann/loss_functions/mean_bias_error_impl.hpp +1 -1
  279. mlpack/include/mlpack/methods/ann/loss_functions/multilabel_softmargin_loss_impl.hpp +1 -1
  280. mlpack/include/mlpack/methods/ann/loss_functions/negative_log_likelihood.hpp +2 -2
  281. mlpack/include/mlpack/methods/ann/loss_functions/negative_log_likelihood_impl.hpp +29 -15
  282. mlpack/include/mlpack/methods/ann/loss_functions/poisson_nll_loss_impl.hpp +1 -1
  283. mlpack/include/mlpack/methods/ann/models/models.hpp +17 -0
  284. mlpack/include/mlpack/methods/ann/models/yolov3/yolov3_layer.hpp +151 -0
  285. mlpack/include/mlpack/methods/ann/models/yolov3/yolov3_layer_impl.hpp +265 -0
  286. mlpack/include/mlpack/methods/ann/models/yolov3/yolov3_tiny.hpp +187 -0
  287. mlpack/include/mlpack/methods/ann/models/yolov3/yolov3_tiny_impl.hpp +206 -0
  288. mlpack/include/mlpack/methods/ann/regularizer/orthogonal_regularizer_impl.hpp +5 -3
  289. mlpack/include/mlpack/methods/ann/rnn.hpp +136 -42
  290. mlpack/include/mlpack/methods/ann/rnn_impl.hpp +230 -38
  291. mlpack/include/mlpack/methods/approx_kfn/drusilla_select_impl.hpp +1 -1
  292. mlpack/include/mlpack/methods/bayesian_linear_regression/bayesian_linear_regression_main.cpp +1 -1
  293. mlpack/include/mlpack/methods/bias_svd/bias_svd_function_impl.hpp +1 -1
  294. mlpack/include/mlpack/methods/cf/cf_model.hpp +1 -1
  295. mlpack/include/mlpack/methods/decision_tree/decision_tree.hpp +6 -6
  296. mlpack/include/mlpack/methods/decision_tree/decision_tree_impl.hpp +12 -12
  297. mlpack/include/mlpack/methods/decision_tree/decision_tree_main.cpp +0 -1
  298. mlpack/include/mlpack/methods/decision_tree/decision_tree_regressor.hpp +6 -6
  299. mlpack/include/mlpack/methods/decision_tree/decision_tree_regressor_impl.hpp +12 -12
  300. mlpack/include/mlpack/methods/det/det_main.cpp +1 -1
  301. mlpack/include/mlpack/methods/hmm/hmm_train_main.cpp +4 -4
  302. mlpack/include/mlpack/methods/hmm/hmm_util_impl.hpp +2 -2
  303. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree.hpp +6 -6
  304. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_impl.hpp +31 -31
  305. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_main.cpp +1 -2
  306. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_model.hpp +2 -2
  307. mlpack/include/mlpack/methods/hoeffding_trees/hoeffding_tree_model_impl.hpp +1 -1
  308. mlpack/include/mlpack/methods/kde/kde_rules_impl.hpp +6 -6
  309. mlpack/include/mlpack/methods/lars/lars_impl.hpp +3 -3
  310. mlpack/include/mlpack/methods/linear_svm/linear_svm_function_impl.hpp +4 -4
  311. mlpack/include/mlpack/methods/linear_svm/linear_svm_main.cpp +3 -3
  312. mlpack/include/mlpack/methods/lmnn/lmnn_main.cpp +1 -1
  313. mlpack/include/mlpack/methods/lsh/lsh_main.cpp +1 -1
  314. mlpack/include/mlpack/methods/matrix_completion/matrix_completion_impl.hpp +1 -1
  315. mlpack/include/mlpack/methods/naive_bayes/naive_bayes_classifier_impl.hpp +1 -1
  316. mlpack/include/mlpack/methods/naive_bayes/nbc_main.cpp +3 -3
  317. mlpack/include/mlpack/methods/nca/nca_main.cpp +1 -1
  318. mlpack/include/mlpack/methods/neighbor_search/kfn_main.cpp +8 -8
  319. mlpack/include/mlpack/methods/neighbor_search/knn_main.cpp +8 -8
  320. mlpack/include/mlpack/methods/neighbor_search/neighbor_search.hpp +154 -34
  321. mlpack/include/mlpack/methods/neighbor_search/neighbor_search_impl.hpp +190 -51
  322. mlpack/include/mlpack/methods/neighbor_search/neighbor_search_stat.hpp +10 -0
  323. mlpack/include/mlpack/methods/neighbor_search/ns_model.hpp +15 -15
  324. mlpack/include/mlpack/methods/neighbor_search/ns_model_impl.hpp +55 -46
  325. mlpack/include/mlpack/methods/neighbor_search/typedef.hpp +42 -2
  326. mlpack/include/mlpack/methods/pca/pca_impl.hpp +2 -2
  327. mlpack/include/mlpack/methods/perceptron/perceptron.hpp +2 -2
  328. mlpack/include/mlpack/methods/perceptron/perceptron_impl.hpp +1 -1
  329. mlpack/include/mlpack/methods/perceptron/perceptron_main.cpp +2 -2
  330. mlpack/include/mlpack/methods/preprocess/image_converter_main.cpp +2 -3
  331. mlpack/include/mlpack/methods/preprocess/preprocess_binarize_main.cpp +2 -2
  332. mlpack/include/mlpack/methods/preprocess/preprocess_describe_main.cpp +0 -1
  333. mlpack/include/mlpack/methods/preprocess/preprocess_imputer_main.cpp +50 -129
  334. mlpack/include/mlpack/methods/preprocess/preprocess_one_hot_encoding_main.cpp +6 -6
  335. mlpack/include/mlpack/methods/preprocess/preprocess_scale_main.cpp +2 -3
  336. mlpack/include/mlpack/methods/preprocess/preprocess_split_main.cpp +3 -4
  337. mlpack/include/mlpack/methods/preprocess/scaling_model.hpp +6 -8
  338. mlpack/include/mlpack/methods/preprocess/scaling_model_impl.hpp +18 -20
  339. mlpack/include/mlpack/methods/random_forest/random_forest.hpp +5 -5
  340. mlpack/include/mlpack/methods/random_forest/random_forest_impl.hpp +9 -9
  341. mlpack/include/mlpack/methods/range_search/range_search_main.cpp +1 -1
  342. mlpack/include/mlpack/methods/rann/krann_main.cpp +1 -1
  343. mlpack/include/mlpack/methods/regularized_svd/regularized_svd_function_impl.hpp +1 -1
  344. mlpack/include/mlpack/methods/reinforcement_learning/async_learning_impl.hpp +8 -8
  345. mlpack/include/mlpack/methods/reinforcement_learning/ddpg_impl.hpp +16 -16
  346. mlpack/include/mlpack/methods/reinforcement_learning/environment/acrobot.hpp +4 -4
  347. mlpack/include/mlpack/methods/reinforcement_learning/environment/cart_pole.hpp +3 -3
  348. mlpack/include/mlpack/methods/reinforcement_learning/environment/cont_double_pole_cart.hpp +6 -5
  349. mlpack/include/mlpack/methods/reinforcement_learning/environment/pendulum.hpp +6 -5
  350. mlpack/include/mlpack/methods/reinforcement_learning/policy/aggregated_policy.hpp +2 -2
  351. mlpack/include/mlpack/methods/reinforcement_learning/q_learning_impl.hpp +10 -10
  352. mlpack/include/mlpack/methods/reinforcement_learning/q_networks/categorical_dqn.hpp +21 -17
  353. mlpack/include/mlpack/methods/reinforcement_learning/q_networks/dueling_dqn.hpp +69 -77
  354. mlpack/include/mlpack/methods/reinforcement_learning/q_networks/simple_dqn.hpp +9 -9
  355. mlpack/include/mlpack/methods/reinforcement_learning/sac_impl.hpp +14 -14
  356. mlpack/include/mlpack/methods/reinforcement_learning/td3_impl.hpp +14 -14
  357. mlpack/include/mlpack/methods/softmax_regression/softmax_regression_function_impl.hpp +1 -1
  358. mlpack/include/mlpack/methods/svdplusplus/svdplusplus_function_impl.hpp +1 -1
  359. mlpack/include/mlpack/namespace_compat.hpp +1 -0
  360. mlpack/include/mlpack/prereqs.hpp +1 -0
  361. mlpack/kde.cp313-win_amd64.pyd +0 -0
  362. mlpack/kernel_pca.cp313-win_amd64.pyd +0 -0
  363. mlpack/kfn.cp313-win_amd64.pyd +0 -0
  364. mlpack/kmeans.cp313-win_amd64.pyd +0 -0
  365. mlpack/knn.cp313-win_amd64.pyd +0 -0
  366. mlpack/krann.cp313-win_amd64.pyd +0 -0
  367. mlpack/lars.cp313-win_amd64.pyd +0 -0
  368. mlpack/linear_regression_predict.cp313-win_amd64.pyd +0 -0
  369. mlpack/linear_regression_train.cp313-win_amd64.pyd +0 -0
  370. mlpack/linear_svm.cp313-win_amd64.pyd +0 -0
  371. mlpack/lmnn.cp313-win_amd64.pyd +0 -0
  372. mlpack/local_coordinate_coding.cp313-win_amd64.pyd +0 -0
  373. mlpack/logistic_regression.cp313-win_amd64.pyd +0 -0
  374. mlpack/lsh.cp313-win_amd64.pyd +0 -0
  375. mlpack/mean_shift.cp313-win_amd64.pyd +0 -0
  376. mlpack/nbc.cp313-win_amd64.pyd +0 -0
  377. mlpack/nca.cp313-win_amd64.pyd +0 -0
  378. mlpack/nmf.cp313-win_amd64.pyd +0 -0
  379. mlpack/pca.cp313-win_amd64.pyd +0 -0
  380. mlpack/perceptron.cp313-win_amd64.pyd +0 -0
  381. mlpack/preprocess_binarize.cp313-win_amd64.pyd +0 -0
  382. mlpack/preprocess_describe.cp313-win_amd64.pyd +0 -0
  383. mlpack/preprocess_one_hot_encoding.cp313-win_amd64.pyd +0 -0
  384. mlpack/preprocess_scale.cp313-win_amd64.pyd +0 -0
  385. mlpack/preprocess_split.cp313-win_amd64.pyd +0 -0
  386. mlpack/radical.cp313-win_amd64.pyd +0 -0
  387. mlpack/random_forest.cp313-win_amd64.pyd +0 -0
  388. mlpack/softmax_regression.cp313-win_amd64.pyd +0 -0
  389. mlpack/sparse_coding.cp313-win_amd64.pyd +0 -0
  390. mlpack-4.7.0.dist-info/DELVEWHEEL +2 -0
  391. {mlpack-4.6.2.dist-info → mlpack-4.7.0.dist-info}/METADATA +2 -2
  392. {mlpack-4.6.2.dist-info → mlpack-4.7.0.dist-info}/RECORD +396 -377
  393. {mlpack-4.6.2.dist-info → mlpack-4.7.0.dist-info}/WHEEL +1 -1
  394. mlpack/include/mlpack/core/data/format.hpp +0 -31
  395. mlpack/include/mlpack/core/data/image_info.hpp +0 -102
  396. mlpack/include/mlpack/core/data/image_info_impl.hpp +0 -84
  397. mlpack/include/mlpack/core/data/load_image_impl.hpp +0 -171
  398. mlpack/include/mlpack/core/data/load_model_impl.hpp +0 -115
  399. mlpack/include/mlpack/core/data/load_vec_impl.hpp +0 -154
  400. mlpack/include/mlpack/core/data/map_policies/missing_policy.hpp +0 -148
  401. mlpack/include/mlpack/core/data/save_image_impl.hpp +0 -170
  402. mlpack/include/mlpack/core/data/types.hpp +0 -61
  403. mlpack/include/mlpack/core/data/types_impl.hpp +0 -83
  404. mlpack/include/mlpack/core/data/utilities.hpp +0 -158
  405. mlpack/include/mlpack/core/util/gitversion.hpp +0 -1
  406. mlpack/include/mlpack/methods/ann/convolution_rules/fft_convolution.hpp +0 -213
  407. mlpack/include/mlpack/methods/ann/convolution_rules/svd_convolution.hpp +0 -201
  408. mlpack/include/mlpack/methods/ann/layer/not_adapted/gru.hpp +0 -226
  409. mlpack/include/mlpack/methods/ann/layer/not_adapted/gru_impl.hpp +0 -367
  410. mlpack/include/mlpack/methods/ann/layer/not_adapted/lookup.hpp +0 -139
  411. mlpack/include/mlpack/methods/ann/layer/not_adapted/lookup_impl.hpp +0 -98
  412. mlpack-4.6.2.dist-info/DELVEWHEEL +0 -2
  413. {mlpack-4.6.2.dist-info → mlpack-4.7.0.dist-info}/top_level.txt +0 -0
  414. /mlpack.libs/{libopenblas-9e6d070f769e6580e8c55c0cf83b80a5.dll → libopenblas-c7f521b507686ddc25bee7538a80c374.dll} +0 -0
  415. /mlpack.libs/{msvcp140-50208655e42969b9a5ab8a4e0186bbb9.dll → msvcp140-a4c2229bdc2a2a630acdc095b4d86008.dll} +0 -0
@@ -1,4 +1,4 @@
1
- /* stb_image_resize2 - v2.12 - public domain image resizing
1
+ /* stb_image_resize2 - v2.17 - public domain image resizing
2
2
 
3
3
  by Jeff Roberts (v2) and Jorge L Rodriguez
4
4
  http://github.com/nothings/stb
@@ -254,7 +254,7 @@
254
254
  using the stbir_set_filter_callbacks function.
255
255
 
256
256
  PROGRESS
257
- For interactive use with slow resize operations, you can use the the
257
+ For interactive use with slow resize operations, you can use the
258
258
  scanline callbacks in the extended API. It would have to be a *very* large
259
259
  image resample to need progress though - we're very fast.
260
260
 
@@ -307,6 +307,8 @@
307
307
  some pixel reconversion, but probably dwarfed by things falling out
308
308
  of cache. Probably also something possible with alternating between
309
309
  scattering and gathering at high resize scales?
310
+ * Should we have a multiple MIPs at the same time function (could keep
311
+ more memory in cache during multiple resizes)?
310
312
  * Rewrite the coefficient generator to do many at once.
311
313
  * AVX-512 vertical kernels - worried about downclocking here.
312
314
  * Convert the reincludes to macros when we know they aren't changing.
@@ -327,6 +329,20 @@
327
329
  Nathan Reed: warning fixes for 1.0
328
330
 
329
331
  REVISIONS
332
+ 2.17 (2025-10-25) silly format bug in easy-to-use APIs.
333
+ 2.16 (2025-10-21) fixed the easy-to-use APIs to allow inverted bitmaps (negative
334
+ strides), fix vertical filter kernel callback, fix threaded
335
+ gather buffer priming (and assert).
336
+ (thanks adipose, TainZerL, and Harrison Green)
337
+ 2.15 (2025-07-17) fixed an assert in debug mode when using floats with input
338
+ callbacks, work around GCC warning when adding to null ptr
339
+ (thanks Johannes Spohr and Pyry Kovanen).
340
+ 2.14 (2025-05-09) fixed a bug using downsampling gather horizontal first, and
341
+ scatter with vertical first.
342
+ 2.13 (2025-02-27) fixed a bug when using input callbacks, turned off simd for
343
+ tiny-c, fixed some variables that should have been static,
344
+ fixes a bug when calculating temp memory with resizes that
345
+ exceed 2GB of temp memory (very large resizes).
330
346
  2.12 (2024-10-18) fix incorrect use of user_data with STBIR_FREE
331
347
  2.11 (2024-09-08) fix harmless asan warnings in 2-channel and 3-channel mode
332
348
  with AVX-2, fix some weird scaling edge conditions with
@@ -382,62 +398,6 @@ typedef uint32_t stbir_uint32;
382
398
  typedef uint64_t stbir_uint64;
383
399
  #endif
384
400
 
385
- #ifdef _M_IX86_FP
386
- #if ( _M_IX86_FP >= 1 )
387
- #ifndef STBIR_SSE
388
- #define STBIR_SSE
389
- #endif
390
- #endif
391
- #endif
392
-
393
- #if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2)
394
- #ifndef STBIR_SSE2
395
- #define STBIR_SSE2
396
- #endif
397
- #if defined(__AVX__) || defined(STBIR_AVX2)
398
- #ifndef STBIR_AVX
399
- #ifndef STBIR_NO_AVX
400
- #define STBIR_AVX
401
- #endif
402
- #endif
403
- #endif
404
- #if defined(__AVX2__) || defined(STBIR_AVX2)
405
- #ifndef STBIR_NO_AVX2
406
- #ifndef STBIR_AVX2
407
- #define STBIR_AVX2
408
- #endif
409
- #if defined( _MSC_VER ) && !defined(__clang__)
410
- #ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c
411
- #define STBIR_FP16C
412
- #endif
413
- #endif
414
- #endif
415
- #endif
416
- #ifdef __F16C__
417
- #ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc)
418
- #define STBIR_FP16C
419
- #endif
420
- #endif
421
- #endif
422
-
423
- #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
424
- #ifndef STBIR_NEON
425
- #define STBIR_NEON
426
- #endif
427
- #endif
428
-
429
- #if defined(_M_ARM) || defined(__arm__)
430
- #ifdef STBIR_USE_FMA
431
- #undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC
432
- #endif
433
- #endif
434
-
435
- #if defined(__wasm__) && defined(__wasm_simd128__)
436
- #ifndef STBIR_WASM
437
- #define STBIR_WASM
438
- #endif
439
- #endif
440
-
441
401
  #ifndef STBIRDEF
442
402
  #ifdef STB_IMAGE_RESIZE_STATIC
443
403
  #define STBIRDEF static
@@ -1036,7 +996,7 @@ typedef struct
1036
996
  char no_cache_straddle[64];
1037
997
  } stbir__per_split_info;
1038
998
 
1039
- typedef void stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input );
999
+ typedef float * stbir__decode_pixels_func( float * decode, int width_times_channels, void const * input );
1040
1000
  typedef void stbir__alpha_weight_func( float * decode_buffer, int width_times_channels );
1041
1001
  typedef void stbir__horizontal_gather_channels_func( float * output_buffer, unsigned int output_sub_size, float const * decode_buffer,
1042
1002
  stbir__contributors const * horizontal_contributors, float const * horizontal_coefficients, int coefficient_width );
@@ -1099,8 +1059,8 @@ struct stbir__info
1099
1059
 
1100
1060
  #define stbir__max_uint8_as_float 255.0f
1101
1061
  #define stbir__max_uint16_as_float 65535.0f
1102
- #define stbir__max_uint8_as_float_inverted (1.0f/255.0f)
1103
- #define stbir__max_uint16_as_float_inverted (1.0f/65535.0f)
1062
+ #define stbir__max_uint8_as_float_inverted 3.9215689e-03f // (1.0f/255.0f)
1063
+ #define stbir__max_uint16_as_float_inverted 1.5259022e-05f // (1.0f/65535.0f)
1104
1064
  #define stbir__small_float ((float)1 / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20) / (1 << 20))
1105
1065
 
1106
1066
  // min/max friendly
@@ -1205,6 +1165,69 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
1205
1165
  #define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
1206
1166
  #endif
1207
1167
 
1168
+ #define STBIR_INPUT_CALLBACK_PADDING 3
1169
+
1170
+ #ifdef _M_IX86_FP
1171
+ #if ( _M_IX86_FP >= 1 )
1172
+ #ifndef STBIR_SSE
1173
+ #define STBIR_SSE
1174
+ #endif
1175
+ #endif
1176
+ #endif
1177
+
1178
+ #ifdef __TINYC__
1179
+ // tiny c has no intrinsics yet - this can become a version check if they add them
1180
+ #define STBIR_NO_SIMD
1181
+ #endif
1182
+
1183
+ #if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(_M_AMD64) || defined(__SSE2__) || defined(STBIR_SSE) || defined(STBIR_SSE2)
1184
+ #ifndef STBIR_SSE2
1185
+ #define STBIR_SSE2
1186
+ #endif
1187
+ #if defined(__AVX__) || defined(STBIR_AVX2)
1188
+ #ifndef STBIR_AVX
1189
+ #ifndef STBIR_NO_AVX
1190
+ #define STBIR_AVX
1191
+ #endif
1192
+ #endif
1193
+ #endif
1194
+ #if defined(__AVX2__) || defined(STBIR_AVX2)
1195
+ #ifndef STBIR_NO_AVX2
1196
+ #ifndef STBIR_AVX2
1197
+ #define STBIR_AVX2
1198
+ #endif
1199
+ #if defined( _MSC_VER ) && !defined(__clang__)
1200
+ #ifndef STBIR_FP16C // FP16C instructions are on all AVX2 cpus, so we can autoselect it here on microsoft - clang needs -m16c
1201
+ #define STBIR_FP16C
1202
+ #endif
1203
+ #endif
1204
+ #endif
1205
+ #endif
1206
+ #ifdef __F16C__
1207
+ #ifndef STBIR_FP16C // turn on FP16C instructions if the define is set (for clang and gcc)
1208
+ #define STBIR_FP16C
1209
+ #endif
1210
+ #endif
1211
+ #endif
1212
+
1213
+ #if defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || ((__ARM_NEON_FP & 4) != 0) || defined(__ARM_NEON__)
1214
+ #ifndef STBIR_NEON
1215
+ #define STBIR_NEON
1216
+ #endif
1217
+ #endif
1218
+
1219
+ #if defined(_M_ARM) || defined(__arm__)
1220
+ #ifdef STBIR_USE_FMA
1221
+ #undef STBIR_USE_FMA // no FMA for 32-bit arm on MSVC
1222
+ #endif
1223
+ #endif
1224
+
1225
+ #if defined(__wasm__) && defined(__wasm_simd128__)
1226
+ #ifndef STBIR_WASM
1227
+ #define STBIR_WASM
1228
+ #endif
1229
+ #endif
1230
+
1208
1231
  // restrict pointers for the output pointers, other loop and unroll control
1209
1232
  #if defined( _MSC_VER ) && !defined(__clang__)
1210
1233
  #define STBIR_STREAMOUT_PTR( star ) star __restrict
@@ -1451,8 +1474,8 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
1451
1474
  #include <smmintrin.h>
1452
1475
  #define stbir__simdf_pack_to_8words(out,reg0,reg1) out = _mm_packus_epi32(_mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg0,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())), _mm_cvttps_epi32(_mm_max_ps(_mm_min_ps(reg1,STBIR__CONSTF(STBIR_max_uint16_as_float)),_mm_setzero_ps())))
1453
1476
  #else
1454
- STBIRDEF STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
1455
- STBIRDEF STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768));
1477
+ static STBIR__SIMDI_CONST(stbir__s32_32768, 32768);
1478
+ static STBIR__SIMDI_CONST(stbir__s16_32768, ((32768<<16)|32768));
1456
1479
 
1457
1480
  #define stbir__simdf_pack_to_8words(out,reg0,reg1) \
1458
1481
  { \
@@ -2816,6 +2839,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte
2816
2839
  char STBIR_SIMD_STREAMOUT_PTR( * ) s_end = ((char*) src) + bytes;
2817
2840
  ptrdiff_t ofs_to_dest = (char*)dest - (char*)src;
2818
2841
 
2842
+ #ifndef STBIR_MEMCPY_NOUNALIGNED
2819
2843
  if ( ofs_to_dest >= 8 ) // is the overlap more than 8 away?
2820
2844
  {
2821
2845
  char STBIR_SIMD_STREAMOUT_PTR( * ) s_end8 = ((char*) src) + (bytes&~7);
@@ -2830,6 +2854,7 @@ static void stbir_overlapping_memcpy( void * dest, void const * src, size_t byte
2830
2854
  if ( sd == s_end )
2831
2855
  return;
2832
2856
  }
2857
+ #endif
2833
2858
 
2834
2859
  STBIR_NO_UNROLL_LOOP_START
2835
2860
  do
@@ -3217,10 +3242,9 @@ static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline
3217
3242
  newspan->n0 = -left_margin;
3218
3243
  newspan->n1 = ( max_left - min_left ) - left_margin;
3219
3244
  scanline_extents->edge_sizes[0] = 0; // don't need to copy the left margin, since we are directly decoding into the margin
3220
- return;
3221
3245
  }
3222
-
3223
3246
  // if we can't merge the min_left range, add it as a second range
3247
+ else
3224
3248
  if ( ( right_margin ) && ( min_right != 0x7fffffff ) )
3225
3249
  {
3226
3250
  stbir__span * newspan = scanline_extents->spans + 1;
@@ -3235,7 +3259,14 @@ static void stbir__get_extents( stbir__sampler * samp, stbir__extents * scanline
3235
3259
  newspan->n0 = scanline_extents->spans[1].n1 + 1;
3236
3260
  newspan->n1 = scanline_extents->spans[1].n1 + 1 + ( max_right - min_right );
3237
3261
  scanline_extents->edge_sizes[1] = 0; // don't need to copy the right margin, since we are directly decoding into the margin
3238
- return;
3262
+ }
3263
+
3264
+ // sort the spans into write output order
3265
+ if ( ( scanline_extents->spans[1].n1 > scanline_extents->spans[1].n0 ) && ( scanline_extents->spans[0].n0 > scanline_extents->spans[1].n0 ) )
3266
+ {
3267
+ stbir__span tspan = scanline_extents->spans[0];
3268
+ scanline_extents->spans[0] = scanline_extents->spans[1];
3269
+ scanline_extents->spans[1] = tspan;
3239
3270
  }
3240
3271
  }
3241
3272
 
@@ -4560,7 +4591,8 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
4560
4591
  int row = stbir__edge_wrap(edge_vertical, n, stbir_info->vertical.scale_info.input_full_size);
4561
4592
  const void* input_plane_data = ( (char *) stbir_info->input_data ) + (size_t)row * (size_t) stbir_info->input_stride_bytes;
4562
4593
  stbir__span const * spans = stbir_info->scanline_extents.spans;
4563
- float* full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels;
4594
+ float * full_decode_buffer = output_buffer - stbir_info->scanline_extents.conservative.n0 * effective_channels;
4595
+ float * last_decoded = 0;
4564
4596
 
4565
4597
  // if we are on edge_zero, and we get in here with an out of bounds n, then the calculate filters has failed
4566
4598
  STBIR_ASSERT( !(edge_vertical == STBIR_EDGE_ZERO && (n < 0 || n >= stbir_info->vertical.scale_info.input_full_size)) );
@@ -4588,12 +4620,12 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
4588
4620
  if ( stbir_info->in_pixels_cb )
4589
4621
  {
4590
4622
  // call the callback with a temp buffer (that they can choose to use or not). the temp is just right aligned memory in the decode_buffer itself
4591
- input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ), input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data );
4623
+ input_data = stbir_info->in_pixels_cb( ( (char*) end_decode ) - ( width * input_sample_in_bytes ) + ( ( stbir_info->input_type != STBIR_TYPE_FLOAT ) ? ( sizeof(float)*STBIR_INPUT_CALLBACK_PADDING ) : 0 ), input_plane_data, width, spans->pixel_offset_for_input, row, stbir_info->user_data );
4592
4624
  }
4593
4625
 
4594
4626
  STBIR_PROFILE_START( decode );
4595
4627
  // convert the pixels info the float decode_buffer, (we index from end_decode, so that when channels<effective_channels, we are right justified in the buffer)
4596
- stbir_info->decode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data );
4628
+ last_decoded = stbir_info->decode_pixels( (float*)end_decode - width_times_channels, width_times_channels, input_data );
4597
4629
  STBIR_PROFILE_END( decode );
4598
4630
 
4599
4631
  if (stbir_info->alpha_weight)
@@ -4628,9 +4660,19 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
4628
4660
  float * marg = full_decode_buffer + x * effective_channels;
4629
4661
  float const * src = full_decode_buffer + stbir__edge_wrap(edge_horizontal, x, input_full_size) * effective_channels;
4630
4662
  STBIR_MEMCPY( marg, src, margin * effective_channels * sizeof(float) );
4663
+ if ( e == 1 ) last_decoded = marg + margin * effective_channels;
4631
4664
  }
4632
4665
  }
4633
4666
  }
4667
+
4668
+ // some of the horizontal gathers read one float off the edge (which is masked out), but we force a zero here to make sure no NaNs leak in
4669
+ // (we can't pre-zero it, because the input callback can use that area as padding)
4670
+ last_decoded[0] = 0.0f;
4671
+
4672
+ // we clear this extra float, because the final output pixel filter kernel might have used one less coeff than the max filter width
4673
+ // when this happens, we do read that pixel from the input, so it too could be Nan, so just zero an extra one.
4674
+ // this fits because each scanline is padded by three floats (STBIR_INPUT_CALLBACK_PADDING)
4675
+ last_decoded[1] = 0.0f;
4634
4676
  }
4635
4677
 
4636
4678
 
@@ -6209,6 +6251,8 @@ static void stbir__resample_vertical_gather(stbir__info const * stbir_info, stbi
6209
6251
  if ( vertical_first )
6210
6252
  {
6211
6253
  // Now resample the gathered vertical data in the horizontal axis into the encode buffer
6254
+ decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
6255
+ decode_buffer[ width_times_channels+1 ] = 0.0f;
6212
6256
  stbir__resample_horizontal_gather(stbir_info, encode_buffer, decode_buffer STBIR_ONLY_PROFILE_SET_SPLIT_INFO );
6213
6257
  }
6214
6258
 
@@ -6380,6 +6424,8 @@ static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir_
6380
6424
  void * scanline_scatter_buffer;
6381
6425
  void * scanline_scatter_buffer_end;
6382
6426
  int on_first_input_y, last_input_y;
6427
+ int width = (stbir_info->vertical_first) ? ( stbir_info->scanline_extents.conservative.n1-stbir_info->scanline_extents.conservative.n0+1 ) : stbir_info->horizontal.scale_info.output_sub_size;
6428
+ int width_times_channels = stbir_info->effective_channels * width;
6383
6429
 
6384
6430
  STBIR_ASSERT( !stbir_info->vertical.is_gather );
6385
6431
 
@@ -6414,7 +6460,12 @@ static void stbir__vertical_scatter_loop( stbir__info const * stbir_info, stbir_
6414
6460
 
6415
6461
  // mark all the buffers as empty to start
6416
6462
  for( y = 0 ; y < stbir_info->ring_buffer_num_entries ; y++ )
6417
- stbir__get_ring_buffer_entry( stbir_info, split_info, y )[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
6463
+ {
6464
+ float * decode_buffer = stbir__get_ring_buffer_entry( stbir_info, split_info, y );
6465
+ decode_buffer[ width_times_channels ] = 0.0f; // clear two over for horizontals with a remnant of 3
6466
+ decode_buffer[ width_times_channels+1 ] = 0.0f;
6467
+ decode_buffer[0] = STBIR__FLOAT_EMPTY_MARKER; // only used on scatter
6468
+ }
6418
6469
 
6419
6470
  // do the loop in input space
6420
6471
  on_first_input_y = 1; last_input_y = start_input_y;
@@ -6562,7 +6613,7 @@ static void stbir__set_sampler(stbir__sampler * samp, stbir_filter filter, stbir
6562
6613
  samp->num_contributors = stbir__get_contributors(samp, samp->is_gather);
6563
6614
 
6564
6615
  samp->contributors_size = samp->num_contributors * sizeof(stbir__contributors);
6565
- samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float); // extra sizeof(float) is padding
6616
+ samp->coefficients_size = samp->num_contributors * samp->coefficient_width * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra sizeof(float) is padding
6566
6617
 
6567
6618
  samp->gather_prescatter_contributors = 0;
6568
6619
  samp->gather_prescatter_coefficients = 0;
@@ -6667,7 +6718,7 @@ static void stbir__get_conservative_extents( stbir__sampler * samp, stbir__contr
6667
6718
  }
6668
6719
  }
6669
6720
 
6670
- static void stbir__get_split_info( stbir__per_split_info* split_info, int splits, int output_height, int vertical_pixel_margin, int input_full_height )
6721
+ static void stbir__get_split_info( stbir__per_split_info* split_info, int splits, int output_height, int vertical_pixel_margin, int input_full_height, int is_gather, stbir__contributors * contribs )
6671
6722
  {
6672
6723
  int i, cur;
6673
6724
  int left = output_height;
@@ -6676,9 +6727,58 @@ static void stbir__get_split_info( stbir__per_split_info* split_info, int splits
6676
6727
  for( i = 0 ; i < splits ; i++ )
6677
6728
  {
6678
6729
  int each;
6730
+
6679
6731
  split_info[i].start_output_y = cur;
6680
6732
  each = left / ( splits - i );
6681
6733
  split_info[i].end_output_y = cur + each;
6734
+
6735
+ // ok, when we are gathering, we need to make sure we are starting on a y offset that doesn't have
6736
+ // a "special" set of coefficients. Basically, with exactly the right filter at exactly the right
6737
+ // resize at exactly the right phase, some of the coefficents can be zero. When they are zero, we
6738
+ // don't process them at all. But this leads to a tricky thing with the thread splits, where we
6739
+ // might have a set of two coeffs like this for example: (4,4) and (3,6). The 4,4 means there was
6740
+ // just one single coeff because things worked out perfectly (normally, they all have 4 coeffs
6741
+ // like the range 3,6. The problem is that if we start right on the (4,4) on a brand new thread,
6742
+ // then when we get to (3,6), we don't have the "3" sample in memory (because we didn't load
6743
+ // it on the initial (4,4) range because it didn't have a 3 (we only add new samples that are
6744
+ // larger than our existing samples - it's just how the eviction works). So, our solution here
6745
+ // is pretty simple, if we start right on a range that has samples that start earlier, then we
6746
+ // simply bump up our previous thread split range to include it, and then start this threads
6747
+ // range with the smaller sample. It just moves one scanline from one thread split to another,
6748
+ // so that we end with the unusual one, instead of start with it. To do this, we check 2-4
6749
+ // sample at each thread split start and then occassionally move them.
6750
+
6751
+ if ( ( is_gather ) && ( i ) )
6752
+ {
6753
+ stbir__contributors * small_contribs;
6754
+ int j, smallest, stop, start_n0;
6755
+ stbir__contributors * split_contribs = contribs + cur;
6756
+
6757
+ // scan for a max of 3x the filter width or until the next thread split
6758
+ stop = vertical_pixel_margin * 3;
6759
+ if ( each < stop )
6760
+ stop = each;
6761
+
6762
+ // loops a few times before early out
6763
+ smallest = 0;
6764
+ small_contribs = split_contribs;
6765
+ start_n0 = small_contribs->n0;
6766
+ for( j = 1 ; j <= stop ; j++ )
6767
+ {
6768
+ ++split_contribs;
6769
+ if ( split_contribs->n0 > start_n0 )
6770
+ break;
6771
+ if ( split_contribs->n0 < small_contribs->n0 )
6772
+ {
6773
+ small_contribs = split_contribs;
6774
+ smallest = j;
6775
+ }
6776
+ }
6777
+
6778
+ split_info[i-1].end_output_y += smallest;
6779
+ split_info[i].start_output_y += smallest;
6780
+ }
6781
+
6682
6782
  cur += each;
6683
6783
  left -= each;
6684
6784
 
@@ -6927,7 +7027,8 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
6927
7027
  void * alloced = 0;
6928
7028
  size_t alloced_total = 0;
6929
7029
  int vertical_first;
6930
- int decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size, alloc_ring_buffer_num_entries;
7030
+ size_t decode_buffer_size, ring_buffer_length_bytes, ring_buffer_size, vertical_buffer_size;
7031
+ int alloc_ring_buffer_num_entries;
6931
7032
 
6932
7033
  int alpha_weighting_type = 0; // 0=none, 1=simple, 2=fancy
6933
7034
  int conservative_split_output_size = stbir__get_max_split( splits, vertical->scale_info.output_sub_size );
@@ -6972,14 +7073,16 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
6972
7073
  vertical_first = stbir__should_do_vertical_first( stbir__compute_weights[ (int)stbir_channel_count_index[ effective_channels ] ], horizontal->filter_pixel_width, horizontal->scale_info.scale, horizontal->scale_info.output_sub_size, vertical->filter_pixel_width, vertical->scale_info.scale, vertical->scale_info.output_sub_size, vertical->is_gather, STBIR__V_FIRST_INFO_POINTER );
6973
7074
 
6974
7075
  // sometimes read one float off in some of the unrolled loops (with a weight of zero coeff, so it doesn't have an effect)
6975
- decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
7076
+ // we use a few extra floats instead of just 1, so that input callback buffer can overlap with the decode buffer without
7077
+ // the conversion routines overwriting the callback input data.
7078
+ decode_buffer_size = ( conservative->n1 - conservative->n0 + 1 ) * effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for input callback stagger
6976
7079
 
6977
7080
  #if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
6978
7081
  if ( effective_channels == 3 )
6979
7082
  decode_buffer_size += sizeof(float); // avx in 3 channel mode needs one float at the start of the buffer (only with separate allocations)
6980
7083
  #endif
6981
7084
 
6982
- ring_buffer_length_bytes = horizontal->scale_info.output_sub_size * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
7085
+ ring_buffer_length_bytes = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float)*STBIR_INPUT_CALLBACK_PADDING; // extra floats for padding
6983
7086
 
6984
7087
  // if we do vertical first, the ring buffer holds a whole decoded line
6985
7088
  if ( vertical_first )
@@ -6994,13 +7097,13 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
6994
7097
  if ( ( !vertical->is_gather ) && ( alloc_ring_buffer_num_entries > conservative_split_output_size ) )
6995
7098
  alloc_ring_buffer_num_entries = conservative_split_output_size;
6996
7099
 
6997
- ring_buffer_size = alloc_ring_buffer_num_entries * ring_buffer_length_bytes;
7100
+ ring_buffer_size = (size_t)alloc_ring_buffer_num_entries * (size_t)ring_buffer_length_bytes;
6998
7101
 
6999
7102
  // The vertical buffer is used differently, depending on whether we are scattering
7000
7103
  // the vertical scanlines, or gathering them.
7001
7104
  // If scattering, it's used at the temp buffer to accumulate each output.
7002
7105
  // If gathering, it's just the output buffer.
7003
- vertical_buffer_size = horizontal->scale_info.output_sub_size * effective_channels * sizeof(float) + sizeof(float); // extra float for padding
7106
+ vertical_buffer_size = (size_t)horizontal->scale_info.output_sub_size * (size_t)effective_channels * sizeof(float) + sizeof(float); // extra float for padding
7004
7107
 
7005
7108
  // we make two passes through this loop, 1st to add everything up, 2nd to allocate and init
7006
7109
  for(;;)
@@ -7013,7 +7116,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7013
7116
  #ifdef STBIR__SEPARATE_ALLOCATIONS
7014
7117
  #define STBIR__NEXT_PTR( ptr, size, ntype ) if ( alloced ) { void * p = STBIR_MALLOC( size, user_data); if ( p == 0 ) { stbir__free_internal_mem( info ); return 0; } (ptr) = (ntype*)p; }
7015
7118
  #else
7016
- #define STBIR__NEXT_PTR( ptr, size, ntype ) advance_mem = (void*) ( ( ((size_t)advance_mem) + 15 ) & ~15 ); if ( alloced ) ptr = (ntype*)advance_mem; advance_mem = ((char*)advance_mem) + (size);
7119
+ #define STBIR__NEXT_PTR( ptr, size, ntype ) advance_mem = (void*) ( ( ((size_t)advance_mem) + 15 ) & ~15 ); if ( alloced ) ptr = (ntype*)advance_mem; advance_mem = (char*)(((size_t)advance_mem) + (size));
7017
7120
  #endif
7018
7121
 
7019
7122
  STBIR__NEXT_PTR( info, sizeof( stbir__info ), stbir__info );
@@ -7036,9 +7139,9 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7036
7139
 
7037
7140
  info->offset_x = new_x;
7038
7141
  info->offset_y = new_y;
7039
- info->alloc_ring_buffer_num_entries = alloc_ring_buffer_num_entries;
7142
+ info->alloc_ring_buffer_num_entries = (int)alloc_ring_buffer_num_entries;
7040
7143
  info->ring_buffer_num_entries = 0;
7041
- info->ring_buffer_length_bytes = ring_buffer_length_bytes;
7144
+ info->ring_buffer_length_bytes = (int)ring_buffer_length_bytes;
7042
7145
  info->splits = splits;
7043
7146
  info->vertical_first = vertical_first;
7044
7147
 
@@ -7119,14 +7222,14 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7119
7222
  // alloc memory for to-be-pivoted coeffs (if necessary)
7120
7223
  if ( vertical->is_gather == 0 )
7121
7224
  {
7122
- int both;
7123
- int temp_mem_amt;
7225
+ size_t both;
7226
+ size_t temp_mem_amt;
7124
7227
 
7125
7228
  // when in vertical scatter mode, we first build the coefficients in gather mode, and then pivot after,
7126
7229
  // that means we need two buffers, so we try to use the decode buffer and ring buffer for this. if that
7127
7230
  // is too small, we just allocate extra memory to use as this temp.
7128
7231
 
7129
- both = vertical->gather_prescatter_contributors_size + vertical->gather_prescatter_coefficients_size;
7232
+ both = (size_t)vertical->gather_prescatter_contributors_size + (size_t)vertical->gather_prescatter_coefficients_size;
7130
7233
 
7131
7234
  #ifdef STBIR__SEPARATE_ALLOCATIONS
7132
7235
  temp_mem_amt = decode_buffer_size;
@@ -7136,7 +7239,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7136
7239
  --temp_mem_amt; // avx in 3 channel mode needs one float at the start of the buffer
7137
7240
  #endif
7138
7241
  #else
7139
- temp_mem_amt = ( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * splits;
7242
+ temp_mem_amt = (size_t)( decode_buffer_size + ring_buffer_size + vertical_buffer_size ) * (size_t)splits;
7140
7243
  #endif
7141
7244
  if ( temp_mem_amt >= both )
7142
7245
  {
@@ -7222,7 +7325,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7222
7325
  }
7223
7326
 
7224
7327
  // setup the vertical split ranges
7225
- stbir__get_split_info( info->split_info, info->splits, info->vertical.scale_info.output_sub_size, info->vertical.filter_pixel_margin, info->vertical.scale_info.input_full_size );
7328
+ stbir__get_split_info( info->split_info, info->splits, info->vertical.scale_info.output_sub_size, info->vertical.filter_pixel_margin, info->vertical.scale_info.input_full_size, info->vertical.is_gather, info->vertical.contributors );
7226
7329
 
7227
7330
  // now we know precisely how many entries we need
7228
7331
  info->ring_buffer_num_entries = info->vertical.extent_info.widest;
@@ -7231,39 +7334,7 @@ static stbir__info * stbir__alloc_internal_mem_and_build_samplers( stbir__sample
7231
7334
  if ( ( !info->vertical.is_gather ) && ( info->ring_buffer_num_entries > conservative_split_output_size ) )
7232
7335
  info->ring_buffer_num_entries = conservative_split_output_size;
7233
7336
  STBIR_ASSERT( info->ring_buffer_num_entries <= info->alloc_ring_buffer_num_entries );
7234
-
7235
- // a few of the horizontal gather functions read past the end of the decode (but mask it out),
7236
- // so put in normal values so no snans or denormals accidentally sneak in (also, in the ring
7237
- // buffer for vertical first)
7238
- for( i = 0 ; i < splits ; i++ )
7239
- {
7240
- int t, ofs, start;
7241
-
7242
- ofs = decode_buffer_size / 4;
7243
-
7244
- #if defined( STBIR__SEPARATE_ALLOCATIONS ) && defined(STBIR_SIMD8)
7245
- if ( effective_channels == 3 )
7246
- --ofs; // avx in 3 channel mode needs one float at the start of the buffer, so we snap back for clearing
7247
- #endif
7248
-
7249
- start = ofs - 4;
7250
- if ( start < 0 ) start = 0;
7251
-
7252
- for( t = start ; t < ofs; t++ )
7253
- info->split_info[i].decode_buffer[ t ] = 9999.0f;
7254
-
7255
- if ( vertical_first )
7256
- {
7257
- int j;
7258
- for( j = 0; j < info->ring_buffer_num_entries ; j++ )
7259
- {
7260
- for( t = start ; t < ofs; t++ )
7261
- stbir__get_ring_buffer_entry( info, info->split_info + i, j )[ t ] = 9999.0f;
7262
- }
7263
- }
7264
- }
7265
7337
  }
7266
-
7267
7338
  #undef STBIR__NEXT_PTR
7268
7339
 
7269
7340
 
@@ -7818,7 +7889,7 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
7818
7889
 
7819
7890
  stbir__set_sampler(&horizontal, resize->horizontal_filter, resize->horizontal_filter_kernel, resize->horizontal_filter_support, resize->horizontal_edge, &horizontal.scale_info, 1, resize->user_data );
7820
7891
  stbir__get_conservative_extents( &horizontal, &conservative, resize->user_data );
7821
- stbir__set_sampler(&vertical, resize->vertical_filter, resize->horizontal_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data );
7892
+ stbir__set_sampler(&vertical, resize->vertical_filter, resize->vertical_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data );
7822
7893
 
7823
7894
  if ( ( vertical.scale_info.output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice)
7824
7895
  {
@@ -7943,93 +8014,99 @@ STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start
7943
8014
  return stbir__perform_resize( resize->samplers, split_start, split_count );
7944
8015
  }
7945
8016
 
7946
- static int stbir__check_output_stuff( void ** ret_ptr, int * ret_pitch, void * output_pixels, int type_size, int output_w, int output_h, int output_stride_in_bytes, stbir_internal_pixel_layout pixel_layout )
8017
+
8018
+ static void * stbir_quick_resize_helper( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
8019
+ void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
8020
+ stbir_pixel_layout pixel_layout, stbir_datatype data_type, stbir_edge edge, stbir_filter filter )
7947
8021
  {
7948
- size_t size;
7949
- int pitch;
7950
- void * ptr;
8022
+ STBIR_RESIZE resize;
8023
+ int scanline_output_in_bytes;
8024
+ int positive_output_stride_in_bytes;
8025
+ void * start_ptr;
8026
+ void * free_ptr;
7951
8027
 
7952
- pitch = output_w * type_size * stbir__pixel_channels[ pixel_layout ];
7953
- if ( pitch == 0 )
8028
+ scanline_output_in_bytes = output_w * stbir__type_size[ data_type ] * stbir__pixel_channels[ stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ];
8029
+ if ( scanline_output_in_bytes == 0 )
7954
8030
  return 0;
7955
8031
 
8032
+ // if zero stride, use scanline output
7956
8033
  if ( output_stride_in_bytes == 0 )
7957
- output_stride_in_bytes = pitch;
8034
+ output_stride_in_bytes = scanline_output_in_bytes;
7958
8035
 
7959
- if ( output_stride_in_bytes < pitch )
7960
- return 0;
8036
+ // abs value for inverted images (negative pitches)
8037
+ positive_output_stride_in_bytes = output_stride_in_bytes;
8038
+ if ( positive_output_stride_in_bytes < 0 )
8039
+ positive_output_stride_in_bytes = -positive_output_stride_in_bytes;
7961
8040
 
7962
- size = (size_t)output_stride_in_bytes * (size_t)output_h;
7963
- if ( size == 0 )
8041
+ // is the requested stride smaller than the scanline output? if so, just fail
8042
+ if ( positive_output_stride_in_bytes < scanline_output_in_bytes )
7964
8043
  return 0;
7965
8044
 
7966
- *ret_ptr = 0;
7967
- *ret_pitch = output_stride_in_bytes;
8045
+ start_ptr = output_pixels;
8046
+ free_ptr = 0; // no free pointer, since they passed buffer to use
7968
8047
 
8048
+ // did they pass a zero for the dest? if so, allocate the buffer
7969
8049
  if ( output_pixels == 0 )
7970
8050
  {
7971
- ptr = STBIR_MALLOC( size, 0 );
7972
- if ( ptr == 0 )
8051
+ size_t size;
8052
+ char * ptr;
8053
+
8054
+ size = (size_t)positive_output_stride_in_bytes * (size_t)output_h;
8055
+ if ( size == 0 )
7973
8056
  return 0;
7974
8057
 
7975
- *ret_ptr = ptr;
7976
- *ret_pitch = pitch;
7977
- }
7978
-
7979
- return 1;
7980
- }
7981
-
8058
+ ptr = (char*) STBIR_MALLOC( size, 0 );
8059
+ if ( ptr == 0 )
8060
+ return 0;
7982
8061
 
7983
- STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
7984
- unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
7985
- stbir_pixel_layout pixel_layout )
7986
- {
7987
- STBIR_RESIZE resize;
7988
- unsigned char * optr;
7989
- int opitch;
8062
+ free_ptr = ptr;
7990
8063
 
7991
- if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( unsigned char ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) )
7992
- return 0;
8064
+ // point at the last scanline, if they requested a flipped image
8065
+ if ( output_stride_in_bytes < 0 )
8066
+ start_ptr = ptr + ( (size_t)positive_output_stride_in_bytes * (size_t)( output_h - 1 ) );
8067
+ else
8068
+ start_ptr = ptr;
8069
+ }
7993
8070
 
8071
+ // ok, now do the resize
7994
8072
  stbir_resize_init( &resize,
7995
8073
  input_pixels, input_w, input_h, input_stride_in_bytes,
7996
- (optr) ? optr : output_pixels, output_w, output_h, opitch,
7997
- pixel_layout, STBIR_TYPE_UINT8 );
8074
+ start_ptr, output_w, output_h, output_stride_in_bytes,
8075
+ pixel_layout, data_type );
8076
+
8077
+ resize.horizontal_edge = edge;
8078
+ resize.vertical_edge = edge;
8079
+ resize.horizontal_filter = filter;
8080
+ resize.vertical_filter = filter;
7998
8081
 
7999
8082
  if ( !stbir_resize_extended( &resize ) )
8000
8083
  {
8001
- if ( optr )
8002
- STBIR_FREE( optr, 0 );
8084
+ if ( free_ptr )
8085
+ STBIR_FREE( free_ptr, 0 );
8003
8086
  return 0;
8004
8087
  }
8005
8088
 
8006
- return (optr) ? optr : output_pixels;
8089
+ return (free_ptr) ? free_ptr : start_ptr;
8090
+ }
8091
+
8092
+
8093
+
8094
+ STBIRDEF unsigned char * stbir_resize_uint8_linear( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
8095
+ unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
8096
+ stbir_pixel_layout pixel_layout )
8097
+ {
8098
+ return (unsigned char *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
8099
+ output_pixels, output_w, output_h, output_stride_in_bytes,
8100
+ pixel_layout, STBIR_TYPE_UINT8, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
8007
8101
  }
8008
8102
 
8009
8103
  STBIRDEF unsigned char * stbir_resize_uint8_srgb( const unsigned char *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
8010
8104
  unsigned char *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
8011
8105
  stbir_pixel_layout pixel_layout )
8012
8106
  {
8013
- STBIR_RESIZE resize;
8014
- unsigned char * optr;
8015
- int opitch;
8016
-
8017
- if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( unsigned char ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) )
8018
- return 0;
8019
-
8020
- stbir_resize_init( &resize,
8021
- input_pixels, input_w, input_h, input_stride_in_bytes,
8022
- (optr) ? optr : output_pixels, output_w, output_h, opitch,
8023
- pixel_layout, STBIR_TYPE_UINT8_SRGB );
8024
-
8025
- if ( !stbir_resize_extended( &resize ) )
8026
- {
8027
- if ( optr )
8028
- STBIR_FREE( optr, 0 );
8029
- return 0;
8030
- }
8031
-
8032
- return (optr) ? optr : output_pixels;
8107
+ return (unsigned char *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
8108
+ output_pixels, output_w, output_h, output_stride_in_bytes,
8109
+ pixel_layout, STBIR_TYPE_UINT8_SRGB, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
8033
8110
  }
8034
8111
 
8035
8112
 
@@ -8037,59 +8114,20 @@ STBIRDEF float * stbir_resize_float_linear( const float *input_pixels , int inpu
8037
8114
  float *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
8038
8115
  stbir_pixel_layout pixel_layout )
8039
8116
  {
8040
- STBIR_RESIZE resize;
8041
- float * optr;
8042
- int opitch;
8043
-
8044
- if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, sizeof( float ), output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) )
8045
- return 0;
8046
-
8047
- stbir_resize_init( &resize,
8048
- input_pixels, input_w, input_h, input_stride_in_bytes,
8049
- (optr) ? optr : output_pixels, output_w, output_h, opitch,
8050
- pixel_layout, STBIR_TYPE_FLOAT );
8051
-
8052
- if ( !stbir_resize_extended( &resize ) )
8053
- {
8054
- if ( optr )
8055
- STBIR_FREE( optr, 0 );
8056
- return 0;
8057
- }
8058
-
8059
- return (optr) ? optr : output_pixels;
8117
+ return (float *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
8118
+ output_pixels, output_w, output_h, output_stride_in_bytes,
8119
+ pixel_layout, STBIR_TYPE_FLOAT, STBIR_EDGE_CLAMP, STBIR_FILTER_DEFAULT );
8060
8120
  }
8061
8121
 
8062
8122
 
8063
8123
  STBIRDEF void * stbir_resize( const void *input_pixels , int input_w , int input_h, int input_stride_in_bytes,
8064
8124
  void *output_pixels, int output_w, int output_h, int output_stride_in_bytes,
8065
- stbir_pixel_layout pixel_layout, stbir_datatype data_type,
8066
- stbir_edge edge, stbir_filter filter )
8125
+ stbir_pixel_layout pixel_layout, stbir_datatype data_type,
8126
+ stbir_edge edge, stbir_filter filter )
8067
8127
  {
8068
- STBIR_RESIZE resize;
8069
- float * optr;
8070
- int opitch;
8071
-
8072
- if ( !stbir__check_output_stuff( (void**)&optr, &opitch, output_pixels, stbir__type_size[data_type], output_w, output_h, output_stride_in_bytes, stbir__pixel_layout_convert_public_to_internal[ pixel_layout ] ) )
8073
- return 0;
8074
-
8075
- stbir_resize_init( &resize,
8076
- input_pixels, input_w, input_h, input_stride_in_bytes,
8077
- (optr) ? optr : output_pixels, output_w, output_h, output_stride_in_bytes,
8078
- pixel_layout, data_type );
8079
-
8080
- resize.horizontal_edge = edge;
8081
- resize.vertical_edge = edge;
8082
- resize.horizontal_filter = filter;
8083
- resize.vertical_filter = filter;
8084
-
8085
- if ( !stbir_resize_extended( &resize ) )
8086
- {
8087
- if ( optr )
8088
- STBIR_FREE( optr, 0 );
8089
- return 0;
8090
- }
8091
-
8092
- return (optr) ? optr : output_pixels;
8128
+ return (void *) stbir_quick_resize_helper( input_pixels , input_w , input_h, input_stride_in_bytes,
8129
+ output_pixels, output_w, output_h, output_stride_in_bytes,
8130
+ pixel_layout, data_type, edge, filter );
8093
8131
  }
8094
8132
 
8095
8133
  #ifdef STBIR_PROFILE
@@ -8226,7 +8264,7 @@ STBIRDEF void stbir_resize_extended_profile_info( STBIR_PROFILE_INFO * info, STB
8226
8264
  #define stbir__encode_simdfX_unflip stbir__encode_simdf4_unflip
8227
8265
  #endif
8228
8266
 
8229
- static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp )
8267
+ static float * STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * decodep, int width_times_channels, void const * inputp )
8230
8268
  {
8231
8269
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
8232
8270
  float * decode_end = (float*) decode + width_times_channels;
@@ -8286,7 +8324,7 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco
8286
8324
  decode = decode_end; // backup and do last couple
8287
8325
  input = end_input_m16;
8288
8326
  }
8289
- return;
8327
+ return decode_end + 16;
8290
8328
  }
8291
8329
  #endif
8292
8330
 
@@ -8324,6 +8362,8 @@ static void STBIR__CODER_NAME( stbir__decode_uint8_linear_scaled )( float * deco
8324
8362
  input += stbir__coder_min_num;
8325
8363
  }
8326
8364
  #endif
8365
+
8366
+ return decode_end;
8327
8367
  }
8328
8368
 
8329
8369
  static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outputp, int width_times_channels, float const * encode )
@@ -8443,7 +8483,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear_scaled )( void * outpu
8443
8483
  #endif
8444
8484
  }
8445
8485
 
8446
- static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp )
8486
+ static float * STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int width_times_channels, void const * inputp )
8447
8487
  {
8448
8488
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
8449
8489
  float * decode_end = (float*) decode + width_times_channels;
@@ -8497,7 +8537,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int
8497
8537
  decode = decode_end; // backup and do last couple
8498
8538
  input = end_input_m16;
8499
8539
  }
8500
- return;
8540
+ return decode_end + 16;
8501
8541
  }
8502
8542
  #endif
8503
8543
 
@@ -8535,6 +8575,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_linear)( float * decodep, int
8535
8575
  input += stbir__coder_min_num;
8536
8576
  }
8537
8577
  #endif
8578
+ return decode_end;
8538
8579
  }
8539
8580
 
8540
8581
  static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int width_times_channels, float const * encode )
@@ -8636,10 +8677,10 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_linear )( void * outputp, int
8636
8677
  #endif
8637
8678
  }
8638
8679
 
8639
- static void STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp )
8680
+ static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int width_times_channels, void const * inputp )
8640
8681
  {
8641
8682
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
8642
- float const * decode_end = (float*) decode + width_times_channels;
8683
+ float * decode_end = (float*) decode + width_times_channels;
8643
8684
  unsigned char const * input = (unsigned char const *)inputp;
8644
8685
 
8645
8686
  // try to do blocks of 4 when you can
@@ -8674,6 +8715,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb)( float * decodep, int wi
8674
8715
  input += stbir__coder_min_num;
8675
8716
  }
8676
8717
  #endif
8718
+ return decode_end;
8677
8719
  }
8678
8720
 
8679
8721
  #define stbir__min_max_shift20( i, f ) \
@@ -8826,11 +8868,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb )( void * outputp, int w
8826
8868
 
8827
8869
  #if ( stbir__coder_min_num == 4 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
8828
8870
 
8829
- static void STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
8871
+ static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
8830
8872
  {
8831
8873
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
8832
- float const * decode_end = (float*) decode + width_times_channels;
8874
+ float * decode_end = (float*) decode + width_times_channels;
8833
8875
  unsigned char const * input = (unsigned char const *)inputp;
8876
+
8834
8877
  do {
8835
8878
  decode[0] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order0] ];
8836
8879
  decode[1] = stbir__srgb_uchar_to_linear_float[ input[stbir__decode_order1] ];
@@ -8839,6 +8882,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb4_linearalpha)( float * de
8839
8882
  input += 4;
8840
8883
  decode += 4;
8841
8884
  } while( decode < decode_end );
8885
+ return decode_end;
8842
8886
  }
8843
8887
 
8844
8888
 
@@ -8911,11 +8955,12 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb4_linearalpha )( void * o
8911
8955
 
8912
8956
  #if ( stbir__coder_min_num == 2 ) || ( ( stbir__coder_min_num == 1 ) && ( !defined(stbir__decode_swizzle) ) )
8913
8957
 
8914
- static void STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
8958
+ static float * STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * decodep, int width_times_channels, void const * inputp )
8915
8959
  {
8916
8960
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
8917
- float const * decode_end = (float*) decode + width_times_channels;
8961
+ float * decode_end = (float*) decode + width_times_channels;
8918
8962
  unsigned char const * input = (unsigned char const *)inputp;
8963
+
8919
8964
  decode += 4;
8920
8965
  while( decode <= decode_end )
8921
8966
  {
@@ -8932,6 +8977,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint8_srgb2_linearalpha)( float * de
8932
8977
  decode[0] = stbir__srgb_uchar_to_linear_float[ stbir__decode_order0 ];
8933
8978
  decode[1] = ( (float) input[stbir__decode_order1] ) * stbir__max_uint8_as_float_inverted;
8934
8979
  }
8980
+ return decode_end;
8935
8981
  }
8936
8982
 
8937
8983
  static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * outputp, int width_times_channels, float const * encode )
@@ -8997,7 +9043,7 @@ static void STBIR__CODER_NAME( stbir__encode_uint8_srgb2_linearalpha )( void * o
8997
9043
 
8998
9044
  #endif
8999
9045
 
9000
- static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp )
9046
+ static float * STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decodep, int width_times_channels, void const * inputp )
9001
9047
  {
9002
9048
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
9003
9049
  float * decode_end = (float*) decode + width_times_channels;
@@ -9045,7 +9091,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod
9045
9091
  decode = decode_end; // backup and do last couple
9046
9092
  input = end_input_m8;
9047
9093
  }
9048
- return;
9094
+ return decode_end + 8;
9049
9095
  }
9050
9096
  #endif
9051
9097
 
@@ -9083,6 +9129,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear_scaled)( float * decod
9083
9129
  input += stbir__coder_min_num;
9084
9130
  }
9085
9131
  #endif
9132
+ return decode_end;
9086
9133
  }
9087
9134
 
9088
9135
 
@@ -9202,7 +9249,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear_scaled)( void * output
9202
9249
  #endif
9203
9250
  }
9204
9251
 
9205
- static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp )
9252
+ static float * STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int width_times_channels, void const * inputp )
9206
9253
  {
9207
9254
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
9208
9255
  float * decode_end = (float*) decode + width_times_channels;
@@ -9247,7 +9294,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int
9247
9294
  decode = decode_end; // backup and do last couple
9248
9295
  input = end_input_m8;
9249
9296
  }
9250
- return;
9297
+ return decode_end + 8;
9251
9298
  }
9252
9299
  #endif
9253
9300
 
@@ -9285,6 +9332,7 @@ static void STBIR__CODER_NAME(stbir__decode_uint16_linear)( float * decodep, int
9285
9332
  input += stbir__coder_min_num;
9286
9333
  }
9287
9334
  #endif
9335
+ return decode_end;
9288
9336
  }
9289
9337
 
9290
9338
  static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int width_times_channels, float const * encode )
@@ -9385,7 +9433,7 @@ static void STBIR__CODER_NAME(stbir__encode_uint16_linear)( void * outputp, int
9385
9433
  #endif
9386
9434
  }
9387
9435
 
9388
- static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp )
9436
+ static float * STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep, int width_times_channels, void const * inputp )
9389
9437
  {
9390
9438
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
9391
9439
  float * decode_end = (float*) decode + width_times_channels;
@@ -9431,7 +9479,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep,
9431
9479
  decode = decode_end; // backup and do last couple
9432
9480
  input = end_input_m8;
9433
9481
  }
9434
- return;
9482
+ return decode_end + 8;
9435
9483
  }
9436
9484
  #endif
9437
9485
 
@@ -9469,6 +9517,7 @@ static void STBIR__CODER_NAME(stbir__decode_half_float_linear)( float * decodep,
9469
9517
  input += stbir__coder_min_num;
9470
9518
  }
9471
9519
  #endif
9520
+ return decode_end;
9472
9521
  }
9473
9522
 
9474
9523
  static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp, int width_times_channels, float const * encode )
@@ -9555,7 +9604,7 @@ static void STBIR__CODER_NAME( stbir__encode_half_float_linear )( void * outputp
9555
9604
  #endif
9556
9605
  }
9557
9606
 
9558
- static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp )
9607
+ static float * STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int width_times_channels, void const * inputp )
9559
9608
  {
9560
9609
  #ifdef stbir__decode_swizzle
9561
9610
  float STBIR_STREAMOUT_PTR( * ) decode = decodep;
@@ -9609,7 +9658,7 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int
9609
9658
  decode = decode_end; // backup and do last couple
9610
9659
  input = end_input_m16;
9611
9660
  }
9612
- return;
9661
+ return decode_end + 16;
9613
9662
  }
9614
9663
  #endif
9615
9664
 
@@ -9647,12 +9696,15 @@ static void STBIR__CODER_NAME(stbir__decode_float_linear)( float * decodep, int
9647
9696
  input += stbir__coder_min_num;
9648
9697
  }
9649
9698
  #endif
9699
+ return decode_end;
9650
9700
 
9651
9701
  #else
9652
9702
 
9653
9703
  if ( (void*)decodep != inputp )
9654
9704
  STBIR_MEMCPY( decodep, inputp, width_times_channels * sizeof( float ) );
9655
9705
 
9706
+ return decodep + width_times_channels;
9707
+
9656
9708
  #endif
9657
9709
  }
9658
9710