sequenzo 0.1.31__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. _sequenzo_fastcluster.cpython-310-darwin.so +0 -0
  2. sequenzo/__init__.py +349 -0
  3. sequenzo/big_data/__init__.py +12 -0
  4. sequenzo/big_data/clara/__init__.py +26 -0
  5. sequenzo/big_data/clara/clara.py +476 -0
  6. sequenzo/big_data/clara/utils/__init__.py +27 -0
  7. sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  8. sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  9. sequenzo/big_data/clara/utils/get_weighted_diss.cpython-310-darwin.so +0 -0
  10. sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  11. sequenzo/big_data/clara/visualization.py +88 -0
  12. sequenzo/clustering/KMedoids.py +178 -0
  13. sequenzo/clustering/__init__.py +30 -0
  14. sequenzo/clustering/clustering_c_code.cpython-310-darwin.so +0 -0
  15. sequenzo/clustering/hierarchical_clustering.py +1256 -0
  16. sequenzo/clustering/sequenzo_fastcluster/fastcluster.py +495 -0
  17. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster.cpp +1877 -0
  18. sequenzo/clustering/sequenzo_fastcluster/src/fastcluster_python.cpp +1264 -0
  19. sequenzo/clustering/src/KMedoid.cpp +263 -0
  20. sequenzo/clustering/src/PAM.cpp +237 -0
  21. sequenzo/clustering/src/PAMonce.cpp +265 -0
  22. sequenzo/clustering/src/cluster_quality.cpp +496 -0
  23. sequenzo/clustering/src/cluster_quality.h +128 -0
  24. sequenzo/clustering/src/cluster_quality_backup.cpp +570 -0
  25. sequenzo/clustering/src/module.cpp +228 -0
  26. sequenzo/clustering/src/weightedinertia.cpp +111 -0
  27. sequenzo/clustering/utils/__init__.py +27 -0
  28. sequenzo/clustering/utils/disscenter.py +122 -0
  29. sequenzo/data_preprocessing/__init__.py +22 -0
  30. sequenzo/data_preprocessing/helpers.py +303 -0
  31. sequenzo/datasets/__init__.py +41 -0
  32. sequenzo/datasets/biofam.csv +2001 -0
  33. sequenzo/datasets/biofam_child_domain.csv +2001 -0
  34. sequenzo/datasets/biofam_left_domain.csv +2001 -0
  35. sequenzo/datasets/biofam_married_domain.csv +2001 -0
  36. sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  37. sequenzo/datasets/country_co2_emissions.csv +194 -0
  38. sequenzo/datasets/country_co2_emissions_global_deciles.csv +195 -0
  39. sequenzo/datasets/country_co2_emissions_global_quintiles.csv +195 -0
  40. sequenzo/datasets/country_co2_emissions_local_deciles.csv +195 -0
  41. sequenzo/datasets/country_co2_emissions_local_quintiles.csv +195 -0
  42. sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  43. sequenzo/datasets/dyadic_children.csv +61 -0
  44. sequenzo/datasets/dyadic_parents.csv +61 -0
  45. sequenzo/datasets/mvad.csv +713 -0
  46. sequenzo/datasets/pairfam_activity_by_month.csv +1028 -0
  47. sequenzo/datasets/pairfam_activity_by_year.csv +1028 -0
  48. sequenzo/datasets/pairfam_family_by_month.csv +1028 -0
  49. sequenzo/datasets/pairfam_family_by_year.csv +1028 -0
  50. sequenzo/datasets/political_science_aid_shock.csv +166 -0
  51. sequenzo/datasets/political_science_donor_fragmentation.csv +157 -0
  52. sequenzo/define_sequence_data.py +1400 -0
  53. sequenzo/dissimilarity_measures/__init__.py +31 -0
  54. sequenzo/dissimilarity_measures/c_code.cpython-310-darwin.so +0 -0
  55. sequenzo/dissimilarity_measures/get_distance_matrix.py +762 -0
  56. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +246 -0
  57. sequenzo/dissimilarity_measures/src/DHDdistance.cpp +148 -0
  58. sequenzo/dissimilarity_measures/src/LCPdistance.cpp +114 -0
  59. sequenzo/dissimilarity_measures/src/LCPspellDistance.cpp +215 -0
  60. sequenzo/dissimilarity_measures/src/OMdistance.cpp +247 -0
  61. sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +281 -0
  62. sequenzo/dissimilarity_measures/src/__init__.py +0 -0
  63. sequenzo/dissimilarity_measures/src/dist2matrix.cpp +63 -0
  64. sequenzo/dissimilarity_measures/src/dp_utils.h +160 -0
  65. sequenzo/dissimilarity_measures/src/module.cpp +40 -0
  66. sequenzo/dissimilarity_measures/src/setup.py +30 -0
  67. sequenzo/dissimilarity_measures/src/utils.h +25 -0
  68. sequenzo/dissimilarity_measures/src/xsimd/.github/cmake-test/main.cpp +6 -0
  69. sequenzo/dissimilarity_measures/src/xsimd/benchmark/main.cpp +159 -0
  70. sequenzo/dissimilarity_measures/src/xsimd/benchmark/xsimd_benchmark.hpp +565 -0
  71. sequenzo/dissimilarity_measures/src/xsimd/docs/source/conf.py +37 -0
  72. sequenzo/dissimilarity_measures/src/xsimd/examples/mandelbrot.cpp +330 -0
  73. sequenzo/dissimilarity_measures/src/xsimd/examples/pico_bench.hpp +246 -0
  74. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_arithmetic.hpp +266 -0
  75. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_complex.hpp +112 -0
  76. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_details.hpp +323 -0
  77. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_logical.hpp +218 -0
  78. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_math.hpp +2583 -0
  79. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_memory.hpp +880 -0
  80. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_rounding.hpp +72 -0
  81. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_swizzle.hpp +174 -0
  82. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/common/xsimd_common_trigo.hpp +978 -0
  83. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx.hpp +1924 -0
  84. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx2.hpp +1144 -0
  85. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512bw.hpp +656 -0
  86. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512cd.hpp +28 -0
  87. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512dq.hpp +244 -0
  88. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512er.hpp +20 -0
  89. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512f.hpp +2650 -0
  90. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512ifma.hpp +20 -0
  91. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512pf.hpp +20 -0
  92. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi.hpp +77 -0
  93. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vbmi2.hpp +131 -0
  94. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512bw.hpp +20 -0
  95. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avx512vnni_avx512vbmi2.hpp +20 -0
  96. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_avxvnni.hpp +20 -0
  97. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common.hpp +24 -0
  98. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_common_fwd.hpp +77 -0
  99. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_constants.hpp +393 -0
  100. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_emulated.hpp +788 -0
  101. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx.hpp +93 -0
  102. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_avx2.hpp +46 -0
  103. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma3_sse.hpp +97 -0
  104. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_fma4.hpp +92 -0
  105. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_i8mm_neon64.hpp +17 -0
  106. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_isa.hpp +142 -0
  107. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon.hpp +3142 -0
  108. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_neon64.hpp +1543 -0
  109. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_rvv.hpp +1513 -0
  110. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_scalar.hpp +1260 -0
  111. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse2.hpp +2024 -0
  112. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse3.hpp +67 -0
  113. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_1.hpp +339 -0
  114. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sse4_2.hpp +44 -0
  115. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_ssse3.hpp +186 -0
  116. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_sve.hpp +1155 -0
  117. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_vsx.hpp +892 -0
  118. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/arch/xsimd_wasm.hpp +1780 -0
  119. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_arch.hpp +240 -0
  120. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_config.hpp +484 -0
  121. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_cpuid.hpp +269 -0
  122. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/config/xsimd_inline.hpp +27 -0
  123. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/math/xsimd_rem_pio2.hpp +719 -0
  124. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp +349 -0
  125. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/memory/xsimd_alignment.hpp +91 -0
  126. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_all_registers.hpp +55 -0
  127. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_api.hpp +2765 -0
  128. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx2_register.hpp +44 -0
  129. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512bw_register.hpp +51 -0
  130. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512cd_register.hpp +51 -0
  131. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512dq_register.hpp +51 -0
  132. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512er_register.hpp +51 -0
  133. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512f_register.hpp +77 -0
  134. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512ifma_register.hpp +51 -0
  135. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512pf_register.hpp +51 -0
  136. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi2_register.hpp +51 -0
  137. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vbmi_register.hpp +51 -0
  138. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512bw_register.hpp +54 -0
  139. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx512vnni_avx512vbmi2_register.hpp +53 -0
  140. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avx_register.hpp +64 -0
  141. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_avxvnni_register.hpp +44 -0
  142. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch.hpp +1524 -0
  143. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_batch_constant.hpp +300 -0
  144. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_common_arch.hpp +47 -0
  145. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_emulated_register.hpp +80 -0
  146. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx2_register.hpp +50 -0
  147. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_avx_register.hpp +50 -0
  148. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma3_sse_register.hpp +50 -0
  149. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_fma4_register.hpp +50 -0
  150. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_i8mm_neon64_register.hpp +55 -0
  151. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon64_register.hpp +55 -0
  152. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_neon_register.hpp +154 -0
  153. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_register.hpp +94 -0
  154. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_rvv_register.hpp +506 -0
  155. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse2_register.hpp +59 -0
  156. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse3_register.hpp +49 -0
  157. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_1_register.hpp +48 -0
  158. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sse4_2_register.hpp +48 -0
  159. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_ssse3_register.hpp +48 -0
  160. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_sve_register.hpp +156 -0
  161. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_traits.hpp +337 -0
  162. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_utils.hpp +536 -0
  163. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_vsx_register.hpp +77 -0
  164. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/types/xsimd_wasm_register.hpp +59 -0
  165. sequenzo/dissimilarity_measures/src/xsimd/include/xsimd/xsimd.hpp +75 -0
  166. sequenzo/dissimilarity_measures/src/xsimd/test/architectures/dummy.cpp +7 -0
  167. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set.cpp +13 -0
  168. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean.cpp +24 -0
  169. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_aligned.cpp +25 -0
  170. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_arch_independent.cpp +28 -0
  171. sequenzo/dissimilarity_measures/src/xsimd/test/doc/explicit_use_of_an_instruction_set_mean_tag_dispatch.cpp +25 -0
  172. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_abstract_batches.cpp +7 -0
  173. sequenzo/dissimilarity_measures/src/xsimd/test/doc/manipulating_parametric_batches.cpp +8 -0
  174. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum.hpp +31 -0
  175. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_avx2.cpp +3 -0
  176. sequenzo/dissimilarity_measures/src/xsimd/test/doc/sum_sse2.cpp +3 -0
  177. sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp +11 -0
  178. sequenzo/dissimilarity_measures/src/xsimd/test/main.cpp +31 -0
  179. sequenzo/dissimilarity_measures/src/xsimd/test/test_api.cpp +230 -0
  180. sequenzo/dissimilarity_measures/src/xsimd/test/test_arch.cpp +217 -0
  181. sequenzo/dissimilarity_measures/src/xsimd/test/test_basic_math.cpp +183 -0
  182. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch.cpp +1049 -0
  183. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_bool.cpp +508 -0
  184. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_cast.cpp +409 -0
  185. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_complex.cpp +712 -0
  186. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_constant.cpp +286 -0
  187. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_float.cpp +141 -0
  188. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_int.cpp +365 -0
  189. sequenzo/dissimilarity_measures/src/xsimd/test/test_batch_manip.cpp +308 -0
  190. sequenzo/dissimilarity_measures/src/xsimd/test/test_bitwise_cast.cpp +222 -0
  191. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_exponential.cpp +226 -0
  192. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_hyperbolic.cpp +183 -0
  193. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_power.cpp +265 -0
  194. sequenzo/dissimilarity_measures/src/xsimd/test/test_complex_trigonometric.cpp +236 -0
  195. sequenzo/dissimilarity_measures/src/xsimd/test/test_conversion.cpp +248 -0
  196. sequenzo/dissimilarity_measures/src/xsimd/test/test_custom_default_arch.cpp +28 -0
  197. sequenzo/dissimilarity_measures/src/xsimd/test/test_error_gamma.cpp +170 -0
  198. sequenzo/dissimilarity_measures/src/xsimd/test/test_explicit_batch_instantiation.cpp +32 -0
  199. sequenzo/dissimilarity_measures/src/xsimd/test/test_exponential.cpp +202 -0
  200. sequenzo/dissimilarity_measures/src/xsimd/test/test_extract_pair.cpp +92 -0
  201. sequenzo/dissimilarity_measures/src/xsimd/test/test_fp_manipulation.cpp +77 -0
  202. sequenzo/dissimilarity_measures/src/xsimd/test/test_gnu_source.cpp +30 -0
  203. sequenzo/dissimilarity_measures/src/xsimd/test/test_hyperbolic.cpp +167 -0
  204. sequenzo/dissimilarity_measures/src/xsimd/test/test_load_store.cpp +304 -0
  205. sequenzo/dissimilarity_measures/src/xsimd/test/test_memory.cpp +61 -0
  206. sequenzo/dissimilarity_measures/src/xsimd/test/test_poly_evaluation.cpp +64 -0
  207. sequenzo/dissimilarity_measures/src/xsimd/test/test_power.cpp +184 -0
  208. sequenzo/dissimilarity_measures/src/xsimd/test/test_rounding.cpp +199 -0
  209. sequenzo/dissimilarity_measures/src/xsimd/test/test_select.cpp +101 -0
  210. sequenzo/dissimilarity_measures/src/xsimd/test/test_shuffle.cpp +760 -0
  211. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.cpp +4 -0
  212. sequenzo/dissimilarity_measures/src/xsimd/test/test_sum.hpp +34 -0
  213. sequenzo/dissimilarity_measures/src/xsimd/test/test_traits.cpp +172 -0
  214. sequenzo/dissimilarity_measures/src/xsimd/test/test_trigonometric.cpp +208 -0
  215. sequenzo/dissimilarity_measures/src/xsimd/test/test_utils.hpp +611 -0
  216. sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py +123 -0
  217. sequenzo/dissimilarity_measures/src/xsimd/test/test_xsimd_api.cpp +1460 -0
  218. sequenzo/dissimilarity_measures/utils/__init__.py +16 -0
  219. sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py +44 -0
  220. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cpython-310-darwin.so +0 -0
  221. sequenzo/dissimilarity_measures/utils/seqconc.cpython-310-darwin.so +0 -0
  222. sequenzo/dissimilarity_measures/utils/seqdss.cpython-310-darwin.so +0 -0
  223. sequenzo/dissimilarity_measures/utils/seqdur.cpython-310-darwin.so +0 -0
  224. sequenzo/dissimilarity_measures/utils/seqlength.cpython-310-darwin.so +0 -0
  225. sequenzo/multidomain/__init__.py +23 -0
  226. sequenzo/multidomain/association_between_domains.py +311 -0
  227. sequenzo/multidomain/cat.py +597 -0
  228. sequenzo/multidomain/combt.py +519 -0
  229. sequenzo/multidomain/dat.py +81 -0
  230. sequenzo/multidomain/idcd.py +139 -0
  231. sequenzo/multidomain/linked_polyad.py +292 -0
  232. sequenzo/openmp_setup.py +233 -0
  233. sequenzo/prefix_tree/__init__.py +62 -0
  234. sequenzo/prefix_tree/hub.py +114 -0
  235. sequenzo/prefix_tree/individual_level_indicators.py +1321 -0
  236. sequenzo/prefix_tree/spell_individual_level_indicators.py +580 -0
  237. sequenzo/prefix_tree/spell_level_indicators.py +297 -0
  238. sequenzo/prefix_tree/system_level_indicators.py +544 -0
  239. sequenzo/prefix_tree/utils.py +54 -0
  240. sequenzo/seqhmm/__init__.py +95 -0
  241. sequenzo/seqhmm/advanced_optimization.py +305 -0
  242. sequenzo/seqhmm/bootstrap.py +411 -0
  243. sequenzo/seqhmm/build_hmm.py +142 -0
  244. sequenzo/seqhmm/build_mhmm.py +136 -0
  245. sequenzo/seqhmm/build_nhmm.py +121 -0
  246. sequenzo/seqhmm/fit_mhmm.py +62 -0
  247. sequenzo/seqhmm/fit_model.py +61 -0
  248. sequenzo/seqhmm/fit_nhmm.py +76 -0
  249. sequenzo/seqhmm/formulas.py +289 -0
  250. sequenzo/seqhmm/forward_backward_nhmm.py +276 -0
  251. sequenzo/seqhmm/gradients_nhmm.py +306 -0
  252. sequenzo/seqhmm/hmm.py +291 -0
  253. sequenzo/seqhmm/mhmm.py +314 -0
  254. sequenzo/seqhmm/model_comparison.py +238 -0
  255. sequenzo/seqhmm/multichannel_em.py +282 -0
  256. sequenzo/seqhmm/multichannel_utils.py +138 -0
  257. sequenzo/seqhmm/nhmm.py +270 -0
  258. sequenzo/seqhmm/nhmm_utils.py +191 -0
  259. sequenzo/seqhmm/predict.py +137 -0
  260. sequenzo/seqhmm/predict_mhmm.py +142 -0
  261. sequenzo/seqhmm/simulate.py +878 -0
  262. sequenzo/seqhmm/utils.py +218 -0
  263. sequenzo/seqhmm/visualization.py +910 -0
  264. sequenzo/sequence_characteristics/__init__.py +40 -0
  265. sequenzo/sequence_characteristics/complexity_index.py +49 -0
  266. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +220 -0
  267. sequenzo/sequence_characteristics/plot_characteristics.py +593 -0
  268. sequenzo/sequence_characteristics/simple_characteristics.py +311 -0
  269. sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py +39 -0
  270. sequenzo/sequence_characteristics/turbulence.py +155 -0
  271. sequenzo/sequence_characteristics/variance_of_spell_durations.py +86 -0
  272. sequenzo/sequence_characteristics/within_sequence_entropy.py +43 -0
  273. sequenzo/suffix_tree/__init__.py +66 -0
  274. sequenzo/suffix_tree/hub.py +114 -0
  275. sequenzo/suffix_tree/individual_level_indicators.py +1679 -0
  276. sequenzo/suffix_tree/spell_individual_level_indicators.py +493 -0
  277. sequenzo/suffix_tree/spell_level_indicators.py +248 -0
  278. sequenzo/suffix_tree/system_level_indicators.py +535 -0
  279. sequenzo/suffix_tree/utils.py +56 -0
  280. sequenzo/version_check.py +283 -0
  281. sequenzo/visualization/__init__.py +29 -0
  282. sequenzo/visualization/plot_mean_time.py +222 -0
  283. sequenzo/visualization/plot_modal_state.py +276 -0
  284. sequenzo/visualization/plot_most_frequent_sequences.py +147 -0
  285. sequenzo/visualization/plot_relative_frequency.py +405 -0
  286. sequenzo/visualization/plot_sequence_index.py +1175 -0
  287. sequenzo/visualization/plot_single_medoid.py +153 -0
  288. sequenzo/visualization/plot_state_distribution.py +651 -0
  289. sequenzo/visualization/plot_transition_matrix.py +190 -0
  290. sequenzo/visualization/utils/__init__.py +23 -0
  291. sequenzo/visualization/utils/utils.py +310 -0
  292. sequenzo/with_event_history_analysis/__init__.py +35 -0
  293. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +850 -0
  294. sequenzo/with_event_history_analysis/sequence_history_analysis.py +283 -0
  295. sequenzo-0.1.31.dist-info/METADATA +286 -0
  296. sequenzo-0.1.31.dist-info/RECORD +299 -0
  297. sequenzo-0.1.31.dist-info/WHEEL +5 -0
  298. sequenzo-0.1.31.dist-info/licenses/LICENSE +28 -0
  299. sequenzo-0.1.31.dist-info/top_level.txt +2 -0
@@ -0,0 +1,2583 @@
1
+ /***************************************************************************
2
+ * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3
+ * Martin Renou *
4
+ * Copyright (c) QuantStack *
5
+ * Copyright (c) Serge Guelton *
6
+ * *
7
+ * Distributed under the terms of the BSD 3-Clause License. *
8
+ * *
9
+ * The full license is in the file LICENSE, distributed with this software. *
10
+ ****************************************************************************/
11
+
12
+ #ifndef XSIMD_COMMON_MATH_HPP
13
+ #define XSIMD_COMMON_MATH_HPP
14
+
15
+ #include "../xsimd_scalar.hpp"
16
+ #include "./xsimd_common_details.hpp"
17
+ #include "./xsimd_common_trigo.hpp"
18
+
19
+ #include <type_traits>
20
+
21
+ namespace xsimd
22
+ {
23
+
24
+ namespace kernel
25
+ {
26
+
27
+ using namespace types;
28
+ // abs
29
+ template <class A, class T, class>
30
+ XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<common>) noexcept
31
+ {
32
+ if (std::is_unsigned<T>::value)
33
+ return self;
34
+ else
35
+ {
36
+ auto sign = bitofsign(self);
37
+ auto inv = self ^ sign;
38
+ return inv - sign;
39
+ }
40
+ }
41
+
42
+ template <class A, class T>
43
+ XSIMD_INLINE batch<T, A> abs(batch<std::complex<T>, A> const& z, requires_arch<common>) noexcept
44
+ {
45
+ return hypot(z.real(), z.imag());
46
+ }
47
+
48
+ // avg
49
+ namespace detail
50
+ {
51
+ template <class A, class T>
52
+ XSIMD_INLINE batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::false_type) noexcept
53
+ {
54
+ return (x & y) + ((x ^ y) >> 1);
55
+ }
56
+
57
+ template <class A, class T>
58
+ XSIMD_INLINE batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::true_type, std::true_type) noexcept
59
+ {
60
+ // Inspired by
61
+ // https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
62
+ auto t = (x & y) + ((x ^ y) >> 1);
63
+ auto t_u = bitwise_cast<typename std::make_unsigned<T>::type>(t);
64
+ auto avg = t + (bitwise_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
65
+ return avg;
66
+ }
67
+
68
+ template <class A, class T>
69
+ XSIMD_INLINE batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, std::false_type, std::true_type) noexcept
70
+ {
71
+ return (x + y) / 2;
72
+ }
73
+ }
74
+
75
+ template <class A, class T>
76
+ XSIMD_INLINE batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y, requires_arch<common>) noexcept
77
+ {
78
+ return detail::avg(x, y, typename std::is_integral<T>::type {}, typename std::is_signed<T>::type {});
79
+ }
80
+
81
+ // avgr
82
+ namespace detail
83
+ {
84
+ template <class A, class T>
85
+ XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::true_type) noexcept
86
+ {
87
+ constexpr unsigned shift = 8 * sizeof(T) - 1;
88
+ auto adj = std::is_signed<T>::value ? ((x ^ y) & 0x1) : (((x ^ y) << shift) >> shift);
89
+ return ::xsimd::kernel::avg(x, y, A {}) + adj;
90
+ }
91
+
92
+ template <class A, class T>
93
+ XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, std::false_type) noexcept
94
+ {
95
+ return ::xsimd::kernel::avg(x, y, A {});
96
+ }
97
+ }
98
+
99
+ template <class A, class T>
100
+ XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y, requires_arch<common>) noexcept
101
+ {
102
+ return detail::avgr(x, y, typename std::is_integral<T>::type {});
103
+ }
104
+
105
+ // batch_cast
106
+ template <class A, class T>
107
+ XSIMD_INLINE batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<common>) noexcept
108
+ {
109
+ return self;
110
+ }
111
+
112
+ namespace detail
113
+ {
114
+ template <class A, class T_out, class T_in>
115
+ XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const& out, requires_arch<common>, with_fast_conversion) noexcept
116
+ {
117
+ return fast_cast(self, out, A {});
118
+ }
119
+ #if defined(__clang__) || __GNUC__
120
+ template <class A, class T_out, class T_in>
121
+ XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<common>, with_slow_conversion) noexcept
122
+ __attribute__((no_sanitize("undefined")));
123
+ #endif
124
+ template <class A, class T_out, class T_in>
125
+ XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<common>, with_slow_conversion) noexcept
126
+ {
127
+ static_assert(!std::is_same<T_in, T_out>::value, "there should be no conversion for this type combination");
128
+ using batch_type_in = batch<T_in, A>;
129
+ using batch_type_out = batch<T_out, A>;
130
+ static_assert(batch_type_in::size == batch_type_out::size, "compatible sizes");
131
+ alignas(A::alignment()) T_in buffer_in[batch_type_in::size];
132
+ alignas(A::alignment()) T_out buffer_out[batch_type_out::size];
133
+ self.store_aligned(&buffer_in[0]);
134
+ for (size_t i = 0; i < batch_type_in::size; ++i)
135
+ buffer_out[i] = static_cast<T_out>(buffer_in[i]);
136
+ return batch_type_out::load_aligned(buffer_out);
137
+ }
138
+
139
+ }
140
+
141
+ template <class A, class T_out, class T_in>
142
+ XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const& out, requires_arch<common>) noexcept
143
+ {
144
+ return detail::batch_cast(self, out, A {}, detail::conversion_type<A, T_in, T_out> {});
145
+ }
146
+
147
+ // bitofsign
148
+ template <class A, class T>
149
+ XSIMD_INLINE batch<T, A> bitofsign(batch<T, A> const& self, requires_arch<common>) noexcept
150
+ {
151
+ static_assert(std::is_integral<T>::value, "int type implementation");
152
+ if (std::is_unsigned<T>::value)
153
+ return batch<T, A>(0);
154
+ else
155
+ return self >> (T)(8 * sizeof(T) - 1);
156
+ }
157
+
158
+ template <class A>
159
+ XSIMD_INLINE batch<float, A> bitofsign(batch<float, A> const& self, requires_arch<common>) noexcept
160
+ {
161
+ return self & constants::signmask<batch<float, A>>();
162
+ }
163
+ template <class A>
164
+ XSIMD_INLINE batch<double, A> bitofsign(batch<double, A> const& self, requires_arch<common>) noexcept
165
+ {
166
+ return self & constants::signmask<batch<double, A>>();
167
+ }
168
+
169
+ // bitwise_cast
170
+ template <class A, class T>
171
+ XSIMD_INLINE batch<T, A> bitwise_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<common>) noexcept
172
+ {
173
+ return self;
174
+ }
175
+
176
+ // cbrt
177
+ /* origin: boost/simd/arch/common/simd/function/cbrt.hpp */
178
+ /*
179
+ * ====================================================
180
+ * copyright 2016 NumScale SAS
181
+ *
182
+ * Distributed under the Boost Software License, Version 1.0.
183
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
184
+ * ====================================================
185
+ */
186
+ template <class A>
187
+ XSIMD_INLINE batch<float, A> cbrt(batch<float, A> const& self, requires_arch<common>) noexcept
188
+ {
189
+ using batch_type = batch<float, A>;
190
+ batch_type z = abs(self);
191
+ #ifndef XSIMD_NO_DENORMALS
192
+ auto denormal = z < constants::smallestposval<batch_type>();
193
+ z = select(denormal, z * constants::twotonmb<batch_type>(), z);
194
+ batch_type f = select(denormal, constants::twotonmbo3<batch_type>(), batch_type(1.));
195
+ #endif
196
+ const batch_type CBRT2(bit_cast<float>(0x3fa14518));
197
+ const batch_type CBRT4(bit_cast<float>(0x3fcb2ff5));
198
+ const batch_type CBRT2I(bit_cast<float>(0x3f4b2ff5));
199
+ const batch_type CBRT4I(bit_cast<float>(0x3f214518));
200
+ using i_type = as_integer_t<batch_type>;
201
+ i_type e;
202
+ batch_type x = frexp(z, e);
203
+ x = detail::horner<batch_type,
204
+ 0x3ece0609,
205
+ 0x3f91eb77,
206
+ 0xbf745265,
207
+ 0x3f0bf0fe,
208
+ 0xbe09e49a>(x);
209
+ auto flag = e >= i_type(0);
210
+ i_type e1 = abs(e);
211
+ i_type rem = e1;
212
+ e1 /= i_type(3);
213
+ rem -= e1 * i_type(3);
214
+ e = e1 * sign(e);
215
+ const batch_type cbrt2 = select(batch_bool_cast<float>(flag), CBRT2, CBRT2I);
216
+ const batch_type cbrt4 = select(batch_bool_cast<float>(flag), CBRT4, CBRT4I);
217
+ batch_type fact = select(batch_bool_cast<float>(rem == i_type(1)), cbrt2, batch_type(1.));
218
+ fact = select(batch_bool_cast<float>(rem == i_type(2)), cbrt4, fact);
219
+ x = ldexp(x * fact, e);
220
+ x -= (x - z / (x * x)) * batch_type(1.f / 3.f);
221
+ #ifndef XSIMD_NO_DENORMALS
222
+ x = (x | bitofsign(self)) * f;
223
+ #else
224
+ x = x | bitofsign(self);
225
+ #endif
226
+ #ifndef XSIMD_NO_INFINITIES
227
+ return select(self == batch_type(0.) || isinf(self), self, x);
228
+ #else
229
+ return select(self == batch_type(0.), self, x);
230
+ #endif
231
+ }
232
+
233
+ template <class A>
234
+ XSIMD_INLINE batch<double, A> cbrt(batch<double, A> const& self, requires_arch<common>) noexcept
235
+ {
236
+ using batch_type = batch<double, A>;
237
+ batch_type z = abs(self);
238
+ #ifndef XSIMD_NO_DENORMALS
239
+ auto denormal = z < constants::smallestposval<batch_type>();
240
+ z = select(denormal, z * constants::twotonmb<batch_type>(), z);
241
+ batch_type f = select(denormal, constants::twotonmbo3<batch_type>(), batch_type(1.));
242
+ #endif
243
+ const batch_type CBRT2(bit_cast<double>(int64_t(0x3ff428a2f98d728b)));
244
+ const batch_type CBRT4(bit_cast<double>(int64_t(0x3ff965fea53d6e3d)));
245
+ const batch_type CBRT2I(bit_cast<double>(int64_t(0x3fe965fea53d6e3d)));
246
+ const batch_type CBRT4I(bit_cast<double>(int64_t(0x3fe428a2f98d728b)));
247
+ using i_type = as_integer_t<batch_type>;
248
+ i_type e;
249
+ batch_type x = frexp(z, e);
250
+ x = detail::horner<batch_type,
251
+ 0x3fd9c0c12122a4feull,
252
+ 0x3ff23d6ee505873aull,
253
+ 0xbfee8a4ca3ba37b8ull,
254
+ 0x3fe17e1fc7e59d58ull,
255
+ 0xbfc13c93386fdff6ull>(x);
256
+ auto flag = e >= typename i_type::value_type(0);
257
+ i_type e1 = abs(e);
258
+ i_type rem = e1;
259
+ e1 /= i_type(3);
260
+ rem -= e1 * i_type(3);
261
+ e = e1 * sign(e);
262
+ const batch_type cbrt2 = select(batch_bool_cast<double>(flag), CBRT2, CBRT2I);
263
+ const batch_type cbrt4 = select(batch_bool_cast<double>(flag), CBRT4, CBRT4I);
264
+ batch_type fact = select(batch_bool_cast<double>(rem == i_type(1)), cbrt2, batch_type(1.));
265
+ fact = select(batch_bool_cast<double>(rem == i_type(2)), cbrt4, fact);
266
+ x = ldexp(x * fact, e);
267
+ x -= (x - z / (x * x)) * batch_type(1. / 3.);
268
+ x -= (x - z / (x * x)) * batch_type(1. / 3.);
269
+ #ifndef XSIMD_NO_DENORMALS
270
+ x = (x | bitofsign(self)) * f;
271
+ #else
272
+ x = x | bitofsign(self);
273
+ #endif
274
+ #ifndef XSIMD_NO_INFINITIES
275
+ return select(self == batch_type(0.) || isinf(self), self, x);
276
+ #else
277
+ return select(self == batch_type(0.), self, x);
278
+ #endif
279
+ }
280
+
281
+ // clip
282
+ template <class A, class T>
283
+ XSIMD_INLINE batch<T, A> clip(batch<T, A> const& self, batch<T, A> const& lo, batch<T, A> const& hi, requires_arch<common>) noexcept
284
+ {
285
+ return min(hi, max(self, lo));
286
+ }
287
+
288
+ // copysign
289
+ template <class A, class T, class _ = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
290
+ XSIMD_INLINE batch<T, A> copysign(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
291
+ {
292
+ return abs(self) | bitofsign(other);
293
+ }
294
+
295
+ // erf
296
+
297
+ namespace detail
298
+ {
299
+ /* origin: boost/simd/arch/common/detail/common/erf_kernel.hpp */
300
+ /*
301
+ * ====================================================
302
+ * copyright 2016 NumScale SAS
303
+ *
304
+ * Distributed under the Boost Software License, Version 1.0.
305
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
306
+ * ====================================================
307
+ */
308
+ template <class B>
309
+ struct erf_kernel;
310
+
311
+ template <class A>
312
+ struct erf_kernel<batch<float, A>>
313
+ {
314
+ using batch_type = batch<float, A>;
315
+ // computes erf(a0)/a0
316
+ // x is sqr(a0) and 0 <= abs(a0) <= 2/3
317
+ static XSIMD_INLINE batch_type erf1(const batch_type& x) noexcept
318
+ {
319
+ return detail::horner<batch_type,
320
+ 0x3f906eba, // 1.128379154774254e+00
321
+ 0xbec0937e, // -3.761252839094832e-01
322
+ 0x3de70f22, // 1.128218315189123e-01
323
+ 0xbcdb61f4, // -2.678010670585737e-02
324
+ 0x3ba4468d, // 5.013293006147870e-03
325
+ 0xba1fc83b // -6.095205117313012e-04
326
+ >(x);
327
+ }
328
+
329
+ // computes erfc(x)*exp(sqr(x))
330
+ // x >= 2/3
331
+ static XSIMD_INLINE batch_type erfc2(const batch_type& x) noexcept
332
+ {
333
+ return detail::horner<batch_type,
334
+ 0x3f0a0e8b, // 5.392844046572836e-01
335
+ 0xbf918a62, // -1.137035586823118e+00
336
+ 0x3e243828, // 1.603704761054187e-01
337
+ 0x3ec4ca6e, // 3.843569094305250e-01
338
+ 0x3e1175c7, // 1.420508523645926e-01
339
+ 0x3e2006f0, // 1.562764709849380e-01
340
+ 0xbfaea865, // -1.364514006347145e+00
341
+ 0x4050b063, // 3.260765682222576e+00
342
+ 0xc0cd1a85, // -6.409487379234005e+00
343
+ 0x40d67e3b, // 6.702908785399893e+00
344
+ 0xc0283611 // -2.628299919293280e+00
345
+ >(x);
346
+ }
347
+
348
+ static XSIMD_INLINE batch_type erfc3(const batch_type& x) noexcept
349
+ {
350
+ return (batch_type(1.) - x) * detail::horner<batch_type,
351
+ 0x3f7ffffe, // 9.9999988e-01
352
+ 0xbe036d7e, // -1.2834737e-01
353
+ 0xbfa11698, // -1.2585020e+00
354
+ 0xbffc9284, // -1.9732213e+00
355
+ 0xc016c985, // -2.3560498e+00
356
+ 0x3f2cff3b, // 6.7576951e-01
357
+ 0xc010d956, // -2.2632651e+00
358
+ 0x401b5680, // 2.4271545e+00
359
+ 0x41aa8e55 // 2.1319498e+01
360
+ >(x);
361
+ }
362
+ };
363
+
364
+ template <class A>
365
+ struct erf_kernel<batch<double, A>>
366
+ {
367
+ using batch_type = batch<double, A>;
368
+ // computes erf(a0)/a0
369
+ // x is sqr(a0) and 0 <= abs(a0) <= 0.65
370
+ static XSIMD_INLINE batch_type erf1(const batch_type& x) noexcept
371
+ {
372
+ return detail::horner<batch_type,
373
+ 0x3ff20dd750429b61ull, // 1.12837916709551
374
+ 0x3fc16500f106c0a5ull, // 0.135894887627278
375
+ 0x3fa4a59a4f02579cull, // 4.03259488531795E-02
376
+ 0x3f53b7664358865aull, // 1.20339380863079E-03
377
+ 0x3f110512d5b20332ull // 6.49254556481904E-05
378
+ >(x)
379
+ / detail::horner<batch_type,
380
+ 0x3ff0000000000000ull, // 1
381
+ 0x3fdd0a84eb1ca867ull, // 0.453767041780003
382
+ 0x3fb64536ca92ea2full, // 8.69936222615386E-02
383
+ 0x3f8166f75999dbd1ull, // 8.49717371168693E-03
384
+ 0x3f37ea4332348252ull // 3.64915280629351E-04
385
+ >(x);
386
+ }
387
+
388
+ // computes erfc(x)*exp(x*x)
389
+ // 0.65 <= abs(x) <= 2.2
390
+ static XSIMD_INLINE batch_type erfc2(const batch_type& x) noexcept
391
+ {
392
+ return detail::horner<batch_type,
393
+ 0x3feffffffbbb552bull, // 0.999999992049799
394
+ 0x3ff54dfe9b258a60ull, // 1.33154163936765
395
+ 0x3fec1986509e687bull, // 0.878115804155882
396
+ 0x3fd53dd7a67c7e9full, // 0.331899559578213
397
+ 0x3fb2488a6b5cb5e5ull, // 7.14193832506776E-02
398
+ 0x3f7cf4cfe0aacbb4ull, // 7.06940843763253E-03
399
+ 0x0ull // 0
400
+ >(x)
401
+ / detail::horner<batch_type,
402
+ 0x3ff0000000000000ull, // 1
403
+ 0x4003adeae79b9708ull, // 2.45992070144246
404
+ 0x40053b1052dca8bdull, // 2.65383972869776
405
+ 0x3ff9e677c2777c3cull, // 1.61876655543871
406
+ 0x3fe307622fcff772ull, // 0.594651311286482
407
+ 0x3fc033c113a7deeeull, // 0.126579413030178
408
+ 0x3f89a996639b0d00ull // 1.25304936549413E-02
409
+ >(x);
410
+ }
411
+
412
+ // computes erfc(x)*exp(x*x)
413
+ // 2.2 <= abs(x) <= 6
414
+ static XSIMD_INLINE batch_type erfc3(const batch_type& x) noexcept
415
+ {
416
+ return detail::horner<batch_type,
417
+ 0x3fefff5a9e697ae2ull, // 0.99992114009714
418
+ 0x3ff9fa202deb88e5ull, // 1.62356584489367
419
+ 0x3ff44744306832aeull, // 1.26739901455873
420
+ 0x3fe29be1cff90d94ull, // 0.581528574177741
421
+ 0x3fc42210f88b9d43ull, // 0.157289620742839
422
+ 0x3f971d0907ea7a92ull, // 2.25716982919218E-02
423
+ 0x0ll // 0
424
+ >(x)
425
+ / detail::horner<batch_type,
426
+ 0x3ff0000000000000ull, // 1
427
+ 0x400602f24bf3fdb6ull, // 2.75143870676376
428
+ 0x400afd487397568full, // 3.37367334657285
429
+ 0x400315ffdfd5ce91ull, // 2.38574194785344
430
+ 0x3ff0cfd4cb6cde9full, // 1.05074004614827
431
+ 0x3fd1d7ab774bb837ull, // 0.278788439273629
432
+ 0x3fa47bd61bbb3843ull // 4.00072964526861E-02
433
+ >(x);
434
+ }
435
+
436
+ // computes erfc(rx)*exp(rx*rx)
437
+ // x >= 6 rx = 1/x
438
+ static XSIMD_INLINE batch_type erfc4(const batch_type& x) noexcept
439
+ {
440
+ return detail::horner<batch_type,
441
+ 0xbc7e4ad1ec7d0000ll, // -2.627435221016534e-17
442
+ 0x3fe20dd750429a16ll, // 5.641895835477182e-01
443
+ 0x3db60000e984b501ll, // 2.000889609806154e-11
444
+ 0xbfd20dd753ae5dfdll, // -2.820947949598745e-01
445
+ 0x3e907e71e046a820ll, // 2.457786367990903e-07
446
+ 0x3fdb1494cac06d39ll, // 4.231311779019112e-01
447
+ 0x3f34a451701654f1ll, // 3.149699042180451e-04
448
+ 0xbff105e6b8ef1a63ll, // -1.063940737150596e+00
449
+ 0x3fb505a857e9ccc8ll, // 8.211757799454056e-02
450
+ 0x40074fbabc514212ll, // 2.913930388669777e+00
451
+ 0x4015ac7631f7ac4fll, // 5.418419628850713e+00
452
+ 0xc0457e03041e9d8bll, // -4.298446704382794e+01
453
+ 0x4055803d26c4ec4fll, // 8.600373238783617e+01
454
+ 0xc0505fce04ec4ec5ll // -6.549694941594051e+01
455
+ >(x);
456
+ }
457
+ };
458
+ }
459
+ /* origin: boost/simd/arch/common/simd/function/erf.hpp */
460
+ /*
461
+ * ====================================================
462
+ * copyright 2016 NumScale SAS
463
+ *
464
+ * Distributed under the Boost Software License, Version 1.0.
465
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
466
+ * ====================================================
467
+ */
468
+
469
+ template <class A>
470
+ XSIMD_INLINE batch<float, A> erf(batch<float, A> const& self, requires_arch<common>) noexcept
471
+ {
472
+ using batch_type = batch<float, A>;
473
+ batch_type x = abs(self);
474
+ batch_type r1(0.);
475
+ auto test1 = x < batch_type(2.f / 3.f);
476
+ if (any(test1))
477
+ {
478
+ r1 = self * detail::erf_kernel<batch_type>::erf1(x * x);
479
+ if (all(test1))
480
+ return r1;
481
+ }
482
+ batch_type z = x / (batch_type(1.) + x);
483
+ z -= batch_type(0.4f);
484
+ batch_type r2 = batch_type(1.) - exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
485
+ r2 = select(self < batch_type(0.), -r2, r2);
486
+ r1 = select(test1, r1, r2);
487
+ #ifndef XSIMD_NO_INFINITIES
488
+ r1 = select(xsimd::isinf(self), sign(self), r1);
489
+ #endif
490
+ return r1;
491
+ }
492
+
493
+ template <class A>
494
+ XSIMD_INLINE batch<double, A> erf(batch<double, A> const& self, requires_arch<common>) noexcept
495
+ {
496
+ using batch_type = batch<double, A>;
497
+ batch_type x = abs(self);
498
+ batch_type xx = x * x;
499
+ batch_type lim1(0.65);
500
+ batch_type lim2(2.2);
501
+ auto test1 = x < lim1;
502
+ batch_type r1(0.);
503
+ if (any(test1))
504
+ {
505
+ r1 = self * detail::erf_kernel<batch_type>::erf1(xx);
506
+ if (all(test1))
507
+ return r1;
508
+ }
509
+ auto test2 = x < lim2;
510
+ auto test3 = test2 && !test1;
511
+ batch_type ex = exp(-xx);
512
+ if (any(test3))
513
+ {
514
+ batch_type z = batch_type(1.) - ex * detail::erf_kernel<batch_type>::erfc2(x);
515
+ batch_type r2 = select(self < batch_type(0.), -z, z);
516
+ r1 = select(test1, r1, r2);
517
+ if (all(test1 || test3))
518
+ return r1;
519
+ }
520
+ batch_type z = batch_type(1.) - ex * detail::erf_kernel<batch_type>::erfc3(x);
521
+ z = select(self < batch_type(0.), -z, z);
522
+ #ifndef XSIMD_NO_INFINITIES
523
+ z = select(xsimd::isinf(self), sign(self), z);
524
+ #endif
525
+ return select(test2, r1, z);
526
+ }
527
+
528
+ // erfc
529
+ template <class A>
530
+ XSIMD_INLINE batch<float, A> erfc(batch<float, A> const& self, requires_arch<common>) noexcept
531
+ {
532
+ using batch_type = batch<float, A>;
533
+ batch_type x = abs(self);
534
+ auto test0 = self < batch_type(0.);
535
+ batch_type r1(0.);
536
+ auto test1 = 3.f * x < 2.f;
537
+ batch_type z = x / (batch_type(1.) + x);
538
+ if (any(test1))
539
+ {
540
+ r1 = detail::erf_kernel<batch_type>::erfc3(z);
541
+ if (all(test1))
542
+ return select(test0, batch_type(2.) - r1, r1);
543
+ }
544
+
545
+ z -= batch_type(0.4f);
546
+ batch_type r2 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
547
+ r1 = select(test1, r1, r2);
548
+ #ifndef XSIMD_NO_INFINITIES
549
+ r1 = select(x == constants::infinity<batch_type>(), batch_type(0.), r1);
550
+ #endif
551
+ return select(test0, batch_type(2.) - r1, r1);
552
+ }
553
+
554
+ template <class A>
555
+ XSIMD_INLINE batch<double, A> erfc(batch<double, A> const& self, requires_arch<common>) noexcept
556
+ {
557
+ using batch_type = batch<double, A>;
558
+ batch_type x = abs(self);
559
+ batch_type xx = x * x;
560
+ batch_type lim1(0.65);
561
+ batch_type lim2(2.2);
562
+ auto test0 = self < batch_type(0.);
563
+ auto test1 = x < lim1;
564
+ batch_type r1(0.);
565
+ if (any(test1))
566
+ {
567
+ r1 = batch_type(1.) - x * detail::erf_kernel<batch_type>::erf1(xx);
568
+ if (all(test1))
569
+ return select(test0, batch_type(2.) - r1, r1);
570
+ }
571
+ auto test2 = x < lim2;
572
+ auto test3 = test2 && !test1;
573
+ batch_type ex = exp(-xx);
574
+ if (any(test3))
575
+ {
576
+ batch_type z = ex * detail::erf_kernel<batch_type>::erfc2(x);
577
+ r1 = select(test1, r1, z);
578
+ if (all(test1 || test3))
579
+ return select(test0, batch_type(2.) - r1, r1);
580
+ }
581
+ batch_type z = ex * detail::erf_kernel<batch_type>::erfc3(x);
582
+ r1 = select(test2, r1, z);
583
+ #ifndef XSIMD_NO_INFINITIES
584
+ r1 = select(x == constants::infinity<batch_type>(), batch_type(0.), r1);
585
+ #endif
586
+ return select(test0, batch_type(2.) - r1, r1);
587
+ }
588
+
589
+ // estrin
590
+ namespace detail
591
+ {
592
+
593
+ template <class B>
594
+ struct estrin
595
+ {
596
+ B x;
597
+
598
+ template <typename... Ts>
599
+ XSIMD_INLINE B operator()(const Ts&... coefs) noexcept
600
+ {
601
+ return eval(coefs...);
602
+ }
603
+
604
+ private:
605
+ XSIMD_INLINE B eval(const B& c0) noexcept
606
+ {
607
+ return c0;
608
+ }
609
+
610
+ XSIMD_INLINE B eval(const B& c0, const B& c1) noexcept
611
+ {
612
+ return fma(x, c1, c0);
613
+ }
614
+
615
+ template <size_t... Is, class Tuple>
616
+ XSIMD_INLINE B eval(::xsimd::detail::index_sequence<Is...>, const Tuple& tuple)
617
+ {
618
+ return estrin { x * x }(std::get<Is>(tuple)...);
619
+ }
620
+
621
+ template <class... Args>
622
+ XSIMD_INLINE B eval(const std::tuple<Args...>& tuple) noexcept
623
+ {
624
+ return eval(::xsimd::detail::make_index_sequence<sizeof...(Args)>(), tuple);
625
+ }
626
+
627
+ template <class... Args>
628
+ XSIMD_INLINE B eval(const std::tuple<Args...>& tuple, const B& c0) noexcept
629
+ {
630
+ return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0))));
631
+ }
632
+
633
+ template <class... Args>
634
+ XSIMD_INLINE B eval(const std::tuple<Args...>& tuple, const B& c0, const B& c1) noexcept
635
+ {
636
+ return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1))));
637
+ }
638
+
639
+ template <class... Args, class... Ts>
640
+ XSIMD_INLINE B eval(const std::tuple<Args...>& tuple, const B& c0, const B& c1, const Ts&... coefs) noexcept
641
+ {
642
+ return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1))), coefs...);
643
+ }
644
+
645
+ template <class... Ts>
646
+ XSIMD_INLINE B eval(const B& c0, const B& c1, const Ts&... coefs) noexcept
647
+ {
648
+ return eval(std::make_tuple(eval(c0, c1)), coefs...);
649
+ }
650
+ };
651
+ }
652
+
653
+ template <class T, class A, uint64_t... Coefs>
654
+ XSIMD_INLINE batch<T, A> estrin(const batch<T, A>& self) noexcept
655
+ {
656
+ using batch_type = batch<T, A>;
657
+ return detail::estrin<batch_type> { self }(detail::coef<batch_type, Coefs>()...);
658
+ }
659
+
660
+ // exp
661
+ /* origin: boost/simd/arch/common/detail/simd/expo_base.hpp */
662
+ /*
663
+ * ====================================================
664
+ * copyright 2016 NumScale SAS
665
+ *
666
+ * Distributed under the Boost Software License, Version 1.0.
667
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
668
+ * ====================================================
669
+ */
670
+ namespace detail
671
+ {
672
+ enum exp_reduction_tag
673
+ {
674
+ exp_tag,
675
+ exp2_tag,
676
+ exp10_tag
677
+ };
678
+
679
+ template <class B, exp_reduction_tag Tag>
680
+ struct exp_reduction_base;
681
+
682
+ template <class B>
683
+ struct exp_reduction_base<B, exp_tag>
684
+ {
685
+ static constexpr B maxlog() noexcept
686
+ {
687
+ return constants::maxlog<B>();
688
+ }
689
+
690
+ static constexpr B minlog() noexcept
691
+ {
692
+ return constants::minlog<B>();
693
+ }
694
+ };
695
+
696
+ template <class B>
697
+ struct exp_reduction_base<B, exp10_tag>
698
+ {
699
+ static constexpr B maxlog() noexcept
700
+ {
701
+ return constants::maxlog10<B>();
702
+ }
703
+
704
+ static constexpr B minlog() noexcept
705
+ {
706
+ return constants::minlog10<B>();
707
+ }
708
+ };
709
+
710
+ template <class B>
711
+ struct exp_reduction_base<B, exp2_tag>
712
+ {
713
+ static constexpr B maxlog() noexcept
714
+ {
715
+ return constants::maxlog2<B>();
716
+ }
717
+
718
+ static constexpr B minlog() noexcept
719
+ {
720
+ return constants::minlog2<B>();
721
+ }
722
+ };
723
+
724
+ template <class T, class A, exp_reduction_tag Tag>
725
+ struct exp_reduction;
726
+
727
+ template <class A>
728
+ struct exp_reduction<float, A, exp_tag> : exp_reduction_base<batch<float, A>, exp_tag>
729
+ {
730
+ using batch_type = batch<float, A>;
731
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
732
+ {
733
+ batch_type y = detail::horner<batch_type,
734
+ 0x3f000000, // 5.0000000e-01
735
+ 0x3e2aa9a5, // 1.6666277e-01
736
+ 0x3d2aa957, // 4.1665401e-02
737
+ 0x3c098d8b, // 8.3955629e-03
738
+ 0x3ab778cf // 1.3997796e-03
739
+ >(x);
740
+ return ++fma(y, x * x, x);
741
+ }
742
+
743
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& x) noexcept
744
+ {
745
+ batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
746
+ x = fnma(k, constants::log_2hi<batch_type>(), a);
747
+ x = fnma(k, constants::log_2lo<batch_type>(), x);
748
+ return k;
749
+ }
750
+ };
751
+
752
+ template <class A>
753
+ struct exp_reduction<float, A, exp10_tag> : exp_reduction_base<batch<float, A>, exp10_tag>
754
+ {
755
+ using batch_type = batch<float, A>;
756
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
757
+ {
758
+ return ++(detail::horner<batch_type,
759
+ 0x40135d8e, // 2.3025851e+00
760
+ 0x4029a926, // 2.6509490e+00
761
+ 0x400237da, // 2.0346589e+00
762
+ 0x3f95eb4c, // 1.1712432e+00
763
+ 0x3f0aacef, // 5.4170126e-01
764
+ 0x3e54dff1 // 2.0788552e-01
765
+ >(x)
766
+ * x);
767
+ }
768
+
769
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& x) noexcept
770
+ {
771
+ batch_type k = nearbyint(constants::invlog10_2<batch_type>() * a);
772
+ x = fnma(k, constants::log10_2hi<batch_type>(), a);
773
+ x -= k * constants::log10_2lo<batch_type>();
774
+ return k;
775
+ }
776
+ };
777
+
778
+ template <class A>
779
+ struct exp_reduction<float, A, exp2_tag> : exp_reduction_base<batch<float, A>, exp2_tag>
780
+ {
781
+ using batch_type = batch<float, A>;
782
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
783
+ {
784
+ batch_type y = detail::horner<batch_type,
785
+ 0x3e75fdf1, // 2.4022652e-01
786
+ 0x3d6356eb, // 5.5502813e-02
787
+ 0x3c1d9422, // 9.6178371e-03
788
+ 0x3ab01218, // 1.3433127e-03
789
+ 0x3922c8c4 // 1.5524315e-04
790
+ >(x);
791
+ return ++fma(y, x * x, x * constants::log_2<batch_type>());
792
+ }
793
+
794
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& x) noexcept
795
+ {
796
+ batch_type k = nearbyint(a);
797
+ x = (a - k);
798
+ return k;
799
+ }
800
+ };
801
+
802
+ template <class A>
803
+ struct exp_reduction<double, A, exp_tag> : exp_reduction_base<batch<double, A>, exp_tag>
804
+ {
805
+ using batch_type = batch<double, A>;
806
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
807
+ {
808
+ batch_type t = x * x;
809
+ return fnma(t,
810
+ detail::horner<batch_type,
811
+ 0x3fc555555555553eull,
812
+ 0xbf66c16c16bebd93ull,
813
+ 0x3f11566aaf25de2cull,
814
+ 0xbebbbd41c5d26bf1ull,
815
+ 0x3e66376972bea4d0ull>(t),
816
+ x);
817
+ }
818
+
819
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type& hi, batch_type& lo, batch_type& x) noexcept
820
+ {
821
+ batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
822
+ hi = fnma(k, constants::log_2hi<batch_type>(), a);
823
+ lo = k * constants::log_2lo<batch_type>();
824
+ x = hi - lo;
825
+ return k;
826
+ }
827
+
828
+ static XSIMD_INLINE batch_type finalize(const batch_type& x, const batch_type& c, const batch_type& hi, const batch_type& lo) noexcept
829
+ {
830
+ return batch_type(1.) - (((lo - (x * c) / (batch_type(2.) - c)) - hi));
831
+ }
832
+ };
833
+
834
+ template <class A>
835
+ struct exp_reduction<double, A, exp10_tag> : exp_reduction_base<batch<double, A>, exp10_tag>
836
+ {
837
+ using batch_type = batch<double, A>;
838
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
839
+ {
840
+ batch_type xx = x * x;
841
+ batch_type px = x * detail::horner<batch_type, 0x40a2b4798e134a01ull, 0x40796b7a050349e4ull, 0x40277d9474c55934ull, 0x3fa4fd75f3062dd4ull>(xx);
842
+ batch_type x2 = px / (detail::horner1<batch_type, 0x40a03f37650df6e2ull, 0x4093e05eefd67782ull, 0x405545fdce51ca08ull>(xx) - px);
843
+ return ++(x2 + x2);
844
+ }
845
+
846
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept
847
+ {
848
+ batch_type k = nearbyint(constants::invlog10_2<batch_type>() * a);
849
+ x = fnma(k, constants::log10_2hi<batch_type>(), a);
850
+ x = fnma(k, constants::log10_2lo<batch_type>(), x);
851
+ return k;
852
+ }
853
+
854
+ static XSIMD_INLINE batch_type finalize(const batch_type&, const batch_type& c, const batch_type&, const batch_type&) noexcept
855
+ {
856
+ return c;
857
+ }
858
+ };
859
+
860
+ template <class A>
861
+ struct exp_reduction<double, A, exp2_tag> : exp_reduction_base<batch<double, A>, exp2_tag>
862
+ {
863
+ using batch_type = batch<double, A>;
864
+ static XSIMD_INLINE batch_type approx(const batch_type& x) noexcept
865
+ {
866
+ batch_type t = x * x;
867
+ return fnma(t,
868
+ detail::horner<batch_type,
869
+ 0x3fc555555555553eull,
870
+ 0xbf66c16c16bebd93ull,
871
+ 0x3f11566aaf25de2cull,
872
+ 0xbebbbd41c5d26bf1ull,
873
+ 0x3e66376972bea4d0ull>(t),
874
+ x);
875
+ }
876
+
877
+ static XSIMD_INLINE batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept
878
+ {
879
+ batch_type k = nearbyint(a);
880
+ x = (a - k) * constants::log_2<batch_type>();
881
+ return k;
882
+ }
883
+
884
+ static XSIMD_INLINE batch_type finalize(const batch_type& x, const batch_type& c, const batch_type&, const batch_type&) noexcept
885
+ {
886
+ return batch_type(1.) + x + x * c / (batch_type(2.) - c);
887
+ }
888
+ };
889
+
890
+ template <exp_reduction_tag Tag, class A>
891
+ XSIMD_INLINE batch<float, A> exp(batch<float, A> const& self) noexcept
892
+ {
893
+ using batch_type = batch<float, A>;
894
+ using reducer_t = exp_reduction<float, A, Tag>;
895
+ batch_type x;
896
+ batch_type k = reducer_t::reduce(self, x);
897
+ x = reducer_t::approx(x);
898
+ x = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(x, to_int(k)));
899
+ #ifndef __FAST_MATH__
900
+ x = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), x);
901
+ #endif
902
+ return x;
903
+ }
904
+
905
+ template <exp_reduction_tag Tag, class A>
906
+ XSIMD_INLINE batch<double, A> exp(batch<double, A> const& self) noexcept
907
+ {
908
+ using batch_type = batch<double, A>;
909
+ using reducer_t = exp_reduction<double, A, Tag>;
910
+ batch_type hi, lo, x;
911
+ batch_type k = reducer_t::reduce(self, hi, lo, x);
912
+ batch_type c = reducer_t::approx(x);
913
+ c = reducer_t::finalize(x, c, hi, lo);
914
+ c = select(self <= reducer_t::minlog(), batch_type(0.), ldexp(c, to_int(k)));
915
+ #ifndef __FAST_MATH__
916
+ c = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), c);
917
+ #endif
918
+ return c;
919
+ }
920
+ }
921
+
922
+ template <class A, class T>
923
+ XSIMD_INLINE batch<T, A> exp(batch<T, A> const& self, requires_arch<common>) noexcept
924
+ {
925
+ return detail::exp<detail::exp_tag>(self);
926
+ }
927
+
928
+ template <class A, class T>
929
+ XSIMD_INLINE batch<std::complex<T>, A> exp(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
930
+ {
931
+ using batch_type = batch<std::complex<T>, A>;
932
+ auto isincos = sincos(self.imag());
933
+ return exp(self.real()) * batch_type(std::get<1>(isincos), std::get<0>(isincos));
934
+ }
935
+
936
+ // exp10
937
+ template <class A, class T>
938
+ XSIMD_INLINE batch<T, A> exp10(batch<T, A> const& self, requires_arch<common>) noexcept
939
+ {
940
+ return detail::exp<detail::exp10_tag>(self);
941
+ }
942
+
943
+ // exp2
944
+ template <class A, class T>
945
+ XSIMD_INLINE batch<T, A> exp2(batch<T, A> const& self, requires_arch<common>) noexcept
946
+ {
947
+ return detail::exp<detail::exp2_tag>(self);
948
+ }
949
+
950
+ // expm1
951
+ namespace detail
952
+ {
953
+ /* origin: boost/simd/arch/common/detail/common/expm1_kernel.hpp */
954
+ /*
955
+ * ====================================================
956
+ * copyright 2016 NumScale SAS
957
+ *
958
+ * Distributed under the Boost Software License, Version 1.0.
959
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
960
+ * ====================================================
961
+ */
962
+ template <class A>
963
+ static XSIMD_INLINE batch<float, A> expm1(const batch<float, A>& a) noexcept
964
+ {
965
+ using batch_type = batch<float, A>;
966
+ batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
967
+ batch_type x = fnma(k, constants::log_2hi<batch_type>(), a);
968
+ x = fnma(k, constants::log_2lo<batch_type>(), x);
969
+ batch_type hx = x * batch_type(0.5);
970
+ batch_type hxs = x * hx;
971
+ batch_type r = detail::horner<batch_type,
972
+ 0X3F800000UL, // 1
973
+ 0XBD08887FUL, // -3.3333298E-02
974
+ 0X3ACF6DB4UL // 1.582554
975
+ >(hxs);
976
+ batch_type t = fnma(r, hx, batch_type(3.));
977
+ batch_type e = hxs * ((r - t) / (batch_type(6.) - x * t));
978
+ e = fms(x, e, hxs);
979
+ using i_type = as_integer_t<batch_type>;
980
+ i_type ik = to_int(k);
981
+ batch_type two2mk = ::xsimd::bitwise_cast<float>((constants::maxexponent<batch_type>() - ik) << constants::nmb<batch_type>());
982
+ batch_type y = batch_type(1.) - two2mk - (e - x);
983
+ return ldexp(y, ik);
984
+ }
985
+
986
+ template <class A>
987
+ static XSIMD_INLINE batch<double, A> expm1(const batch<double, A>& a) noexcept
988
+ {
989
+ using batch_type = batch<double, A>;
990
+ batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
991
+ batch_type hi = fnma(k, constants::log_2hi<batch_type>(), a);
992
+ batch_type lo = k * constants::log_2lo<batch_type>();
993
+ batch_type x = hi - lo;
994
+ batch_type hxs = x * x * batch_type(0.5);
995
+ batch_type r = detail::horner<batch_type,
996
+ 0X3FF0000000000000ULL,
997
+ 0XBFA11111111110F4ULL,
998
+ 0X3F5A01A019FE5585ULL,
999
+ 0XBF14CE199EAADBB7ULL,
1000
+ 0X3ED0CFCA86E65239ULL,
1001
+ 0XBE8AFDB76E09C32DULL>(hxs);
1002
+ batch_type t = batch_type(3.) - r * batch_type(0.5) * x;
1003
+ batch_type e = hxs * ((r - t) / (batch_type(6) - x * t));
1004
+ batch_type c = (hi - x) - lo;
1005
+ e = (x * (e - c) - c) - hxs;
1006
+ using i_type = as_integer_t<batch_type>;
1007
+ i_type ik = to_int(k);
1008
+ batch_type two2mk = ::xsimd::bitwise_cast<double>((constants::maxexponent<batch_type>() - ik) << constants::nmb<batch_type>());
1009
+ batch_type ct1 = batch_type(1.) - two2mk - (e - x);
1010
+ batch_type ct2 = ++(x - (e + two2mk));
1011
+ batch_type y = select(k < batch_type(20.), ct1, ct2);
1012
+ return ldexp(y, ik);
1013
+ }
1014
+
1015
+ }
1016
+
1017
+ template <class A, class T>
1018
+ XSIMD_INLINE batch<T, A> expm1(batch<T, A> const& self, requires_arch<common>) noexcept
1019
+ {
1020
+ using batch_type = batch<T, A>;
1021
+ auto x = detail::expm1(self);
1022
+ #ifndef __FAST_MATH__
1023
+ x = select(self > constants::maxlog<batch_type>(), constants::infinity<batch_type>(), x);
1024
+ #endif
1025
+ return select(self < constants::logeps<batch_type>(), batch_type(-1.), x);
1026
+ }
1027
+
1028
+ template <class A, class T>
1029
+ XSIMD_INLINE batch<std::complex<T>, A> expm1(const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
1030
+ {
1031
+ using batch_type = batch<std::complex<T>, A>;
1032
+ using real_batch = typename batch_type::real_batch;
1033
+ real_batch isin = sin(z.imag());
1034
+ real_batch rem1 = expm1(z.real());
1035
+ real_batch re = rem1 + 1.;
1036
+ real_batch si = sin(z.imag() * 0.5);
1037
+ return { rem1 - 2. * re * si * si, re * isin };
1038
+ }
1039
+
1040
+ // polar
1041
+ template <class A, class T>
1042
+ XSIMD_INLINE batch<std::complex<T>, A> polar(const batch<T, A>& r, const batch<T, A>& theta, requires_arch<common>) noexcept
1043
+ {
1044
+ auto sincosTheta = sincos(theta);
1045
+ return { r * sincosTheta.second, r * sincosTheta.first };
1046
+ }
1047
+
1048
+ // fdim
1049
+ template <class A, class T>
1050
+ XSIMD_INLINE batch<T, A> fdim(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
1051
+ {
1052
+ return fmax(batch<T, A>(0), self - other);
1053
+ }
1054
+
1055
+ // fmod
1056
+ template <class A, class T>
1057
+ XSIMD_INLINE batch<T, A> fmod(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
1058
+ {
1059
+ return fnma(trunc(self / other), other, self);
1060
+ }
1061
+
1062
+ // frexp
1063
+ /* origin: boost/simd/arch/common/simd/function/ifrexp.hpp */
1064
+ /*
1065
+ * ====================================================
1066
+ * copyright 2016 NumScale SAS
1067
+ *
1068
+ * Distributed under the Boost Software License, Version 1.0.
1069
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1070
+ * ====================================================
1071
+ */
1072
+ template <class A, class T>
1073
+ XSIMD_INLINE batch<T, A> frexp(const batch<T, A>& self, batch<as_integer_t<T>, A>& exp, requires_arch<common>) noexcept
1074
+ {
1075
+ using batch_type = batch<T, A>;
1076
+ using int_type = as_integer_t<T>;
1077
+ using i_type = batch<int_type, A>;
1078
+ i_type m1f = constants::mask1frexp<batch_type>();
1079
+ i_type r1 = m1f & ::xsimd::bitwise_cast<int_type>(self);
1080
+ batch_type x = self & ::xsimd::bitwise_cast<T>(~m1f);
1081
+ exp = (r1 >> constants::nmb<batch_type>()) - constants::maxexponentm1<batch_type>();
1082
+ exp = select(batch_bool_cast<typename i_type::value_type>(self != batch_type(0.)), exp, i_type(typename i_type::value_type(0)));
1083
+ return select((self != batch_type(0.)), x | ::xsimd::bitwise_cast<T>(constants::mask2frexp<batch_type>()), batch_type(0.));
1084
+ }
1085
+
1086
+ // from bool
1087
+ template <class A, class T>
1088
+ XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<common>) noexcept
1089
+ {
1090
+ return batch<T, A>((typename batch<T, A>::register_type)self.data) & batch<T, A>(1);
1091
+ }
1092
+
1093
+ // horner
1094
+ template <class T, class A, uint64_t... Coefs>
1095
+ XSIMD_INLINE batch<T, A> horner(const batch<T, A>& self) noexcept
1096
+ {
1097
+ return detail::horner<batch<T, A>, Coefs...>(self);
1098
+ }
1099
+
1100
+ // hypot
1101
+ template <class A, class T>
1102
+ XSIMD_INLINE batch<T, A> hypot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
1103
+ {
1104
+ return sqrt(fma(self, self, other * other));
1105
+ }
1106
+
1107
+ // ipow
1108
+ template <class A, class T, class ITy>
1109
+ XSIMD_INLINE batch<T, A> ipow(batch<T, A> const& self, ITy other, requires_arch<common>) noexcept
1110
+ {
1111
+ return ::xsimd::detail::ipow(self, other);
1112
+ }
1113
+
1114
+ // ldexp
1115
+ /* origin: boost/simd/arch/common/simd/function/ldexp.hpp */
1116
+ /*
1117
+ * ====================================================
1118
+ * copyright 2016 NumScale SAS
1119
+ *
1120
+ * Distributed under the Boost Software License, Version 1.0.
1121
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1122
+ * ====================================================
1123
+ */
1124
+ template <class A, class T>
1125
+ XSIMD_INLINE batch<T, A> ldexp(const batch<T, A>& self, const batch<as_integer_t<T>, A>& other, requires_arch<common>) noexcept
1126
+ {
1127
+ using batch_type = batch<T, A>;
1128
+ using itype = as_integer_t<batch_type>;
1129
+ itype ik = other + constants::maxexponent<T>();
1130
+ ik = ik << constants::nmb<T>();
1131
+ return self * ::xsimd::bitwise_cast<T>(ik);
1132
+ }
1133
+
1134
+ // lgamma
1135
+ template <class A, class T>
1136
+ XSIMD_INLINE batch<T, A> lgamma(batch<T, A> const& self, requires_arch<common>) noexcept;
1137
+
1138
+ namespace detail
1139
+ {
1140
+ /* origin: boost/simd/arch/common/detail/common/gammaln_kernel.hpp */
1141
+ /*
1142
+ * ====================================================
1143
+ * copyright 2016 NumScale SAS
1144
+ *
1145
+ * Distributed under the Boost Software License, Version 1.0.
1146
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1147
+ * ====================================================
1148
+ */
1149
+ template <class A>
1150
+ static XSIMD_INLINE batch<float, A> gammalnB(const batch<float, A>& x) noexcept
1151
+ {
1152
+ return horner<batch<float, A>,
1153
+ 0x3ed87730, // 4.227843421859038E-001
1154
+ 0x3ea51a64, // 3.224669577325661E-001,
1155
+ 0xbd89f07e, // -6.735323259371034E-002,
1156
+ 0x3ca89ed8, // 2.058355474821512E-002,
1157
+ 0xbbf164fd, // -7.366775108654962E-003,
1158
+ 0x3b3ba883, // 2.863437556468661E-003,
1159
+ 0xbaabeab1, // -1.311620815545743E-003,
1160
+ 0x3a1ebb94 // 6.055172732649237E-004
1161
+ >(x);
1162
+ }
1163
+
1164
+ template <class A>
1165
+ static XSIMD_INLINE batch<float, A> gammalnC(const batch<float, A>& x) noexcept
1166
+ {
1167
+ return horner<batch<float, A>,
1168
+ 0xbf13c468, // -5.772156501719101E-001
1169
+ 0x3f528d34, // 8.224670749082976E-001,
1170
+ 0xbecd27a8, // -4.006931650563372E-001,
1171
+ 0x3e8a898b, // 2.705806208275915E-001,
1172
+ 0xbe53c04f, // -2.067882815621965E-001,
1173
+ 0x3e2d4dab, // 1.692415923504637E-001,
1174
+ 0xbe22d329, // -1.590086327657347E-001,
1175
+ 0x3e0c3c4f // 1.369488127325832E-001
1176
+ >(x);
1177
+ }
1178
+
1179
+ template <class A>
1180
+ static XSIMD_INLINE batch<float, A> gammaln2(const batch<float, A>& x) noexcept
1181
+ {
1182
+ return horner<batch<float, A>,
1183
+ 0x3daaaa94, // 8.333316229807355E-002f
1184
+ 0xbb358701, // -2.769887652139868E-003f,
1185
+ 0x3a31fd69 // 6.789774945028216E-004f
1186
+ >(x);
1187
+ }
1188
+
1189
+ template <class A>
1190
+ static XSIMD_INLINE batch<double, A> gammaln1(const batch<double, A>& x) noexcept
1191
+ {
1192
+ return horner<batch<double, A>,
1193
+ 0xc12a0c675418055eull, // -8.53555664245765465627E5
1194
+ 0xc13a45890219f20bull, // -1.72173700820839662146E6,
1195
+ 0xc131bc82f994db51ull, // -1.16237097492762307383E6,
1196
+ 0xc1143d73f89089e5ull, // -3.31612992738871184744E5,
1197
+ 0xc0e2f234355bb93eull, // -3.88016315134637840924E4,
1198
+ 0xc09589018ff36761ull // -1.37825152569120859100E3
1199
+ >(x)
1200
+ / horner<batch<double, A>,
1201
+ 0xc13ece4b6a11e14aull, // -2.01889141433532773231E6
1202
+ 0xc1435255892ff34cull, // -2.53252307177582951285E6,
1203
+ 0xc131628671950043ull, // -1.13933444367982507207E6,
1204
+ 0xc10aeb84b9744c9bull, // -2.20528590553854454839E5,
1205
+ 0xc0d0aa0d7b89d757ull, // -1.70642106651881159223E4,
1206
+ 0xc075fd0d1cf312b2ull, // -3.51815701436523470549E2,
1207
+ 0x3ff0000000000000ull // 1.00000000000000000000E0
1208
+ >(x);
1209
+ }
1210
+
1211
+ template <class A>
1212
+ static XSIMD_INLINE batch<double, A> gammalnA(const batch<double, A>& x) noexcept
1213
+ {
1214
+ return horner<batch<double, A>,
1215
+ 0x3fb555555555554bull, // 8.33333333333331927722E-2
1216
+ 0xbf66c16c16b0a5a1ull, // -2.77777777730099687205E-3,
1217
+ 0x3f4a019f20dc5ebbull, // 7.93650340457716943945E-4,
1218
+ 0xbf437fbdb580e943ull, // -5.95061904284301438324E-4,
1219
+ 0x3f4a985027336661ull // 8.11614167470508450300E-4
1220
+ >(x);
1221
+ }
1222
+
1223
+ /* origin: boost/simd/arch/common/simd/function/gammaln.hpp */
1224
+ /*
1225
+ * ====================================================
1226
+ * copyright 2016 NumScale SAS
1227
+ *
1228
+ * Distributed under the Boost Software License, Version 1.0.
1229
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1230
+ * ====================================================
1231
+ */
1232
+ template <class B>
1233
+ struct lgamma_impl;
1234
+
1235
+ template <class A>
1236
+ struct lgamma_impl<batch<float, A>>
1237
+ {
1238
+ using batch_type = batch<float, A>;
1239
+ static XSIMD_INLINE batch_type compute(const batch_type& a) noexcept
1240
+ {
1241
+ auto inf_result = (a <= batch_type(0.)) && is_flint(a);
1242
+ batch_type x = select(inf_result, constants::nan<batch_type>(), a);
1243
+ batch_type q = abs(x);
1244
+ #ifndef XSIMD_NO_INFINITIES
1245
+ inf_result = (x == constants::infinity<batch_type>()) || inf_result;
1246
+ #endif
1247
+ auto ltza = a < batch_type(0.);
1248
+ batch_type r(0);
1249
+ batch_type r1 = other(q);
1250
+ if (any(ltza))
1251
+ {
1252
+ #ifdef __FAST_MATH__
1253
+ r = negative(q, r1);
1254
+ #else
1255
+ r = select(inf_result, constants::infinity<batch_type>(), negative(q, r1));
1256
+ #endif
1257
+ if (all(ltza))
1258
+ return r;
1259
+ }
1260
+ batch_type r2 = select(ltza, r, r1);
1261
+ #ifdef __FAST_MATH__
1262
+ return r2;
1263
+ #else
1264
+ return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1265
+ #endif
1266
+ }
1267
+
1268
+ private:
1269
+ static XSIMD_INLINE batch_type negative(const batch_type& q, const batch_type& w) noexcept
1270
+ {
1271
+ batch_type p = floor(q);
1272
+ batch_type z = q - p;
1273
+ auto test2 = z < batch_type(0.5);
1274
+ z = select(test2, z - batch_type(1.), z);
1275
+ z = q * sin(z, trigo_pi_tag());
1276
+ return -log(constants::invpi<batch_type>() * abs(z)) - w;
1277
+ }
1278
+
1279
+ static XSIMD_INLINE batch_type other(const batch_type& x) noexcept
1280
+ {
1281
+ auto xlt650 = (x < batch_type(6.5));
1282
+ batch_type r0x = x;
1283
+ batch_type r0z = x;
1284
+ batch_type r0s = batch_type(1.);
1285
+ batch_type r1 = batch_type(0.);
1286
+ batch_type p = constants::nan<batch_type>();
1287
+ if (any(xlt650))
1288
+ {
1289
+ batch_type z = batch_type(1.);
1290
+ batch_type tx = select(xlt650, x, batch_type(0.));
1291
+ batch_type nx = batch_type(0.);
1292
+ const batch_type _075 = batch_type(0.75);
1293
+ const batch_type _150 = batch_type(1.50);
1294
+ const batch_type _125 = batch_type(1.25);
1295
+ const batch_type _250 = batch_type(2.50);
1296
+ auto xge150 = (x >= _150);
1297
+ auto txgt250 = (tx > _250);
1298
+
1299
+ // x >= 1.5
1300
+ while (any(xge150 && txgt250))
1301
+ {
1302
+ nx = select(txgt250, nx - batch_type(1.), nx);
1303
+ tx = select(txgt250, x + nx, tx);
1304
+ z = select(txgt250, z * tx, z);
1305
+ txgt250 = (tx > _250);
1306
+ }
1307
+ r0x = select(xge150, x + nx - batch_type(2.), x);
1308
+ r0z = select(xge150, z, r0z);
1309
+ r0s = select(xge150, batch_type(1.), r0s);
1310
+
1311
+ // x >= 1.25 && x < 1.5
1312
+ auto xge125 = (x >= _125);
1313
+ auto xge125t = xge125 && !xge150;
1314
+ if (any(xge125))
1315
+ {
1316
+ r0x = select(xge125t, x - batch_type(1.), r0x);
1317
+ r0z = select(xge125t, z * x, r0z);
1318
+ r0s = select(xge125t, batch_type(-1.), r0s);
1319
+ }
1320
+
1321
+ // x >= 0.75 && x < 1.5
1322
+ batch_bool<float, A> kernelC(false);
1323
+ auto xge075 = (x >= _075);
1324
+ auto xge075t = xge075 && !xge125;
1325
+ if (any(xge075t))
1326
+ {
1327
+ kernelC = xge075t;
1328
+ r0x = select(xge075t, x - batch_type(1.), x);
1329
+ r0z = select(xge075t, batch_type(1.), r0z);
1330
+ r0s = select(xge075t, batch_type(-1.), r0s);
1331
+ p = gammalnC(r0x);
1332
+ }
1333
+
1334
+ // tx < 1.5 && x < 0.75
1335
+ auto txlt150 = (tx < _150) && !xge075;
1336
+ if (any(txlt150))
1337
+ {
1338
+ auto orig = txlt150;
1339
+ while (any(txlt150))
1340
+ {
1341
+ z = select(txlt150, z * tx, z);
1342
+ nx = select(txlt150, nx + batch_type(1.), nx);
1343
+ tx = select(txlt150, x + nx, tx);
1344
+ txlt150 = (tx < _150) && !xge075;
1345
+ }
1346
+ r0x = select(orig, r0x + nx - batch_type(2.), r0x);
1347
+ r0z = select(orig, z, r0z);
1348
+ r0s = select(orig, batch_type(-1.), r0s);
1349
+ }
1350
+ p = select(kernelC, p, gammalnB(r0x));
1351
+ if (all(xlt650))
1352
+ return fma(r0x, p, r0s * log(abs(r0z)));
1353
+ }
1354
+ r0z = select(xlt650, abs(r0z), x);
1355
+ batch_type m = log(r0z);
1356
+ r1 = fma(r0x, p, r0s * m);
1357
+ batch_type r2 = fma(x - batch_type(0.5), m, constants::logsqrt2pi<batch_type>() - x);
1358
+ r2 += gammaln2(batch_type(1.) / (x * x)) / x;
1359
+ return select(xlt650, r1, r2);
1360
+ }
1361
+ };
1362
+
1363
+ template <class A>
1364
+ struct lgamma_impl<batch<double, A>>
1365
+ {
1366
+ using batch_type = batch<double, A>;
1367
+
1368
+ static XSIMD_INLINE batch_type compute(const batch_type& a) noexcept
1369
+ {
1370
+ auto inf_result = (a <= batch_type(0.)) && is_flint(a);
1371
+ batch_type x = select(inf_result, constants::nan<batch_type>(), a);
1372
+ batch_type q = abs(x);
1373
+ #ifndef XSIMD_NO_INFINITIES
1374
+ inf_result = (q == constants::infinity<batch_type>());
1375
+ #endif
1376
+ auto test = (a < batch_type(-34.));
1377
+ batch_type r = constants::nan<batch_type>();
1378
+ if (any(test))
1379
+ {
1380
+ r = large_negative(q);
1381
+ if (all(test))
1382
+ return select(inf_result, constants::nan<batch_type>(), r);
1383
+ }
1384
+ batch_type r1 = other(a);
1385
+ batch_type r2 = select(test, r, r1);
1386
+ #ifdef __FAST_MATH__
1387
+ return r2;
1388
+ #else
1389
+ return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1390
+ #endif
1391
+ }
1392
+
1393
+ private:
1394
+ // FIXME: cannot mark this one as XSIMD_INLINE because there's a
1395
+ // recursive loop on `lgamma'.
1396
+ static inline batch_type large_negative(const batch_type& q) noexcept
1397
+ {
1398
+ batch_type w = lgamma(q);
1399
+ batch_type p = floor(q);
1400
+ batch_type z = q - p;
1401
+ auto test2 = (z < batch_type(0.5));
1402
+ z = select(test2, z - batch_type(1.), z);
1403
+ z = q * sin(z, trigo_pi_tag());
1404
+ z = abs(z);
1405
+ return constants::logpi<batch_type>() - log(z) - w;
1406
+ }
1407
+
1408
+ static XSIMD_INLINE batch_type other(const batch_type& xx) noexcept
1409
+ {
1410
+ batch_type x = xx;
1411
+ auto test = (x < batch_type(13.));
1412
+ batch_type r1 = batch_type(0.);
1413
+ if (any(test))
1414
+ {
1415
+ batch_type z = batch_type(1.);
1416
+ batch_type p = batch_type(0.);
1417
+ batch_type u = select(test, x, batch_type(0.));
1418
+ auto test1 = (u >= batch_type(3.));
1419
+ while (any(test1))
1420
+ {
1421
+ p = select(test1, p - batch_type(1.), p);
1422
+ u = select(test1, x + p, u);
1423
+ z = select(test1, z * u, z);
1424
+ test1 = (u >= batch_type(3.));
1425
+ }
1426
+
1427
+ auto test2 = (u < batch_type(2.));
1428
+ while (any(test2))
1429
+ {
1430
+ z = select(test2, z / u, z);
1431
+ p = select(test2, p + batch_type(1.), p);
1432
+ u = select(test2, x + p, u);
1433
+ test2 = (u < batch_type(2.));
1434
+ }
1435
+
1436
+ z = abs(z);
1437
+ x += p - batch_type(2.);
1438
+ r1 = x * gammaln1(x) + log(z);
1439
+ if (all(test))
1440
+ return r1;
1441
+ }
1442
+ batch_type r2 = fma(xx - batch_type(0.5), log(xx), constants::logsqrt2pi<batch_type>() - xx);
1443
+ batch_type p = batch_type(1.) / (xx * xx);
1444
+ r2 += gammalnA(p) / xx;
1445
+ return select(test, r1, r2);
1446
+ }
1447
+ };
1448
+ }
1449
+
1450
+ template <class A, class T>
1451
+ XSIMD_INLINE batch<T, A> lgamma(batch<T, A> const& self, requires_arch<common>) noexcept
1452
+ {
1453
+ return detail::lgamma_impl<batch<T, A>>::compute(self);
1454
+ }
1455
+
1456
+ // log
1457
+ /* origin: boost/simd/arch/common/simd/function/log.hpp */
1458
+ /*
1459
+ * ====================================================
1460
+ * copyright 2016 NumScale SAS
1461
+ *
1462
+ * Distributed under the Boost Software License, Version 1.0.
1463
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1464
+ * ====================================================
1465
+ */
1466
+ template <class A>
1467
+ XSIMD_INLINE batch<float, A> log(batch<float, A> const& self, requires_arch<common>) noexcept
1468
+ {
1469
+ using batch_type = batch<float, A>;
1470
+ using int_type = as_integer_t<float>;
1471
+ using i_type = batch<int_type, A>;
1472
+ batch_type x = self;
1473
+ i_type k(0);
1474
+ auto isnez = (self != batch_type(0.));
1475
+ #ifndef XSIMD_NO_DENORMALS
1476
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1477
+ if (any(test))
1478
+ {
1479
+ k = select(batch_bool_cast<int_type>(test), k - i_type(23), k);
1480
+ x = select(test, x * batch_type(8388608ul), x);
1481
+ }
1482
+ #endif
1483
+ i_type ix = ::xsimd::bitwise_cast<int_type>(x);
1484
+ ix += 0x3f800000 - 0x3f3504f3;
1485
+ k += (ix >> 23) - 0x7f;
1486
+ ix = (ix & i_type(0x007fffff)) + 0x3f3504f3;
1487
+ x = ::xsimd::bitwise_cast<float>(ix);
1488
+ batch_type f = --x;
1489
+ batch_type s = f / (batch_type(2.) + f);
1490
+ batch_type z = s * s;
1491
+ batch_type w = z * z;
1492
+ batch_type t1 = w * detail::horner<batch_type, 0x3eccce13, 0x3e789e26>(w);
1493
+ batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa, 0x3e91e9ee>(w);
1494
+ batch_type R = t2 + t1;
1495
+ batch_type hfsq = batch_type(0.5) * f * f;
1496
+ batch_type dk = to_float(k);
1497
+ batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1498
+ #ifdef __FAST_MATH__
1499
+ return r;
1500
+ #else
1501
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1502
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1503
+ #endif
1504
+ }
1505
+
1506
+ template <class A>
1507
+ XSIMD_INLINE batch<double, A> log(batch<double, A> const& self, requires_arch<common>) noexcept
1508
+ {
1509
+ using batch_type = batch<double, A>;
1510
+ using int_type = as_integer_t<double>;
1511
+ using i_type = batch<int_type, A>;
1512
+
1513
+ batch_type x = self;
1514
+ i_type hx = ::xsimd::bitwise_cast<int_type>(x) >> 32;
1515
+ i_type k(0);
1516
+ auto isnez = (self != batch_type(0.));
1517
+ #ifndef XSIMD_NO_DENORMALS
1518
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1519
+ if (any(test))
1520
+ {
1521
+ k = select(batch_bool_cast<int_type>(test), k - i_type(54), k);
1522
+ x = select(test, x * batch_type(18014398509481984ull), x);
1523
+ }
1524
+ #endif
1525
+ hx += 0x3ff00000 - 0x3fe6a09e;
1526
+ k += (hx >> 20) - 0x3ff;
1527
+ batch_type dk = to_float(k);
1528
+ hx = (hx & i_type(0x000fffff)) + 0x3fe6a09e;
1529
+ x = ::xsimd::bitwise_cast<double>(hx << 32 | (i_type(0xffffffff) & ::xsimd::bitwise_cast<int_type>(x)));
1530
+
1531
+ batch_type f = --x;
1532
+ batch_type hfsq = batch_type(0.5) * f * f;
1533
+ batch_type s = f / (batch_type(2.) + f);
1534
+ batch_type z = s * s;
1535
+ batch_type w = z * z;
1536
+
1537
+ batch_type t1 = w * detail::horner<batch_type, 0x3fd999999997fa04ll, 0x3fcc71c51d8e78afll, 0x3fc39a09d078c69fll>(w);
1538
+ batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll, 0x3fd2492494229359ll, 0x3fc7466496cb03dell, 0x3fc2f112df3e5244ll>(w);
1539
+ batch_type R = t2 + t1;
1540
+ batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1541
+ #ifdef __FAST_MATH__
1542
+ return r;
1543
+ #else
1544
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1545
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1546
+ #endif
1547
+ }
1548
+
1549
+ template <class A, class T>
1550
+ XSIMD_INLINE batch<std::complex<T>, A> log(const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
1551
+ {
1552
+ return batch<std::complex<T>, A>(log(abs(z)), atan2(z.imag(), z.real()));
1553
+ }
1554
+
1555
+ // log2
1556
+ template <class A>
1557
+ XSIMD_INLINE batch<float, A> log2(batch<float, A> const& self, requires_arch<common>) noexcept
1558
+ {
1559
+ using batch_type = batch<float, A>;
1560
+ using int_type = as_integer_t<float>;
1561
+ using i_type = batch<int_type, A>;
1562
+ batch_type x = self;
1563
+ i_type k(0);
1564
+ auto isnez = (self != batch_type(0.));
1565
+ #ifndef XSIMD_NO_DENORMALS
1566
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1567
+ if (any(test))
1568
+ {
1569
+ k = select(batch_bool_cast<int_type>(test), k - i_type(25), k);
1570
+ x = select(test, x * batch_type(33554432ul), x);
1571
+ }
1572
+ #endif
1573
+ i_type ix = ::xsimd::bitwise_cast<int_type>(x);
1574
+ ix += 0x3f800000 - 0x3f3504f3;
1575
+ k += (ix >> 23) - 0x7f;
1576
+ ix = (ix & i_type(0x007fffff)) + 0x3f3504f3;
1577
+ x = ::xsimd::bitwise_cast<float>(ix);
1578
+ batch_type f = --x;
1579
+ batch_type s = f / (batch_type(2.) + f);
1580
+ batch_type z = s * s;
1581
+ batch_type w = z * z;
1582
+ batch_type t1 = w * detail::horner<batch_type, 0x3eccce13, 0x3e789e26>(w);
1583
+ batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa, 0x3e91e9ee>(w);
1584
+ batch_type R = t1 + t2;
1585
+ batch_type hfsq = batch_type(0.5) * f * f;
1586
+ batch_type dk = to_float(k);
1587
+ batch_type r = fma(fms(s, hfsq + R, hfsq) + f, constants::invlog_2<batch_type>(), dk);
1588
+ #ifdef __FAST_MATH__
1589
+ return r;
1590
+ #else
1591
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1592
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1593
+ #endif
1594
+ }
1595
+
1596
+ template <class A>
1597
+ XSIMD_INLINE batch<double, A> log2(batch<double, A> const& self, requires_arch<common>) noexcept
1598
+ {
1599
+ using batch_type = batch<double, A>;
1600
+ using int_type = as_integer_t<double>;
1601
+ using i_type = batch<int_type, A>;
1602
+ batch_type x = self;
1603
+ i_type hx = ::xsimd::bitwise_cast<int_type>(x) >> 32;
1604
+ i_type k(0);
1605
+ auto isnez = (self != batch_type(0.));
1606
+ #ifndef XSIMD_NO_DENORMALS
1607
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1608
+ if (any(test))
1609
+ {
1610
+ k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(54), k);
1611
+ x = select(test, x * batch_type(18014398509481984ull), x);
1612
+ }
1613
+ #endif
1614
+ hx += 0x3ff00000 - 0x3fe6a09e;
1615
+ k += (hx >> 20) - 0x3ff;
1616
+ hx = (hx & i_type(0x000fffff)) + 0x3fe6a09e;
1617
+ x = ::xsimd::bitwise_cast<double>(hx << 32 | (i_type(0xffffffff) & ::xsimd::bitwise_cast<int_type>(x)));
1618
+ batch_type f = --x;
1619
+ batch_type s = f / (batch_type(2.) + f);
1620
+ batch_type z = s * s;
1621
+ batch_type w = z * z;
1622
+ batch_type t1 = w * detail::horner<batch_type, 0x3fd999999997fa04ll, 0x3fcc71c51d8e78afll, 0x3fc39a09d078c69fll>(w);
1623
+ batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll, 0x3fd2492494229359ll, 0x3fc7466496cb03dell, 0x3fc2f112df3e5244ll>(w);
1624
+ batch_type R = t2 + t1;
1625
+ batch_type hfsq = batch_type(0.5) * f * f;
1626
+ batch_type hi = f - hfsq;
1627
+ hi = hi & ::xsimd::bitwise_cast<double>((constants::allbits<i_type>() << 32));
1628
+ batch_type lo = fma(s, hfsq + R, f - hi - hfsq);
1629
+ batch_type val_hi = hi * constants::invlog_2hi<batch_type>();
1630
+ batch_type val_lo = fma(lo + hi, constants::invlog_2lo<batch_type>(), lo * constants::invlog_2hi<batch_type>());
1631
+ batch_type dk = to_float(k);
1632
+ batch_type w1 = dk + val_hi;
1633
+ val_lo += (dk - w1) + val_hi;
1634
+ val_hi = w1;
1635
+ batch_type r = val_lo + val_hi;
1636
+ #ifdef __FAST_MATH__
1637
+ return r;
1638
+ #else
1639
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1640
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1641
+ #endif
1642
+ }
1643
+
1644
+ namespace detail
1645
+ {
1646
+ template <class T, class A>
1647
+ XSIMD_INLINE batch<T, A> logN_complex_impl(const batch<T, A>& z, typename batch<T, A>::value_type base) noexcept
1648
+ {
1649
+ using batch_type = batch<T, A>;
1650
+ using rv_type = typename batch_type::value_type;
1651
+ return log(z) / batch_type(rv_type(base));
1652
+ }
1653
+ }
1654
+
1655
+ template <class A, class T>
1656
+ XSIMD_INLINE batch<std::complex<T>, A> log2(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
1657
+ {
1658
+ return detail::logN_complex_impl(self, std::log(2));
1659
+ }
1660
+
1661
+ // log10
1662
+ /* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */
1663
+ /*
1664
+ * ====================================================
1665
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1666
+ *
1667
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
1668
+ * Permission to use, copy, modify, and distribute this
1669
+ * software is freely granted, provided that this notice
1670
+ * is preserved.
1671
+ * ====================================================
1672
+ */
1673
+ template <class A>
1674
+ XSIMD_INLINE batch<float, A> log10(batch<float, A> const& self, requires_arch<common>) noexcept
1675
+ {
1676
+ using batch_type = batch<float, A>;
1677
+ const batch_type
1678
+ ivln10hi(4.3432617188e-01f),
1679
+ ivln10lo(-3.1689971365e-05f),
1680
+ log10_2hi(3.0102920532e-01f),
1681
+ log10_2lo(7.9034151668e-07f);
1682
+ using int_type = as_integer_t<float>;
1683
+ using i_type = batch<int_type, A>;
1684
+ batch_type x = self;
1685
+ i_type k(0);
1686
+ auto isnez = (self != batch_type(0.));
1687
+ #ifndef XSIMD_NO_DENORMALS
1688
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1689
+ if (any(test))
1690
+ {
1691
+ k = select(batch_bool_cast<int_type>(test), k - i_type(25), k);
1692
+ x = select(test, x * batch_type(33554432ul), x);
1693
+ }
1694
+ #endif
1695
+ i_type ix = ::xsimd::bitwise_cast<int_type>(x);
1696
+ ix += 0x3f800000 - 0x3f3504f3;
1697
+ k += (ix >> 23) - 0x7f;
1698
+ ix = (ix & i_type(0x007fffff)) + 0x3f3504f3;
1699
+ x = ::xsimd::bitwise_cast<float>(ix);
1700
+ batch_type f = --x;
1701
+ batch_type s = f / (batch_type(2.) + f);
1702
+ batch_type z = s * s;
1703
+ batch_type w = z * z;
1704
+ batch_type t1 = w * detail::horner<batch_type, 0x3eccce13, 0x3e789e26>(w);
1705
+ batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa, 0x3e91e9ee>(w);
1706
+ batch_type R = t2 + t1;
1707
+ batch_type dk = to_float(k);
1708
+ batch_type hfsq = batch_type(0.5) * f * f;
1709
+ batch_type hibits = f - hfsq;
1710
+ hibits &= ::xsimd::bitwise_cast<float>(i_type(0xfffff000));
1711
+ batch_type lobits = fma(s, hfsq + R, f - hibits - hfsq);
1712
+ batch_type r = fma(dk, log10_2hi,
1713
+ fma(hibits, ivln10hi,
1714
+ fma(lobits, ivln10hi,
1715
+ fma(lobits + hibits, ivln10lo, dk * log10_2lo))));
1716
+ #ifndef XSIMD_NO_INFINITIES
1717
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1718
+ #else
1719
+ assert(all(isnez) && "Calling log10 on a batch with zero value while XSIMD_NO_INFINITIES is active");
1720
+ batch_type zz = r;
1721
+ #endif
1722
+ #ifndef XSIMD_NO_NANS
1723
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1724
+ #else
1725
+ assert(all(self >= batch_type(0.)) && "Calling log10 on a batch with negative value while XSIMD_NO_NANS is active");
1726
+ return zz;
1727
+ #endif
1728
+ }
1729
+
1730
+ template <class A>
1731
+ XSIMD_INLINE batch<double, A> log10(batch<double, A> const& self, requires_arch<common>) noexcept
1732
+ {
1733
+ using batch_type = batch<double, A>;
1734
+ const batch_type
1735
+ ivln10hi(4.34294481878168880939e-01),
1736
+ ivln10lo(2.50829467116452752298e-11),
1737
+ log10_2hi(3.01029995663611771306e-01),
1738
+ log10_2lo(3.69423907715893078616e-13);
1739
+ using int_type = as_integer_t<double>;
1740
+ using i_type = batch<int_type, A>;
1741
+ batch_type x = self;
1742
+ i_type hx = ::xsimd::bitwise_cast<int_type>(x) >> 32;
1743
+ i_type k(0);
1744
+ auto isnez = (self != batch_type(0.));
1745
+ #ifndef XSIMD_NO_DENORMALS
1746
+ auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1747
+ if (any(test))
1748
+ {
1749
+ k = select(batch_bool_cast<int_type>(test), k - i_type(54), k);
1750
+ x = select(test, x * batch_type(18014398509481984ull), x);
1751
+ }
1752
+ #endif
1753
+ hx += 0x3ff00000 - 0x3fe6a09e;
1754
+ k += (hx >> 20) - 0x3ff;
1755
+ hx = (hx & i_type(0x000fffff)) + 0x3fe6a09e;
1756
+ x = ::xsimd::bitwise_cast<double>(hx << 32 | (i_type(0xffffffff) & ::xsimd::bitwise_cast<int_type>(x)));
1757
+ batch_type f = --x;
1758
+ batch_type dk = to_float(k);
1759
+ batch_type s = f / (batch_type(2.) + f);
1760
+ batch_type z = s * s;
1761
+ batch_type w = z * z;
1762
+ batch_type t1 = w * detail::horner<batch_type, 0x3fd999999997fa04ll, 0x3fcc71c51d8e78afll, 0x3fc39a09d078c69fll>(w);
1763
+ batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll, 0x3fd2492494229359ll, 0x3fc7466496cb03dell, 0x3fc2f112df3e5244ll>(w);
1764
+ batch_type R = t2 + t1;
1765
+ batch_type hfsq = batch_type(0.5) * f * f;
1766
+ batch_type hi = f - hfsq;
1767
+ hi = hi & ::xsimd::bitwise_cast<double>(constants::allbits<i_type>() << 32);
1768
+ batch_type lo = f - hi - hfsq + s * (hfsq + R);
1769
+ batch_type val_hi = hi * ivln10hi;
1770
+ batch_type y = dk * log10_2hi;
1771
+ batch_type val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
1772
+ batch_type w1 = y + val_hi;
1773
+ val_lo += (y - w1) + val_hi;
1774
+ val_hi = w1;
1775
+ batch_type r = val_lo + val_hi;
1776
+ #ifdef __FAST_MATH__
1777
+ return r;
1778
+ #else
1779
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1780
+ return select(!(self >= batch_type(0.)), constants::nan<batch_type>(), zz);
1781
+ #endif
1782
+ }
1783
+
1784
+ template <class A, class T>
1785
+ XSIMD_INLINE batch<std::complex<T>, A> log10(const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
1786
+ {
1787
+ return detail::logN_complex_impl(z, std::log(10));
1788
+ }
1789
+
1790
+ // log1p
1791
+ /* origin: boost/simd/arch/common/simd/function/log1p.hpp */
1792
+ /*
1793
+ * ====================================================
1794
+ * copyright 2016 NumScale SAS
1795
+ *
1796
+ * Distributed under the Boost Software License, Version 1.0.
1797
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
1798
+ * ====================================================
1799
+ */
1800
+ template <class A>
1801
+ XSIMD_INLINE batch<float, A> log1p(batch<float, A> const& self, requires_arch<common>) noexcept
1802
+ {
1803
+ using batch_type = batch<float, A>;
1804
+ using int_type = as_integer_t<float>;
1805
+ using i_type = batch<int_type, A>;
1806
+ const batch_type uf = self + batch_type(1.);
1807
+ auto isnez = (uf != batch_type(0.));
1808
+ i_type iu = ::xsimd::bitwise_cast<int_type>(uf);
1809
+ iu += 0x3f800000 - 0x3f3504f3;
1810
+ i_type k = (iu >> 23) - 0x7f;
1811
+ iu = (iu & i_type(0x007fffff)) + 0x3f3504f3;
1812
+ batch_type f = --(::xsimd::bitwise_cast<float>(iu));
1813
+ batch_type s = f / (batch_type(2.) + f);
1814
+ batch_type z = s * s;
1815
+ batch_type w = z * z;
1816
+ batch_type t1 = w * detail::horner<batch_type, 0x3eccce13, 0x3e789e26>(w);
1817
+ batch_type t2 = z * detail::horner<batch_type, 0x3f2aaaaa, 0x3e91e9ee>(w);
1818
+ batch_type R = t2 + t1;
1819
+ batch_type hfsq = batch_type(0.5) * f * f;
1820
+ batch_type dk = to_float(k);
1821
+ /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
1822
+ batch_type c = select(batch_bool_cast<float>(k >= i_type(2)), batch_type(1.) - (uf - self), self - (uf - batch_type(1.))) / uf;
1823
+ batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1824
+ #ifdef __FAST_MATH__
1825
+ return r;
1826
+ #else
1827
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1828
+ return select(!(uf >= batch_type(0.)), constants::nan<batch_type>(), zz);
1829
+ #endif
1830
+ }
1831
+
1832
+ template <class A>
1833
+ XSIMD_INLINE batch<double, A> log1p(batch<double, A> const& self, requires_arch<common>) noexcept
1834
+ {
1835
+ using batch_type = batch<double, A>;
1836
+ using int_type = as_integer_t<double>;
1837
+ using i_type = batch<int_type, A>;
1838
+ const batch_type uf = self + batch_type(1.);
1839
+ auto isnez = (uf != batch_type(0.));
1840
+ i_type hu = ::xsimd::bitwise_cast<int_type>(uf) >> 32;
1841
+ hu += 0x3ff00000 - 0x3fe6a09e;
1842
+ i_type k = (hu >> 20) - 0x3ff;
1843
+ /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */
1844
+ batch_type c = select(batch_bool_cast<double>(k >= i_type(2)), batch_type(1.) - (uf - self), self - (uf - batch_type(1.))) / uf;
1845
+ hu = (hu & i_type(0x000fffff)) + 0x3fe6a09e;
1846
+ batch_type f = ::xsimd::bitwise_cast<double>((hu << 32) | (i_type(0xffffffff) & ::xsimd::bitwise_cast<int_type>(uf)));
1847
+ f = --f;
1848
+ batch_type hfsq = batch_type(0.5) * f * f;
1849
+ batch_type s = f / (batch_type(2.) + f);
1850
+ batch_type z = s * s;
1851
+ batch_type w = z * z;
1852
+ batch_type t1 = w * detail::horner<batch_type, 0x3fd999999997fa04ll, 0x3fcc71c51d8e78afll, 0x3fc39a09d078c69fll>(w);
1853
+ batch_type t2 = z * detail::horner<batch_type, 0x3fe5555555555593ll, 0x3fd2492494229359ll, 0x3fc7466496cb03dell, 0x3fc2f112df3e5244ll>(w);
1854
+ batch_type R = t2 + t1;
1855
+ batch_type dk = to_float(k);
1856
+ batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1857
+ #ifdef __FAST_MATH__
1858
+ return r;
1859
+ #else
1860
+ batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1861
+ return select(!(uf >= batch_type(0.)), constants::nan<batch_type>(), zz);
1862
+ #endif
1863
+ }
1864
+
1865
+ template <class A, class T>
1866
+ XSIMD_INLINE batch<std::complex<T>, A> log1p(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
1867
+ {
1868
+ using batch_type = batch<std::complex<T>, A>;
1869
+ using real_batch = typename batch_type::real_batch;
1870
+ batch_type u = 1 + self;
1871
+ batch_type logu = log(u);
1872
+ return select(u == batch_type(1.),
1873
+ self,
1874
+ select(u.real() <= real_batch(0.),
1875
+ logu,
1876
+ logu * self / (u - batch_type(1.))));
1877
+ }
1878
+
1879
+ // mod
1880
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1881
+ XSIMD_INLINE batch<T, A> mod(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
1882
+ {
1883
+ return detail::apply([](T x, T y) noexcept -> T
1884
+ { return x % y; },
1885
+ self, other);
1886
+ }
1887
+
1888
+ // nearbyint
1889
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1890
+ XSIMD_INLINE batch<T, A> nearbyint(batch<T, A> const& self, requires_arch<common>) noexcept
1891
+ {
1892
+ return self;
1893
+ }
1894
+ namespace detail
1895
+ {
1896
+ template <class A, class T>
1897
+ XSIMD_INLINE batch<T, A> nearbyintf(batch<T, A> const& self) noexcept
1898
+ {
1899
+ using batch_type = batch<T, A>;
1900
+ batch_type s = bitofsign(self);
1901
+ batch_type v = self ^ s;
1902
+ batch_type t2n = constants::twotonmb<batch_type>();
1903
+ // Under fast-math, reordering is possible and the compiler optimizes d
1904
+ // to v. That's not what we want, so prevent compiler optimization here.
1905
+ // FIXME: it may be better to emit a memory barrier here (?).
1906
+ #ifdef __FAST_MATH__
1907
+ batch_type d0 = v + t2n;
1908
+ asm volatile("" ::"r"(&d0) : "memory");
1909
+ batch_type d = d0 - t2n;
1910
+ #else
1911
+ batch_type d0 = v + t2n;
1912
+ batch_type d = d0 - t2n;
1913
+ #endif
1914
+ return s ^ select(v < t2n, d, v);
1915
+ }
1916
+ }
1917
+ template <class A>
1918
+ XSIMD_INLINE batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<common>) noexcept
1919
+ {
1920
+ return detail::nearbyintf(self);
1921
+ }
1922
+ template <class A>
1923
+ XSIMD_INLINE batch<double, A> nearbyint(batch<double, A> const& self, requires_arch<common>) noexcept
1924
+ {
1925
+ return detail::nearbyintf(self);
1926
+ }
1927
+
1928
+ // nearbyint_as_int
1929
+ template <class T, class A, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1930
+ XSIMD_INLINE batch<T, A> nearbyint_as_int(batch<T, A> const& self, requires_arch<common>) noexcept
1931
+ {
1932
+ return self;
1933
+ }
1934
+
1935
+ // nearbyint_as_int
1936
+ template <class A>
1937
+ XSIMD_INLINE batch<as_integer_t<float>, A>
1938
+ nearbyint_as_int(batch<float, A> const& self, requires_arch<common>) noexcept
1939
+ {
1940
+ using U = as_integer_t<float>;
1941
+ return kernel::detail::apply_transform<U>([](float x) noexcept -> U
1942
+ { return std::nearbyintf(x); },
1943
+ self);
1944
+ }
1945
+
1946
+ template <class A>
1947
+ XSIMD_INLINE batch<as_integer_t<double>, A>
1948
+ nearbyint_as_int(batch<double, A> const& self, requires_arch<common>) noexcept
1949
+ {
1950
+ using U = as_integer_t<double>;
1951
+ return kernel::detail::apply_transform<U>([](double x) noexcept -> U
1952
+ { return std::nearbyint(x); },
1953
+ self);
1954
+ }
1955
+
1956
+ // nextafter
1957
+ namespace detail
1958
+ {
1959
+ template <class T, class A, bool is_int = std::is_integral<T>::value>
1960
+ struct nextafter_kernel
1961
+ {
1962
+ using batch_type = batch<T, A>;
1963
+
1964
+ static XSIMD_INLINE batch_type next(batch_type const& b) noexcept
1965
+ {
1966
+ return b;
1967
+ }
1968
+
1969
+ static XSIMD_INLINE batch_type prev(batch_type const& b) noexcept
1970
+ {
1971
+ return b;
1972
+ }
1973
+ };
1974
+
1975
+ template <class T, class A>
1976
+ struct bitwise_cast_batch;
1977
+
1978
+ template <class A>
1979
+ struct bitwise_cast_batch<float, A>
1980
+ {
1981
+ using type = batch<int32_t, A>;
1982
+ };
1983
+
1984
+ template <class A>
1985
+ struct bitwise_cast_batch<double, A>
1986
+ {
1987
+ using type = batch<int64_t, A>;
1988
+ };
1989
+
1990
+ template <class T, class A>
1991
+ struct nextafter_kernel<T, A, false>
1992
+ {
1993
+ using batch_type = batch<T, A>;
1994
+ using int_batch = typename bitwise_cast_batch<T, A>::type;
1995
+ using int_type = typename int_batch::value_type;
1996
+
1997
+ static XSIMD_INLINE batch_type next(const batch_type& b) noexcept
1998
+ {
1999
+ batch_type n = ::xsimd::bitwise_cast<T>(::xsimd::bitwise_cast<int_type>(b) + int_type(1));
2000
+ #ifdef __FAST_MATH__
2001
+ return n;
2002
+ #else
2003
+ return select(b == constants::infinity<batch_type>(), b, n);
2004
+ #endif
2005
+ }
2006
+
2007
+ static XSIMD_INLINE batch_type prev(const batch_type& b) noexcept
2008
+ {
2009
+ batch_type p = ::xsimd::bitwise_cast<T>(::xsimd::bitwise_cast<int_type>(b) - int_type(1));
2010
+ #ifdef __FAST_MATH__
2011
+ return p;
2012
+ #else
2013
+ return select(b == constants::minusinfinity<batch_type>(), b, p);
2014
+ #endif
2015
+ }
2016
+ };
2017
+ }
2018
+ template <class A, class T>
2019
+ XSIMD_INLINE batch<T, A> nextafter(batch<T, A> const& from, batch<T, A> const& to, requires_arch<common>) noexcept
2020
+ {
2021
+ using kernel = detail::nextafter_kernel<T, A>;
2022
+ return select(from == to, from,
2023
+ select(to > from, kernel::next(from), kernel::prev(from)));
2024
+ }
2025
+
2026
+ // pow
2027
+ /* origin: boost/simd/arch/common/simd/function/pow.hpp*/
2028
+ /*
2029
+ * ====================================================
2030
+ * copyright 2016 NumScale SAS
2031
+ *
2032
+ * Distributed under the Boost Software License, Version 1.0.
2033
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
2034
+ * ====================================================
2035
+ */
2036
+ template <class A, class T>
2037
+ XSIMD_INLINE batch<T, A> pow(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
2038
+ {
2039
+ using batch_type = batch<T, A>;
2040
+ const auto zero = batch_type(0.);
2041
+ auto negself = self < zero;
2042
+ auto iszeropowpos = self == zero && other >= zero;
2043
+ auto adj_self = select(iszeropowpos, batch_type(1), abs(self));
2044
+ batch_type z = exp(other * log(adj_self));
2045
+ z = select(iszeropowpos, zero, z);
2046
+ z = select(is_odd(other) && negself, -z, z);
2047
+ auto invalid = negself && !(is_flint(other) || isinf(other));
2048
+ return select(invalid, constants::nan<batch_type>(), z);
2049
+ }
2050
+
2051
+ template <class A, class T>
2052
+ XSIMD_INLINE batch<std::complex<T>, A> pow(const batch<std::complex<T>, A>& a, const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
2053
+ {
2054
+ using cplx_batch = batch<std::complex<T>, A>;
2055
+ using real_batch = typename cplx_batch::real_batch;
2056
+ real_batch absa = abs(a);
2057
+ real_batch arga = arg(a);
2058
+ real_batch x = z.real();
2059
+ real_batch y = z.imag();
2060
+ real_batch r = pow(absa, x);
2061
+ real_batch theta = x * arga;
2062
+ real_batch ze(0);
2063
+ auto cond = (y == ze);
2064
+ r = select(cond, r, r * exp(-y * arga));
2065
+ theta = select(cond, theta, theta + y * log(absa));
2066
+ auto sincosTheta = xsimd::sincos(theta);
2067
+ return select(absa == ze, cplx_batch(ze), cplx_batch(r * sincosTheta.second, r * sincosTheta.first));
2068
+ }
2069
+
2070
+ template <class A, class T>
2071
+ inline batch<std::complex<T>, A> pow(const batch<std::complex<T>, A>& a, const batch<T, A>& z, requires_arch<common>) noexcept
2072
+ {
2073
+ using cplx_batch = batch<std::complex<T>, A>;
2074
+
2075
+ auto absa = abs(a);
2076
+ auto arga = arg(a);
2077
+ auto r = pow(absa, z);
2078
+
2079
+ auto theta = z * arga;
2080
+ auto sincosTheta = xsimd::sincos(theta);
2081
+ return select(absa == 0, cplx_batch(0), cplx_batch(r * sincosTheta.second, r * sincosTheta.first));
2082
+ }
2083
+
2084
+ template <class A, class T>
2085
+ inline batch<std::complex<T>, A> pow(const batch<T, A>& a, const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
2086
+ {
2087
+ return pow(batch<std::complex<T>, A> { a, batch<T, A> {} }, z);
2088
+ }
2089
+
2090
+ // reciprocal
2091
+ template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
2092
+ XSIMD_INLINE batch<T, A> reciprocal(batch<T, A> const& self,
2093
+ requires_arch<common>) noexcept
2094
+ {
2095
+ using batch_type = batch<T, A>;
2096
+ return div(batch_type(1), self);
2097
+ }
2098
+
2099
+ // reduce_add
2100
+ template <class A, class T>
2101
+ XSIMD_INLINE std::complex<T> reduce_add(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
2102
+ {
2103
+ return { reduce_add(self.real()), reduce_add(self.imag()) };
2104
+ }
2105
+
2106
+ template <class A, class T, class /*=typename std::enable_if<std::is_scalar<T>::value, void>::type*/>
2107
+ XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<common>) noexcept
2108
+ {
2109
+ alignas(A::alignment()) T buffer[batch<T, A>::size];
2110
+ self.store_aligned(buffer);
2111
+ T res = 0;
2112
+ for (T val : buffer)
2113
+ {
2114
+ res += val;
2115
+ }
2116
+ return res;
2117
+ }
2118
+
2119
+ namespace detail
2120
+ {
2121
+ template <class T, T N>
2122
+ struct split_high
2123
+ {
2124
+ static constexpr T get(T i, T)
2125
+ {
2126
+ return i < N ? (i + N) : ((i % N) + N);
2127
+ }
2128
+ };
2129
+
2130
+ template <class Op, class A, class T>
2131
+ XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
2132
+ {
2133
+ return ::xsimd::kernel::first(self, A {});
2134
+ }
2135
+
2136
+ template <class Op, class A, class T, unsigned Lvl>
2137
+ XSIMD_INLINE T reduce(Op op, batch<T, A> const& self, std::integral_constant<unsigned, Lvl>) noexcept
2138
+ {
2139
+ using index_type = as_unsigned_integer_t<T>;
2140
+ batch<T, A> split = swizzle(self, make_batch_constant<index_type, split_high<index_type, Lvl / 2>, A>());
2141
+ return reduce(op, op(split, self), std::integral_constant<unsigned, Lvl / 2>());
2142
+ }
2143
+ }
2144
+
2145
+ // reduce_max
2146
+ template <class A, class T>
2147
+ XSIMD_INLINE T reduce_max(batch<T, A> const& self, requires_arch<common>) noexcept
2148
+ {
2149
+ return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
2150
+ { return max(x, y); },
2151
+ self, std::integral_constant<unsigned, batch<T, A>::size>());
2152
+ }
2153
+
2154
+ // reduce_min
2155
+ template <class A, class T>
2156
+ XSIMD_INLINE T reduce_min(batch<T, A> const& self, requires_arch<common>) noexcept
2157
+ {
2158
+ return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
2159
+ { return min(x, y); },
2160
+ self, std::integral_constant<unsigned, batch<T, A>::size>());
2161
+ }
2162
+
2163
+ // reduce_mul
2164
+ template <class A, class T>
2165
+ XSIMD_INLINE std::complex<T> reduce_mul(batch<std::complex<T>, A> const& self, requires_arch<common>) noexcept
2166
+ {
2167
+ // FIXME: could do better
2168
+ alignas(A::alignment()) std::complex<T> buffer[batch<std::complex<T>, A>::size];
2169
+ self.store_aligned(buffer);
2170
+ std::complex<T> res = 1;
2171
+ for (auto val : buffer)
2172
+ {
2173
+ res *= val;
2174
+ }
2175
+ return res;
2176
+ }
2177
+
2178
+ template <class A, class T, class /*=typename std::enable_if<std::is_scalar<T>::value, void>::type*/>
2179
+ XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<common>) noexcept
2180
+ {
2181
+ alignas(A::alignment()) T buffer[batch<T, A>::size];
2182
+ self.store_aligned(buffer);
2183
+ T res = 1;
2184
+ for (T val : buffer)
2185
+ {
2186
+ res *= val;
2187
+ }
2188
+ return res;
2189
+ }
2190
+
2191
+ // remainder
2192
+ template <class A>
2193
+ XSIMD_INLINE batch<float, A> remainder(batch<float, A> const& self, batch<float, A> const& other, requires_arch<common>) noexcept
2194
+ {
2195
+ return fnma(nearbyint(self / other), other, self);
2196
+ }
2197
+ template <class A>
2198
+ XSIMD_INLINE batch<double, A> remainder(batch<double, A> const& self, batch<double, A> const& other, requires_arch<common>) noexcept
2199
+ {
2200
+ return fnma(nearbyint(self / other), other, self);
2201
+ }
2202
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2203
+ XSIMD_INLINE batch<T, A> remainder(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept
2204
+ {
2205
+ auto mod = self % other;
2206
+ return select(mod <= other / 2, mod, mod - other);
2207
+ }
2208
+
2209
+ // select
2210
+ template <class A, class T>
2211
+ XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br, requires_arch<common>) noexcept
2212
+ {
2213
+ return { select(cond, true_br.real(), false_br.real()), select(cond, true_br.imag(), false_br.imag()) };
2214
+ }
2215
+
2216
+ // sign
2217
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2218
+ XSIMD_INLINE batch<T, A> sign(batch<T, A> const& self, requires_arch<common>) noexcept
2219
+ {
2220
+ using batch_type = batch<T, A>;
2221
+ batch_type res = select(self > batch_type(0), batch_type(1), batch_type(0)) - select(self < batch_type(0), batch_type(1), batch_type(0));
2222
+ return res;
2223
+ }
2224
+
2225
+ namespace detail
2226
+ {
2227
+ template <class T, class A>
2228
+ XSIMD_INLINE batch<T, A> signf(batch<T, A> const& self) noexcept
2229
+ {
2230
+ using batch_type = batch<T, A>;
2231
+ batch_type res = select(self > batch_type(0.f), batch_type(1.f), batch_type(0.f)) - select(self < batch_type(0.f), batch_type(1.f), batch_type(0.f));
2232
+ #ifdef XSIMD_NO_NANS
2233
+ return res;
2234
+ #else
2235
+ return select(isnan(self), constants::nan<batch_type>(), res);
2236
+ #endif
2237
+ }
2238
+ }
2239
+
2240
+ template <class A>
2241
+ XSIMD_INLINE batch<float, A> sign(batch<float, A> const& self, requires_arch<common>) noexcept
2242
+ {
2243
+ return detail::signf(self);
2244
+ }
2245
+ template <class A>
2246
+ XSIMD_INLINE batch<double, A> sign(batch<double, A> const& self, requires_arch<common>) noexcept
2247
+ {
2248
+ return detail::signf(self);
2249
+ }
2250
+ template <class A, class T>
2251
+ XSIMD_INLINE batch<std::complex<T>, A> sign(const batch<std::complex<T>, A>& z, requires_arch<common>) noexcept
2252
+ {
2253
+ using batch_type = batch<std::complex<T>, A>;
2254
+ using real_batch = typename batch_type::real_batch;
2255
+ auto rz = z.real();
2256
+ auto iz = z.imag();
2257
+ return select(rz != real_batch(0.),
2258
+ batch_type(sign(rz)),
2259
+ batch_type(sign(iz)));
2260
+ }
2261
+
2262
+ // signnz
2263
+ template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2264
+ XSIMD_INLINE batch<T, A> signnz(batch<T, A> const& self, requires_arch<common>) noexcept
2265
+ {
2266
+ using batch_type = batch<T, A>;
2267
+ return (self >> (sizeof(T) * 8 - 1)) | batch_type(1.);
2268
+ }
2269
+
2270
+ namespace detail
2271
+ {
2272
+ template <class T, class A>
2273
+ XSIMD_INLINE batch<T, A> signnzf(batch<T, A> const& self) noexcept
2274
+ {
2275
+ using batch_type = batch<T, A>;
2276
+ #ifndef XSIMD_NO_NANS
2277
+ return select(isnan(self), constants::nan<batch_type>(), batch_type(1.) | (constants::signmask<batch_type>() & self));
2278
+ #else
2279
+ return batch_type(1.) | (constants::signmask<batch_type>() & self);
2280
+ #endif
2281
+ }
2282
+ }
2283
+
2284
+ template <class A>
2285
+ XSIMD_INLINE batch<float, A> signnz(batch<float, A> const& self, requires_arch<common>) noexcept
2286
+ {
2287
+ return detail::signnzf(self);
2288
+ }
2289
+ template <class A>
2290
+ XSIMD_INLINE batch<double, A> signnz(batch<double, A> const& self, requires_arch<common>) noexcept
2291
+ {
2292
+ return detail::signnzf(self);
2293
+ }
2294
+
2295
+ // sqrt
2296
+ template <class A, class T>
2297
+ XSIMD_INLINE batch<std::complex<T>, A> sqrt(batch<std::complex<T>, A> const& z, requires_arch<common>) noexcept
2298
+ {
2299
+
2300
+ constexpr T csqrt_scale_factor = std::is_same<T, float>::value ? 6.7108864e7f : 1.8014398509481984e16;
2301
+ constexpr T csqrt_scale = std::is_same<T, float>::value ? 1.220703125e-4f : 7.450580596923828125e-9;
2302
+ using batch_type = batch<std::complex<T>, A>;
2303
+ using real_batch = batch<T, A>;
2304
+ real_batch x = z.real();
2305
+ real_batch y = z.imag();
2306
+ real_batch sqrt_x = sqrt(fabs(x));
2307
+ real_batch sqrt_hy = sqrt(0.5 * fabs(y));
2308
+ auto cond = (fabs(x) > real_batch(4.) || fabs(y) > real_batch(4.));
2309
+ x = select(cond, x * 0.25, x * csqrt_scale_factor);
2310
+ y = select(cond, y * 0.25, y * csqrt_scale_factor);
2311
+ real_batch scale = select(cond, real_batch(2.), real_batch(csqrt_scale));
2312
+ real_batch r = abs(batch_type(x, y));
2313
+
2314
+ auto condxp = x > real_batch(0.);
2315
+ real_batch t0 = select(condxp, xsimd::sqrt(0.5 * (r + x)), xsimd::sqrt(0.5 * (r - x)));
2316
+ real_batch r0 = scale * fabs((0.5 * y) / t0);
2317
+ t0 *= scale;
2318
+ real_batch t = select(condxp, t0, r0);
2319
+ r = select(condxp, r0, t0);
2320
+ batch_type resg = select(y < real_batch(0.), batch_type(t, -r), batch_type(t, r));
2321
+ real_batch ze(0.);
2322
+
2323
+ return select(y == ze,
2324
+ select(x == ze,
2325
+ batch_type(ze, ze),
2326
+ select(x < ze, batch_type(ze, sqrt_x), batch_type(sqrt_x, ze))),
2327
+ select(x == ze,
2328
+ select(y > ze, batch_type(sqrt_hy, sqrt_hy), batch_type(sqrt_hy, -sqrt_hy)),
2329
+ resg));
2330
+ }
2331
+
2332
+ // tgamma
2333
+
2334
+ namespace detail
2335
+ {
2336
+ /* origin: boost/simd/arch/common/detail/common/stirling_kernel.hpp */
2337
+ /*
2338
+ * ====================================================
2339
+ * copyright 2016 NumScale SAS
2340
+ *
2341
+ * Distributed under the Boost Software License, Version 1.0.
2342
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
2343
+ * ====================================================
2344
+ */
2345
+ template <class B>
2346
+ struct stirling_kernel;
2347
+
2348
+ template <class A>
2349
+ struct stirling_kernel<batch<float, A>>
2350
+ {
2351
+ using batch_type = batch<float, A>;
2352
+ static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept
2353
+ {
2354
+ return horner<batch_type,
2355
+ 0x3daaaaab,
2356
+ 0x3b638e39,
2357
+ 0xbb2fb930,
2358
+ 0xb970b359>(x);
2359
+ }
2360
+
2361
+ static XSIMD_INLINE batch_type split_limit() noexcept
2362
+ {
2363
+ return batch_type(bit_cast<float>(uint32_t(0x41d628f6)));
2364
+ }
2365
+
2366
+ static XSIMD_INLINE batch_type large_limit() noexcept
2367
+ {
2368
+ return batch_type(bit_cast<float>(uint32_t(0x420c28f3)));
2369
+ }
2370
+ };
2371
+
2372
+ template <class A>
2373
+ struct stirling_kernel<batch<double, A>>
2374
+ {
2375
+ using batch_type = batch<double, A>;
2376
+ static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept
2377
+ {
2378
+ return horner<batch_type,
2379
+ 0x3fb5555555555986ull, // 8.33333333333482257126E-2
2380
+ 0x3f6c71c71b98c5fdull, // 3.47222221605458667310E-3
2381
+ 0xbf65f72607d44fd7ull, // -2.68132617805781232825E-3
2382
+ 0xbf2e166b27e61d7cull, // -2.29549961613378126380E-4
2383
+ 0x3f49cc72592d7293ull // 7.87311395793093628397E-4
2384
+ >(x);
2385
+ }
2386
+
2387
+ static XSIMD_INLINE batch_type split_limit() noexcept
2388
+ {
2389
+ return batch_type(bit_cast<double>(uint64_t(0x4061e083ba3443d4)));
2390
+ }
2391
+
2392
+ static XSIMD_INLINE batch_type large_limit() noexcept
2393
+ {
2394
+ return batch_type(bit_cast<double>(uint64_t(0x4065800000000000)));
2395
+ }
2396
+ };
2397
+
2398
+ /* origin: boost/simd/arch/common/simd/function/stirling.hpp */
2399
+ /*
2400
+ * ====================================================
2401
+ * copyright 2016 NumScale SAS
2402
+ *
2403
+ * Distributed under the Boost Software License, Version 1.0.
2404
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
2405
+ * ====================================================
2406
+ */
2407
+ template <class T, class A>
2408
+ XSIMD_INLINE batch<T, A> stirling(const batch<T, A>& a) noexcept
2409
+ {
2410
+ using batch_type = batch<T, A>;
2411
+ const batch_type stirlingsplitlim = stirling_kernel<batch_type>::split_limit();
2412
+ const batch_type stirlinglargelim = stirling_kernel<batch_type>::large_limit();
2413
+ batch_type x = select(a >= batch_type(0.), a, constants::nan<batch_type>());
2414
+ batch_type w = batch_type(1.) / x;
2415
+ w = fma(w, stirling_kernel<batch_type>::compute(w), batch_type(1.));
2416
+ batch_type y = exp(-x);
2417
+ auto test = (x < stirlingsplitlim);
2418
+ batch_type z = x - batch_type(0.5);
2419
+ z = select(test, z, batch_type(0.5) * z);
2420
+ batch_type v = exp(z * log(abs(x)));
2421
+ y *= v;
2422
+ y = select(test, y, y * v);
2423
+ y *= constants::sqrt_2pi<batch_type>() * w;
2424
+ #ifdef __FAST_MATH__
2425
+ return y;
2426
+ #else
2427
+ y = select(isinf(x), x, y);
2428
+ return select(x > stirlinglargelim, constants::infinity<batch_type>(), y);
2429
+ #endif
2430
+ }
2431
+
2432
+ /* origin: boost/simd/arch/common/detail/common/gamma_kernel.hpp */
2433
+ /*
2434
+ * ====================================================
2435
+ * copyright 2016 NumScale SAS
2436
+ *
2437
+ * Distributed under the Boost Software License, Version 1.0.
2438
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
2439
+ * ====================================================
2440
+ */
2441
+ template <class B>
2442
+ struct tgamma_kernel;
2443
+
2444
+ template <class A>
2445
+ struct tgamma_kernel<batch<float, A>>
2446
+ {
2447
+ using batch_type = batch<float, A>;
2448
+ static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept
2449
+ {
2450
+ return horner<batch_type,
2451
+ 0x3f800000UL, // 9.999999757445841E-01
2452
+ 0x3ed87799UL, // 4.227874605370421E-01
2453
+ 0x3ed2d411UL, // 4.117741948434743E-01
2454
+ 0x3da82a34UL, // 8.211174403261340E-02
2455
+ 0x3d93ae7cUL, // 7.211014349068177E-02
2456
+ 0x3b91db14UL, // 4.451165155708328E-03
2457
+ 0x3ba90c99UL, // 5.158972571345137E-03
2458
+ 0x3ad28b22UL // 1.606319369134976E-03
2459
+ >(x);
2460
+ }
2461
+ };
2462
+
2463
+ template <class A>
2464
+ struct tgamma_kernel<batch<double, A>>
2465
+ {
2466
+ using batch_type = batch<double, A>;
2467
+ static XSIMD_INLINE batch_type compute(const batch_type& x) noexcept
2468
+ {
2469
+ return horner<batch_type,
2470
+ 0x3ff0000000000000ULL, // 9.99999999999999996796E-1
2471
+ 0x3fdfa1373993e312ULL, // 4.94214826801497100753E-1
2472
+ 0x3fca8da9dcae7d31ULL, // 2.07448227648435975150E-1
2473
+ 0x3fa863d918c423d3ULL, // 4.76367800457137231464E-2
2474
+ 0x3f8557cde9db14b0ULL, // 1.04213797561761569935E-2
2475
+ 0x3f5384e3e686bfabULL, // 1.19135147006586384913E-3
2476
+ 0x3f24fcb839982153ULL // 1.60119522476751861407E-4
2477
+ >(x)
2478
+ / horner<batch_type,
2479
+ 0x3ff0000000000000ULL, // 1.00000000000000000320E00
2480
+ 0x3fb24944c9cd3c51ULL, // 7.14304917030273074085E-2
2481
+ 0xbfce071a9d4287c2ULL, // -2.34591795718243348568E-1
2482
+ 0x3fa25779e33fde67ULL, // 3.58236398605498653373E-2
2483
+ 0x3f8831ed5b1bb117ULL, // 1.18139785222060435552E-2
2484
+ 0xBf7240e4e750b44aULL, // -4.45641913851797240494E-3
2485
+ 0x3f41ae8a29152573ULL, // 5.39605580493303397842E-4
2486
+ 0xbef8487a8400d3aFULL // -2.31581873324120129819E-5
2487
+ >(x);
2488
+ }
2489
+ };
2490
+
2491
+ /* origin: boost/simd/arch/common/simd/function/gamma.hpp */
2492
+ /*
2493
+ * ====================================================
2494
+ * copyright 2016 NumScale SAS
2495
+ *
2496
+ * Distributed under the Boost Software License, Version 1.0.
2497
+ * (See copy at http://boost.org/LICENSE_1_0.txt)
2498
+ * ====================================================
2499
+ */
2500
+ template <class B>
2501
+ XSIMD_INLINE B tgamma_large_negative(const B& a) noexcept
2502
+ {
2503
+ B st = stirling(a);
2504
+ B p = floor(a);
2505
+ B sgngam = select(is_even(p), -B(1.), B(1.));
2506
+ B z = a - p;
2507
+ auto test2 = z < B(0.5);
2508
+ z = select(test2, z - B(1.), z);
2509
+ z = a * sin(z, trigo_pi_tag());
2510
+ z = abs(z);
2511
+ return sgngam * constants::pi<B>() / (z * st);
2512
+ }
2513
+
2514
+ template <class B, class BB>
2515
+ XSIMD_INLINE B tgamma_other(const B& a, const BB& test) noexcept
2516
+ {
2517
+ B x = select(test, B(2.), a);
2518
+ #ifndef XSIMD_NO_INFINITIES
2519
+ auto inf_result = (a == constants::infinity<B>());
2520
+ x = select(inf_result, B(2.), x);
2521
+ #endif
2522
+ B z = B(1.);
2523
+ auto test1 = (x >= B(3.));
2524
+ while (any(test1))
2525
+ {
2526
+ x = select(test1, x - B(1.), x);
2527
+ z = select(test1, z * x, z);
2528
+ test1 = (x >= B(3.));
2529
+ }
2530
+ test1 = (x < B(0.));
2531
+ while (any(test1))
2532
+ {
2533
+ z = select(test1, z / x, z);
2534
+ x = select(test1, x + B(1.), x);
2535
+ test1 = (x < B(0.));
2536
+ }
2537
+ auto test2 = (x < B(2.));
2538
+ while (any(test2))
2539
+ {
2540
+ z = select(test2, z / x, z);
2541
+ x = select(test2, x + B(1.), x);
2542
+ test2 = (x < B(2.));
2543
+ }
2544
+ x = z * tgamma_kernel<B>::compute(x - B(2.));
2545
+ #ifndef XSIMD_NO_INFINITIES
2546
+ return select(inf_result, a, x);
2547
+ #else
2548
+ return x;
2549
+ #endif
2550
+ }
2551
+ }
2552
+
2553
+ template <class A, class T>
2554
+ XSIMD_INLINE batch<T, A> tgamma(batch<T, A> const& self, requires_arch<common>) noexcept
2555
+ {
2556
+ using batch_type = batch<T, A>;
2557
+ auto nan_result = (self < batch_type(0.) && is_flint(self));
2558
+ #ifndef XSIMD_NO_NANS
2559
+ nan_result = isnan(self) || nan_result;
2560
+ #endif
2561
+ batch_type q = abs(self);
2562
+ auto test = (self < batch_type(-33.));
2563
+ batch_type r = constants::nan<batch_type>();
2564
+ if (any(test))
2565
+ {
2566
+ r = detail::tgamma_large_negative(q);
2567
+ if (all(test))
2568
+ return select(nan_result, constants::nan<batch_type>(), r);
2569
+ }
2570
+ batch_type r1 = detail::tgamma_other(self, test);
2571
+ batch_type r2 = select(test, r, r1);
2572
+ #ifdef __FAST_MATH__
2573
+ return r2;
2574
+ #else
2575
+ return select(self == batch_type(0.), copysign(constants::infinity<batch_type>(), self), select(nan_result, constants::nan<batch_type>(), r2));
2576
+ #endif
2577
+ }
2578
+
2579
+ }
2580
+
2581
+ }
2582
+
2583
+ #endif