numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (171) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +4 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  129. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  130. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  134. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  135. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  139. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  140. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  141. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  142. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  143. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  145. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  146. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  147. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  148. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  151. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  152. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  153. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  154. numba_cuda/numba/cuda/tests/support.py +55 -15
  155. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  156. numba_cuda/numba/cuda/types.py +56 -0
  157. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  158. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  159. numba_cuda/numba/cuda/typing/context.py +751 -0
  160. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  161. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  162. numba_cuda/numba/cuda/typing/templates.py +7 -6
  163. numba_cuda/numba/cuda/ufuncs.py +3 -3
  164. numba_cuda/numba/cuda/utils.py +6 -112
  165. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
  167. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  168. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1807 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """Codegen for functions used as kernels in NumPy functions
5
+
6
+ Typically, the kernels of several ufuncs that can't map directly to
7
+ Python builtins
8
+ """
9
+
10
+ import math
11
+
12
+ import llvmlite.ir
13
+ import numpy as np
14
+
15
+ from numba.core.extending import overload
16
+ from numba.core.imputils import impl_ret_untracked
17
+ from numba.core import typing, types, errors
18
+ from numba.cuda import cgutils
19
+ from numba.core.extending import register_jitable
20
+ from numba.np import npdatetime
21
+ from numba.np.math import cmathimpl, mathimpl, numbers
22
+ from numba.np.numpy_support import numpy_version
23
+
24
+ # some NumPy constants. Note that we could generate some of them using
25
+ # the math library, but having the values copied from npy_math seems to
26
+ # yield more accurate results
27
+ _NPY_LOG2E = 1.442695040888963407359924681001892137 # math.log(math.e, 2)
28
+ _NPY_LOG10E = 0.434294481903251827651128918916605082 # math.log(math.e, 10)
29
+ _NPY_LOGE2 = 0.693147180559945309417232121458176568 # math.log(2)
30
+
31
+
32
+ def _check_arity_and_homogeneity(sig, args, arity, return_type=None):
33
+ """checks that the following are true:
34
+ - args and sig.args have arg_count elements
35
+ - all input types are homogeneous
36
+ - return type is 'return_type' if provided, otherwise it must be
37
+ homogeneous with the input types.
38
+ """
39
+ assert len(args) == arity
40
+ assert len(sig.args) == arity
41
+ ty = sig.args[0]
42
+ if return_type is None:
43
+ return_type = ty
44
+ # must have homogeneous args
45
+ if not (
46
+ all(arg == ty for arg in sig.args) and sig.return_type == return_type
47
+ ):
48
+ import inspect
49
+
50
+ fname = inspect.currentframe().f_back.f_code.co_name
51
+ msg = "{0} called with invalid types: {1}".format(fname, sig)
52
+ assert False, msg
53
+
54
+
55
+ cast_arg_ty = types.float64
56
+
57
+
58
+ def _call_func_by_name_with_cast(
59
+ context, builder, sig, args, func_name, ty=cast_arg_ty
60
+ ):
61
+ # it is quite common in NumPy to have loops implemented as a call
62
+ # to the double version of the function, wrapped in casts. This
63
+ # helper function facilitates that.
64
+ mod = builder.module
65
+ lty = context.get_argument_type(ty)
66
+ fnty = llvmlite.ir.FunctionType(lty, [lty] * len(sig.args))
67
+ fn = cgutils.insert_pure_function(mod, fnty, name=func_name)
68
+ cast_args = [
69
+ context.cast(builder, arg, argty, ty)
70
+ for arg, argty in zip(args, sig.args)
71
+ ]
72
+
73
+ result = builder.call(fn, cast_args)
74
+ return context.cast(builder, result, types.float64, sig.return_type)
75
+
76
+
77
+ def _dispatch_func_by_name_type(context, builder, sig, args, table, user_name):
78
+ # for most cases the functions are homogeneous on all their types.
79
+ # this code dispatches on the first argument type as it is the most useful
80
+ # for our uses (all cases but ldexp are homogeneous in all types, and
81
+ # dispatching on the first argument type works of ldexp as well)
82
+ #
83
+ # assumes that the function pointed by func_name has the type
84
+ # signature sig (but needs translation to llvm types).
85
+
86
+ ty = sig.args[0]
87
+ try:
88
+ func_name = table[ty]
89
+ except KeyError as e:
90
+ msg = "No {0} function for real type {1}".format(user_name, str(e))
91
+ raise errors.LoweringError(msg)
92
+
93
+ mod = builder.module
94
+ if ty in types.complex_domain:
95
+ # In numba struct types are always passed by pointer. So the call has to
96
+ # be transformed from "result = func(ops...)" to "func(&result, ops...).
97
+ # note that the result value pointer as first argument is the convention
98
+ # used by numba.
99
+
100
+ # First, prepare the return value
101
+ out = context.make_complex(builder, ty)
102
+ ptrargs = [cgutils.alloca_once_value(builder, arg) for arg in args]
103
+ call_args = [out._getpointer()] + ptrargs
104
+ # get_value_as_argument for struct types like complex allocate stack space
105
+ # and initialize with the value, the return value is the pointer to that
106
+ # allocated space (ie: pointer to a copy of the value in the stack).
107
+ # get_argument_type returns a pointer to the struct type in consonance.
108
+ call_argtys = [ty] + list(sig.args)
109
+ call_argltys = [
110
+ context.get_value_type(ty).as_pointer() for ty in call_argtys
111
+ ]
112
+ fnty = llvmlite.ir.FunctionType(llvmlite.ir.VoidType(), call_argltys)
113
+ # Note: the function isn't pure here (it writes to its pointer args)
114
+ fn = cgutils.get_or_insert_function(mod, fnty, func_name)
115
+ builder.call(fn, call_args)
116
+ retval = builder.load(call_args[0])
117
+ else:
118
+ argtypes = [context.get_argument_type(aty) for aty in sig.args]
119
+ restype = context.get_argument_type(sig.return_type)
120
+ fnty = llvmlite.ir.FunctionType(restype, argtypes)
121
+ fn = cgutils.insert_pure_function(mod, fnty, name=func_name)
122
+ retval = context.call_external_function(builder, fn, sig.args, args)
123
+ return retval
124
+
125
+
126
+ ########################################################################
127
+ # Division kernels inspired by NumPy loops.c.src code
128
+ #
129
+ # The builtins are not applicable as they rely on a test for zero in the
130
+ # denominator. If it is zero the appropriate exception is raised.
131
+ # In NumPy, a division by zero does not raise an exception, but instead
132
+ # generated a known value. Note that a division by zero in any of the
133
+ # operations of a vector may raise an exception or issue a warning
134
+ # depending on the np.seterr configuration. This is not supported
135
+ # right now (and in any case, it won't be handled by these functions
136
+ # either)
137
+
138
+
139
+ def np_int_sdiv_impl(context, builder, sig, args):
140
+ # based on the actual code in NumPy loops.c.src for signed integer types
141
+ _check_arity_and_homogeneity(sig, args, 2)
142
+
143
+ num, den = args
144
+ ty = sig.args[0] # any arg type will do, homogeneous
145
+
146
+ ZERO = context.get_constant(ty, 0)
147
+ MINUS_ONE = context.get_constant(ty, -1)
148
+ MIN_INT = context.get_constant(ty, 1 << (den.type.width - 1))
149
+ den_is_zero = builder.icmp_unsigned("==", ZERO, den)
150
+ den_is_minus_one = builder.icmp_unsigned("==", MINUS_ONE, den)
151
+ num_is_min_int = builder.icmp_unsigned("==", MIN_INT, num)
152
+ could_cause_sigfpe = builder.and_(den_is_minus_one, num_is_min_int)
153
+ force_zero = builder.or_(den_is_zero, could_cause_sigfpe)
154
+ with builder.if_else(force_zero, likely=False) as (then, otherwise):
155
+ with then:
156
+ bb_then = builder.basic_block
157
+ with otherwise:
158
+ bb_otherwise = builder.basic_block
159
+ div = builder.sdiv(num, den)
160
+ mod = builder.srem(num, den)
161
+ num_gt_zero = builder.icmp_signed(">", num, ZERO)
162
+ den_gt_zero = builder.icmp_signed(">", den, ZERO)
163
+ not_same_sign = builder.xor(num_gt_zero, den_gt_zero)
164
+ mod_not_zero = builder.icmp_unsigned("!=", mod, ZERO)
165
+ needs_fixing = builder.and_(not_same_sign, mod_not_zero)
166
+ fix_value = builder.select(needs_fixing, MINUS_ONE, ZERO)
167
+ result_otherwise = builder.add(div, fix_value)
168
+
169
+ result = builder.phi(ZERO.type)
170
+ result.add_incoming(ZERO, bb_then)
171
+ result.add_incoming(result_otherwise, bb_otherwise)
172
+
173
+ return result
174
+
175
+
176
+ def np_int_srem_impl(context, builder, sig, args):
177
+ # based on the actual code in NumPy loops.c.src for signed integers
178
+ _check_arity_and_homogeneity(sig, args, 2)
179
+
180
+ num, den = args
181
+ ty = sig.args[0] # any arg type will do, homogeneous
182
+
183
+ ZERO = context.get_constant(ty, 0)
184
+ den_not_zero = builder.icmp_unsigned("!=", ZERO, den)
185
+ bb_no_if = builder.basic_block
186
+ with cgutils.if_unlikely(builder, den_not_zero):
187
+ bb_if = builder.basic_block
188
+ mod = builder.srem(num, den)
189
+ num_gt_zero = builder.icmp_signed(">", num, ZERO)
190
+ den_gt_zero = builder.icmp_signed(">", den, ZERO)
191
+ not_same_sign = builder.xor(num_gt_zero, den_gt_zero)
192
+ mod_not_zero = builder.icmp_unsigned("!=", mod, ZERO)
193
+ needs_fixing = builder.and_(not_same_sign, mod_not_zero)
194
+ fix_value = builder.select(needs_fixing, den, ZERO)
195
+ final_mod = builder.add(fix_value, mod)
196
+
197
+ result = builder.phi(ZERO.type)
198
+ result.add_incoming(ZERO, bb_no_if)
199
+ result.add_incoming(final_mod, bb_if)
200
+
201
+ return result
202
+
203
+
204
+ def np_int_sdivrem_impl(context, builder, sig, args):
205
+ div = np_int_sdiv_impl(
206
+ context, builder, sig.return_type[0](*sig.args), args
207
+ )
208
+ rem = np_int_srem_impl(
209
+ context, builder, sig.return_type[1](*sig.args), args
210
+ )
211
+ return context.make_tuple(builder, sig.return_type, [div, rem])
212
+
213
+
214
+ def np_int_udiv_impl(context, builder, sig, args):
215
+ _check_arity_and_homogeneity(sig, args, 2)
216
+
217
+ num, den = args
218
+ ty = sig.args[0] # any arg type will do, homogeneous
219
+
220
+ ZERO = context.get_constant(ty, 0)
221
+ div_by_zero = builder.icmp_unsigned("==", ZERO, den)
222
+ with builder.if_else(div_by_zero, likely=False) as (then, otherwise):
223
+ with then:
224
+ # division by zero
225
+ bb_then = builder.basic_block
226
+ with otherwise:
227
+ # divide!
228
+ div = builder.udiv(num, den)
229
+ bb_otherwise = builder.basic_block
230
+
231
+ result = builder.phi(ZERO.type)
232
+ result.add_incoming(ZERO, bb_then)
233
+ result.add_incoming(div, bb_otherwise)
234
+ return result
235
+
236
+
237
+ def np_int_urem_impl(context, builder, sig, args):
238
+ # based on the actual code in NumPy loops.c.src for signed integers
239
+ _check_arity_and_homogeneity(sig, args, 2)
240
+
241
+ num, den = args
242
+ ty = sig.args[0] # any arg type will do, homogeneous
243
+
244
+ ZERO = context.get_constant(ty, 0)
245
+ den_not_zero = builder.icmp_unsigned("!=", ZERO, den)
246
+ bb_no_if = builder.basic_block
247
+ with cgutils.if_unlikely(builder, den_not_zero):
248
+ bb_if = builder.basic_block
249
+ mod = builder.urem(num, den)
250
+
251
+ result = builder.phi(ZERO.type)
252
+ result.add_incoming(ZERO, bb_no_if)
253
+ result.add_incoming(mod, bb_if)
254
+
255
+ return result
256
+
257
+
258
+ def np_int_udivrem_impl(context, builder, sig, args):
259
+ div = np_int_udiv_impl(
260
+ context, builder, sig.return_type[0](*sig.args), args
261
+ )
262
+ rem = np_int_urem_impl(
263
+ context, builder, sig.return_type[1](*sig.args), args
264
+ )
265
+ return context.make_tuple(builder, sig.return_type, [div, rem])
266
+
267
+
268
+ # implementation of int_fmod is in fact the same as the unsigned remainder,
269
+ # that is: srem with a special case returning 0 when the denominator is 0.
270
+ np_int_fmod_impl = np_int_urem_impl
271
+
272
+
273
+ def np_real_div_impl(context, builder, sig, args):
274
+ # in NumPy real div has the same semantics as an fdiv for generating
275
+ # NANs, INF and NINF
276
+ _check_arity_and_homogeneity(sig, args, 2)
277
+ return builder.fdiv(*args)
278
+
279
+
280
+ def np_real_mod_impl(context, builder, sig, args):
281
+ # note: this maps to NumPy remainder, which has the same semantics as Python
282
+ # based on code in loops.c.src
283
+ _check_arity_and_homogeneity(sig, args, 2)
284
+ in1, in2 = args
285
+ ty = sig.args[0]
286
+
287
+ ZERO = context.get_constant(ty, 0.0)
288
+ res = builder.frem(in1, in2)
289
+ res_ne_zero = builder.fcmp_ordered("!=", res, ZERO)
290
+ den_lt_zero = builder.fcmp_ordered("<", in2, ZERO)
291
+ res_lt_zero = builder.fcmp_ordered("<", res, ZERO)
292
+ needs_fixing = builder.and_(
293
+ res_ne_zero, builder.xor(den_lt_zero, res_lt_zero)
294
+ )
295
+ fix_value = builder.select(needs_fixing, in2, ZERO)
296
+
297
+ return builder.fadd(res, fix_value)
298
+
299
+
300
+ def np_real_fmod_impl(context, builder, sig, args):
301
+ _check_arity_and_homogeneity(sig, args, 2)
302
+ return builder.frem(*args)
303
+
304
+
305
+ def _fabs(context, builder, arg):
306
+ ZERO = llvmlite.ir.Constant(arg.type, 0.0)
307
+ arg_negated = builder.fsub(ZERO, arg)
308
+ arg_is_negative = builder.fcmp_ordered("<", arg, ZERO)
309
+ return builder.select(arg_is_negative, arg_negated, arg)
310
+
311
+
312
+ def np_complex_div_impl(context, builder, sig, args):
313
+ # Extracted from numpy/core/src/umath/loops.c.src,
314
+ # inspired by complex_div_impl
315
+ # variables named coherent with loops.c.src
316
+ # This is implemented using the approach described in
317
+ # R.L. Smith. Algorithm 116: Complex division.
318
+ # Communications of the ACM, 5(8):435, 1962
319
+
320
+ in1, in2 = [
321
+ context.make_complex(builder, sig.args[0], value=arg) for arg in args
322
+ ]
323
+
324
+ in1r = in1.real # numerator.real
325
+ in1i = in1.imag # numerator.imag
326
+ in2r = in2.real # denominator.real
327
+ in2i = in2.imag # denominator.imag
328
+ ftype = in1r.type
329
+ assert all([i.type == ftype for i in [in1r, in1i, in2r, in2i]]), (
330
+ "mismatched types"
331
+ )
332
+ out = context.make_helper(builder, sig.return_type)
333
+
334
+ ZERO = llvmlite.ir.Constant(ftype, 0.0)
335
+ ONE = llvmlite.ir.Constant(ftype, 1.0)
336
+
337
+ # if abs(denominator.real) >= abs(denominator.imag)
338
+ in2r_abs = _fabs(context, builder, in2r)
339
+ in2i_abs = _fabs(context, builder, in2i)
340
+ in2r_abs_ge_in2i_abs = builder.fcmp_ordered(">=", in2r_abs, in2i_abs)
341
+ with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise):
342
+ with then:
343
+ # if abs(denominator.real) == 0 and abs(denominator.imag) == 0
344
+ in2r_is_zero = builder.fcmp_ordered("==", in2r_abs, ZERO)
345
+ in2i_is_zero = builder.fcmp_ordered("==", in2i_abs, ZERO)
346
+ in2_is_zero = builder.and_(in2r_is_zero, in2i_is_zero)
347
+ with builder.if_else(in2_is_zero) as (inn_then, inn_otherwise):
348
+ with inn_then:
349
+ # division by 0.
350
+ # fdiv generates the appropriate NAN/INF/NINF
351
+ out.real = builder.fdiv(in1r, in2r_abs)
352
+ out.imag = builder.fdiv(in1i, in2i_abs)
353
+ with inn_otherwise:
354
+ # general case for:
355
+ # abs(denominator.real) > abs(denominator.imag)
356
+ rat = builder.fdiv(in2i, in2r)
357
+ # scl = 1.0/(in2r + in2i*rat)
358
+ tmp1 = builder.fmul(in2i, rat)
359
+ tmp2 = builder.fadd(in2r, tmp1)
360
+ scl = builder.fdiv(ONE, tmp2)
361
+ # out.real = (in1r + in1i*rat)*scl
362
+ # out.imag = (in1i - in1r*rat)*scl
363
+ tmp3 = builder.fmul(in1i, rat)
364
+ tmp4 = builder.fmul(in1r, rat)
365
+ tmp5 = builder.fadd(in1r, tmp3)
366
+ tmp6 = builder.fsub(in1i, tmp4)
367
+ out.real = builder.fmul(tmp5, scl)
368
+ out.imag = builder.fmul(tmp6, scl)
369
+ with otherwise:
370
+ # general case for:
371
+ # abs(denominator.imag) > abs(denominator.real)
372
+ rat = builder.fdiv(in2r, in2i)
373
+ # scl = 1.0/(in2i + in2r*rat)
374
+ tmp1 = builder.fmul(in2r, rat)
375
+ tmp2 = builder.fadd(in2i, tmp1)
376
+ scl = builder.fdiv(ONE, tmp2)
377
+ # out.real = (in1r*rat + in1i)*scl
378
+ # out.imag = (in1i*rat - in1r)*scl
379
+ tmp3 = builder.fmul(in1r, rat)
380
+ tmp4 = builder.fmul(in1i, rat)
381
+ tmp5 = builder.fadd(tmp3, in1i)
382
+ tmp6 = builder.fsub(tmp4, in1r)
383
+ out.real = builder.fmul(tmp5, scl)
384
+ out.imag = builder.fmul(tmp6, scl)
385
+
386
+ return out._getvalue()
387
+
388
+
389
+ ########################################################################
390
+ # NumPy logaddexp
391
+
392
+
393
+ def _npy_logaddexp(x1, x2):
394
+ pass
395
+
396
+
397
+ def _generate_logaddexp(fnoverload, const, log1pfn, expfn):
398
+ # Code generation for logaddexp and logaddexp2 is based on:
399
+ # https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/npy_math_internal.h.src#L467-L507
400
+
401
+ @overload(fnoverload, target="cuda")
402
+ def ol_npy_logaddexp(x1, x2):
403
+ if x1 != x2:
404
+ return
405
+ shift = x1(const)
406
+
407
+ def impl(x1, x2):
408
+ x, y = x1, x2
409
+ if x == y:
410
+ # Handles infinities of the same sign without warnings
411
+ return x + shift
412
+ else:
413
+ tmp = x - y
414
+ if tmp > 0:
415
+ return x + log1pfn(expfn(-tmp))
416
+ elif tmp <= 0:
417
+ return y + log1pfn(expfn(tmp))
418
+ else:
419
+ # NaN
420
+ return tmp
421
+
422
+ return impl
423
+
424
+
425
+ def _npy_logaddexp(x1, x2):
426
+ pass
427
+
428
+
429
+ _generate_logaddexp(_npy_logaddexp, _NPY_LOGE2, np.log1p, np.exp)
430
+
431
+
432
+ def np_real_logaddexp_impl(context, builder, sig, args):
433
+ _check_arity_and_homogeneity(sig, args, 2)
434
+
435
+ fnty = context.typing_context.resolve_value_type(_npy_logaddexp)
436
+ sig = fnty.get_call_type(context.typing_context, (*sig.args,), {})
437
+ impl = context.get_function(fnty, sig)
438
+ return impl(builder, args)
439
+
440
+
441
+ ########################################################################
442
+ # NumPy logaddexp2
443
+ def _npy_logaddexp2(x1, x2):
444
+ pass
445
+
446
+
447
+ def npy_log2_1p(x):
448
+ pass
449
+
450
+
451
+ # The following npy_log2_1p function is a translation of:
452
+ # https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/npy_math_internal.h.src#L457-L460
453
+
454
+
455
+ @overload(npy_log2_1p, target="cuda")
456
+ def ol_npy_log2_1p(x):
457
+ LOG2E = x(_NPY_LOG2E)
458
+
459
+ def impl(x):
460
+ return LOG2E * np.log1p(x)
461
+
462
+ return impl
463
+
464
+
465
+ _generate_logaddexp(_npy_logaddexp2, 1.0, npy_log2_1p, np.exp2)
466
+
467
+
468
+ def np_real_logaddexp2_impl(context, builder, sig, args):
469
+ _check_arity_and_homogeneity(sig, args, 2)
470
+
471
+ fnty = context.typing_context.resolve_value_type(_npy_logaddexp2)
472
+ sig = fnty.get_call_type(context.typing_context, (*sig.args,), {})
473
+ impl = context.get_function(fnty, sig)
474
+ return impl(builder, args)
475
+
476
+
477
+ ########################################################################
478
+ # true div kernels
479
+
480
+
481
+ def np_int_truediv_impl(context, builder, sig, args):
482
+ # in NumPy we don't check for 0 denominator... fdiv handles div by
483
+ # 0 in the way NumPy expects..
484
+ # integer truediv always yields double
485
+ num, den = args
486
+ lltype = num.type
487
+ assert all(i.type == lltype for i in args), "must have homogeneous types"
488
+ numty, denty = sig.args
489
+
490
+ num = context.cast(builder, num, numty, types.float64)
491
+ den = context.cast(builder, den, denty, types.float64)
492
+
493
+ return builder.fdiv(num, den)
494
+
495
+
496
+ ########################################################################
497
+ # floor div kernels
498
+
499
+
500
+ def np_real_floor_div_impl(context, builder, sig, args):
501
+ res = np_real_div_impl(context, builder, sig, args)
502
+ s = typing.signature(sig.return_type, sig.return_type)
503
+ return np_real_floor_impl(context, builder, s, (res,))
504
+
505
+
506
+ def np_real_divmod_impl(context, builder, sig, args):
507
+ div = np_real_floor_div_impl(
508
+ context, builder, sig.return_type[0](*sig.args), args
509
+ )
510
+ rem = np_real_mod_impl(
511
+ context, builder, sig.return_type[1](*sig.args), args
512
+ )
513
+ return context.make_tuple(builder, sig.return_type, [div, rem])
514
+
515
+
516
+ def np_complex_floor_div_impl(context, builder, sig, args):
517
+ # this is based on the complex floor divide in Numpy's loops.c.src
518
+ # This is basically a full complex division with a complex floor
519
+ # applied.
520
+ # The complex floor seems to be defined as the real floor applied
521
+ # with the real part and zero in the imaginary part. Fully developed
522
+ # so it avoids computing anything related to the imaginary result.
523
+ float_kind = sig.args[0].underlying_float
524
+ floor_sig = typing.signature(float_kind, float_kind)
525
+
526
+ in1, in2 = [
527
+ context.make_complex(builder, sig.args[0], value=arg) for arg in args
528
+ ]
529
+
530
+ in1r = in1.real
531
+ in1i = in1.imag
532
+ in2r = in2.real
533
+ in2i = in2.imag
534
+ ftype = in1r.type
535
+ assert all([i.type == ftype for i in [in1r, in1i, in2r, in2i]]), (
536
+ "mismatched types"
537
+ )
538
+
539
+ ZERO = llvmlite.ir.Constant(ftype, 0.0)
540
+
541
+ out = context.make_helper(builder, sig.return_type)
542
+ out.imag = ZERO
543
+
544
+ in2r_abs = _fabs(context, builder, in2r)
545
+ in2i_abs = _fabs(context, builder, in2i)
546
+ in2r_abs_ge_in2i_abs = builder.fcmp_ordered(">=", in2r_abs, in2i_abs)
547
+
548
+ with builder.if_else(in2r_abs_ge_in2i_abs) as (then, otherwise):
549
+ with then:
550
+ rat = builder.fdiv(in2i, in2r)
551
+ # out.real = floor((in1r+in1i*rat)/(in2r + in2i*rat))
552
+ tmp1 = builder.fmul(in1i, rat)
553
+ tmp2 = builder.fmul(in2i, rat)
554
+ tmp3 = builder.fadd(in1r, tmp1)
555
+ tmp4 = builder.fadd(in2r, tmp2)
556
+ tmp5 = builder.fdiv(tmp3, tmp4)
557
+ out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,))
558
+ with otherwise:
559
+ rat = builder.fdiv(in2r, in2i)
560
+ # out.real = floor((in1i + in1r*rat)/(in2i + in2r*rat))
561
+ tmp1 = builder.fmul(in1r, rat)
562
+ tmp2 = builder.fmul(in2r, rat)
563
+ tmp3 = builder.fadd(in1i, tmp1)
564
+ tmp4 = builder.fadd(in2i, tmp2)
565
+ tmp5 = builder.fdiv(tmp3, tmp4)
566
+ out.real = np_real_floor_impl(context, builder, floor_sig, (tmp5,))
567
+ return out._getvalue()
568
+
569
+
570
+ ########################################################################
571
+ # numpy power funcs
572
+
573
+
574
+ def np_complex_power_impl(context, builder, sig, args):
575
+ _check_arity_and_homogeneity(sig, args, 2)
576
+
577
+ return numbers.complex_power_impl(context, builder, sig, args)
578
+
579
+
580
+ ########################################################################
581
+ # numpy float power funcs
582
+
583
+
584
+ def real_float_power_impl(context, builder, sig, args):
585
+ _check_arity_and_homogeneity(sig, args, 2)
586
+
587
+ return numbers.real_power_impl(context, builder, sig, args)
588
+
589
+
590
+ def np_complex_float_power_impl(context, builder, sig, args):
591
+ _check_arity_and_homogeneity(sig, args, 2)
592
+
593
+ return numbers.complex_power_impl(context, builder, sig, args)
594
+
595
+
596
+ ########################################################################
597
+ # numpy greatest common denominator
598
+
599
+
600
+ def np_gcd_impl(context, builder, sig, args):
601
+ _check_arity_and_homogeneity(sig, args, 2)
602
+ return mathimpl.gcd_impl(context, builder, sig, args)
603
+
604
+
605
+ ########################################################################
606
+ # numpy lowest common multiple
607
+
608
+
609
+ def np_lcm_impl(context, builder, sig, args):
610
+ xty, yty = sig.args
611
+ assert xty == yty == sig.return_type
612
+ x, y = args
613
+
614
+ def lcm(a, b):
615
+ """
616
+ Like gcd, heavily cribbed from Julia.
617
+ """
618
+ return 0 if a == 0 else abs(a * (b // np.gcd(b, a)))
619
+
620
+ res = context.compile_internal(builder, lcm, sig, args)
621
+ return impl_ret_untracked(context, builder, sig.return_type, res)
622
+
623
+
624
+ ########################################################################
625
+ # Numpy style complex sign
626
+
627
+
628
+ def np_complex_sign_impl(context, builder, sig, args):
629
+ # equivalent to complex sign in NumPy's sign
630
+ # but implemented via selects, balancing the 4 cases.
631
+ _check_arity_and_homogeneity(sig, args, 1)
632
+
633
+ if numpy_version >= (2, 0):
634
+ # NumPy >= 2.0.0
635
+ def complex_sign(z):
636
+ abs = math.hypot(z.real, z.imag)
637
+ if abs == 0:
638
+ return 0 + 0j
639
+ else:
640
+ return z / abs
641
+
642
+ res = context.compile_internal(builder, complex_sign, sig, args)
643
+ return impl_ret_untracked(context, builder, sig.return_type, res)
644
+ else:
645
+ op = args[0]
646
+ ty = sig.args[0]
647
+ result = context.make_complex(builder, ty)
648
+ float_ty = ty.underlying_float
649
+
650
+ ZERO = context.get_constant(float_ty, 0.0)
651
+ ONE = context.get_constant(float_ty, 1.0)
652
+ MINUS_ONE = context.get_constant(float_ty, -1.0)
653
+ NAN = context.get_constant(float_ty, float("nan"))
654
+
655
+ result.real = ZERO
656
+ result.imag = ZERO
657
+ cmp_sig = typing.signature(types.boolean, *[ty] * 2)
658
+ cmp_args = [op, result._getvalue()]
659
+ arg1_ge_arg2 = np_complex_ge_impl(context, builder, cmp_sig, cmp_args)
660
+ arg1_eq_arg2 = np_complex_eq_impl(context, builder, cmp_sig, cmp_args)
661
+ arg1_lt_arg2 = np_complex_lt_impl(context, builder, cmp_sig, cmp_args)
662
+
663
+ real_when_ge = builder.select(arg1_eq_arg2, ZERO, ONE)
664
+ real_when_nge = builder.select(arg1_lt_arg2, MINUS_ONE, NAN)
665
+ result.real = builder.select(arg1_ge_arg2, real_when_ge, real_when_nge)
666
+
667
+ return result._getvalue()
668
+
669
+
670
+ ########################################################################
671
+ # Numpy rint
672
+
673
+
674
+ def np_real_rint_impl(context, builder, sig, args):
675
+ _check_arity_and_homogeneity(sig, args, 1)
676
+
677
+ return mathimpl.call_fp_intrinsic(builder, "llvm.rint", args)
678
+
679
+
680
+ def np_complex_rint_impl(context, builder, sig, args):
681
+ # based on code in NumPy's funcs.inc.src
682
+ # rint of a complex number defined as rint of its real and imag
683
+ # parts
684
+ _check_arity_and_homogeneity(sig, args, 1)
685
+ ty = sig.args[0]
686
+ float_ty = ty.underlying_float
687
+ in1 = context.make_complex(builder, ty, value=args[0])
688
+ out = context.make_complex(builder, ty)
689
+
690
+ inner_sig = typing.signature(*[float_ty] * 2)
691
+ out.real = np_real_rint_impl(context, builder, inner_sig, [in1.real])
692
+ out.imag = np_real_rint_impl(context, builder, inner_sig, [in1.imag])
693
+ return out._getvalue()
694
+
695
+
696
+ ########################################################################
697
+ # NumPy exp
698
+
699
+
700
+ def np_real_exp_impl(context, builder, sig, args):
701
+ _check_arity_and_homogeneity(sig, args, 1)
702
+ return mathimpl.exp_impl(context, builder, sig, args)
703
+
704
+
705
+ def np_complex_exp_impl(context, builder, sig, args):
706
+ _check_arity_and_homogeneity(sig, args, 1)
707
+ return cmathimpl.exp_impl(context, builder, sig, args)
708
+
709
+
710
+ ########################################################################
711
+ # NumPy exp2
712
+
713
+
714
+ def np_real_exp2_impl(context, builder, sig, args):
715
+ _check_arity_and_homogeneity(sig, args, 1)
716
+
717
+ ll_ty = args[0].type
718
+ fnty = llvmlite.ir.FunctionType(
719
+ ll_ty,
720
+ [
721
+ ll_ty,
722
+ ],
723
+ )
724
+ fn = cgutils.insert_pure_function(builder.module, fnty, name="llvm.exp2")
725
+ return builder.call(fn, [args[0]])
726
+
727
+
728
+ def np_complex_exp2_impl(context, builder, sig, args):
729
+ _check_arity_and_homogeneity(sig, args, 1)
730
+ ty = sig.args[0]
731
+ float_ty = ty.underlying_float
732
+ in1 = context.make_complex(builder, ty, value=args[0])
733
+ tmp = context.make_complex(builder, ty)
734
+ loge2 = context.get_constant(float_ty, _NPY_LOGE2)
735
+ tmp.real = builder.fmul(loge2, in1.real)
736
+ tmp.imag = builder.fmul(loge2, in1.imag)
737
+ return np_complex_exp_impl(context, builder, sig, [tmp._getvalue()])
738
+
739
+
740
+ ########################################################################
741
+ # NumPy log
742
+
743
+
744
+ def np_real_log_impl(context, builder, sig, args):
745
+ _check_arity_and_homogeneity(sig, args, 1)
746
+ return mathimpl.log_impl(context, builder, sig, args)
747
+
748
+
749
+ def np_complex_log_impl(context, builder, sig, args):
750
+ _check_arity_and_homogeneity(sig, args, 1)
751
+ return cmathimpl.log_impl(context, builder, sig, args)
752
+
753
+
754
+ ########################################################################
755
+ # NumPy log2
756
+
757
+
758
+ def np_real_log2_impl(context, builder, sig, args):
759
+ _check_arity_and_homogeneity(sig, args, 1)
760
+
761
+ ll_ty = args[0].type
762
+ fnty = llvmlite.ir.FunctionType(
763
+ ll_ty,
764
+ [
765
+ ll_ty,
766
+ ],
767
+ )
768
+ fn = cgutils.insert_pure_function(builder.module, fnty, name="llvm.log2")
769
+ return builder.call(fn, [args[0]])
770
+
771
+
772
+ def np_complex_log2_impl(context, builder, sig, args):
773
+ _check_arity_and_homogeneity(sig, args, 1)
774
+
775
+ ty = sig.args[0]
776
+ float_ty = ty.underlying_float
777
+ tmp = np_complex_log_impl(context, builder, sig, args)
778
+ tmp = context.make_complex(builder, ty, value=tmp)
779
+ log2e = context.get_constant(float_ty, _NPY_LOG2E)
780
+ tmp.real = builder.fmul(log2e, tmp.real)
781
+ tmp.imag = builder.fmul(log2e, tmp.imag)
782
+ return tmp._getvalue()
783
+
784
+
785
+ ########################################################################
786
+ # NumPy log10
787
+
788
+
789
+ def np_real_log10_impl(context, builder, sig, args):
790
+ _check_arity_and_homogeneity(sig, args, 1)
791
+ return mathimpl.log10_impl(context, builder, sig, args)
792
+
793
+
794
+ def np_complex_log10_impl(context, builder, sig, args):
795
+ _check_arity_and_homogeneity(sig, args, 1)
796
+
797
+ ty = sig.args[0]
798
+ float_ty = ty.underlying_float
799
+ tmp = np_complex_log_impl(context, builder, sig, args)
800
+ tmp = context.make_complex(builder, ty, value=tmp)
801
+ log10e = context.get_constant(float_ty, _NPY_LOG10E)
802
+ tmp.real = builder.fmul(log10e, tmp.real)
803
+ tmp.imag = builder.fmul(log10e, tmp.imag)
804
+ return tmp._getvalue()
805
+
806
+
807
+ ########################################################################
808
+ # NumPy expm1
809
+
810
+
811
+ def np_real_expm1_impl(context, builder, sig, args):
812
+ _check_arity_and_homogeneity(sig, args, 1)
813
+ return mathimpl.expm1_impl(context, builder, sig, args)
814
+
815
+
816
+ def np_complex_expm1_impl(context, builder, sig, args):
817
+ # this is based on nc_expm1 in funcs.inc.src
818
+ _check_arity_and_homogeneity(sig, args, 1)
819
+
820
+ ty = sig.args[0]
821
+ float_ty = ty.underlying_float
822
+ float_unary_sig = typing.signature(*[float_ty] * 2)
823
+
824
+ MINUS_ONE = context.get_constant(float_ty, -1.0)
825
+ in1 = context.make_complex(builder, ty, value=args[0])
826
+ a = np_real_exp_impl(context, builder, float_unary_sig, [in1.real])
827
+ out = context.make_complex(builder, ty)
828
+ cos_imag = np_real_cos_impl(context, builder, float_unary_sig, [in1.imag])
829
+ sin_imag = np_real_sin_impl(context, builder, float_unary_sig, [in1.imag])
830
+ tmp = builder.fmul(a, cos_imag)
831
+ out.imag = builder.fmul(a, sin_imag)
832
+ out.real = builder.fadd(tmp, MINUS_ONE)
833
+
834
+ return out._getvalue()
835
+
836
+
837
+ ########################################################################
838
+ # NumPy log1p
839
+
840
+
841
+ def np_real_log1p_impl(context, builder, sig, args):
842
+ _check_arity_and_homogeneity(sig, args, 1)
843
+ return mathimpl.log1p_impl(context, builder, sig, args)
844
+
845
+
846
+ def np_complex_log1p_impl(context, builder, sig, args):
847
+ # base on NumPy's nc_log1p in funcs.inc.src
848
+ _check_arity_and_homogeneity(sig, args, 1)
849
+
850
+ ty = sig.args[0]
851
+ float_ty = ty.underlying_float
852
+ float_unary_sig = typing.signature(*[float_ty] * 2)
853
+ float_binary_sig = typing.signature(*[float_ty] * 3)
854
+
855
+ ONE = context.get_constant(float_ty, 1.0)
856
+ in1 = context.make_complex(builder, ty, value=args[0])
857
+ out = context.make_complex(builder, ty)
858
+ real_plus_one = builder.fadd(in1.real, ONE)
859
+ l = np_real_hypot_impl(
860
+ context, builder, float_binary_sig, [real_plus_one, in1.imag]
861
+ )
862
+ out.imag = np_real_atan2_impl(
863
+ context, builder, float_binary_sig, [in1.imag, real_plus_one]
864
+ )
865
+ out.real = np_real_log_impl(context, builder, float_unary_sig, [l])
866
+
867
+ return out._getvalue()
868
+
869
+
870
+ ########################################################################
871
+ # NumPy sqrt
872
+
873
+
874
+ def np_real_sqrt_impl(context, builder, sig, args):
875
+ _check_arity_and_homogeneity(sig, args, 1)
876
+ return mathimpl.sqrt_impl(context, builder, sig, args)
877
+
878
+
879
+ def np_complex_sqrt_impl(context, builder, sig, args):
880
+ _check_arity_and_homogeneity(sig, args, 1)
881
+ return cmathimpl.sqrt_impl(context, builder, sig, args)
882
+
883
+
884
+ ########################################################################
885
+ # NumPy square
886
+
887
+
888
+ def np_int_square_impl(context, builder, sig, args):
889
+ _check_arity_and_homogeneity(sig, args, 1)
890
+ return builder.mul(args[0], args[0])
891
+
892
+
893
+ def np_real_square_impl(context, builder, sig, args):
894
+ _check_arity_and_homogeneity(sig, args, 1)
895
+ return builder.fmul(args[0], args[0])
896
+
897
+
898
+ def np_complex_square_impl(context, builder, sig, args):
899
+ _check_arity_and_homogeneity(sig, args, 1)
900
+ binary_sig = typing.signature(*[sig.return_type] * 3)
901
+ return numbers.complex_mul_impl(
902
+ context, builder, binary_sig, [args[0], args[0]]
903
+ )
904
+
905
+
906
+ ########################################################################
907
+ # NumPy cbrt
908
+
909
+
910
+ def np_real_cbrt_impl(context, builder, sig, args):
911
+ _check_arity_and_homogeneity(sig, args, 1)
912
+
913
+ # We enable fastmath here to force np.power(x, 1/3) to generate a
914
+ # call to libm cbrt function
915
+ @register_jitable(fastmath=True)
916
+ def cbrt(x):
917
+ if x < 0:
918
+ return -np.power(-x, 1.0 / 3.0)
919
+ else:
920
+ return np.power(x, 1.0 / 3.0)
921
+
922
+ def _cbrt(x):
923
+ if np.isnan(x):
924
+ return np.nan
925
+ return cbrt(x)
926
+
927
+ return context.compile_internal(builder, _cbrt, sig, args)
928
+
929
+
930
+ ########################################################################
931
+ # NumPy reciprocal
932
+
933
+
934
+ def np_int_reciprocal_impl(context, builder, sig, args):
935
+ # based on the implementation in loops.c.src
936
+ # integer versions for reciprocal are performed via promotion
937
+ # using double, and then converted back to the type
938
+ _check_arity_and_homogeneity(sig, args, 1)
939
+ ty = sig.return_type
940
+
941
+ in_as_float = context.cast(builder, args[0], ty, types.float64)
942
+ ONE = context.get_constant(types.float64, 1)
943
+ result_as_float = builder.fdiv(ONE, in_as_float)
944
+ return context.cast(builder, result_as_float, types.float64, ty)
945
+
946
+
947
+ def np_real_reciprocal_impl(context, builder, sig, args):
948
+ _check_arity_and_homogeneity(sig, args, 1)
949
+ ONE = context.get_constant(sig.return_type, 1.0)
950
+ return builder.fdiv(ONE, args[0])
951
+
952
+
953
+ def np_complex_reciprocal_impl(context, builder, sig, args):
954
+ # based on the implementation in loops.c.src
955
+ # Basically the same Smith method used for division, but with
956
+ # the numerator substituted by 1.0
957
+ _check_arity_and_homogeneity(sig, args, 1)
958
+
959
+ ty = sig.args[0]
960
+ float_ty = ty.underlying_float
961
+
962
+ ZERO = context.get_constant(float_ty, 0.0)
963
+ ONE = context.get_constant(float_ty, 1.0)
964
+ in1 = context.make_complex(builder, ty, value=args[0])
965
+ out = context.make_complex(builder, ty)
966
+ in1r = in1.real
967
+ in1i = in1.imag
968
+ in1r_abs = _fabs(context, builder, in1r)
969
+ in1i_abs = _fabs(context, builder, in1i)
970
+ in1i_abs_le_in1r_abs = builder.fcmp_ordered("<=", in1i_abs, in1r_abs)
971
+
972
+ with builder.if_else(in1i_abs_le_in1r_abs) as (then, otherwise):
973
+ with then:
974
+ r = builder.fdiv(in1i, in1r)
975
+ tmp0 = builder.fmul(in1i, r)
976
+ d = builder.fadd(in1r, tmp0)
977
+ inv_d = builder.fdiv(ONE, d)
978
+ minus_r = builder.fsub(ZERO, r)
979
+ out.real = inv_d
980
+ out.imag = builder.fmul(minus_r, inv_d)
981
+ with otherwise:
982
+ r = builder.fdiv(in1r, in1i)
983
+ tmp0 = builder.fmul(in1r, r)
984
+ d = builder.fadd(tmp0, in1i)
985
+ inv_d = builder.fdiv(ONE, d)
986
+ out.real = builder.fmul(r, inv_d)
987
+ out.imag = builder.fsub(ZERO, inv_d)
988
+
989
+ return out._getvalue()
990
+
991
+
992
+ ########################################################################
993
+ # NumPy sin
994
+
995
+
996
+ def np_real_sin_impl(context, builder, sig, args):
997
+ _check_arity_and_homogeneity(sig, args, 1)
998
+ return mathimpl.sin_impl(context, builder, sig, args)
999
+
1000
+
1001
+ def np_complex_sin_impl(context, builder, sig, args):
1002
+ _check_arity_and_homogeneity(sig, args, 1)
1003
+ return cmathimpl.sin_impl(context, builder, sig, args)
1004
+
1005
+
1006
+ ########################################################################
1007
+ # NumPy cos
1008
+
1009
+
1010
+ def np_real_cos_impl(context, builder, sig, args):
1011
+ _check_arity_and_homogeneity(sig, args, 1)
1012
+ return mathimpl.cos_impl(context, builder, sig, args)
1013
+
1014
+
1015
+ def np_complex_cos_impl(context, builder, sig, args):
1016
+ _check_arity_and_homogeneity(sig, args, 1)
1017
+ return cmathimpl.cos_impl(context, builder, sig, args)
1018
+
1019
+
1020
+ ########################################################################
1021
+ # NumPy tan
1022
+
1023
+
1024
+ def np_real_tan_impl(context, builder, sig, args):
1025
+ _check_arity_and_homogeneity(sig, args, 1)
1026
+ return mathimpl.tan_impl(context, builder, sig, args)
1027
+
1028
+
1029
+ ########################################################################
1030
+ # NumPy asin
1031
+
1032
+
1033
+ def np_real_asin_impl(context, builder, sig, args):
1034
+ _check_arity_and_homogeneity(sig, args, 1)
1035
+ return mathimpl.asin_impl(context, builder, sig, args)
1036
+
1037
+
1038
+ ########################################################################
1039
+ # NumPy acos
1040
+
1041
+
1042
+ def np_real_acos_impl(context, builder, sig, args):
1043
+ _check_arity_and_homogeneity(sig, args, 1)
1044
+ return mathimpl.acos_impl(context, builder, sig, args)
1045
+
1046
+
1047
+ ########################################################################
1048
+ # NumPy atan
1049
+
1050
+
1051
+ def np_real_atan_impl(context, builder, sig, args):
1052
+ _check_arity_and_homogeneity(sig, args, 1)
1053
+ return mathimpl.atan_impl(context, builder, sig, args)
1054
+
1055
+
1056
+ ########################################################################
1057
+ # NumPy atan2
1058
+
1059
+
1060
+ def np_real_atan2_impl(context, builder, sig, args):
1061
+ _check_arity_and_homogeneity(sig, args, 2)
1062
+ return mathimpl.atan2_float_impl(context, builder, sig, args)
1063
+
1064
+
1065
+ ########################################################################
1066
+ # NumPy hypot
1067
+
1068
+
1069
+ def np_real_hypot_impl(context, builder, sig, args):
1070
+ _check_arity_and_homogeneity(sig, args, 2)
1071
+ return mathimpl.hypot_float_impl(context, builder, sig, args)
1072
+
1073
+
1074
+ ########################################################################
1075
+ # NumPy sinh
1076
+
1077
+
1078
+ def np_real_sinh_impl(context, builder, sig, args):
1079
+ _check_arity_and_homogeneity(sig, args, 1)
1080
+ return mathimpl.sinh_impl(context, builder, sig, args)
1081
+
1082
+
1083
+ def np_complex_sinh_impl(context, builder, sig, args):
1084
+ # npymath does not provide a complex sinh. The code in funcs.inc.src
1085
+ # is translated here...
1086
+ _check_arity_and_homogeneity(sig, args, 1)
1087
+
1088
+ ty = sig.args[0]
1089
+ fty = ty.underlying_float
1090
+ fsig1 = typing.signature(*[fty] * 2)
1091
+ x = context.make_complex(builder, ty, args[0])
1092
+ out = context.make_complex(builder, ty)
1093
+ xr = x.real
1094
+ xi = x.imag
1095
+
1096
+ sxi = np_real_sin_impl(context, builder, fsig1, [xi])
1097
+ shxr = np_real_sinh_impl(context, builder, fsig1, [xr])
1098
+ cxi = np_real_cos_impl(context, builder, fsig1, [xi])
1099
+ chxr = np_real_cosh_impl(context, builder, fsig1, [xr])
1100
+
1101
+ out.real = builder.fmul(cxi, shxr)
1102
+ out.imag = builder.fmul(sxi, chxr)
1103
+
1104
+ return out._getvalue()
1105
+
1106
+
1107
+ ########################################################################
1108
+ # NumPy cosh
1109
+
1110
+
1111
+ def np_real_cosh_impl(context, builder, sig, args):
1112
+ _check_arity_and_homogeneity(sig, args, 1)
1113
+ return mathimpl.cosh_impl(context, builder, sig, args)
1114
+
1115
+
1116
+ def np_complex_cosh_impl(context, builder, sig, args):
1117
+ # npymath does not provide a complex cosh. The code in funcs.inc.src
1118
+ # is translated here...
1119
+ _check_arity_and_homogeneity(sig, args, 1)
1120
+
1121
+ ty = sig.args[0]
1122
+ fty = ty.underlying_float
1123
+ fsig1 = typing.signature(*[fty] * 2)
1124
+ x = context.make_complex(builder, ty, args[0])
1125
+ out = context.make_complex(builder, ty)
1126
+ xr = x.real
1127
+ xi = x.imag
1128
+
1129
+ cxi = np_real_cos_impl(context, builder, fsig1, [xi])
1130
+ chxr = np_real_cosh_impl(context, builder, fsig1, [xr])
1131
+ sxi = np_real_sin_impl(context, builder, fsig1, [xi])
1132
+ shxr = np_real_sinh_impl(context, builder, fsig1, [xr])
1133
+
1134
+ out.real = builder.fmul(cxi, chxr)
1135
+ out.imag = builder.fmul(sxi, shxr)
1136
+
1137
+ return out._getvalue()
1138
+
1139
+
1140
+ ########################################################################
1141
+ # NumPy tanh
1142
+
1143
+
1144
+ def np_real_tanh_impl(context, builder, sig, args):
1145
+ _check_arity_and_homogeneity(sig, args, 1)
1146
+ return mathimpl.tanh_impl(context, builder, sig, args)
1147
+
1148
+
1149
+ def np_complex_tanh_impl(context, builder, sig, args):
1150
+ # npymath does not provide complex tan functions. The code
1151
+ # in funcs.inc.src for tanh is translated here...
1152
+ _check_arity_and_homogeneity(sig, args, 1)
1153
+
1154
+ ty = sig.args[0]
1155
+ fty = ty.underlying_float
1156
+ fsig1 = typing.signature(*[fty] * 2)
1157
+ ONE = context.get_constant(fty, 1.0)
1158
+ x = context.make_complex(builder, ty, args[0])
1159
+ out = context.make_complex(builder, ty)
1160
+
1161
+ xr = x.real
1162
+ xi = x.imag
1163
+ si = np_real_sin_impl(context, builder, fsig1, [xi])
1164
+ ci = np_real_cos_impl(context, builder, fsig1, [xi])
1165
+ shr = np_real_sinh_impl(context, builder, fsig1, [xr])
1166
+ chr_ = np_real_cosh_impl(context, builder, fsig1, [xr])
1167
+ rs = builder.fmul(ci, shr)
1168
+ is_ = builder.fmul(si, chr_)
1169
+ rc = builder.fmul(ci, chr_)
1170
+ ic = builder.fmul(si, shr) # note: opposite sign from code in funcs.inc.src
1171
+ sqr_rc = builder.fmul(rc, rc)
1172
+ sqr_ic = builder.fmul(ic, ic)
1173
+ d = builder.fadd(sqr_rc, sqr_ic)
1174
+ inv_d = builder.fdiv(ONE, d)
1175
+ rs_rc = builder.fmul(rs, rc)
1176
+ is_ic = builder.fmul(is_, ic)
1177
+ is_rc = builder.fmul(is_, rc)
1178
+ rs_ic = builder.fmul(rs, ic)
1179
+ numr = builder.fadd(rs_rc, is_ic)
1180
+ numi = builder.fsub(is_rc, rs_ic)
1181
+ out.real = builder.fmul(numr, inv_d)
1182
+ out.imag = builder.fmul(numi, inv_d)
1183
+
1184
+ return out._getvalue()
1185
+
1186
+
1187
+ ########################################################################
1188
+ # NumPy asinh
1189
+
1190
+
1191
+ def np_real_asinh_impl(context, builder, sig, args):
1192
+ _check_arity_and_homogeneity(sig, args, 1)
1193
+ return mathimpl.asinh_impl(context, builder, sig, args)
1194
+
1195
+
1196
+ ########################################################################
1197
+ # NumPy acosh
1198
+
1199
+
1200
+ def np_real_acosh_impl(context, builder, sig, args):
1201
+ _check_arity_and_homogeneity(sig, args, 1)
1202
+ return mathimpl.acosh_impl(context, builder, sig, args)
1203
+
1204
+
1205
+ def np_complex_acosh_impl(context, builder, sig, args):
1206
+ # npymath does not provide a complex acosh. The code in funcs.inc.src
1207
+ # is translated here...
1208
+ # log(x + sqrt(x+1) * sqrt(x-1))
1209
+ _check_arity_and_homogeneity(sig, args, 1)
1210
+
1211
+ ty = sig.args[0]
1212
+ csig2 = typing.signature(*[ty] * 3)
1213
+
1214
+ ONE = context.get_constant_generic(builder, ty, 1.0 + 0.0j)
1215
+ x = args[0]
1216
+
1217
+ x_plus_one = numbers.complex_add_impl(context, builder, csig2, [x, ONE])
1218
+ x_minus_one = numbers.complex_sub_impl(context, builder, csig2, [x, ONE])
1219
+ sqrt_x_plus_one = np_complex_sqrt_impl(context, builder, sig, [x_plus_one])
1220
+ sqrt_x_minus_one = np_complex_sqrt_impl(
1221
+ context, builder, sig, [x_minus_one]
1222
+ )
1223
+ prod_sqrt = numbers.complex_mul_impl(
1224
+ context, builder, csig2, [sqrt_x_plus_one, sqrt_x_minus_one]
1225
+ )
1226
+ log_arg = numbers.complex_add_impl(context, builder, csig2, [x, prod_sqrt])
1227
+
1228
+ return np_complex_log_impl(context, builder, sig, [log_arg])
1229
+
1230
+
1231
+ ########################################################################
1232
+ # NumPy atanh
1233
+
1234
+
1235
+ def np_real_atanh_impl(context, builder, sig, args):
1236
+ _check_arity_and_homogeneity(sig, args, 1)
1237
+ return mathimpl.atanh_impl(context, builder, sig, args)
1238
+
1239
+
1240
+ ########################################################################
1241
+ # NumPy floor
1242
+
1243
+
1244
+ def np_real_floor_impl(context, builder, sig, args):
1245
+ _check_arity_and_homogeneity(sig, args, 1)
1246
+
1247
+ return mathimpl.call_fp_intrinsic(builder, "llvm.floor", args)
1248
+
1249
+
1250
+ ########################################################################
1251
+ # NumPy ceil
1252
+
1253
+
1254
+ def np_real_ceil_impl(context, builder, sig, args):
1255
+ _check_arity_and_homogeneity(sig, args, 1)
1256
+
1257
+ return mathimpl.call_fp_intrinsic(builder, "llvm.ceil", args)
1258
+
1259
+
1260
+ ########################################################################
1261
+ # NumPy trunc
1262
+
1263
+
1264
+ def np_real_trunc_impl(context, builder, sig, args):
1265
+ _check_arity_and_homogeneity(sig, args, 1)
1266
+
1267
+ return mathimpl.call_fp_intrinsic(builder, "llvm.trunc", args)
1268
+
1269
+
1270
+ ########################################################################
1271
+ # NumPy fabs
1272
+
1273
+
1274
+ def np_real_fabs_impl(context, builder, sig, args):
1275
+ _check_arity_and_homogeneity(sig, args, 1)
1276
+
1277
+ return mathimpl.call_fp_intrinsic(builder, "llvm.fabs", args)
1278
+
1279
+
1280
+ ########################################################################
1281
+ # NumPy style predicates
1282
+
1283
+
1284
+ # For real and integer types rely on numbers... but complex ordering in
1285
+ # NumPy is lexicographic (while Python does not provide ordering).
1286
+ def np_complex_ge_impl(context, builder, sig, args):
1287
+ # equivalent to macro CGE in NumPy's loops.c.src
1288
+ # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi >= yi))
1289
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1290
+
1291
+ ty = sig.args[0]
1292
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1293
+ xr = in1.real
1294
+ xi = in1.imag
1295
+ yr = in2.real
1296
+ yi = in2.imag
1297
+
1298
+ xr_gt_yr = builder.fcmp_ordered(">", xr, yr)
1299
+ no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
1300
+ xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
1301
+ xi_ge_yi = builder.fcmp_ordered(">=", xi, yi)
1302
+ first_term = builder.and_(xr_gt_yr, no_nan_xi_yi)
1303
+ second_term = builder.and_(xr_eq_yr, xi_ge_yi)
1304
+ return builder.or_(first_term, second_term)
1305
+
1306
+
1307
+ def np_complex_le_impl(context, builder, sig, args):
1308
+ # equivalent to macro CLE in NumPy's loops.c.src
1309
+ # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi <= yi))
1310
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1311
+
1312
+ ty = sig.args[0]
1313
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1314
+ xr = in1.real
1315
+ xi = in1.imag
1316
+ yr = in2.real
1317
+ yi = in2.imag
1318
+
1319
+ xr_lt_yr = builder.fcmp_ordered("<", xr, yr)
1320
+ no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
1321
+ xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
1322
+ xi_le_yi = builder.fcmp_ordered("<=", xi, yi)
1323
+ first_term = builder.and_(xr_lt_yr, no_nan_xi_yi)
1324
+ second_term = builder.and_(xr_eq_yr, xi_le_yi)
1325
+ return builder.or_(first_term, second_term)
1326
+
1327
+
1328
+ def np_complex_gt_impl(context, builder, sig, args):
1329
+ # equivalent to macro CGT in NumPy's loops.c.src
1330
+ # ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi > yi))
1331
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1332
+
1333
+ ty = sig.args[0]
1334
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1335
+ xr = in1.real
1336
+ xi = in1.imag
1337
+ yr = in2.real
1338
+ yi = in2.imag
1339
+
1340
+ xr_gt_yr = builder.fcmp_ordered(">", xr, yr)
1341
+ no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
1342
+ xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
1343
+ xi_gt_yi = builder.fcmp_ordered(">", xi, yi)
1344
+ first_term = builder.and_(xr_gt_yr, no_nan_xi_yi)
1345
+ second_term = builder.and_(xr_eq_yr, xi_gt_yi)
1346
+ return builder.or_(first_term, second_term)
1347
+
1348
+
1349
+ def np_complex_lt_impl(context, builder, sig, args):
1350
+ # equivalent to macro CLT in NumPy's loops.c.src
1351
+ # ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) || (xr == yr && xi < yi))
1352
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1353
+
1354
+ ty = sig.args[0]
1355
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1356
+ xr = in1.real
1357
+ xi = in1.imag
1358
+ yr = in2.real
1359
+ yi = in2.imag
1360
+
1361
+ xr_lt_yr = builder.fcmp_ordered("<", xr, yr)
1362
+ no_nan_xi_yi = builder.fcmp_ordered("ord", xi, yi)
1363
+ xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
1364
+ xi_lt_yi = builder.fcmp_ordered("<", xi, yi)
1365
+ first_term = builder.and_(xr_lt_yr, no_nan_xi_yi)
1366
+ second_term = builder.and_(xr_eq_yr, xi_lt_yi)
1367
+ return builder.or_(first_term, second_term)
1368
+
1369
+
1370
+ def np_complex_eq_impl(context, builder, sig, args):
1371
+ # equivalent to macro CEQ in NumPy's loops.c.src
1372
+ # (xr == yr && xi == yi)
1373
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1374
+
1375
+ ty = sig.args[0]
1376
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1377
+ xr = in1.real
1378
+ xi = in1.imag
1379
+ yr = in2.real
1380
+ yi = in2.imag
1381
+
1382
+ xr_eq_yr = builder.fcmp_ordered("==", xr, yr)
1383
+ xi_eq_yi = builder.fcmp_ordered("==", xi, yi)
1384
+ return builder.and_(xr_eq_yr, xi_eq_yi)
1385
+
1386
+
1387
+ def np_complex_ne_impl(context, builder, sig, args):
1388
+ # equivalent to macro CNE in NumPy's loops.c.src
1389
+ # (xr != yr || xi != yi)
1390
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1391
+
1392
+ ty = sig.args[0]
1393
+ in1, in2 = [context.make_complex(builder, ty, value=arg) for arg in args]
1394
+ xr = in1.real
1395
+ xi = in1.imag
1396
+ yr = in2.real
1397
+ yi = in2.imag
1398
+
1399
+ xr_ne_yr = builder.fcmp_unordered("!=", xr, yr)
1400
+ xi_ne_yi = builder.fcmp_unordered("!=", xi, yi)
1401
+ return builder.or_(xr_ne_yr, xi_ne_yi)
1402
+
1403
+
1404
+ ########################################################################
1405
+ # NumPy logical algebra
1406
+
1407
+ # these are made generic for all types for now, assuming that
1408
+ # cgutils.is_true works in the underlying types.
1409
+
1410
+
1411
+ def _complex_is_true(context, builder, ty, val):
1412
+ complex_val = context.make_complex(builder, ty, value=val)
1413
+ re_true = cgutils.is_true(builder, complex_val.real)
1414
+ im_true = cgutils.is_true(builder, complex_val.imag)
1415
+ return builder.or_(re_true, im_true)
1416
+
1417
+
1418
+ def np_logical_and_impl(context, builder, sig, args):
1419
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1420
+ a = cgutils.is_true(builder, args[0])
1421
+ b = cgutils.is_true(builder, args[1])
1422
+ return builder.and_(a, b)
1423
+
1424
+
1425
+ def np_complex_logical_and_impl(context, builder, sig, args):
1426
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1427
+ a = _complex_is_true(context, builder, sig.args[0], args[0])
1428
+ b = _complex_is_true(context, builder, sig.args[1], args[1])
1429
+ return builder.and_(a, b)
1430
+
1431
+
1432
+ def np_logical_or_impl(context, builder, sig, args):
1433
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1434
+ a = cgutils.is_true(builder, args[0])
1435
+ b = cgutils.is_true(builder, args[1])
1436
+ return builder.or_(a, b)
1437
+
1438
+
1439
+ def np_complex_logical_or_impl(context, builder, sig, args):
1440
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1441
+ a = _complex_is_true(context, builder, sig.args[0], args[0])
1442
+ b = _complex_is_true(context, builder, sig.args[1], args[1])
1443
+ return builder.or_(a, b)
1444
+
1445
+
1446
+ def np_logical_xor_impl(context, builder, sig, args):
1447
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1448
+ a = cgutils.is_true(builder, args[0])
1449
+ b = cgutils.is_true(builder, args[1])
1450
+ return builder.xor(a, b)
1451
+
1452
+
1453
+ def np_complex_logical_xor_impl(context, builder, sig, args):
1454
+ _check_arity_and_homogeneity(sig, args, 2, return_type=types.boolean)
1455
+ a = _complex_is_true(context, builder, sig.args[0], args[0])
1456
+ b = _complex_is_true(context, builder, sig.args[1], args[1])
1457
+ return builder.xor(a, b)
1458
+
1459
+
1460
+ def np_logical_not_impl(context, builder, sig, args):
1461
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1462
+ return cgutils.is_false(builder, args[0])
1463
+
1464
+
1465
+ def np_complex_logical_not_impl(context, builder, sig, args):
1466
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1467
+ a = _complex_is_true(context, builder, sig.args[0], args[0])
1468
+ return builder.not_(a)
1469
+
1470
+
1471
+ ########################################################################
1472
+ # NumPy style max/min
1473
+ #
1474
+ # There are 2 different sets of functions to perform max and min in
1475
+ # NumPy: maximum/minimum and fmax/fmin.
1476
+ # Both differ in the way NaNs are handled, so the actual differences
1477
+ # come in action only on float/complex numbers. The functions used for
1478
+ # integers is shared. For booleans maximum is equivalent to or, and
1479
+ # minimum is equivalent to and. Datetime support will go elsewhere.
1480
+
1481
+
1482
+ def np_int_smax_impl(context, builder, sig, args):
1483
+ _check_arity_and_homogeneity(sig, args, 2)
1484
+ arg1, arg2 = args
1485
+ arg1_sge_arg2 = builder.icmp_signed(">=", arg1, arg2)
1486
+ return builder.select(arg1_sge_arg2, arg1, arg2)
1487
+
1488
+
1489
+ def np_int_umax_impl(context, builder, sig, args):
1490
+ _check_arity_and_homogeneity(sig, args, 2)
1491
+ arg1, arg2 = args
1492
+ arg1_uge_arg2 = builder.icmp_unsigned(">=", arg1, arg2)
1493
+ return builder.select(arg1_uge_arg2, arg1, arg2)
1494
+
1495
+
1496
+ def np_real_maximum_impl(context, builder, sig, args):
1497
+ # maximum prefers nan (tries to return a nan).
1498
+ _check_arity_and_homogeneity(sig, args, 2)
1499
+
1500
+ arg1, arg2 = args
1501
+ arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
1502
+ any_nan = builder.fcmp_unordered("uno", arg1, arg2)
1503
+ nan_result = builder.select(arg1_nan, arg1, arg2)
1504
+
1505
+ arg1_ge_arg2 = builder.fcmp_ordered(">=", arg1, arg2)
1506
+ non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
1507
+
1508
+ return builder.select(any_nan, nan_result, non_nan_result)
1509
+
1510
+
1511
+ def np_real_fmax_impl(context, builder, sig, args):
1512
+ # fmax prefers non-nan (tries to return a non-nan).
1513
+ _check_arity_and_homogeneity(sig, args, 2)
1514
+
1515
+ arg1, arg2 = args
1516
+ arg2_nan = builder.fcmp_unordered("uno", arg2, arg2)
1517
+ any_nan = builder.fcmp_unordered("uno", arg1, arg2)
1518
+ nan_result = builder.select(arg2_nan, arg1, arg2)
1519
+
1520
+ arg1_ge_arg2 = builder.fcmp_ordered(">=", arg1, arg2)
1521
+ non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
1522
+
1523
+ return builder.select(any_nan, nan_result, non_nan_result)
1524
+
1525
+
1526
+ def np_complex_maximum_impl(context, builder, sig, args):
1527
+ # maximum prefers nan (tries to return a nan).
1528
+ # There is an extra caveat with complex numbers, as there is more
1529
+ # than one type of nan. NumPy's docs state that the nan in the
1530
+ # first argument is returned when both arguments are nans.
1531
+ # If only one nan is found, that nan is returned.
1532
+ _check_arity_and_homogeneity(sig, args, 2)
1533
+ ty = sig.args[0]
1534
+ bc_sig = typing.signature(types.boolean, ty)
1535
+ bcc_sig = typing.signature(types.boolean, *[ty] * 2)
1536
+ arg1, arg2 = args
1537
+ arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
1538
+ arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
1539
+ any_nan = builder.or_(arg1_nan, arg2_nan)
1540
+ nan_result = builder.select(arg1_nan, arg1, arg2)
1541
+
1542
+ arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args)
1543
+ non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
1544
+
1545
+ return builder.select(any_nan, nan_result, non_nan_result)
1546
+
1547
+
1548
+ def np_complex_fmax_impl(context, builder, sig, args):
1549
+ # fmax prefers non-nan (tries to return a non-nan).
1550
+ # There is an extra caveat with complex numbers, as there is more
1551
+ # than one type of nan. NumPy's docs state that the nan in the
1552
+ # first argument is returned when both arguments are nans.
1553
+ _check_arity_and_homogeneity(sig, args, 2)
1554
+ ty = sig.args[0]
1555
+ bc_sig = typing.signature(types.boolean, ty)
1556
+ bcc_sig = typing.signature(types.boolean, *[ty] * 2)
1557
+ arg1, arg2 = args
1558
+ arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
1559
+ arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
1560
+ any_nan = builder.or_(arg1_nan, arg2_nan)
1561
+ nan_result = builder.select(arg2_nan, arg1, arg2)
1562
+
1563
+ arg1_ge_arg2 = np_complex_ge_impl(context, builder, bcc_sig, args)
1564
+ non_nan_result = builder.select(arg1_ge_arg2, arg1, arg2)
1565
+
1566
+ return builder.select(any_nan, nan_result, non_nan_result)
1567
+
1568
+
1569
+ def np_int_smin_impl(context, builder, sig, args):
1570
+ _check_arity_and_homogeneity(sig, args, 2)
1571
+ arg1, arg2 = args
1572
+ arg1_sle_arg2 = builder.icmp_signed("<=", arg1, arg2)
1573
+ return builder.select(arg1_sle_arg2, arg1, arg2)
1574
+
1575
+
1576
+ def np_int_umin_impl(context, builder, sig, args):
1577
+ _check_arity_and_homogeneity(sig, args, 2)
1578
+ arg1, arg2 = args
1579
+ arg1_ule_arg2 = builder.icmp_unsigned("<=", arg1, arg2)
1580
+ return builder.select(arg1_ule_arg2, arg1, arg2)
1581
+
1582
+
1583
+ def np_real_minimum_impl(context, builder, sig, args):
1584
+ # minimum prefers nan (tries to return a nan).
1585
+ _check_arity_and_homogeneity(sig, args, 2)
1586
+
1587
+ arg1, arg2 = args
1588
+ arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
1589
+ any_nan = builder.fcmp_unordered("uno", arg1, arg2)
1590
+ nan_result = builder.select(arg1_nan, arg1, arg2)
1591
+
1592
+ arg1_le_arg2 = builder.fcmp_ordered("<=", arg1, arg2)
1593
+ non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
1594
+
1595
+ return builder.select(any_nan, nan_result, non_nan_result)
1596
+
1597
+
1598
+ def np_real_fmin_impl(context, builder, sig, args):
1599
+ # fmin prefers non-nan (tries to return a non-nan).
1600
+ _check_arity_and_homogeneity(sig, args, 2)
1601
+
1602
+ arg1, arg2 = args
1603
+ arg1_nan = builder.fcmp_unordered("uno", arg1, arg1)
1604
+ any_nan = builder.fcmp_unordered("uno", arg1, arg2)
1605
+ nan_result = builder.select(arg1_nan, arg2, arg1)
1606
+
1607
+ arg1_le_arg2 = builder.fcmp_ordered("<=", arg1, arg2)
1608
+ non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
1609
+
1610
+ return builder.select(any_nan, nan_result, non_nan_result)
1611
+
1612
+
1613
+ def np_complex_minimum_impl(context, builder, sig, args):
1614
+ # minimum prefers nan (tries to return a nan).
1615
+ # There is an extra caveat with complex numbers, as there is more
1616
+ # than one type of nan. NumPy's docs state that the nan in the
1617
+ # first argument is returned when both arguments are nans.
1618
+ # If only one nan is found, that nan is returned.
1619
+ _check_arity_and_homogeneity(sig, args, 2)
1620
+ ty = sig.args[0]
1621
+ bc_sig = typing.signature(types.boolean, ty)
1622
+ bcc_sig = typing.signature(types.boolean, *[ty] * 2)
1623
+ arg1, arg2 = args
1624
+ arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
1625
+ arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
1626
+ any_nan = builder.or_(arg1_nan, arg2_nan)
1627
+ nan_result = builder.select(arg1_nan, arg1, arg2)
1628
+
1629
+ arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args)
1630
+ non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
1631
+
1632
+ return builder.select(any_nan, nan_result, non_nan_result)
1633
+
1634
+
1635
+ def np_complex_fmin_impl(context, builder, sig, args):
1636
+ # fmin prefers non-nan (tries to return a non-nan).
1637
+ # There is an extra caveat with complex numbers, as there is more
1638
+ # than one type of nan. NumPy's docs state that the nan in the
1639
+ # first argument is returned when both arguments are nans.
1640
+ _check_arity_and_homogeneity(sig, args, 2)
1641
+ ty = sig.args[0]
1642
+ bc_sig = typing.signature(types.boolean, ty)
1643
+ bcc_sig = typing.signature(types.boolean, *[ty] * 2)
1644
+ arg1, arg2 = args
1645
+ arg1_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg1])
1646
+ arg2_nan = np_complex_isnan_impl(context, builder, bc_sig, [arg2])
1647
+ any_nan = builder.or_(arg1_nan, arg2_nan)
1648
+ nan_result = builder.select(arg2_nan, arg1, arg2)
1649
+
1650
+ arg1_le_arg2 = np_complex_le_impl(context, builder, bcc_sig, args)
1651
+ non_nan_result = builder.select(arg1_le_arg2, arg1, arg2)
1652
+
1653
+ return builder.select(any_nan, nan_result, non_nan_result)
1654
+
1655
+
1656
+ ########################################################################
1657
+ # NumPy floating point misc
1658
+
1659
+
1660
+ def np_int_isnan_impl(context, builder, sig, args):
1661
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1662
+ return cgutils.false_bit
1663
+
1664
+
1665
+ def np_real_isnan_impl(context, builder, sig, args):
1666
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1667
+ return mathimpl.is_nan(builder, args[0])
1668
+
1669
+
1670
+ def np_complex_isnan_impl(context, builder, sig, args):
1671
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1672
+
1673
+ (x,) = args
1674
+ (ty,) = sig.args
1675
+ complex_val = context.make_complex(builder, ty, value=x)
1676
+ return cmathimpl.is_nan(builder, complex_val)
1677
+
1678
+
1679
+ def np_int_isfinite_impl(context, builder, sig, args):
1680
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1681
+ return cgutils.true_bit
1682
+
1683
+
1684
+ def np_datetime_isfinite_impl(context, builder, sig, args):
1685
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1686
+ return builder.icmp_unsigned("!=", args[0], npdatetime.NAT)
1687
+
1688
+
1689
+ def np_datetime_isnat_impl(context, builder, sig, args):
1690
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1691
+ return builder.icmp_signed("==", args[0], npdatetime.NAT)
1692
+
1693
+
1694
+ def np_real_isfinite_impl(context, builder, sig, args):
1695
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1696
+ return mathimpl.is_finite(builder, args[0])
1697
+
1698
+
1699
+ def np_complex_isfinite_impl(context, builder, sig, args):
1700
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1701
+ (x,) = args
1702
+ (ty,) = sig.args
1703
+ complex_val = context.make_complex(builder, ty, value=x)
1704
+ return cmathimpl.is_finite(builder, complex_val)
1705
+
1706
+
1707
+ def np_int_isinf_impl(context, builder, sig, args):
1708
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1709
+ return cgutils.false_bit
1710
+
1711
+
1712
+ def np_real_isinf_impl(context, builder, sig, args):
1713
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1714
+ return mathimpl.is_inf(builder, args[0])
1715
+
1716
+
1717
+ def np_complex_isinf_impl(context, builder, sig, args):
1718
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1719
+ (x,) = args
1720
+ (ty,) = sig.args
1721
+ complex_val = context.make_complex(builder, ty, value=x)
1722
+ return cmathimpl.is_inf(builder, complex_val)
1723
+
1724
+
1725
+ def np_real_signbit_impl(context, builder, sig, args):
1726
+ _check_arity_and_homogeneity(sig, args, 1, return_type=types.boolean)
1727
+ # there's no signbit intrinsic in LLVM, so just bitcast as int, mask the
1728
+ # signbit and cmp against 0.
1729
+ masks = {
1730
+ types.float16: context.get_constant(types.uint16, 0x8000),
1731
+ types.float32: context.get_constant(types.uint32, 0x80000000),
1732
+ types.float64: context.get_constant(types.uint64, 0x8000000000000000),
1733
+ }
1734
+ arg_ty = sig.args[0]
1735
+ arg_int_ty = getattr(types, f"uint{arg_ty.bitwidth}")
1736
+ arg_ll_int_ty = context.get_value_type(arg_int_ty)
1737
+ int_res = builder.and_(
1738
+ builder.bitcast(args[0], arg_ll_int_ty), masks[arg_ty]
1739
+ )
1740
+ bool_res = builder.icmp_unsigned("!=", int_res, int_res.type(0))
1741
+ return bool_res
1742
+
1743
+
1744
+ def np_real_copysign_impl(context, builder, sig, args):
1745
+ _check_arity_and_homogeneity(sig, args, 2)
1746
+
1747
+ return mathimpl.copysign_float_impl(context, builder, sig, args)
1748
+
1749
+
1750
+ def np_real_nextafter_impl(context, builder, sig, args):
1751
+ _check_arity_and_homogeneity(sig, args, 2)
1752
+
1753
+ dispatch_table = {
1754
+ types.float32: "numba_nextafterf",
1755
+ types.float64: "numba_nextafter",
1756
+ }
1757
+
1758
+ return _dispatch_func_by_name_type(
1759
+ context, builder, sig, args, dispatch_table, "nextafter"
1760
+ )
1761
+
1762
+
1763
+ def np_real_spacing_impl(context, builder, sig, args):
1764
+ # This is different to how NumPy does it, NumPy has a specialisation of
1765
+ # nextafter called _next, which is used. See:
1766
+ # https://github.com/numpy/numpy/blob/12c2b7dd62fc0c14b81c8892ed5f4f59cc94d09c/numpy/core/src/npymath/ieee754.c.src#L32-L38
1767
+ # Numba elects to use `nextafter` for a similar behaviour to save
1768
+ # translating this very involved function. Further, the NumPy comments note
1769
+ # that there is a lot of redundancy present between the two.
1770
+ _check_arity_and_homogeneity(sig, args, 1)
1771
+
1772
+ dispatch_table = {
1773
+ types.float32: "numba_nextafterf",
1774
+ types.float64: "numba_nextafter",
1775
+ }
1776
+
1777
+ [ty] = sig.args
1778
+ inner_sig = typing.signature(sig.return_type, ty, ty)
1779
+ ll_ty = args[0].type
1780
+ ll_inf = ll_ty(np.inf)
1781
+ fnty = llvmlite.ir.FunctionType(ll_ty, [ll_ty, ll_ty])
1782
+ fn = cgutils.insert_pure_function(
1783
+ builder.module, fnty, name="llvm.copysign"
1784
+ )
1785
+ ll_sinf = builder.call(fn, [ll_inf, args[0]])
1786
+ inner_args = args + [
1787
+ ll_sinf,
1788
+ ]
1789
+ nextafter = _dispatch_func_by_name_type(
1790
+ context, builder, inner_sig, inner_args, dispatch_table, "nextafter"
1791
+ )
1792
+ return builder.fsub(nextafter, args[0])
1793
+
1794
+
1795
+ def np_real_ldexp_impl(context, builder, sig, args):
1796
+ # this one is slightly different to other ufuncs.
1797
+ # arguments are not homogeneous and second arg may come as
1798
+ # an 'i' or an 'l'.
1799
+
1800
+ # the function expects the second argument to be have a C int type
1801
+ x1, x2 = args
1802
+ ty1, ty2 = sig.args
1803
+ # note that types.intc should be equivalent to int_ that is
1804
+ # 'NumPy's default int')
1805
+ x2 = context.cast(builder, x2, ty2, types.intc)
1806
+ f_fi_sig = typing.signature(ty1, ty1, types.intc)
1807
+ return mathimpl.ldexp_impl(context, builder, f_fi_sig, (x1, x2))