numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
|
|
1
1
|
import numpy as np
|
2
|
-
from numba.cuda.testing import (
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
from numba.cuda.testing import (
|
3
|
+
skip_unless_cc_53,
|
4
|
+
unittest,
|
5
|
+
CUDATestCase,
|
6
|
+
skip_on_cudasim,
|
7
|
+
)
|
6
8
|
from numba.np import numpy_support
|
7
9
|
from numba import cuda, float32, float64, int32, vectorize, void, int64
|
8
10
|
import math
|
@@ -253,8 +255,10 @@ class TestCudaMath(CUDATestCase):
|
|
253
255
|
def unary_bool_special_values(self, func, npfunc, npdtype, npmtype):
|
254
256
|
fi = np.finfo(npdtype)
|
255
257
|
denorm = fi.tiny / 4
|
256
|
-
A = np.array(
|
257
|
-
|
258
|
+
A = np.array(
|
259
|
+
[0.0, denorm, fi.tiny, 0.5, 1.0, fi.max, np.inf, np.nan],
|
260
|
+
dtype=npdtype,
|
261
|
+
)
|
258
262
|
B = np.empty_like(A, dtype=np.int32)
|
259
263
|
cfunc = cuda.jit((npmtype[::1], int32[::1]))(func)
|
260
264
|
|
@@ -314,7 +318,7 @@ class TestCudaMath(CUDATestCase):
|
|
314
318
|
cfunc[1, nelem](A, A, B)
|
315
319
|
np.testing.assert_allclose(npfunc(A, A), B)
|
316
320
|
|
317
|
-
|
321
|
+
# ---------------------------------------------------------------------------
|
318
322
|
# test_math_acos
|
319
323
|
|
320
324
|
def test_math_acos(self):
|
@@ -325,7 +329,7 @@ class TestCudaMath(CUDATestCase):
|
|
325
329
|
self.unary_template_int64(math_acos, np.arccos, start=0, stop=0)
|
326
330
|
self.unary_template_uint64(math_acos, np.arccos, start=0, stop=0)
|
327
331
|
|
328
|
-
|
332
|
+
# ---------------------------------------------------------------------------
|
329
333
|
# test_math_asin
|
330
334
|
|
331
335
|
def test_math_asin(self):
|
@@ -336,7 +340,7 @@ class TestCudaMath(CUDATestCase):
|
|
336
340
|
self.unary_template_int64(math_asin, np.arcsin, start=0, stop=0)
|
337
341
|
self.unary_template_uint64(math_asin, np.arcsin, start=0, stop=0)
|
338
342
|
|
339
|
-
|
343
|
+
# ---------------------------------------------------------------------------
|
340
344
|
# test_math_atan
|
341
345
|
|
342
346
|
def test_math_atan(self):
|
@@ -345,7 +349,7 @@ class TestCudaMath(CUDATestCase):
|
|
345
349
|
self.unary_template_int64(math_atan, np.arctan)
|
346
350
|
self.unary_template_uint64(math_atan, np.arctan)
|
347
351
|
|
348
|
-
|
352
|
+
# ---------------------------------------------------------------------------
|
349
353
|
# test_math_acosh
|
350
354
|
|
351
355
|
def test_math_acosh(self):
|
@@ -354,7 +358,7 @@ class TestCudaMath(CUDATestCase):
|
|
354
358
|
self.unary_template_int64(math_acosh, np.arccosh, start=1, stop=2)
|
355
359
|
self.unary_template_uint64(math_acosh, np.arccosh, start=1, stop=2)
|
356
360
|
|
357
|
-
|
361
|
+
# ---------------------------------------------------------------------------
|
358
362
|
# test_math_asinh
|
359
363
|
|
360
364
|
def test_math_asinh(self):
|
@@ -363,16 +367,16 @@ class TestCudaMath(CUDATestCase):
|
|
363
367
|
self.unary_template_int64(math_asinh, np.arcsinh)
|
364
368
|
self.unary_template_uint64(math_asinh, np.arcsinh)
|
365
369
|
|
366
|
-
|
370
|
+
# ---------------------------------------------------------------------------
|
367
371
|
# test_math_atanh
|
368
372
|
|
369
373
|
def test_math_atanh(self):
|
370
|
-
self.unary_template_float32(math_atanh, np.arctanh, start=0, stop
|
371
|
-
self.unary_template_float64(math_atanh, np.arctanh, start=0, stop
|
372
|
-
self.unary_template_int64(math_atanh, np.arctanh, start=0, stop
|
373
|
-
self.unary_template_uint64(math_atanh, np.arctanh, start=0, stop
|
374
|
+
self.unary_template_float32(math_atanh, np.arctanh, start=0, stop=0.9)
|
375
|
+
self.unary_template_float64(math_atanh, np.arctanh, start=0, stop=0.9)
|
376
|
+
self.unary_template_int64(math_atanh, np.arctanh, start=0, stop=0.9)
|
377
|
+
self.unary_template_uint64(math_atanh, np.arctanh, start=0, stop=0.9)
|
374
378
|
|
375
|
-
|
379
|
+
# ---------------------------------------------------------------------------
|
376
380
|
# test_math_cos
|
377
381
|
|
378
382
|
def test_math_cos(self):
|
@@ -399,7 +403,7 @@ class TestCudaMath(CUDATestCase):
|
|
399
403
|
def test_math_fp16_trunc(self):
|
400
404
|
self.unary_template_float16(math_trunc, np.trunc)
|
401
405
|
|
402
|
-
|
406
|
+
# ---------------------------------------------------------------------------
|
403
407
|
# test_math_sin
|
404
408
|
|
405
409
|
def test_math_sin(self):
|
@@ -408,7 +412,7 @@ class TestCudaMath(CUDATestCase):
|
|
408
412
|
self.unary_template_int64(math_sin, np.sin)
|
409
413
|
self.unary_template_uint64(math_sin, np.sin)
|
410
414
|
|
411
|
-
|
415
|
+
# ---------------------------------------------------------------------------
|
412
416
|
# test_math_tan
|
413
417
|
|
414
418
|
def test_math_tan(self):
|
@@ -417,7 +421,7 @@ class TestCudaMath(CUDATestCase):
|
|
417
421
|
self.unary_template_int64(math_tan, np.tan)
|
418
422
|
self.unary_template_uint64(math_tan, np.tan)
|
419
423
|
|
420
|
-
|
424
|
+
# ---------------------------------------------------------------------------
|
421
425
|
# test_math_cosh
|
422
426
|
|
423
427
|
def test_math_cosh(self):
|
@@ -426,7 +430,7 @@ class TestCudaMath(CUDATestCase):
|
|
426
430
|
self.unary_template_int64(math_cosh, np.cosh)
|
427
431
|
self.unary_template_uint64(math_cosh, np.cosh)
|
428
432
|
|
429
|
-
|
433
|
+
# ---------------------------------------------------------------------------
|
430
434
|
# test_math_sinh
|
431
435
|
|
432
436
|
def test_math_sinh(self):
|
@@ -435,7 +439,7 @@ class TestCudaMath(CUDATestCase):
|
|
435
439
|
self.unary_template_int64(math_sinh, np.sinh)
|
436
440
|
self.unary_template_uint64(math_sinh, np.sinh)
|
437
441
|
|
438
|
-
|
442
|
+
# ---------------------------------------------------------------------------
|
439
443
|
# test_math_tanh
|
440
444
|
|
441
445
|
def test_math_tanh(self):
|
@@ -444,7 +448,7 @@ class TestCudaMath(CUDATestCase):
|
|
444
448
|
self.unary_template_int64(math_tanh, np.tanh)
|
445
449
|
self.unary_template_uint64(math_tanh, np.tanh)
|
446
450
|
|
447
|
-
|
451
|
+
# ---------------------------------------------------------------------------
|
448
452
|
# test_math_atan2
|
449
453
|
|
450
454
|
def test_math_atan2(self):
|
@@ -453,31 +457,33 @@ class TestCudaMath(CUDATestCase):
|
|
453
457
|
self.binary_template_int64(math_atan2, np.arctan2)
|
454
458
|
self.binary_template_uint64(math_atan2, np.arctan2)
|
455
459
|
|
456
|
-
|
460
|
+
# ---------------------------------------------------------------------------
|
457
461
|
# test_math_erf
|
458
462
|
|
459
463
|
def test_math_erf(self):
|
460
464
|
@vectorize
|
461
465
|
def ufunc(x):
|
462
466
|
return math.erf(x)
|
467
|
+
|
463
468
|
self.unary_template_float32(math_erf, ufunc)
|
464
469
|
self.unary_template_float64(math_erf, ufunc)
|
465
470
|
self.unary_template_int64(math_erf, ufunc)
|
466
471
|
self.unary_template_uint64(math_erf, ufunc)
|
467
472
|
|
468
|
-
|
473
|
+
# ---------------------------------------------------------------------------
|
469
474
|
# test_math_erfc
|
470
475
|
|
471
476
|
def test_math_erfc(self):
|
472
477
|
@vectorize
|
473
478
|
def ufunc(x):
|
474
479
|
return math.erfc(x)
|
480
|
+
|
475
481
|
self.unary_template_float32(math_erfc, ufunc)
|
476
482
|
self.unary_template_float64(math_erfc, ufunc)
|
477
483
|
self.unary_template_int64(math_erfc, ufunc)
|
478
484
|
self.unary_template_uint64(math_erfc, ufunc)
|
479
485
|
|
480
|
-
|
486
|
+
# ---------------------------------------------------------------------------
|
481
487
|
# test_math_exp
|
482
488
|
|
483
489
|
def test_math_exp(self):
|
@@ -486,7 +492,7 @@ class TestCudaMath(CUDATestCase):
|
|
486
492
|
self.unary_template_int64(math_exp, np.exp)
|
487
493
|
self.unary_template_uint64(math_exp, np.exp)
|
488
494
|
|
489
|
-
|
495
|
+
# ---------------------------------------------------------------------------
|
490
496
|
# test_math_expm1
|
491
497
|
|
492
498
|
def test_math_expm1(self):
|
@@ -495,7 +501,7 @@ class TestCudaMath(CUDATestCase):
|
|
495
501
|
self.unary_template_int64(math_expm1, np.expm1)
|
496
502
|
self.unary_template_uint64(math_expm1, np.expm1)
|
497
503
|
|
498
|
-
|
504
|
+
# ---------------------------------------------------------------------------
|
499
505
|
# test_math_fabs
|
500
506
|
|
501
507
|
def test_math_fabs(self):
|
@@ -504,31 +510,33 @@ class TestCudaMath(CUDATestCase):
|
|
504
510
|
self.unary_template_int64(math_fabs, np.fabs, start=-1)
|
505
511
|
self.unary_template_uint64(math_fabs, np.fabs, start=-1)
|
506
512
|
|
507
|
-
|
513
|
+
# ---------------------------------------------------------------------------
|
508
514
|
# test_math_gamma
|
509
515
|
|
510
516
|
def test_math_gamma(self):
|
511
517
|
@vectorize
|
512
518
|
def ufunc(x):
|
513
519
|
return math.gamma(x)
|
520
|
+
|
514
521
|
self.unary_template_float32(math_gamma, ufunc, start=0.1)
|
515
522
|
self.unary_template_float64(math_gamma, ufunc, start=0.1)
|
516
523
|
self.unary_template_int64(math_gamma, ufunc, start=1)
|
517
524
|
self.unary_template_uint64(math_gamma, ufunc, start=1)
|
518
525
|
|
519
|
-
|
526
|
+
# ---------------------------------------------------------------------------
|
520
527
|
# test_math_lgamma
|
521
528
|
|
522
529
|
def test_math_lgamma(self):
|
523
530
|
@vectorize
|
524
531
|
def ufunc(x):
|
525
532
|
return math.lgamma(x)
|
533
|
+
|
526
534
|
self.unary_template_float32(math_lgamma, ufunc, start=0.1)
|
527
535
|
self.unary_template_float64(math_lgamma, ufunc, start=0.1)
|
528
536
|
self.unary_template_int64(math_lgamma, ufunc, start=1)
|
529
537
|
self.unary_template_uint64(math_lgamma, ufunc, start=1)
|
530
538
|
|
531
|
-
|
539
|
+
# ---------------------------------------------------------------------------
|
532
540
|
# test_math_log
|
533
541
|
|
534
542
|
def test_math_log(self):
|
@@ -537,7 +545,7 @@ class TestCudaMath(CUDATestCase):
|
|
537
545
|
self.unary_template_int64(math_log, np.log, start=1)
|
538
546
|
self.unary_template_uint64(math_log, np.log, start=1)
|
539
547
|
|
540
|
-
|
548
|
+
# ---------------------------------------------------------------------------
|
541
549
|
# test_math_log2
|
542
550
|
|
543
551
|
def test_math_log2(self):
|
@@ -546,7 +554,7 @@ class TestCudaMath(CUDATestCase):
|
|
546
554
|
self.unary_template_int64(math_log2, np.log2, start=1)
|
547
555
|
self.unary_template_uint64(math_log2, np.log2, start=1)
|
548
556
|
|
549
|
-
|
557
|
+
# ---------------------------------------------------------------------------
|
550
558
|
# test_math_log10
|
551
559
|
|
552
560
|
def test_math_log10(self):
|
@@ -555,7 +563,7 @@ class TestCudaMath(CUDATestCase):
|
|
555
563
|
self.unary_template_int64(math_log10, np.log10, start=1)
|
556
564
|
self.unary_template_uint64(math_log10, np.log10, start=1)
|
557
565
|
|
558
|
-
|
566
|
+
# ---------------------------------------------------------------------------
|
559
567
|
# test_math_log1p
|
560
568
|
|
561
569
|
def test_math_log1p(self):
|
@@ -564,7 +572,7 @@ class TestCudaMath(CUDATestCase):
|
|
564
572
|
self.unary_template_int64(math_log1p, np.log1p)
|
565
573
|
self.unary_template_uint64(math_log1p, np.log1p)
|
566
574
|
|
567
|
-
|
575
|
+
# ---------------------------------------------------------------------------
|
568
576
|
# test_math_remainder
|
569
577
|
|
570
578
|
def test_math_remainder(self):
|
@@ -573,16 +581,17 @@ class TestCudaMath(CUDATestCase):
|
|
573
581
|
self.binary_template_int64(math_remainder, np.remainder, start=1)
|
574
582
|
self.binary_template_uint64(math_remainder, np.remainder, start=1)
|
575
583
|
|
576
|
-
@skip_on_cudasim(
|
584
|
+
@skip_on_cudasim("math.remainder(0, 0) raises a ValueError on CUDASim")
|
577
585
|
def test_math_remainder_0_0(self):
|
578
586
|
@cuda.jit(void(float64[::1], int64, int64))
|
579
587
|
def test_0_0(r, x, y):
|
580
588
|
r[0] = math.remainder(x, y)
|
589
|
+
|
581
590
|
r = np.zeros(1, np.float64)
|
582
591
|
test_0_0[1, 1](r, 0, 0)
|
583
592
|
self.assertTrue(np.isnan(r[0]))
|
584
593
|
|
585
|
-
|
594
|
+
# ---------------------------------------------------------------------------
|
586
595
|
# test_math_sqrt
|
587
596
|
|
588
597
|
def test_math_sqrt(self):
|
@@ -591,7 +600,7 @@ class TestCudaMath(CUDATestCase):
|
|
591
600
|
self.unary_template_int64(math_sqrt, np.sqrt)
|
592
601
|
self.unary_template_uint64(math_sqrt, np.sqrt)
|
593
602
|
|
594
|
-
|
603
|
+
# ---------------------------------------------------------------------------
|
595
604
|
# test_math_hypot
|
596
605
|
|
597
606
|
def test_math_hypot(self):
|
@@ -600,7 +609,7 @@ class TestCudaMath(CUDATestCase):
|
|
600
609
|
self.binary_template_int64(math_hypot, np.hypot)
|
601
610
|
self.binary_template_uint64(math_hypot, np.hypot)
|
602
611
|
|
603
|
-
|
612
|
+
# ---------------------------------------------------------------------------
|
604
613
|
# test_math_pow
|
605
614
|
|
606
615
|
def pow_template_int32(self, npdtype):
|
@@ -626,14 +635,14 @@ class TestCudaMath(CUDATestCase):
|
|
626
635
|
self.pow_template_int32(np.float32)
|
627
636
|
self.pow_template_int32(np.float64)
|
628
637
|
|
629
|
-
|
638
|
+
# ---------------------------------------------------------------------------
|
630
639
|
# test_math_pow_binop
|
631
640
|
|
632
641
|
def test_math_pow_binop(self):
|
633
642
|
self.binary_template_float32(math_pow_binop, np.power)
|
634
643
|
self.binary_template_float64(math_pow_binop, np.power)
|
635
644
|
|
636
|
-
|
645
|
+
# ---------------------------------------------------------------------------
|
637
646
|
# test_math_ceil
|
638
647
|
|
639
648
|
def test_math_ceil(self):
|
@@ -642,7 +651,7 @@ class TestCudaMath(CUDATestCase):
|
|
642
651
|
self.unary_template_int64(math_ceil, np.ceil)
|
643
652
|
self.unary_template_uint64(math_ceil, np.ceil)
|
644
653
|
|
645
|
-
|
654
|
+
# ---------------------------------------------------------------------------
|
646
655
|
# test_math_floor
|
647
656
|
|
648
657
|
def test_math_floor(self):
|
@@ -651,7 +660,7 @@ class TestCudaMath(CUDATestCase):
|
|
651
660
|
self.unary_template_int64(math_floor, np.floor)
|
652
661
|
self.unary_template_uint64(math_floor, np.floor)
|
653
662
|
|
654
|
-
|
663
|
+
# ---------------------------------------------------------------------------
|
655
664
|
# test_math_trunc
|
656
665
|
#
|
657
666
|
# Note that math.trunc() is only supported on NumPy float64s, and not
|
@@ -663,20 +672,20 @@ class TestCudaMath(CUDATestCase):
|
|
663
672
|
def test_math_trunc(self):
|
664
673
|
self.unary_template_float64(math_trunc, np.trunc)
|
665
674
|
|
666
|
-
@skip_on_cudasim(
|
675
|
+
@skip_on_cudasim("trunc only supported on NumPy float64")
|
667
676
|
def test_math_trunc_non_float64(self):
|
668
677
|
self.unary_template_float32(math_trunc, np.trunc)
|
669
678
|
self.unary_template_int64(math_trunc, np.trunc)
|
670
679
|
self.unary_template_uint64(math_trunc, np.trunc)
|
671
680
|
|
672
|
-
|
681
|
+
# ---------------------------------------------------------------------------
|
673
682
|
# test_math_copysign
|
674
683
|
|
675
684
|
def test_math_copysign(self):
|
676
685
|
self.binary_template_float32(math_copysign, np.copysign, start=-1)
|
677
686
|
self.binary_template_float64(math_copysign, np.copysign, start=-1)
|
678
687
|
|
679
|
-
|
688
|
+
# ---------------------------------------------------------------------------
|
680
689
|
# test_math_modf
|
681
690
|
|
682
691
|
def test_math_modf(self):
|
@@ -696,45 +705,53 @@ class TestCudaMath(CUDATestCase):
|
|
696
705
|
cfunc = cuda.jit((arytype, arytype, arytype))(math_modf)
|
697
706
|
cfunc[1, len(A)](A, B, C)
|
698
707
|
D, E = np.modf(A)
|
699
|
-
self.assertTrue(np.array_equal(B,D))
|
700
|
-
self.assertTrue(np.array_equal(C,E))
|
708
|
+
self.assertTrue(np.array_equal(B, D))
|
709
|
+
self.assertTrue(np.array_equal(C, E))
|
701
710
|
|
702
711
|
nelem = 50
|
703
|
-
#32 bit float
|
712
|
+
# 32 bit float
|
704
713
|
with self.subTest("float32 modf on simple float"):
|
705
|
-
modf_template_compare(
|
706
|
-
|
714
|
+
modf_template_compare(
|
715
|
+
np.linspace(0, 10, nelem), dtype=np.float32, arytype=float32[:]
|
716
|
+
)
|
707
717
|
with self.subTest("float32 modf on +- infinity"):
|
708
|
-
modf_template_compare(
|
709
|
-
|
718
|
+
modf_template_compare(
|
719
|
+
np.array([np.inf, -np.inf]),
|
720
|
+
dtype=np.float32,
|
721
|
+
arytype=float32[:],
|
722
|
+
)
|
710
723
|
with self.subTest("float32 modf on nan"):
|
711
724
|
modf_template_nan(dtype=np.float32, arytype=float32[:])
|
712
725
|
|
713
|
-
#64 bit float
|
726
|
+
# 64 bit float
|
714
727
|
with self.subTest("float64 modf on simple float"):
|
715
|
-
modf_template_compare(
|
716
|
-
|
728
|
+
modf_template_compare(
|
729
|
+
np.linspace(0, 10, nelem), dtype=np.float64, arytype=float64[:]
|
730
|
+
)
|
717
731
|
with self.subTest("float64 modf on +- infinity"):
|
718
|
-
modf_template_compare(
|
719
|
-
|
732
|
+
modf_template_compare(
|
733
|
+
np.array([np.inf, -np.inf]),
|
734
|
+
dtype=np.float64,
|
735
|
+
arytype=float64[:],
|
736
|
+
)
|
720
737
|
with self.subTest("float64 modf on nan"):
|
721
738
|
modf_template_nan(dtype=np.float64, arytype=float64[:])
|
722
739
|
|
723
|
-
|
740
|
+
# ---------------------------------------------------------------------------
|
724
741
|
# test_math_fmod
|
725
742
|
|
726
743
|
def test_math_fmod(self):
|
727
744
|
self.binary_template_float32(math_fmod, np.fmod, start=1)
|
728
745
|
self.binary_template_float64(math_fmod, np.fmod, start=1)
|
729
746
|
|
730
|
-
|
747
|
+
# ---------------------------------------------------------------------------
|
731
748
|
# test_math_mod_binop
|
732
749
|
|
733
750
|
def test_math_mod_binop(self):
|
734
751
|
self.binary_template_float32(math_mod_binop, np.fmod, start=1)
|
735
752
|
self.binary_template_float64(math_mod_binop, np.fmod, start=1)
|
736
753
|
|
737
|
-
|
754
|
+
# ---------------------------------------------------------------------------
|
738
755
|
# test_math_isnan
|
739
756
|
|
740
757
|
def test_math_isnan(self):
|
@@ -745,7 +762,7 @@ class TestCudaMath(CUDATestCase):
|
|
745
762
|
self.unary_bool_special_values_float32(math_isnan, np.isnan)
|
746
763
|
self.unary_bool_special_values_float64(math_isnan, np.isnan)
|
747
764
|
|
748
|
-
|
765
|
+
# ---------------------------------------------------------------------------
|
749
766
|
# test_math_isinf
|
750
767
|
|
751
768
|
def test_math_isinf(self):
|
@@ -756,7 +773,7 @@ class TestCudaMath(CUDATestCase):
|
|
756
773
|
self.unary_bool_special_values_float32(math_isinf, np.isinf)
|
757
774
|
self.unary_bool_special_values_float64(math_isinf, np.isinf)
|
758
775
|
|
759
|
-
|
776
|
+
# ---------------------------------------------------------------------------
|
760
777
|
# test_math_isfinite
|
761
778
|
|
762
779
|
def test_math_isfinite(self):
|
@@ -767,14 +784,14 @@ class TestCudaMath(CUDATestCase):
|
|
767
784
|
self.unary_bool_special_values_float32(math_isfinite, np.isfinite)
|
768
785
|
self.unary_bool_special_values_float64(math_isfinite, np.isfinite)
|
769
786
|
|
770
|
-
|
787
|
+
# ---------------------------------------------------------------------------
|
771
788
|
# test_math_degrees
|
772
789
|
|
773
790
|
def test_math_degrees(self):
|
774
791
|
self.unary_bool_template_float32(math_degrees, np.degrees)
|
775
792
|
self.unary_bool_template_float64(math_degrees, np.degrees)
|
776
793
|
|
777
|
-
|
794
|
+
# ---------------------------------------------------------------------------
|
778
795
|
# test_math_radians
|
779
796
|
|
780
797
|
def test_math_radians(self):
|
@@ -782,5 +799,5 @@ class TestCudaMath(CUDATestCase):
|
|
782
799
|
self.unary_bool_template_float64(math_radians, np.radians)
|
783
800
|
|
784
801
|
|
785
|
-
if __name__ ==
|
802
|
+
if __name__ == "__main__":
|
786
803
|
unittest.main()
|
@@ -15,9 +15,7 @@ SM_SIZE = (tpb, tpb)
|
|
15
15
|
|
16
16
|
|
17
17
|
class TestCudaMatMul(CUDATestCase):
|
18
|
-
|
19
18
|
def test_func(self):
|
20
|
-
|
21
19
|
@cuda.jit(void(float32[:, ::1], float32[:, ::1], float32[:, ::1]))
|
22
20
|
def cu_square_matrix_mul(A, B, C):
|
23
21
|
sA = cuda.shared.array(shape=SM_SIZE, dtype=float32)
|
@@ -70,5 +68,5 @@ class TestCudaMatMul(CUDATestCase):
|
|
70
68
|
np.testing.assert_allclose(C, Cans, rtol=1e-5)
|
71
69
|
|
72
70
|
|
73
|
-
if __name__ ==
|
71
|
+
if __name__ == "__main__":
|
74
72
|
unittest.main()
|
@@ -22,20 +22,21 @@ def builtin_min(A, B, C):
|
|
22
22
|
C[i] = float64(min(A[i], B[i]))
|
23
23
|
|
24
24
|
|
25
|
-
@skip_on_cudasim(
|
25
|
+
@skip_on_cudasim("Tests PTX emission")
|
26
26
|
class TestCudaMinMax(CUDATestCase):
|
27
27
|
def _run(
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
self,
|
29
|
+
kernel,
|
30
|
+
numpy_equivalent,
|
31
|
+
ptx_instruction,
|
32
|
+
dtype_left,
|
33
|
+
dtype_right,
|
34
|
+
n=5,
|
35
|
+
):
|
35
36
|
kernel = cuda.jit(kernel)
|
36
37
|
|
37
38
|
c = np.zeros(n, dtype=np.float64)
|
38
|
-
a = np.arange(n, dtype=dtype_left) + .5
|
39
|
+
a = np.arange(n, dtype=dtype_left) + 0.5
|
39
40
|
b = np.full(n, fill_value=2, dtype=dtype_right)
|
40
41
|
|
41
42
|
kernel[1, c.shape](a, b, c)
|
@@ -45,69 +46,29 @@ class TestCudaMinMax(CUDATestCase):
|
|
45
46
|
self.assertIn(ptx_instruction, ptx)
|
46
47
|
|
47
48
|
def test_max_f8f8(self):
|
48
|
-
self._run(
|
49
|
-
builtin_max,
|
50
|
-
np.maximum,
|
51
|
-
'max.f64',
|
52
|
-
np.float64,
|
53
|
-
np.float64)
|
49
|
+
self._run(builtin_max, np.maximum, "max.f64", np.float64, np.float64)
|
54
50
|
|
55
51
|
def test_max_f4f8(self):
|
56
|
-
self._run(
|
57
|
-
builtin_max,
|
58
|
-
np.maximum,
|
59
|
-
'max.f64',
|
60
|
-
np.float32,
|
61
|
-
np.float64)
|
52
|
+
self._run(builtin_max, np.maximum, "max.f64", np.float32, np.float64)
|
62
53
|
|
63
54
|
def test_max_f8f4(self):
|
64
|
-
self._run(
|
65
|
-
builtin_max,
|
66
|
-
np.maximum,
|
67
|
-
'max.f64',
|
68
|
-
np.float64,
|
69
|
-
np.float32)
|
55
|
+
self._run(builtin_max, np.maximum, "max.f64", np.float64, np.float32)
|
70
56
|
|
71
57
|
def test_max_f4f4(self):
|
72
|
-
self._run(
|
73
|
-
builtin_max,
|
74
|
-
np.maximum,
|
75
|
-
'max.f32',
|
76
|
-
np.float32,
|
77
|
-
np.float32)
|
58
|
+
self._run(builtin_max, np.maximum, "max.f32", np.float32, np.float32)
|
78
59
|
|
79
60
|
def test_min_f8f8(self):
|
80
|
-
self._run(
|
81
|
-
builtin_min,
|
82
|
-
np.minimum,
|
83
|
-
'min.f64',
|
84
|
-
np.float64,
|
85
|
-
np.float64)
|
61
|
+
self._run(builtin_min, np.minimum, "min.f64", np.float64, np.float64)
|
86
62
|
|
87
63
|
def test_min_f4f8(self):
|
88
|
-
self._run(
|
89
|
-
builtin_min,
|
90
|
-
np.minimum,
|
91
|
-
'min.f64',
|
92
|
-
np.float32,
|
93
|
-
np.float64)
|
64
|
+
self._run(builtin_min, np.minimum, "min.f64", np.float32, np.float64)
|
94
65
|
|
95
66
|
def test_min_f8f4(self):
|
96
|
-
self._run(
|
97
|
-
builtin_min,
|
98
|
-
np.minimum,
|
99
|
-
'min.f64',
|
100
|
-
np.float64,
|
101
|
-
np.float32)
|
67
|
+
self._run(builtin_min, np.minimum, "min.f64", np.float64, np.float32)
|
102
68
|
|
103
69
|
def test_min_f4f4(self):
|
104
|
-
self._run(
|
105
|
-
builtin_min,
|
106
|
-
np.minimum,
|
107
|
-
'min.f32',
|
108
|
-
np.float32,
|
109
|
-
np.float32)
|
70
|
+
self._run(builtin_min, np.minimum, "min.f32", np.float32, np.float32)
|
110
71
|
|
111
72
|
|
112
|
-
if __name__ ==
|
73
|
+
if __name__ == "__main__":
|
113
74
|
unittest.main()
|
@@ -5,11 +5,11 @@ from numba.cuda.testing import unittest, CUDATestCase
|
|
5
5
|
|
6
6
|
class TestCudaMonteCarlo(CUDATestCase):
|
7
7
|
def test_montecarlo(self):
|
8
|
-
"""Just make sure we can compile this
|
9
|
-
"""
|
8
|
+
"""Just make sure we can compile this"""
|
10
9
|
|
11
10
|
@cuda.jit(
|
12
|
-
|
11
|
+
"void(double[:], double[:], double, double, double, double[:])"
|
12
|
+
)
|
13
13
|
def step(last, paths, dt, c0, c1, normdist):
|
14
14
|
i = cuda.grid(1)
|
15
15
|
if i >= paths.shape[0]:
|
@@ -18,5 +18,5 @@ class TestCudaMonteCarlo(CUDATestCase):
|
|
18
18
|
paths[i] = last[i] * math.exp(c0 * dt + c1 * noise)
|
19
19
|
|
20
20
|
|
21
|
-
if __name__ ==
|
21
|
+
if __name__ == "__main__":
|
22
22
|
unittest.main()
|
@@ -47,7 +47,7 @@ class TestMultiGPUContext(CUDATestCase):
|
|
47
47
|
copy_plus_1[1, N](A, B)
|
48
48
|
check(A, B)
|
49
49
|
|
50
|
-
@skip_on_cudasim(
|
50
|
+
@skip_on_cudasim("Simulator does not support multiple threads")
|
51
51
|
def test_multithreaded(self):
|
52
52
|
def work(gpu, dA, results, ridx):
|
53
53
|
try:
|
@@ -64,9 +64,12 @@ class TestMultiGPUContext(CUDATestCase):
|
|
64
64
|
|
65
65
|
nthreads = 10
|
66
66
|
results = [None] * nthreads
|
67
|
-
threads = [
|
68
|
-
|
69
|
-
|
67
|
+
threads = [
|
68
|
+
threading.Thread(
|
69
|
+
target=work, args=(cuda.gpus.current, dA, results, i)
|
70
|
+
)
|
71
|
+
for i in range(nthreads)
|
72
|
+
]
|
70
73
|
for th in threads:
|
71
74
|
th.start()
|
72
75
|
|
@@ -81,7 +84,6 @@ class TestMultiGPUContext(CUDATestCase):
|
|
81
84
|
|
82
85
|
@unittest.skipIf(len(cuda.gpus) < 2, "need more than 1 gpus")
|
83
86
|
def test_with_context(self):
|
84
|
-
|
85
87
|
@cuda.jit
|
86
88
|
def vector_add_scalar(arr, val):
|
87
89
|
i = cuda.grid(1)
|
@@ -115,7 +117,7 @@ class TestMultiGPUContext(CUDATestCase):
|
|
115
117
|
with cuda.gpus[0]:
|
116
118
|
ctx = cuda.current_context()
|
117
119
|
if not ctx.can_access_peer(1):
|
118
|
-
self.skipTest(
|
120
|
+
self.skipTest("Peer access between GPUs disabled")
|
119
121
|
|
120
122
|
# 1. Create a range in an array
|
121
123
|
hostarr = np.arange(10, dtype=np.float32)
|
@@ -136,5 +138,5 @@ class TestMultiGPUContext(CUDATestCase):
|
|
136
138
|
np.testing.assert_equal(arr2.copy_to_host(), hostarr)
|
137
139
|
|
138
140
|
|
139
|
-
if __name__ ==
|
141
|
+
if __name__ == "__main__":
|
140
142
|
unittest.main()
|