numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.1.dist-info/METADATA +0 -10
- numba_cuda-0.0.1.dist-info/RECORD +0 -5
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
from numba.tests.support import captured_stdout
|
5
|
+
|
6
|
+
|
7
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
8
|
+
class TestMonteCarlo(CUDATestCase):
|
9
|
+
"""
|
10
|
+
Test monte-carlo integration
|
11
|
+
"""
|
12
|
+
|
13
|
+
def setUp(self):
|
14
|
+
# Prevent output from this test showing up when running the test suite
|
15
|
+
self._captured_stdout = captured_stdout()
|
16
|
+
self._captured_stdout.__enter__()
|
17
|
+
super().setUp()
|
18
|
+
|
19
|
+
def tearDown(self):
|
20
|
+
# No exception type, value, or traceback
|
21
|
+
self._captured_stdout.__exit__(None, None, None)
|
22
|
+
super().tearDown()
|
23
|
+
|
24
|
+
def test_ex_montecarlo(self):
|
25
|
+
# ex_montecarlo.import.begin
|
26
|
+
import numba
|
27
|
+
import numpy as np
|
28
|
+
from numba import cuda
|
29
|
+
from numba.cuda.random import (
|
30
|
+
create_xoroshiro128p_states,
|
31
|
+
xoroshiro128p_uniform_float32,
|
32
|
+
)
|
33
|
+
# ex_montecarlo.import.end
|
34
|
+
|
35
|
+
# ex_montecarlo.define.begin
|
36
|
+
# number of samples, higher will lead to a more accurate answer
|
37
|
+
nsamps = 1000000
|
38
|
+
# ex_montecarlo.define.end
|
39
|
+
|
40
|
+
# ex_montecarlo.kernel.begin
|
41
|
+
@cuda.jit
|
42
|
+
def mc_integrator_kernel(out, rng_states, lower_lim, upper_lim):
|
43
|
+
"""
|
44
|
+
kernel to draw random samples and evaluate the function to
|
45
|
+
be integrated at those sample values
|
46
|
+
"""
|
47
|
+
size = len(out)
|
48
|
+
|
49
|
+
gid = cuda.grid(1)
|
50
|
+
if gid < size:
|
51
|
+
# draw a sample between 0 and 1 on this thread
|
52
|
+
samp = xoroshiro128p_uniform_float32(rng_states, gid)
|
53
|
+
|
54
|
+
# normalize this sample to the limit range
|
55
|
+
samp = samp * (upper_lim - lower_lim) + lower_lim
|
56
|
+
|
57
|
+
# evaluate the function to be
|
58
|
+
# integrated at the normalized
|
59
|
+
# value of the sample
|
60
|
+
y = func(samp)
|
61
|
+
out[gid] = y
|
62
|
+
# ex_montecarlo.kernel.end
|
63
|
+
|
64
|
+
# ex_montecarlo.callfunc.begin
|
65
|
+
@cuda.reduce
|
66
|
+
def sum_reduce(a, b):
|
67
|
+
return a + b
|
68
|
+
|
69
|
+
def mc_integrate(lower_lim, upper_lim, nsamps):
|
70
|
+
"""
|
71
|
+
approximate the definite integral of `func` from
|
72
|
+
`lower_lim` to `upper_lim`
|
73
|
+
"""
|
74
|
+
out = cuda.to_device(np.zeros(nsamps, dtype="float32"))
|
75
|
+
rng_states = create_xoroshiro128p_states(nsamps, seed=42)
|
76
|
+
|
77
|
+
# jit the function for use in CUDA kernels
|
78
|
+
|
79
|
+
mc_integrator_kernel.forall(nsamps)(
|
80
|
+
out, rng_states, lower_lim, upper_lim
|
81
|
+
)
|
82
|
+
# normalization factor to convert
|
83
|
+
# to the average: (b - a)/(N - 1)
|
84
|
+
factor = (upper_lim - lower_lim) / (nsamps - 1)
|
85
|
+
|
86
|
+
return sum_reduce(out) * factor
|
87
|
+
# ex_montecarlo.callfunc.end
|
88
|
+
|
89
|
+
# ex_montecarlo.launch.begin
|
90
|
+
# define a function to integrate
|
91
|
+
@numba.jit
|
92
|
+
def func(x):
|
93
|
+
return 1.0 / x
|
94
|
+
|
95
|
+
mc_integrate(1, 2, nsamps) # array(0.6929643, dtype=float32)
|
96
|
+
mc_integrate(2, 3, nsamps) # array(0.4054021, dtype=float32)
|
97
|
+
# ex_montecarlo.launch.end
|
98
|
+
|
99
|
+
# values computed independently using maple
|
100
|
+
np.testing.assert_allclose(
|
101
|
+
mc_integrate(1, 2, nsamps), 0.69315, atol=0.001
|
102
|
+
)
|
103
|
+
np.testing.assert_allclose(
|
104
|
+
mc_integrate(2, 3, nsamps), 0.4055, atol=0.001
|
105
|
+
)
|
106
|
+
|
107
|
+
|
108
|
+
if __name__ == "__main__":
|
109
|
+
unittest.main()
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Contents in this file are referenced from the sphinx-generated docs.
|
2
|
+
# "magictoken" is used for markers as beginning and ending of example text.
|
3
|
+
|
4
|
+
import unittest
|
5
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
6
|
+
|
7
|
+
|
8
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
9
|
+
class TestRandom(CUDATestCase):
|
10
|
+
def test_ex_3d_grid(self):
|
11
|
+
# magictoken.ex_3d_grid.begin
|
12
|
+
from numba import cuda
|
13
|
+
from numba.cuda.random import (create_xoroshiro128p_states,
|
14
|
+
xoroshiro128p_uniform_float32)
|
15
|
+
import numpy as np
|
16
|
+
|
17
|
+
@cuda.jit
|
18
|
+
def random_3d(arr, rng_states):
|
19
|
+
# Per-dimension thread indices and strides
|
20
|
+
startx, starty, startz = cuda.grid(3)
|
21
|
+
stridex, stridey, stridez = cuda.gridsize(3)
|
22
|
+
|
23
|
+
# Linearized thread index
|
24
|
+
tid = (startz * stridey * stridex) + (starty * stridex) + startx
|
25
|
+
|
26
|
+
# Use strided loops over the array to assign a random value to each entry
|
27
|
+
for i in range(startz, arr.shape[0], stridez):
|
28
|
+
for j in range(starty, arr.shape[1], stridey):
|
29
|
+
for k in range(startx, arr.shape[2], stridex):
|
30
|
+
arr[i, j, k] = xoroshiro128p_uniform_float32(rng_states, tid)
|
31
|
+
|
32
|
+
# Array dimensions
|
33
|
+
X, Y, Z = 701, 900, 719
|
34
|
+
|
35
|
+
# Block and grid dimensions
|
36
|
+
bx, by, bz = 8, 8, 8
|
37
|
+
gx, gy, gz = 16, 16, 16
|
38
|
+
|
39
|
+
# Total number of threads
|
40
|
+
nthreads = bx * by * bz * gx * gy * gz
|
41
|
+
|
42
|
+
# Initialize a state for each thread
|
43
|
+
rng_states = create_xoroshiro128p_states(nthreads, seed=1)
|
44
|
+
|
45
|
+
# Generate random numbers
|
46
|
+
arr = cuda.device_array((X, Y, Z), dtype=np.float32)
|
47
|
+
random_3d[(gx, gy, gz), (bx, by, bz)](arr, rng_states)
|
48
|
+
# magictoken.ex_3d_grid.end
|
49
|
+
|
50
|
+
# Some basic tests of the randomly-generated numbers
|
51
|
+
host_arr = arr.copy_to_host()
|
52
|
+
self.assertGreater(np.mean(host_arr), 0.49)
|
53
|
+
self.assertLess(np.mean(host_arr), 0.51)
|
54
|
+
self.assertTrue(np.all(host_arr <= 1.0))
|
55
|
+
self.assertTrue(np.all(host_arr >= 0.0))
|
56
|
+
|
57
|
+
|
58
|
+
if __name__ == '__main__':
|
59
|
+
unittest.main()
|
@@ -0,0 +1,76 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
from numba.tests.support import captured_stdout
|
5
|
+
|
6
|
+
|
7
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
8
|
+
class TestReduction(CUDATestCase):
|
9
|
+
"""
|
10
|
+
Test shared memory reduction
|
11
|
+
"""
|
12
|
+
|
13
|
+
def setUp(self):
|
14
|
+
# Prevent output from this test showing up when running the test suite
|
15
|
+
self._captured_stdout = captured_stdout()
|
16
|
+
self._captured_stdout.__enter__()
|
17
|
+
super().setUp()
|
18
|
+
|
19
|
+
def tearDown(self):
|
20
|
+
# No exception type, value, or traceback
|
21
|
+
self._captured_stdout.__exit__(None, None, None)
|
22
|
+
super().tearDown()
|
23
|
+
|
24
|
+
def test_ex_reduction(self):
|
25
|
+
# ex_reduction.import.begin
|
26
|
+
import numpy as np
|
27
|
+
from numba import cuda
|
28
|
+
from numba.types import int32
|
29
|
+
# ex_reduction.import.end
|
30
|
+
|
31
|
+
# ex_reduction.allocate.begin
|
32
|
+
# generate data
|
33
|
+
a = cuda.to_device(np.arange(1024))
|
34
|
+
nelem = len(a)
|
35
|
+
# ex_reduction.allocate.end
|
36
|
+
|
37
|
+
# ex_reduction.kernel.begin
|
38
|
+
@cuda.jit
|
39
|
+
def array_sum(data):
|
40
|
+
tid = cuda.threadIdx.x
|
41
|
+
size = len(data)
|
42
|
+
if tid < size:
|
43
|
+
i = cuda.grid(1)
|
44
|
+
|
45
|
+
# Declare an array in shared memory
|
46
|
+
shr = cuda.shared.array(nelem, int32)
|
47
|
+
shr[tid] = data[i]
|
48
|
+
|
49
|
+
# Ensure writes to shared memory are visible
|
50
|
+
# to all threads before reducing
|
51
|
+
cuda.syncthreads()
|
52
|
+
|
53
|
+
s = 1
|
54
|
+
while s < cuda.blockDim.x:
|
55
|
+
if tid % (2 * s) == 0:
|
56
|
+
# Stride by `s` and add
|
57
|
+
shr[tid] += shr[tid + s]
|
58
|
+
s *= 2
|
59
|
+
cuda.syncthreads()
|
60
|
+
|
61
|
+
# After the loop, the zeroth element contains the sum
|
62
|
+
if tid == 0:
|
63
|
+
data[tid] = shr[tid]
|
64
|
+
# ex_reduction.kernel.end
|
65
|
+
|
66
|
+
# ex_reduction.launch.begin
|
67
|
+
array_sum[1, nelem](a)
|
68
|
+
print(a[0]) # 523776
|
69
|
+
print(sum(np.arange(1024))) # 523776
|
70
|
+
# ex_reduction.launch.end
|
71
|
+
|
72
|
+
np.testing.assert_equal(a[0], sum(np.arange(1024)))
|
73
|
+
|
74
|
+
|
75
|
+
if __name__ == "__main__":
|
76
|
+
unittest.main()
|
@@ -0,0 +1,130 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from numba.cuda.testing import (CUDATestCase, skip_if_cudadevrt_missing,
|
4
|
+
skip_on_cudasim, skip_unless_cc_60,
|
5
|
+
skip_if_mvc_enabled)
|
6
|
+
from numba.tests.support import captured_stdout
|
7
|
+
|
8
|
+
|
9
|
+
@skip_if_cudadevrt_missing
|
10
|
+
@skip_unless_cc_60
|
11
|
+
@skip_if_mvc_enabled('CG not supported with MVC')
|
12
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
13
|
+
class TestSessionization(CUDATestCase):
|
14
|
+
"""
|
15
|
+
Test click stream sessionization
|
16
|
+
"""
|
17
|
+
|
18
|
+
def setUp(self):
|
19
|
+
# Prevent output from this test showing up when running the test suite
|
20
|
+
self._captured_stdout = captured_stdout()
|
21
|
+
self._captured_stdout.__enter__()
|
22
|
+
super().setUp()
|
23
|
+
|
24
|
+
def tearDown(self):
|
25
|
+
# No exception type, value, or traceback
|
26
|
+
self._captured_stdout.__exit__(None, None, None)
|
27
|
+
super().tearDown()
|
28
|
+
|
29
|
+
def test_ex_sessionize(self):
|
30
|
+
# ex_sessionize.import.begin
|
31
|
+
import numpy as np
|
32
|
+
from numba import cuda
|
33
|
+
|
34
|
+
# Set the timeout to one hour
|
35
|
+
session_timeout = np.int64(np.timedelta64("3600", "s"))
|
36
|
+
# ex_sessionize.import.end
|
37
|
+
|
38
|
+
# ex_sessionize.allocate.begin
|
39
|
+
# Generate data
|
40
|
+
ids = cuda.to_device(
|
41
|
+
np.array(
|
42
|
+
[
|
43
|
+
1, 1, 1, 1, 1, 1,
|
44
|
+
2, 2, 2,
|
45
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
46
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4,
|
47
|
+
]
|
48
|
+
)
|
49
|
+
)
|
50
|
+
sec = cuda.to_device(
|
51
|
+
np.array(
|
52
|
+
[
|
53
|
+
1, 2, 3, 5000, 5001, 5002, 1,
|
54
|
+
2, 3, 1, 2, 5000, 5001, 10000,
|
55
|
+
10001, 10002, 10003, 15000, 150001,
|
56
|
+
1, 5000, 50001, 15000, 20000,
|
57
|
+
25000, 25001, 25002, 25003,
|
58
|
+
],
|
59
|
+
dtype="datetime64[ns]",
|
60
|
+
).astype(
|
61
|
+
"int64"
|
62
|
+
) # Cast to int64 for compatibility
|
63
|
+
)
|
64
|
+
# Create a vector to hold the results
|
65
|
+
results = cuda.to_device(np.zeros(len(ids)))
|
66
|
+
# ex_sessionize.allocate.end
|
67
|
+
|
68
|
+
# ex_sessionize.kernel.begin
|
69
|
+
@cuda.jit
|
70
|
+
def sessionize(user_id, timestamp, results):
|
71
|
+
gid = cuda.grid(1)
|
72
|
+
size = len(user_id)
|
73
|
+
|
74
|
+
if gid >= size:
|
75
|
+
return
|
76
|
+
|
77
|
+
# Determine session boundaries
|
78
|
+
is_first_datapoint = gid == 0
|
79
|
+
if not is_first_datapoint:
|
80
|
+
new_user = user_id[gid] != user_id[gid - 1]
|
81
|
+
timed_out = (
|
82
|
+
timestamp[gid] - timestamp[gid - 1] > session_timeout
|
83
|
+
)
|
84
|
+
is_sess_boundary = new_user or timed_out
|
85
|
+
else:
|
86
|
+
is_sess_boundary = True
|
87
|
+
|
88
|
+
# Determine session labels
|
89
|
+
if is_sess_boundary:
|
90
|
+
# This thread marks the start of a session
|
91
|
+
results[gid] = gid
|
92
|
+
|
93
|
+
# Make sure all session boundaries are written
|
94
|
+
# before populating the session id
|
95
|
+
grid = cuda.cg.this_grid()
|
96
|
+
grid.sync()
|
97
|
+
|
98
|
+
look_ahead = 1
|
99
|
+
# Check elements 'forward' of this one
|
100
|
+
# until a new session boundary is found
|
101
|
+
while results[gid + look_ahead] == 0:
|
102
|
+
results[gid + look_ahead] = gid
|
103
|
+
look_ahead += 1
|
104
|
+
# Avoid out-of-bounds accesses by the last thread
|
105
|
+
if gid + look_ahead == size - 1:
|
106
|
+
results[gid + look_ahead] = gid
|
107
|
+
break
|
108
|
+
# ex_sessionize.kernel.end
|
109
|
+
|
110
|
+
# ex_sessionize.launch.begin
|
111
|
+
sessionize.forall(len(ids))(ids, sec, results)
|
112
|
+
|
113
|
+
print(results.copy_to_host())
|
114
|
+
# array([ 0., 0., 0., 3., 3., 3.,
|
115
|
+
# 6., 6., 6., 9., 9., 11.,
|
116
|
+
# 11., 13., 13., 13., 13., 17.,
|
117
|
+
# 18., 19., 20., 21., 21., 23.,
|
118
|
+
# 24., 24., 24., 24.])
|
119
|
+
# ex_sessionize.launch.end
|
120
|
+
|
121
|
+
expect = [
|
122
|
+
0, 0, 0, 3, 3, 3, 6, 6, 6, 9, 9,
|
123
|
+
11, 11, 13, 13, 13, 13, 17, 18, 19, 20, 21,
|
124
|
+
21, 23, 24, 24, 24, 24
|
125
|
+
]
|
126
|
+
np.testing.assert_equal(expect, results.copy_to_host())
|
127
|
+
|
128
|
+
|
129
|
+
if __name__ == "__main__":
|
130
|
+
unittest.main()
|
@@ -0,0 +1,50 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
from numba.tests.support import captured_stdout
|
5
|
+
|
6
|
+
|
7
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
8
|
+
class TestUFunc(CUDATestCase):
|
9
|
+
"""
|
10
|
+
Test calling a UFunc
|
11
|
+
"""
|
12
|
+
|
13
|
+
def setUp(self):
|
14
|
+
# Prevent output from this test showing
|
15
|
+
# up when running the test suite
|
16
|
+
self._captured_stdout = captured_stdout()
|
17
|
+
self._captured_stdout.__enter__()
|
18
|
+
super().setUp()
|
19
|
+
|
20
|
+
def tearDown(self):
|
21
|
+
# No exception type, value, or traceback
|
22
|
+
self._captured_stdout.__exit__(None, None, None)
|
23
|
+
super().tearDown()
|
24
|
+
|
25
|
+
def test_ex_cuda_ufunc_call(self):
|
26
|
+
# ex_cuda_ufunc.begin
|
27
|
+
import numpy as np
|
28
|
+
from numba import cuda
|
29
|
+
|
30
|
+
# A kernel calling a ufunc (sin, in this case)
|
31
|
+
@cuda.jit
|
32
|
+
def f(r, x):
|
33
|
+
# Compute sin(x) with result written to r
|
34
|
+
np.sin(x, r)
|
35
|
+
|
36
|
+
# Declare input and output arrays
|
37
|
+
x = np.arange(10, dtype=np.float32) - 5
|
38
|
+
r = np.zeros_like(x)
|
39
|
+
|
40
|
+
# Launch kernel that calls the ufunc
|
41
|
+
f[1, 1](r, x)
|
42
|
+
|
43
|
+
# A quick sanity check demonstrating equality of the sine computed by
|
44
|
+
# the sin ufunc inside the kernel, and NumPy's sin ufunc
|
45
|
+
np.testing.assert_allclose(r, np.sin(x))
|
46
|
+
# ex_cuda_ufunc.end
|
47
|
+
|
48
|
+
|
49
|
+
if __name__ == "__main__":
|
50
|
+
unittest.main()
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import unittest
|
2
|
+
|
3
|
+
from numba.cuda.testing import CUDATestCase, skip_on_cudasim
|
4
|
+
from numba.tests.support import captured_stdout
|
5
|
+
|
6
|
+
|
7
|
+
@skip_on_cudasim("cudasim doesn't support cuda import at non-top-level")
|
8
|
+
class TestVecAdd(CUDATestCase):
|
9
|
+
"""
|
10
|
+
Test simple vector addition
|
11
|
+
"""
|
12
|
+
|
13
|
+
def setUp(self):
|
14
|
+
# Prevent output from this test showing
|
15
|
+
# up when running the test suite
|
16
|
+
self._captured_stdout = captured_stdout()
|
17
|
+
self._captured_stdout.__enter__()
|
18
|
+
super().setUp()
|
19
|
+
|
20
|
+
def tearDown(self):
|
21
|
+
# No exception type, value, or traceback
|
22
|
+
self._captured_stdout.__exit__(None, None, None)
|
23
|
+
super().tearDown()
|
24
|
+
|
25
|
+
def test_ex_vecadd(self):
|
26
|
+
# ex_vecadd.import.begin
|
27
|
+
import numpy as np
|
28
|
+
from numba import cuda
|
29
|
+
# ex_vecadd.import.end
|
30
|
+
|
31
|
+
# ex_vecadd.kernel.begin
|
32
|
+
@cuda.jit
|
33
|
+
def f(a, b, c):
|
34
|
+
# like threadIdx.x + (blockIdx.x * blockDim.x)
|
35
|
+
tid = cuda.grid(1)
|
36
|
+
size = len(c)
|
37
|
+
|
38
|
+
if tid < size:
|
39
|
+
c[tid] = a[tid] + b[tid]
|
40
|
+
# ex_vecadd.kernel.end
|
41
|
+
|
42
|
+
# Seed RNG for test repeatability
|
43
|
+
np.random.seed(1)
|
44
|
+
|
45
|
+
# ex_vecadd.allocate.begin
|
46
|
+
N = 100000
|
47
|
+
a = cuda.to_device(np.random.random(N))
|
48
|
+
b = cuda.to_device(np.random.random(N))
|
49
|
+
c = cuda.device_array_like(a)
|
50
|
+
# ex_vecadd.allocate.end
|
51
|
+
|
52
|
+
# ex_vecadd.forall.begin
|
53
|
+
f.forall(len(a))(a, b, c)
|
54
|
+
print(c.copy_to_host())
|
55
|
+
# ex_vecadd.forall.end
|
56
|
+
|
57
|
+
# ex_vecadd.launch.begin
|
58
|
+
# Enough threads per block for several warps per block
|
59
|
+
nthreads = 256
|
60
|
+
# Enough blocks to cover the entire vector depending on its length
|
61
|
+
nblocks = (len(a) // nthreads) + 1
|
62
|
+
f[nblocks, nthreads](a, b, c)
|
63
|
+
print(c.copy_to_host())
|
64
|
+
# ex_vecadd.launch.end
|
65
|
+
|
66
|
+
np.testing.assert_equal(
|
67
|
+
c.copy_to_host(),
|
68
|
+
a.copy_to_host() + b.copy_to_host()
|
69
|
+
)
|
70
|
+
|
71
|
+
|
72
|
+
if __name__ == "__main__":
|
73
|
+
unittest.main()
|