numba-cuda 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/compiler.py +35 -3
  3. numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
  4. numba_cuda/numba/cuda/cuda_paths.py +2 -0
  5. numba_cuda/numba/cuda/cudadecl.py +0 -42
  6. numba_cuda/numba/cuda/cudadrv/linkable_code.py +11 -2
  7. numba_cuda/numba/cuda/cudadrv/nvrtc.py +10 -3
  8. numba_cuda/numba/cuda/cudaimpl.py +0 -63
  9. numba_cuda/numba/cuda/debuginfo.py +92 -2
  10. numba_cuda/numba/cuda/decorators.py +27 -1
  11. numba_cuda/numba/cuda/device_init.py +4 -5
  12. numba_cuda/numba/cuda/dispatcher.py +4 -3
  13. numba_cuda/numba/cuda/extending.py +54 -0
  14. numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
  15. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
  16. numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +550 -387
  17. numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +465 -316
  18. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  19. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  20. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  21. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  22. numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
  23. numba_cuda/numba/cuda/intrinsics.py +172 -1
  24. numba_cuda/numba/cuda/lowering.py +43 -0
  25. numba_cuda/numba/cuda/stubs.py +0 -11
  26. numba_cuda/numba/cuda/target.py +28 -0
  27. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -2
  28. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +1 -1
  29. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
  30. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +1 -1
  31. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +46 -0
  32. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +18 -0
  33. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -2
  34. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +156 -0
  35. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  36. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +50 -5
  37. numba_cuda/numba/cuda/vector_types.py +3 -1
  38. numba_cuda/numba/cuda/vectorizers.py +1 -1
  39. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.1.dist-info}/METADATA +1 -1
  40. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.1.dist-info}/RECORD +43 -33
  41. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.1.dist-info}/WHEEL +1 -1
  42. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.1.dist-info}/licenses/LICENSE +0 -0
  43. {numba_cuda-0.9.0.dist-info → numba_cuda-0.10.1.dist-info}/top_level.txt +0 -0
@@ -35,15 +35,17 @@ if not config.ENABLE_CUDASIM:
35
35
  from numba.core import cgutils
36
36
  from numba.core.extending import (
37
37
  lower_builtin,
38
- make_attribute_wrapper,
39
38
  models,
40
- register_model,
41
39
  type_callable,
42
40
  typeof_impl,
43
41
  )
44
42
  from numba.core.typing.templates import AttributeTemplate
45
43
  from numba.cuda.cudadecl import registry as cuda_registry
46
44
  from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
45
+ from numba.cuda.extending import (
46
+ register_model,
47
+ make_attribute_wrapper,
48
+ )
47
49
 
48
50
  class IntervalType(types.Type):
49
51
  def __init__(self):
@@ -0,0 +1,156 @@
1
+ import re
2
+ import numpy as np
3
+ from numba import cuda, types
4
+ from numba.cuda.testing import (
5
+ unittest,
6
+ CUDATestCase,
7
+ skip_on_cudasim,
8
+ )
9
+
10
+
11
+ @skip_on_cudasim("Cudasim does not support inline and forceinline")
12
+ class TestCudaInline(CUDATestCase):
13
+ def _test_call_inline(self, inline):
14
+ """Test @cuda.jit(inline=...)"""
15
+ a = np.ones(2, dtype=np.int32)
16
+
17
+ sig = (types.int32[::1],)
18
+
19
+ @cuda.jit(inline=inline)
20
+ def set_zero(a):
21
+ a[0] = 0
22
+
23
+ @cuda.jit(sig)
24
+ def call_set_zero(a):
25
+ set_zero(a)
26
+
27
+ call_set_zero[1, 2](a)
28
+
29
+ expected = np.arange(2, dtype=np.int32)
30
+ self.assertTrue(np.all(a == expected))
31
+
32
+ llvm_ir = call_set_zero.inspect_llvm(sig)
33
+ pat = r"call [a-zA-Z0-9]* @"
34
+ match = re.compile(pat).search(llvm_ir)
35
+
36
+ if inline == "always" or inline is True:
37
+ # check that call was inlined
38
+ self.assertIsNone(match, msg=llvm_ir)
39
+ else:
40
+ assert inline == "never" or inline is False
41
+
42
+ # check that call was not inlined
43
+ self.assertIsNotNone(match, msg=llvm_ir)
44
+
45
+ # alwaysinline should not be in the IR when the inline kwarg is used
46
+ self.assertNotIn("alwaysinline", llvm_ir)
47
+
48
+ def test_call_inline_always(self):
49
+ self._test_call_inline("always")
50
+
51
+ def test_call_inline_never(self):
52
+ self._test_call_inline("never")
53
+
54
+ def test_call_inline_true(self):
55
+ self._test_call_inline(True)
56
+
57
+ def test_call_inline_false(self):
58
+ self._test_call_inline(False)
59
+
60
+ def _test_call_forceinline(self, forceinline):
61
+ """Test @cuda.jit(forceinline=...)"""
62
+ a = np.ones(2, dtype=np.int32)
63
+
64
+ sig = (types.int32[::1],)
65
+
66
+ @cuda.jit(forceinline=forceinline)
67
+ def set_zero(a):
68
+ a[0] = 0
69
+
70
+ @cuda.jit(sig)
71
+ def call_set_zero(a):
72
+ set_zero(a)
73
+
74
+ call_set_zero[1, 2](a)
75
+
76
+ expected = np.arange(2, dtype=np.int32)
77
+ self.assertTrue(np.all(a == expected))
78
+
79
+ llvm_ir = call_set_zero.inspect_llvm(sig)
80
+ pat = r"call [a-zA-Z0-9]* @"
81
+ match = re.compile(pat).search(llvm_ir)
82
+
83
+ # Check that call was not inlined at the Numba IR level - the call
84
+ # should still be present in the IR
85
+ self.assertIsNotNone(match)
86
+
87
+ # Check the definition of set_zero - it is a definition where the
88
+ # name does not include an underscore just before "set_zero", because
89
+ # that would match the "call_set_zero" definition
90
+ pat = r"define.*[^_]set_zero.*"
91
+ match = re.compile(pat).search(llvm_ir)
92
+ self.assertIsNotNone(match)
93
+ if forceinline:
94
+ self.assertIn("alwaysinline", match.group())
95
+ else:
96
+ self.assertNotIn("alwaysinline", match.group())
97
+
98
+ # The kernel, "call_set_zero", should never have "alwaysinline" set
99
+ pat = r"define.*call_set_zero.*"
100
+ match = re.compile(pat).search(llvm_ir)
101
+ self.assertIsNotNone(match)
102
+ self.assertNotIn("alwaysinline", match.group())
103
+
104
+ def test_call_forceinline_true(self):
105
+ self._test_call_forceinline(True)
106
+
107
+ def test_call_forceinline_false(self):
108
+ self._test_call_forceinline(False)
109
+
110
+ def test_compile_forceinline_ltoir_only(self):
111
+ def set_zero(a):
112
+ a[0] = 0
113
+
114
+ args = (types.float32[::1],)
115
+ msg = r"Can only designate forced inlining in LTO-IR"
116
+ with self.assertRaisesRegex(ValueError, msg):
117
+ cuda.compile(
118
+ set_zero,
119
+ args,
120
+ device=True,
121
+ forceinline=True,
122
+ )
123
+
124
+ def _compile_set_zero(self, forceinline):
125
+ def set_zero(a):
126
+ a[0] = 0
127
+
128
+ args = (types.float32[::1],)
129
+ ltoir, resty = cuda.compile(
130
+ set_zero,
131
+ args,
132
+ device=True,
133
+ output="ltoir",
134
+ forceinline=forceinline,
135
+ )
136
+
137
+ # Sanity check
138
+ self.assertEqual(resty, types.none)
139
+
140
+ return ltoir
141
+
142
+ def test_compile_forceinline(self):
143
+ ltoir_noinline = self._compile_set_zero(False)
144
+ ltoir_forceinline = self._compile_set_zero(True)
145
+
146
+ # As LTO-IR is opaque, the best we can do is check that changing the
147
+ # flag resulted in a change in the generated LTO-IR in some way.
148
+ self.assertNotEqual(
149
+ ltoir_noinline,
150
+ ltoir_forceinline,
151
+ "forceinline flag appeared to have no effect on LTO-IR",
152
+ )
153
+
154
+
155
+ if __name__ == "__main__":
156
+ unittest.main()
@@ -14,7 +14,7 @@ SM_SIZE = tpb, tpb
14
14
 
15
15
  class TestCudaLaplace(CUDATestCase):
16
16
  def test_laplace_small(self):
17
- @cuda.jit(float64(float64, float64), device=True, inline=True)
17
+ @cuda.jit(float64(float64, float64), device=True, inline="always")
18
18
  def get_max(a, b):
19
19
  if a > b:
20
20
  return a
@@ -1,6 +1,9 @@
1
+ import re
2
+
1
3
  import numpy as np
2
4
  from numba import cuda, int32, int64, float32, float64
3
5
  from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
6
+ from numba.cuda.compiler import compile_ptx
4
7
  from numba.core import config
5
8
 
6
9
 
@@ -144,6 +147,47 @@ class TestCudaWarpOperations(CUDATestCase):
144
147
  compiled[1, nelem](ary, xor)
145
148
  self.assertTrue(np.all(ary == exp))
146
149
 
150
+ def test_shfl_sync_const_mode_val(self):
151
+ # Test `mode` argument is constant in shfl_sync calls.
152
+ # Related to https://github.com/NVIDIA/numba-cuda/pull/231
153
+ subtest = [
154
+ (use_shfl_sync_idx, 4),
155
+ (use_shfl_sync_up, 4),
156
+ (use_shfl_sync_down, 4),
157
+ (use_shfl_sync_xor, 16),
158
+ ]
159
+
160
+ args_re = r"\((.*)\)"
161
+ m = re.compile(args_re)
162
+
163
+ for func, value in subtest:
164
+ with self.subTest(func=func.__name__):
165
+ compiled = cuda.jit("void(int32[:], int32)")(func)
166
+ nelem = 32
167
+ ary = np.empty(nelem, dtype=np.int32)
168
+ compiled[1, nelem](ary, value)
169
+ irs = next(iter(compiled.inspect_llvm().values()))
170
+
171
+ for ir in irs.split("\n"):
172
+ if "call" in ir and "llvm.nvvm.shfl.sync.i32" in ir:
173
+ args = m.search(ir).group(0)
174
+ arglist = args.split(",")
175
+ mode_arg = arglist[1]
176
+ self.assertNotIn("%", mode_arg)
177
+
178
+ def test_shfl_sync_const_mode_val_sm100(self):
179
+ # Test shfl_sync compiles with cc=(10, 0)
180
+ subtest = [
181
+ use_shfl_sync_idx,
182
+ use_shfl_sync_up,
183
+ use_shfl_sync_down,
184
+ use_shfl_sync_xor,
185
+ ]
186
+
187
+ for func in subtest:
188
+ with self.subTest(func=func.__name__):
189
+ compile_ptx(func, (int32[:], int32), cc=(10, 0))
190
+
147
191
  def test_shfl_sync_types(self):
148
192
  types = int32, int64, float32, float64
149
193
  values = (
@@ -153,11 +197,12 @@ class TestCudaWarpOperations(CUDATestCase):
153
197
  np.float64(np.pi),
154
198
  )
155
199
  for typ, val in zip(types, values):
156
- compiled = cuda.jit((typ[:], typ))(use_shfl_sync_with_val)
157
- nelem = 32
158
- ary = np.empty(nelem, dtype=val.dtype)
159
- compiled[1, nelem](ary, val)
160
- self.assertTrue(np.all(ary == val))
200
+ with self.subTest(typ=typ):
201
+ compiled = cuda.jit((typ[:], typ))(use_shfl_sync_with_val)
202
+ nelem = 32
203
+ ary = np.empty(nelem, dtype=val.dtype)
204
+ compiled[1, nelem](ary, val)
205
+ self.assertTrue(np.all(ary == val))
161
206
 
162
207
  def test_vote_sync_all(self):
163
208
  compiled = cuda.jit("void(int32[:], int32[:])")(use_vote_sync_all)
@@ -5,13 +5,15 @@ from typing import List, Tuple, Dict
5
5
 
6
6
  from numba import types
7
7
  from numba.core import cgutils
8
- from numba.core.extending import make_attribute_wrapper, models, register_model
8
+ from numba.core.extending import models
9
9
  from numba.core.imputils import Registry as ImplRegistry
10
10
  from numba.core.typing.templates import ConcreteTemplate
11
11
  from numba.core.typing.templates import Registry as TypingRegistry
12
12
  from numba.core.typing.templates import signature
13
13
  from numba.cuda import stubs
14
14
  from numba.cuda.errors import CudaLoweringError
15
+ from numba.cuda.extending import make_attribute_wrapper, register_model
16
+
15
17
 
16
18
  typing_registry = TypingRegistry()
17
19
  impl_registry = ImplRegistry()
@@ -206,7 +206,7 @@ def __vectorized_{name}({args}, __out__):
206
206
 
207
207
  class CUDAVectorize(deviceufunc.DeviceVectorize):
208
208
  def _compile_core(self, sig):
209
- cudevfn = cuda.jit(sig, device=True, inline=True)(self.pyfunc)
209
+ cudevfn = cuda.jit(sig, device=True, inline="always")(self.pyfunc)
210
210
  return cudevfn, cudevfn.overloads[sig.args].signature.return_type
211
211
 
212
212
  def _get_globals(self, corefn):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numba-cuda
3
- Version: 0.9.0
3
+ Version: 0.10.1
4
4
  Summary: CUDA target for Numba
5
5
  Author: Anaconda Inc., NVIDIA Corporation
6
6
  License: BSD 2-clause
@@ -1,6 +1,6 @@
1
1
  _numba_cuda_redirector.pth,sha256=cmfMMmV0JPh3yEpl4bGeM9AuXiVVMSo6Z_b7RaQL3XE,30
2
2
  _numba_cuda_redirector.py,sha256=n_r8MYbu5-vcXMnLJW147k8DnFXXvgb7nPIXnlXwTyQ,2659
3
- numba_cuda/VERSION,sha256=nYyU8a0-qWseKsSRT9pMuTx2tKPg2Mxt2JdtbAsifRU,6
3
+ numba_cuda/VERSION,sha256=9NQ54LUjIIoJ0ThiwWggzDAo_ZRBcxDOHVOjHRTWosQ,7
4
4
  numba_cuda/__init__.py,sha256=atXeUvJKR3JHcAiCFbXCVOJQUHgB1TulmsqSL_9RT3Q,114
5
5
  numba_cuda/_version.py,sha256=nzrrJXi85d18m6SPdsPsetJNClDETkmF1MrEhGLYDBs,734
6
6
  numba_cuda/numba/cuda/__init__.py,sha256=3siqMXEKqa9ezQ8RxPC3KMdebUjgJt-EKxxV4CX9818,607
@@ -9,30 +9,30 @@ numba_cuda/numba/cuda/api_util.py,sha256=jK8oUD3zf_D5IX7vbjc3uY_5kmOxwgEqO2m_lDH
9
9
  numba_cuda/numba/cuda/args.py,sha256=UlTHTJpwPeCtnW0Bb-Wetm5UO9TPR-PCgIt5ys8b8tQ,1894
10
10
  numba_cuda/numba/cuda/cg.py,sha256=azz1sIT_jXQfJEZfDjBeqboJc6Pu_NtrZxfE7D1eQLQ,1484
11
11
  numba_cuda/numba/cuda/codegen.py,sha256=4hAdztvCcpwVbWcl9b5zK9xu04f7mVMNAgekpfc-8uw,14049
12
- numba_cuda/numba/cuda/compiler.py,sha256=I4fviK5cuIr8zqXaJHTC0jDuH96E2IBRs58BqRQf0SU,24130
12
+ numba_cuda/numba/cuda/compiler.py,sha256=sFreZM07D8zp4QyUBL2IKoBtDjzdxj80wN4KUgEQOS8,25283
13
13
  numba_cuda/numba/cuda/cpp_function_wrappers.cu,sha256=8lUPmU6FURxphzEqkPLZRPYBCEK_wmDtHq2voPkckfs,950
14
- numba_cuda/numba/cuda/cuda_fp16.h,sha256=8Ss_QAi5Ij9Dv_o08ur-PovyLgH7VyDhdjsEqBf7xcI,126180
15
- numba_cuda/numba/cuda/cuda_fp16.hpp,sha256=ojrzEMG6WM7X3_jglFCKZ8-cORfHDJ53nGVCPpcJsZI,89127
16
- numba_cuda/numba/cuda/cuda_paths.py,sha256=RbYexjtLbhsywDc_eR1KGayBvZ3cBqwb4As0QKgtIAI,15812
17
- numba_cuda/numba/cuda/cudadecl.py,sha256=9R7T-d_8o67auSuXNQi4pI_sf64C9Ax4x6XKAMQgaw8,23313
18
- numba_cuda/numba/cuda/cudaimpl.py,sha256=eiF9KPhPAJUuWA_yB5ZXWYUweYLhTZ77TG5X84jtS88,38273
14
+ numba_cuda/numba/cuda/cuda_bf16.py,sha256=RfnWMV2_zSAW9FLN4JqfW6GfmWR8ZVO16e9Bw3jZnto,152203
15
+ numba_cuda/numba/cuda/cuda_paths.py,sha256=kMIJ_1yV2qtcKEM5rCgSDJ3Gz7bgxbfAWh54E5cDndg,15872
16
+ numba_cuda/numba/cuda/cudadecl.py,sha256=4DhYDnKg95AKsmDHetJvL1rfdvhnuz9PKS1Ncf4nO20,22343
17
+ numba_cuda/numba/cuda/cudaimpl.py,sha256=-a5dvGHORH4RypGliHqXvwG3Rc0CAJVntYGxoYHmbpc,35656
19
18
  numba_cuda/numba/cuda/cudamath.py,sha256=wbGjlyGVwcUAoQjgXIaAaasLdVuDSKHkf6KyID5IYBw,3979
20
- numba_cuda/numba/cuda/debuginfo.py,sha256=JCdtmIKGD8pob2lu2trwDavYovwsDqz6yQ6G0kuCGU8,1495
21
- numba_cuda/numba/cuda/decorators.py,sha256=W6S5G87Eo5c-y2nW8DbhNPhpSNNpd6bEGjRPbtHYyP8,8245
19
+ numba_cuda/numba/cuda/debuginfo.py,sha256=tWlRAC1-AsSQp0pG9kXQY9tlVdZPA-nDUJsrvru4eaM,4504
20
+ numba_cuda/numba/cuda/decorators.py,sha256=kqzbv7eEQSyQg2G_XtIyKIfvmm354jw2vZDlOmK-t9s,9454
22
21
  numba_cuda/numba/cuda/descriptor.py,sha256=t1rSVJSCAlVACC5_Un3FQ7iubdTTBe-euqz88cvs2tI,985
23
- numba_cuda/numba/cuda/device_init.py,sha256=lPh7zssW8q88B3oISb1muRq9unBY458u4VJeY3DveTM,3474
22
+ numba_cuda/numba/cuda/device_init.py,sha256=Rtwd6hQMHMLMkj6MXtndbWYFJfkIaRe0MwOIJF2nzhU,3449
24
23
  numba_cuda/numba/cuda/deviceufunc.py,sha256=zj9BbLiZD-dPttHew4olw8ANgR2nXnXEE9qjCeGLrQI,30731
25
- numba_cuda/numba/cuda/dispatcher.py,sha256=_lEKvUcystUwgMvEyT3lCuvi41OULn0VE3H36HQ21o8,44369
24
+ numba_cuda/numba/cuda/dispatcher.py,sha256=uX6ltCDQq9mIBqSHV6Ci-2mJtuAmeZXBb3yWp8gXZ2U,44426
26
25
  numba_cuda/numba/cuda/errors.py,sha256=WRso1Q_jCoWP5yrDBMhihRhhVtVo1-7KdN8QVE9j46o,1712
27
- numba_cuda/numba/cuda/extending.py,sha256=2g_YgNqTSnoe08s24XOnj5xNhLUsnS8JM96OpSJuj84,142
26
+ numba_cuda/numba/cuda/extending.py,sha256=VwuU5F0AQFlJsqaiwoWk-6Itihew1FsjVT_BVjhY8Us,2278
28
27
  numba_cuda/numba/cuda/initialize.py,sha256=0SnpjccQEYiWITIyfAJx833H1yhYFFDY42EpnwYyMn8,487
29
- numba_cuda/numba/cuda/intrinsic_wrapper.py,sha256=B7oUiPYxglegicKawNgYCEv7ddcC2gU7J9xK8HO1T0s,2239
30
- numba_cuda/numba/cuda/intrinsics.py,sha256=tmy4PqDuMkcUSxabMar-WGWaaiSWaoyyhY56wybQ2E8,6106
28
+ numba_cuda/numba/cuda/intrinsic_wrapper.py,sha256=-b7w5ywqW-upPG3WCKjRS9AWKzAmHKpie15BHnJd5vY,1075
29
+ numba_cuda/numba/cuda/intrinsics.py,sha256=G57JZ-DM-wgvJm8FeIpXErX1sQkkafP95Y1OG3CksR0,11896
31
30
  numba_cuda/numba/cuda/libdevice.py,sha256=jOeNrjs6K08lv7P6eENLIVUMP-WJRR86a7Hco1y1B94,61333
32
31
  numba_cuda/numba/cuda/libdevicedecl.py,sha256=xdZbb_rCaftMf8Pbw63g_Lr230N-1QoaYzBxq8udKTg,532
33
32
  numba_cuda/numba/cuda/libdevicefuncs.py,sha256=c80lGpGoFIYkAdgr4fzbxzdNCyJYrLdss64bwa0Mc6w,37471
34
33
  numba_cuda/numba/cuda/libdeviceimpl.py,sha256=m4Fog_OPPEg2RkOk7LEeqF26MK4aEFlKxITlSCZKMAo,2798
35
34
  numba_cuda/numba/cuda/locks.py,sha256=yF6WcwMyzauJ9H7JuCRq2Ynx7kFVAnlkkvmWp7UdZ5w,388
35
+ numba_cuda/numba/cuda/lowering.py,sha256=6XXpTRfTBTVHPh1M4jVAL9APvKk1UWSb-A5WJTEMsqQ,1602
36
36
  numba_cuda/numba/cuda/mathimpl.py,sha256=-8IOkhorbMg8iPBMIdgjk3qJZSyRWYJDwPAWrTMkODI,14356
37
37
  numba_cuda/numba/cuda/models.py,sha256=jbvmbL51mt0Z1nZTSiniBJTFhnOfPzzcVD6xCEpXDMA,1282
38
38
  numba_cuda/numba/cuda/nvvmutils.py,sha256=x-0nCqwkoB8DzX7bSrvTH0h-aKSDx0rVWKR7Eqx4ldA,7993
@@ -40,14 +40,14 @@ numba_cuda/numba/cuda/printimpl.py,sha256=AO_KvkKhlJacjoq8IV1nDm7YBNKnqN7SBkvTG1
40
40
  numba_cuda/numba/cuda/random.py,sha256=V30KaFdkuDyjxoP14awz-KkY3lRIXqIZuuH27UotINE,10451
41
41
  numba_cuda/numba/cuda/reshape_funcs.cu,sha256=frw1uoeMSYlkPC38LiKE8Tz2P70X2e4UZGyLKkaPzho,4326
42
42
  numba_cuda/numba/cuda/simulator_init.py,sha256=Hvzty6NJp1SeKspyb-b887xpeNLMMI0x9aPmV--X77E,450
43
- numba_cuda/numba/cuda/stubs.py,sha256=0IH4puoOizBShZV_bvYvXVCO0aSFdxckhwTdWm4zQuk,22389
44
- numba_cuda/numba/cuda/target.py,sha256=dtORdiwGyippoTOPvda_QTDd0YKk7-oLXXZvpt_c_HI,11285
43
+ numba_cuda/numba/cuda/stubs.py,sha256=mCS65wc4MuaDnL_XYYxkKbDOH991o6a0JsN9KrLNMGQ,22104
44
+ numba_cuda/numba/cuda/target.py,sha256=mSMnS-bSsC8_4KqkAsa1Byi2mO8jPJdKW3m31qxsxUE,12520
45
45
  numba_cuda/numba/cuda/testing.py,sha256=OR37AuDdzg7vLG4G_4s2uRAkNTScZc-BzHmTMJYuxhQ,6827
46
46
  numba_cuda/numba/cuda/types.py,sha256=hC1MUvgUwy-SLgbzFzXwssJzPR8BxQwqUcjwGJFzVac,1317
47
47
  numba_cuda/numba/cuda/ufuncs.py,sha256=AJifQgapyv62fdJeMm939R1I5TvIRmaA8dJ83Jy8DCw,23559
48
48
  numba_cuda/numba/cuda/utils.py,sha256=Bk9TZZerYrnAaeKjjAAYkbm6YoP0ptxcPrCysRi_nRI,631
49
- numba_cuda/numba/cuda/vector_types.py,sha256=8JYxlrR3EJTDiFYRcElopwvzVXZQslAPHLW9ZvWRJu0,6750
50
- numba_cuda/numba/cuda/vectorizers.py,sha256=4YFwbcJggu96raPhyHGUCeIWZi3VYfloZh7xlHUPakc,8383
49
+ numba_cuda/numba/cuda/vector_types.py,sha256=FlzOKufhvBnZ-VC-liA7y9is8BV-uj0fD-En_vP6zl0,6783
50
+ numba_cuda/numba/cuda/vectorizers.py,sha256=nEfQxjSA4oCX8ZzvoqjDRygDfwzxFVDXtnjx-K1aPqA,8387
51
51
  numba_cuda/numba/cuda/cudadrv/__init__.py,sha256=inat2K8K1OVrgDe64FK7CyRmyFyNKcNO4p2_L79yRZ0,201
52
52
  numba_cuda/numba/cuda/cudadrv/devicearray.py,sha256=6tF2TYnmjMbKk2fho1ONoD_QsRD9QVTT2kHP7x1u1J0,31556
53
53
  numba_cuda/numba/cuda/cudadrv/devices.py,sha256=k87EDIRhj1ncM9PxJCjZGPFfEks99vzmHlTc55GK5X0,8062
@@ -57,13 +57,21 @@ numba_cuda/numba/cuda/cudadrv/dummyarray.py,sha256=2jycZhniMy3ncoVWQG9D8dBehTEeo
57
57
  numba_cuda/numba/cuda/cudadrv/enums.py,sha256=raWKryxamWQZ5A8ivMpyYVhhwbSpaD9lu7l1_wl2W9M,23742
58
58
  numba_cuda/numba/cuda/cudadrv/error.py,sha256=C2tTPT5h3BGgzjaFTCqbY7hOk2PgkVh0iuM1EiRp1eI,583
59
59
  numba_cuda/numba/cuda/cudadrv/libs.py,sha256=qjknQxYXd2ucwDLQqzhWC_srNg6FnwvcVHIpKyPxJ9A,7287
60
- numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=ltsRRGFvuJ2nU5axf1rzVKR_EPRiImiz1q5-lYmZxJA,2256
60
+ numba_cuda/numba/cuda/cudadrv/linkable_code.py,sha256=bgXfXIVLx-R5BGr6aiORJ8uWakMl_2dh1SxDn9fH8EI,2582
61
61
  numba_cuda/numba/cuda/cudadrv/mappings.py,sha256=9uEs1KepeVGRbEpVhLjtxSsvZpZsbrHnPywmx--y88A,804
62
62
  numba_cuda/numba/cuda/cudadrv/ndarray.py,sha256=HtULWWFyDlgqvrH5459yyPTvU4UbUo2DSdtcNfvbH00,473
63
- numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=kkc4rElfZcvi3-lDoxd6DlYPDjQjKJGtgvQ1kS4_JnU,14096
63
+ numba_cuda/numba/cuda/cudadrv/nvrtc.py,sha256=6xtAR1af5BsBkDMJcQsTIUFFO02wwpfLClNIsh5L33Y,14324
64
64
  numba_cuda/numba/cuda/cudadrv/nvvm.py,sha256=7tTy6-VEbMBpDUmuSMnUwqPFfBndTh3aPq_n7nxhEA0,26344
65
65
  numba_cuda/numba/cuda/cudadrv/rtapi.py,sha256=J6PRGGK07XSLRzgCw5xs8VU5xVoqavvhojk1mxiQsi4,226
66
66
  numba_cuda/numba/cuda/cudadrv/runtime.py,sha256=CFumwg4iblWap_E7l7GM_hMYz1PsbH81-N0tZwFFooA,4372
67
+ numba_cuda/numba/cuda/include/11/cuda_bf16.h,sha256=Z7HGJEOhMjQzD0Gs0eq0qdzD-Wr8Zbty-FeeLtahN-s,138713
68
+ numba_cuda/numba/cuda/include/11/cuda_bf16.hpp,sha256=NQQkLp3doxSllrkNOjbl_fKigtGsKccNd1g-NJ7G-2k,101350
69
+ numba_cuda/numba/cuda/include/11/cuda_fp16.h,sha256=s77PdhSlN_b2KqymCX_gUwIc4JWcqOeAkvsgJDVQRhg,131916
70
+ numba_cuda/numba/cuda/include/11/cuda_fp16.hpp,sha256=evyhsAqX9BFZoLi4AtTjQuPebuXsmihSrm9fXVc1pKE,93578
71
+ numba_cuda/numba/cuda/include/12/cuda_bf16.h,sha256=B3m8XisQyXlu6dkJrgmWaiQYf9xWAJDNZK1qlCTIcBc,203796
72
+ numba_cuda/numba/cuda/include/12/cuda_bf16.hpp,sha256=mAeEsTy3N5ZrAM-r-7FTjP90sOLzFfKXY3CrFbaxceg,136542
73
+ numba_cuda/numba/cuda/include/12/cuda_fp16.h,sha256=5AlJ5h6KS8G-ecZWFzyJJhtLtSaFN3ahsevB7HwTnQ0,206162
74
+ numba_cuda/numba/cuda/include/12/cuda_fp16.hpp,sha256=o1ITDmuN67N8YUGUcvTpV3IdpS-6wwlm65M_H-8LYKs,120927
67
75
  numba_cuda/numba/cuda/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
76
  numba_cuda/numba/cuda/kernels/reduction.py,sha256=RsVubg8uNumxNxo9HBlFVCDicA-KZKsksKId0ktgQyY,9101
69
77
  numba_cuda/numba/cuda/kernels/transpose.py,sha256=FbtFmOqaj_e7ARR_kkiTpSvj4BJyqBta5ci1CWtJ690,2033
@@ -125,15 +133,16 @@ numba_cuda/numba/cuda/tests/cudadrv/test_streams.py,sha256=rrQEA8iawR6UyKnK2MdI5
125
133
  numba_cuda/numba/cuda/tests/cudapy/__init__.py,sha256=43EXdiXXRBd6yIcVGMrU9F_EJCD9Uw3mzOP3SB53AEE,260
126
134
  numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py,sha256=FnvjeqTZ-YBmroHctPrHgMHxnJ-HiT9KI79aHTej5G8,5840
127
135
  numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py,sha256=9CbjosLNPN5IzrD-15sD_4B0BMmjo02Y7faZiS82cyk,1143
128
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py,sha256=UATKkU16Ki5To99XFQ7tAT2DoMbB0ECDQuX3HtGwBh4,1606
136
+ numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py,sha256=2i_xq4B1t1tctr6ZrWA29ZHkmQlD_vCSewhr-AT9tMc,1651
129
137
  numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx,sha256=PKVafUhDH1SKRWXkt4N3v8SDMh4RyDFiJM-CMksa5uc,519
130
138
  numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py,sha256=wrWx8AeRhBHM74iYPKKrZqiyWrYCtQU3J-g3Zv7JmoY,1782
131
139
  numba_cuda/numba/cuda/tests/cudapy/test_alignment.py,sha256=RkhAcVkGtze8JpZTlYYvqTesDYE7xfKQZd1izgxDQpU,1219
132
140
  numba_cuda/numba/cuda/tests/cudapy/test_array.py,sha256=lT7XWXl0_lqtXyyXN-w0cd0wH7EBklRElYDnHUM5G1I,13215
133
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py,sha256=5gw1bW782Xjk06oJ2eOggCNJc9qLC7noE7fnpbf8AnM,4978
141
+ numba_cuda/numba/cuda/tests/cudapy/test_array_args.py,sha256=iiFrt5Yn7gfheAGOYG2VBeWeuW3JlBhRLXNfSz4cHAA,4982
134
142
  numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py,sha256=SWa1MvpwG07yBkrFIUeM9pm3BIwUbhttMNBdUW-CpSM,969
135
143
  numba_cuda/numba/cuda/tests/cudapy/test_atomics.py,sha256=agsfUN3WOoh6ICAECtuMuxZNcKq5ivK30Ew3h_m76m0,57689
136
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py,sha256=fvmmlRg93F4_I1E2_ALe2laZsP3O7ncTzxMW-mDyMkM,4413
144
+ numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py,sha256=NYLa_e60NYc63X7japCAsjUS84lXn92k4_S_E6-sEX4,6779
145
+ numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py,sha256=0_wr6MSeHh0QVzPeH8SB7j0Nv_RrPAK01hNoQ_dGT5I,4417
137
146
  numba_cuda/numba/cuda/tests/cudapy/test_boolean.py,sha256=j4mIOv4rJTLjJzpKk1O9UFLT41_iOQRtwsmteXdKZ-M,547
138
147
  numba_cuda/numba/cuda/tests/cudapy/test_caching.py,sha256=qbNisdxvoErKlDkD5dw7IkdJhfcQUpIdfHX11UzGBOo,18990
139
148
  numba_cuda/numba/cuda/tests/cudapy/test_casting.py,sha256=3LaN3ZsSuOZXAZXCV85wYyhh0ih7JqABnjGTa7Y2YBE,8748
@@ -148,13 +157,13 @@ numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py,sha256=RXCNHAZM3
148
157
  numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py,sha256=8prL2FTiaajW-UHSL9al-nBniygOfpdAOT_Dkej4PWI,2138
149
158
  numba_cuda/numba/cuda/tests/cudapy/test_datetime.py,sha256=MnOeDWMz-rL3-07FsswM06Laxmm0KjTmTwhrP3rmchQ,3526
150
159
  numba_cuda/numba/cuda/tests/cudapy/test_debug.py,sha256=1P369s02AvGu7fSIEe_YxSgh3c6S72Aw1gRgmepDbQY,3383
151
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=FV3rsPrmbAJakNPXKYrVe-T2m3njPMxn8k9l7vLcZG4,11070
160
+ numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py,sha256=796d8Oa1ZV2mZ9LTcwR3g6_j5sjSBk7kZEHYMOXPBfU,12606
152
161
  numba_cuda/numba/cuda/tests/cudapy/test_device_func.py,sha256=LNGBZfqFGUtVVQeC6FcHo8T3DbG-j6AjeBwJmwp9HH4,13157
153
162
  numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py,sha256=Oc6CdI1j9Ad_wklHdIYSMytrzUpzK6oXD0BGe45sTwg,26636
154
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=2P5hRmI77UVRQXfovefN98VkAyH8t9n8CTKPKCIQt5A,3562
163
+ numba_cuda/numba/cuda/tests/cudapy/test_enums.py,sha256=Yxac6S5P6C8GN0kMwieL3dQb1uogOVZQEx969B0AMpM,4533
155
164
  numba_cuda/numba/cuda/tests/cudapy/test_errors.py,sha256=w6ipW9UIvUD_ZIt_6fQ-uJsHyKLyHVqv2bym-9vyGyY,2757
156
165
  numba_cuda/numba/cuda/tests/cudapy/test_exception.py,sha256=W5NF022DOOTaEjFmhfr8BnfhRXvYyXHiGwznQrm_9T4,5507
157
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=UcQVHvT-cS3Fx1oPClWMJfnpXcU_UfyfnZ3IQ-O17Zk,4099
166
+ numba_cuda/numba/cuda/tests/cudapy/test_extending.py,sha256=2QWcl8yJvp0A22V8qItJuzVvmt5Ng1JdhqR7hn5XX0E,4144
158
167
  numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py,sha256=fiUoOiwWjctZNFN-DGw1A8eGfHLqNulo2OQ7v1DFS9o,8552
159
168
  numba_cuda/numba/cuda/tests/cudapy/test_forall.py,sha256=Ory5s-_9MauSCP2RuWUEmcGFvP0kS7ytV-3iYPFYR6o,1470
160
169
  numba_cuda/numba/cuda/tests/cudapy/test_freevar.py,sha256=JvWn7Lw137HI61mouKnPvDxZIqLppiCF_351osxQQYE,753
@@ -164,12 +173,13 @@ numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py,sha256=Rl35HQdN6J3ZPjSLIz2mFJx
164
173
  numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py,sha256=vAP2ggp2arBqJS8kNbGeC5jrZuYzLtFstgvxX0PI-I0,5322
165
174
  numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py,sha256=1USofSlavYFaVhP8oep5oJ-CLzXxYwkI3EtOkY6jrVw,2610
166
175
  numba_cuda/numba/cuda/tests/cudapy/test_idiv.py,sha256=tTy7hN2LJ4897UzO3EUxjuUzbBcs9QITHJu3s_eknq0,1054
176
+ numba_cuda/numba/cuda/tests/cudapy/test_inline.py,sha256=T7DHquV_4HuX5fFQQS3kcZzgifTzwYbMFiY7SgQzoLA,4584
167
177
  numba_cuda/numba/cuda/tests/cudapy/test_inspect.py,sha256=L9-62nPmiWC90PST5EZrnGdAcrsbhMS_mbEkwdDkFQ0,4901
168
178
  numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py,sha256=uQ0S_XXds-F9Z5GhuFYzRVXu5XYD1ULa-y55Wi92i5I,36726
169
179
  numba_cuda/numba/cuda/tests/cudapy/test_ipc.py,sha256=bNT6UZgsgeVWyzBrlKXucQW6IKcD6NEmbwV5cFhf-7I,10553
170
180
  numba_cuda/numba/cuda/tests/cudapy/test_iterators.py,sha256=WCRkQfkEnB0d9aj55dVvyQzD4QxrOLubnlKO0xTiNto,2343
171
181
  numba_cuda/numba/cuda/tests/cudapy/test_lang.py,sha256=TP1spLeJfmBKKrU7G3bvkhNPvVm-oQX134taQsZeNbE,1693
172
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py,sha256=_S3AiFK9ws-3nB8jUy6rrtvlcB7eUD5Ylx7RS3uApu8,3199
182
+ numba_cuda/numba/cuda/tests/cudapy/test_laplace.py,sha256=ZE5dOhI3NUZUyvygM480DfFwP8dlV-s1lhfXq6Lm2ro,3203
173
183
  numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py,sha256=YVGdHBh2FOYxICTVugN14VGldvJyzOsdAnbH8TCZqMI,6531
174
184
  numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py,sha256=HuJWaeRDzQV-91PwpILxsM1HjvlFJp_w9qVXhJTxYgw,6693
175
185
  numba_cuda/numba/cuda/tests/cudapy/test_localmem.py,sha256=1i2ECv31bFnSBO_pVgmwtSz-pxyww1qs1MLj3FtY_FA,5368
@@ -209,7 +219,7 @@ numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py,sha256=ouA7bMO8L87mzQ
209
219
  numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py,sha256=gNbVk8-Uv3jm795-zWyig2JXb4Jo4iB5hyiHH6YLCiE,987
210
220
  numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py,sha256=VqBNr8SdEhldCZLP8MdxMrE2CXJUc6yqhGNB_Aj20wE,933
211
221
  numba_cuda/numba/cuda/tests/cudapy/test_warning.py,sha256=7xHFLmVvYlQUoOdush4lDBWwrBDe8Z9vN0NYciNP5QA,5716
212
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py,sha256=LeNFBU3BYKysQgcJQ8R_LvXqAX9PbZgDHx5OzcuPhlk,8999
222
+ numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py,sha256=sDMvdudkePl10Vq7rco67vGL7IDpCBWdOGQuL0UPeG4,10602
213
223
  numba_cuda/numba/cuda/tests/cudasim/__init__.py,sha256=GdfSq6pRVSOQwmgNi7ZFQ5l0yg4-2gNar_0Rz0buUpM,157
214
224
  numba_cuda/numba/cuda/tests/cudasim/support.py,sha256=JjRrfrrLKS0V5p6GX6ibs6QTuFb1NanKfBQSgbLeiHs,114
215
225
  numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py,sha256=-GJCl2c063Ig6EUB8w5L_0GcmXzTLatGe_ddEzdnbgc,3177
@@ -246,8 +256,8 @@ numba_cuda/numba/cuda/tests/test_binary_generation/Makefile,sha256=P2WzCc5d64JGq
246
256
  numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py,sha256=SE5FrbZdkVrnzS0R62YPPyH25r6Jevd2nuB6HRJ3PZ0,5011
247
257
  numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu,sha256=cUf-t6ZM9MK_x7X_aKwsrKW1LdR97XcpR-qnYr5faOE,453
248
258
  numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu,sha256=q3oxZziT8KDodeNcEBiWULH6vMrHCWucmJmtrg8C0d0,128
249
- numba_cuda-0.9.0.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
250
- numba_cuda-0.9.0.dist-info/METADATA,sha256=-r7qoK8WPBoXcxCVrYFLo5pdjRaycv2UZ8cchnR00rA,1858
251
- numba_cuda-0.9.0.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
252
- numba_cuda-0.9.0.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
253
- numba_cuda-0.9.0.dist-info/RECORD,,
259
+ numba_cuda-0.10.1.dist-info/licenses/LICENSE,sha256=eHeYE-XjASmwbxfsP5AImgfzRwZurZGqH1f6OFwJ4io,1326
260
+ numba_cuda-0.10.1.dist-info/METADATA,sha256=nP_9oLjsU48Y-dOmumPuN2JsiapA9t5ViCU_paTk7Uw,1859
261
+ numba_cuda-0.10.1.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
262
+ numba_cuda-0.10.1.dist-info/top_level.txt,sha256=C50SsH-8tXDmt7I0Y3nlJYhS5s6pqWflCPdobe9vx2M,11
263
+ numba_cuda-0.10.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5