numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,67 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ This file provides internal compiler utilities that support certain special
6
+ operations with bytes and workarounds for limitations enforced in userland.
7
+ """
8
+
9
+ from numba.core.extending import intrinsic
10
+ from llvmlite import ir
11
+ from numba.core import types
12
+ from numba.cuda import cgutils
13
+
14
+
15
+ @intrinsic
16
+ def grab_byte(typingctx, data, offset):
17
+ # returns a byte at a given offset in data
18
+ def impl(context, builder, signature, args):
19
+ data, idx = args
20
+ ptr = builder.bitcast(data, ir.IntType(8).as_pointer())
21
+ ch = builder.load(builder.gep(ptr, [idx]))
22
+ return ch
23
+
24
+ sig = types.uint8(types.voidptr, types.intp)
25
+ return sig, impl
26
+
27
+
28
+ @intrinsic
29
+ def grab_uint64_t(typingctx, data, offset):
30
+ # returns a uint64_t at a given offset in data
31
+ def impl(context, builder, signature, args):
32
+ data, idx = args
33
+ ptr = builder.bitcast(data, ir.IntType(64).as_pointer())
34
+ ch = builder.load(builder.gep(ptr, [idx]))
35
+ return ch
36
+
37
+ sig = types.uint64(types.voidptr, types.intp)
38
+ return sig, impl
39
+
40
+
41
+ @intrinsic
42
+ def memcpy_region(typingctx, dst, dst_offset, src, src_offset, nbytes, align):
43
+ """Copy nbytes from *(src + src_offset) to *(dst + dst_offset)"""
44
+
45
+ def codegen(context, builder, signature, args):
46
+ [
47
+ dst_val,
48
+ dst_offset_val,
49
+ src_val,
50
+ src_offset_val,
51
+ nbytes_val,
52
+ align_val,
53
+ ] = args
54
+ src_ptr = builder.gep(src_val, [src_offset_val])
55
+ dst_ptr = builder.gep(dst_val, [dst_offset_val])
56
+ cgutils.raw_memcpy(builder, dst_ptr, src_ptr, nbytes_val, align_val)
57
+ return context.get_dummy_value()
58
+
59
+ sig = types.void(
60
+ types.voidptr,
61
+ types.intp,
62
+ types.voidptr,
63
+ types.intp,
64
+ types.intp,
65
+ types.intp,
66
+ )
67
+ return sig, codegen
@@ -0,0 +1,66 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Exception handling intrinsics.
6
+ """
7
+
8
+ from numba.core import types, errors
9
+ from numba.cuda import cgutils
10
+ from numba.core.extending import intrinsic
11
+
12
+
13
+ @intrinsic
14
+ def exception_check(typingctx):
15
+ """An intrinsic to check if an exception is raised"""
16
+
17
+ def codegen(context, builder, signature, args):
18
+ nrt = context.nrt
19
+ return nrt.eh_check(builder)
20
+
21
+ restype = types.boolean
22
+ return restype(), codegen
23
+
24
+
25
+ @intrinsic
26
+ def mark_try_block(typingctx):
27
+ """An intrinsic to mark the start of a *try* block."""
28
+
29
+ def codegen(context, builder, signature, args):
30
+ nrt = context.nrt
31
+ nrt.eh_try(builder)
32
+ return context.get_dummy_value()
33
+
34
+ restype = types.none
35
+ return restype(), codegen
36
+
37
+
38
+ @intrinsic
39
+ def end_try_block(typingctx):
40
+ """An intrinsic to mark the end of a *try* block."""
41
+
42
+ def codegen(context, builder, signature, args):
43
+ nrt = context.nrt
44
+ nrt.eh_end_try(builder)
45
+ return context.get_dummy_value()
46
+
47
+ restype = types.none
48
+ return restype(), codegen
49
+
50
+
51
+ @intrinsic
52
+ def exception_match(typingctx, exc_value, exc_class):
53
+ """Basically do ``isinstance(exc_value, exc_class)`` for exception objects.
54
+ Used in ``except Exception:`` syntax.
55
+ """
56
+ # Check for our limitation
57
+ if exc_class.exc_class is not Exception:
58
+ msg = "Exception matching is limited to {}"
59
+ raise errors.UnsupportedError(msg.format(Exception))
60
+
61
+ def codegen(context, builder, signature, args):
62
+ # Intentionally always True.
63
+ return cgutils.true_bit
64
+
65
+ restype = types.boolean
66
+ return restype(exc_value, exc_class), codegen
@@ -0,0 +1,98 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Helpers to see the refcount information of an object
6
+ """
7
+
8
+ from llvmlite import ir
9
+
10
+ from numba.core import types
11
+ from numba.cuda import cgutils
12
+ from numba.core.extending import intrinsic
13
+
14
+ _word_type = ir.IntType(64)
15
+ _pointer_type = ir.PointerType(ir.IntType(8))
16
+
17
+ _meminfo_struct_type = ir.LiteralStructType(
18
+ [
19
+ _word_type, # size_t refct
20
+ _pointer_type, # dtor_function dtor
21
+ _pointer_type, # void *dtor_info
22
+ _pointer_type, # void *data
23
+ _word_type, # size_t size
24
+ ]
25
+ )
26
+
27
+
28
+ @intrinsic
29
+ def dump_refcount(typingctx, obj):
30
+ """Dump the refcount of an object to stdout.
31
+
32
+ Returns True if and only if object is reference-counted and NRT is enabled.
33
+ """
34
+
35
+ def codegen(context, builder, signature, args):
36
+ [obj] = args
37
+ [ty] = signature.args
38
+ # A sequence of (type, meminfo)
39
+ meminfos = []
40
+ if context.enable_nrt:
41
+ tmp_mis = context.nrt.get_meminfos(builder, ty, obj)
42
+ meminfos.extend(tmp_mis)
43
+
44
+ if meminfos:
45
+ pyapi = context.get_python_api(builder)
46
+ gil_state = pyapi.gil_ensure()
47
+ pyapi.print_string("dump refct of {}".format(ty))
48
+ for ty, mi in meminfos:
49
+ miptr = builder.bitcast(mi, _meminfo_struct_type.as_pointer())
50
+ refctptr = cgutils.gep_inbounds(builder, miptr, 0, 0)
51
+ refct = builder.load(refctptr)
52
+
53
+ pyapi.print_string(" | {} refct=".format(ty))
54
+ # "%zu" is not portable. just truncate refcount to 32-bit.
55
+ # that's good enough for a debugging util.
56
+ refct_32bit = builder.trunc(refct, ir.IntType(32))
57
+ printed = cgutils.snprintf_stackbuffer(
58
+ builder, 30, "%d [%p]", refct_32bit, miptr
59
+ )
60
+ pyapi.sys_write_stdout(printed)
61
+
62
+ pyapi.print_string(";\n")
63
+ pyapi.gil_release(gil_state)
64
+ return cgutils.true_bit
65
+ else:
66
+ return cgutils.false_bit
67
+
68
+ sig = types.bool_(obj)
69
+ return sig, codegen
70
+
71
+
72
+ @intrinsic
73
+ def get_refcount(typingctx, obj):
74
+ """Get the current refcount of an object.
75
+
76
+ FIXME: only handles the first object
77
+ """
78
+
79
+ def codegen(context, builder, signature, args):
80
+ [obj] = args
81
+ [ty] = signature.args
82
+ # A sequence of (type, meminfo)
83
+ meminfos = []
84
+ if context.enable_nrt:
85
+ tmp_mis = context.nrt.get_meminfos(builder, ty, obj)
86
+ meminfos.extend(tmp_mis)
87
+ refcounts = []
88
+ if meminfos:
89
+ for ty, mi in meminfos:
90
+ miptr = builder.bitcast(mi, _meminfo_struct_type.as_pointer())
91
+ refctptr = cgutils.gep_inbounds(builder, miptr, 0, 0)
92
+ refct = builder.load(refctptr)
93
+ refct_32bit = builder.trunc(refct, ir.IntType(32))
94
+ refcounts.append(refct_32bit)
95
+ return refcounts[0]
96
+
97
+ sig = types.int32(obj)
98
+ return sig, codegen