numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +1 -1
  3. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
  4. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
  5. numba_cuda/numba/cuda/api.py +6 -1
  6. numba_cuda/numba/cuda/bf16.py +285 -2
  7. numba_cuda/numba/cuda/cgutils.py +2 -2
  8. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  9. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  10. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  11. numba_cuda/numba/cuda/codegen.py +1 -1
  12. numba_cuda/numba/cuda/compiler.py +373 -30
  13. numba_cuda/numba/cuda/core/analysis.py +319 -0
  14. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  15. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  16. numba_cuda/numba/cuda/core/base.py +1289 -0
  17. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  18. numba_cuda/numba/cuda/core/caching.py +2 -2
  19. numba_cuda/numba/cuda/core/compiler.py +6 -14
  20. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  21. numba_cuda/numba/cuda/core/config.py +747 -0
  22. numba_cuda/numba/cuda/core/consts.py +124 -0
  23. numba_cuda/numba/cuda/core/cpu.py +370 -0
  24. numba_cuda/numba/cuda/core/environment.py +68 -0
  25. numba_cuda/numba/cuda/core/event.py +511 -0
  26. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  27. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  28. numba_cuda/numba/cuda/core/interpreter.py +48 -26
  29. numba_cuda/numba/cuda/core/ir_utils.py +15 -26
  30. numba_cuda/numba/cuda/core/options.py +262 -0
  31. numba_cuda/numba/cuda/core/postproc.py +249 -0
  32. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  33. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  34. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  35. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  36. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  37. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  38. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  39. numba_cuda/numba/cuda/core/ssa.py +496 -0
  40. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  41. numba_cuda/numba/cuda/core/tracing.py +231 -0
  42. numba_cuda/numba/cuda/core/transforms.py +952 -0
  43. numba_cuda/numba/cuda/core/typed_passes.py +738 -7
  44. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  45. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  46. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  47. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  48. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  49. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  50. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  51. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  52. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  53. numba_cuda/numba/cuda/cuda_paths.py +422 -246
  54. numba_cuda/numba/cuda/cudadecl.py +1 -1
  55. numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
  56. numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
  57. numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
  58. numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
  59. numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
  60. numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
  61. numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
  62. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
  63. numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
  64. numba_cuda/numba/cuda/cudaimpl.py +5 -1
  65. numba_cuda/numba/cuda/debuginfo.py +85 -2
  66. numba_cuda/numba/cuda/decorators.py +3 -3
  67. numba_cuda/numba/cuda/descriptor.py +3 -4
  68. numba_cuda/numba/cuda/deviceufunc.py +66 -2
  69. numba_cuda/numba/cuda/dispatcher.py +18 -39
  70. numba_cuda/numba/cuda/flags.py +141 -1
  71. numba_cuda/numba/cuda/fp16.py +0 -2
  72. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  73. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  74. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  75. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  76. numba_cuda/numba/cuda/lowering.py +7 -144
  77. numba_cuda/numba/cuda/mathimpl.py +2 -1
  78. numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
  79. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  80. numba_cuda/numba/cuda/models.py +9 -1
  81. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  82. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  83. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  84. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  85. numba_cuda/numba/cuda/nvvmutils.py +1 -1
  86. numba_cuda/numba/cuda/printimpl.py +12 -1
  87. numba_cuda/numba/cuda/random.py +1 -1
  88. numba_cuda/numba/cuda/serialize.py +1 -1
  89. numba_cuda/numba/cuda/simulator/__init__.py +1 -1
  90. numba_cuda/numba/cuda/simulator/api.py +1 -1
  91. numba_cuda/numba/cuda/simulator/compiler.py +4 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
  93. numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
  94. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
  95. numba_cuda/numba/cuda/target.py +35 -17
  96. numba_cuda/numba/cuda/testing.py +7 -19
  97. numba_cuda/numba/cuda/tests/__init__.py +1 -1
  98. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  99. numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
  100. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
  101. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
  102. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
  103. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
  104. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
  105. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
  106. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
  107. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
  108. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
  109. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  110. numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
  111. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
  112. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
  113. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
  114. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
  115. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  116. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
  117. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
  118. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  119. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
  120. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
  121. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
  122. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
  123. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
  124. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
  125. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
  126. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
  127. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
  128. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
  129. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
  130. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
  131. numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
  132. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
  133. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
  134. numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
  135. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
  136. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
  137. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
  138. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
  139. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  140. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
  141. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  142. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
  143. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
  144. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
  145. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
  146. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
  147. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
  148. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
  149. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
  150. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
  151. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
  152. numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
  153. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
  154. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
  155. numba_cuda/numba/cuda/tests/support.py +55 -15
  156. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  157. numba_cuda/numba/cuda/types.py +56 -0
  158. numba_cuda/numba/cuda/typing/__init__.py +9 -1
  159. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  160. numba_cuda/numba/cuda/typing/context.py +751 -0
  161. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  162. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  163. numba_cuda/numba/cuda/typing/templates.py +7 -6
  164. numba_cuda/numba/cuda/ufuncs.py +3 -3
  165. numba_cuda/numba/cuda/utils.py +6 -112
  166. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
  167. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
  168. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
  169. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
  170. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
  171. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
  172. {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ import os
11
11
  import numpy as np
12
12
 
13
13
  from .cudadrv import devicearray, devices, driver
14
- from numba.core import config
14
+ from numba.cuda.core import config
15
15
  from numba.cuda.api_util import prepare_shape_strides_dtype
16
16
 
17
17
  # NDarray device helper
@@ -508,6 +508,11 @@ def close():
508
508
  Explicitly clears all contexts in the current thread, and destroys all
509
509
  contexts if the current thread is the main thread.
510
510
  """
511
+ # Must clear memsys object in case it has been used already
512
+ from .memory_management import rtsys
513
+
514
+ rtsys.close()
515
+
511
516
  devices.reset()
512
517
 
513
518
 
@@ -2,8 +2,116 @@
2
2
  # SPDX-License-Identifier: BSD-2-Clause
3
3
 
4
4
  from numba.cuda._internal.cuda_bf16 import (
5
- _type_class___nv_bfloat16,
5
+ typing_registry,
6
+ target_registry,
6
7
  nv_bfloat16 as bfloat16,
8
+ # Arithmetic intrinsics
9
+ __habs as habs,
10
+ __hadd as hadd,
11
+ __hsub as hsub,
12
+ __hmul as hmul,
13
+ __hadd_rn as hadd_rn,
14
+ __hsub_rn as hsub_rn,
15
+ __hmul_rn as hmul_rn,
16
+ __hdiv as hdiv,
17
+ __hadd_sat as hadd_sat,
18
+ __hsub_sat as hsub_sat,
19
+ __hmul_sat as hmul_sat,
20
+ __hfma as hfma,
21
+ __hfma_sat as hfma_sat,
22
+ __hneg as hneg,
23
+ __hfma_relu as hfma_relu,
24
+ # Comparison intrinsics
25
+ __heq as heq,
26
+ __hne as hne,
27
+ __hge as hge,
28
+ __hgt as hgt,
29
+ __hle as hle,
30
+ __hlt as hlt,
31
+ __hmax as hmax,
32
+ __hmin as hmin,
33
+ __hmax_nan as hmax_nan,
34
+ __hmin_nan as hmin_nan,
35
+ __hisinf as hisinf,
36
+ __hisnan as hisnan,
37
+ # Unordered comparison intrinsics
38
+ __hequ as hequ,
39
+ __hneu as hneu,
40
+ __hgeu as hgeu,
41
+ __hgtu as hgtu,
42
+ __hleu as hleu,
43
+ __hltu as hltu,
44
+ # Precision conversion and data movement
45
+ # - floating-point family
46
+ __bfloat162float as bfloat162float,
47
+ __float2bfloat16 as float2bfloat16,
48
+ __double2bfloat16 as double2bfloat16,
49
+ __float2bfloat16_rn as float2bfloat16_rn,
50
+ __float2bfloat16_rz as float2bfloat16_rz,
51
+ __float2bfloat16_rd as float2bfloat16_rd,
52
+ __float2bfloat16_ru as float2bfloat16_ru,
53
+ # - char family
54
+ __bfloat162char_rz as bfloat162char_rz,
55
+ __bfloat162uchar_rz as bfloat162uchar_rz,
56
+ # - int family (signed 32-bit)
57
+ __int2bfloat16_rn as int2bfloat16_rn,
58
+ __int2bfloat16_rz as int2bfloat16_rz,
59
+ __int2bfloat16_rd as int2bfloat16_rd,
60
+ __int2bfloat16_ru as int2bfloat16_ru,
61
+ __bfloat162int_rn as bfloat162int_rn,
62
+ __bfloat162int_rz as bfloat162int_rz,
63
+ __bfloat162int_rd as bfloat162int_rd,
64
+ __bfloat162int_ru as bfloat162int_ru,
65
+ # - short family (signed 16-bit)
66
+ __short2bfloat16_rn as short2bfloat16_rn,
67
+ __short2bfloat16_rz as short2bfloat16_rz,
68
+ __short2bfloat16_rd as short2bfloat16_rd,
69
+ __short2bfloat16_ru as short2bfloat16_ru,
70
+ __bfloat162short_rn as bfloat162short_rn,
71
+ __bfloat162short_rz as bfloat162short_rz,
72
+ __bfloat162short_rd as bfloat162short_rd,
73
+ __bfloat162short_ru as bfloat162short_ru,
74
+ # - ushort family (unsigned 16-bit)
75
+ __ushort2bfloat16_rn as ushort2bfloat16_rn,
76
+ __ushort2bfloat16_rz as ushort2bfloat16_rz,
77
+ __ushort2bfloat16_rd as ushort2bfloat16_rd,
78
+ __ushort2bfloat16_ru as ushort2bfloat16_ru,
79
+ __bfloat162ushort_rn as bfloat162ushort_rn,
80
+ __bfloat162ushort_rz as bfloat162ushort_rz,
81
+ __bfloat162ushort_rd as bfloat162ushort_rd,
82
+ __bfloat162ushort_ru as bfloat162ushort_ru,
83
+ # - uint family (unsigned 32-bit)
84
+ __uint2bfloat16_rn as uint2bfloat16_rn,
85
+ __uint2bfloat16_rz as uint2bfloat16_rz,
86
+ __uint2bfloat16_rd as uint2bfloat16_rd,
87
+ __uint2bfloat16_ru as uint2bfloat16_ru,
88
+ __bfloat162uint_rn as bfloat162uint_rn,
89
+ __bfloat162uint_rz as bfloat162uint_rz,
90
+ __bfloat162uint_rd as bfloat162uint_rd,
91
+ __bfloat162uint_ru as bfloat162uint_ru,
92
+ # - ll family (signed 64-bit)
93
+ __ll2bfloat16_rn as ll2bfloat16_rn,
94
+ __ll2bfloat16_rz as ll2bfloat16_rz,
95
+ __ll2bfloat16_rd as ll2bfloat16_rd,
96
+ __ll2bfloat16_ru as ll2bfloat16_ru,
97
+ __bfloat162ll_rn as bfloat162ll_rn,
98
+ __bfloat162ll_rz as bfloat162ll_rz,
99
+ __bfloat162ll_rd as bfloat162ll_rd,
100
+ __bfloat162ll_ru as bfloat162ll_ru,
101
+ # - ull family (unsigned 64-bit)
102
+ __ull2bfloat16_rn as ull2bfloat16_rn,
103
+ __ull2bfloat16_rz as ull2bfloat16_rz,
104
+ __ull2bfloat16_rd as ull2bfloat16_rd,
105
+ __ull2bfloat16_ru as ull2bfloat16_ru,
106
+ __bfloat162ull_rn as bfloat162ull_rn,
107
+ __bfloat162ull_rz as bfloat162ull_rz,
108
+ __bfloat162ull_rd as bfloat162ull_rd,
109
+ __bfloat162ull_ru as bfloat162ull_ru,
110
+ # - bit reinterpret casts
111
+ __bfloat16_as_short as bfloat16_as_short,
112
+ __bfloat16_as_ushort as bfloat16_as_ushort,
113
+ __short_as_bfloat16 as short_as_bfloat16,
114
+ __ushort_as_bfloat16 as ushort_as_bfloat16,
7
115
  htrunc,
8
116
  hceil,
9
117
  hfloor,
@@ -28,7 +136,7 @@ import math
28
136
 
29
137
 
30
138
  def _make_unary(a, func):
31
- if isinstance(a, _type_class___nv_bfloat16):
139
+ if a == bfloat16:
32
140
  return lambda a: func(a)
33
141
 
34
142
 
@@ -92,9 +200,184 @@ try:
92
200
  except ImportError:
93
201
  pass
94
202
 
203
+ ## Public aliases using Numba/Numpy-style type names
204
+ # Floating-point
205
+ float32_to_bfloat16 = float2bfloat16
206
+ float64_to_bfloat16 = double2bfloat16
207
+ bfloat16_to_float32 = bfloat162float
208
+ float32_to_bfloat16_rn = float2bfloat16_rn
209
+ float32_to_bfloat16_rz = float2bfloat16_rz
210
+ float32_to_bfloat16_rd = float2bfloat16_rd
211
+ float32_to_bfloat16_ru = float2bfloat16_ru
212
+
213
+ # Char (8-bit)
214
+ bfloat16_to_int8_rz = bfloat162char_rz
215
+ bfloat16_to_uint8_rz = bfloat162uchar_rz
216
+
217
+ # Int16 / UInt16
218
+ int16_to_bfloat16_rn = short2bfloat16_rn
219
+ int16_to_bfloat16_rz = short2bfloat16_rz
220
+ int16_to_bfloat16_rd = short2bfloat16_rd
221
+ int16_to_bfloat16_ru = short2bfloat16_ru
222
+ bfloat16_to_int16_rn = bfloat162short_rn
223
+ bfloat16_to_int16_rz = bfloat162short_rz
224
+ bfloat16_to_int16_rd = bfloat162short_rd
225
+ bfloat16_to_int16_ru = bfloat162short_ru
226
+
227
+ uint16_to_bfloat16_rn = ushort2bfloat16_rn
228
+ uint16_to_bfloat16_rz = ushort2bfloat16_rz
229
+ uint16_to_bfloat16_rd = ushort2bfloat16_rd
230
+ uint16_to_bfloat16_ru = ushort2bfloat16_ru
231
+ bfloat16_to_uint16_rn = bfloat162ushort_rn
232
+ bfloat16_to_uint16_rz = bfloat162ushort_rz
233
+ bfloat16_to_uint16_rd = bfloat162ushort_rd
234
+ bfloat16_to_uint16_ru = bfloat162ushort_ru
235
+
236
+ # Int32 / UInt32
237
+ int32_to_bfloat16_rn = int2bfloat16_rn
238
+ int32_to_bfloat16_rz = int2bfloat16_rz
239
+ int32_to_bfloat16_rd = int2bfloat16_rd
240
+ int32_to_bfloat16_ru = int2bfloat16_ru
241
+ bfloat16_to_int32_rn = bfloat162int_rn
242
+ bfloat16_to_int32_rz = bfloat162int_rz
243
+ bfloat16_to_int32_rd = bfloat162int_rd
244
+ bfloat16_to_int32_ru = bfloat162int_ru
245
+
246
+ uint32_to_bfloat16_rn = uint2bfloat16_rn
247
+ uint32_to_bfloat16_rz = uint2bfloat16_rz
248
+ uint32_to_bfloat16_rd = uint2bfloat16_rd
249
+ uint32_to_bfloat16_ru = uint2bfloat16_ru
250
+ bfloat16_to_uint32_rn = bfloat162uint_rn
251
+ bfloat16_to_uint32_rz = bfloat162uint_rz
252
+ bfloat16_to_uint32_rd = bfloat162uint_rd
253
+ bfloat16_to_uint32_ru = bfloat162uint_ru
254
+
255
+ # Int64 / UInt64
256
+ int64_to_bfloat16_rn = ll2bfloat16_rn
257
+ int64_to_bfloat16_rz = ll2bfloat16_rz
258
+ int64_to_bfloat16_rd = ll2bfloat16_rd
259
+ int64_to_bfloat16_ru = ll2bfloat16_ru
260
+ bfloat16_to_int64_rn = bfloat162ll_rn
261
+ bfloat16_to_int64_rz = bfloat162ll_rz
262
+ bfloat16_to_int64_rd = bfloat162ll_rd
263
+ bfloat16_to_int64_ru = bfloat162ll_ru
264
+
265
+ uint64_to_bfloat16_rn = ull2bfloat16_rn
266
+ uint64_to_bfloat16_rz = ull2bfloat16_rz
267
+ uint64_to_bfloat16_rd = ull2bfloat16_rd
268
+ uint64_to_bfloat16_ru = ull2bfloat16_ru
269
+ bfloat16_to_uint64_rn = bfloat162ull_rn
270
+ bfloat16_to_uint64_rz = bfloat162ull_rz
271
+ bfloat16_to_uint64_rd = bfloat162ull_rd
272
+ bfloat16_to_uint64_ru = bfloat162ull_ru
273
+
274
+ # Bit reinterpret casts
275
+ bfloat16_as_int16 = bfloat16_as_short
276
+ bfloat16_as_uint16 = bfloat16_as_ushort
277
+ int16_as_bfloat16 = short_as_bfloat16
278
+ uint16_as_bfloat16 = ushort_as_bfloat16
95
279
 
96
280
  __all__ = [
281
+ "typing_registry",
282
+ "target_registry",
97
283
  "bfloat16",
284
+ # Arithmetic intrinsics
285
+ "habs",
286
+ "hadd",
287
+ "hsub",
288
+ "hmul",
289
+ "hadd_rn",
290
+ "hsub_rn",
291
+ "hmul_rn",
292
+ "hdiv",
293
+ "hadd_sat",
294
+ "hsub_sat",
295
+ "hmul_sat",
296
+ "hfma",
297
+ "hfma_sat",
298
+ "hneg",
299
+ "hfma_relu",
300
+ # Comparison intrinsics
301
+ "heq",
302
+ "hne",
303
+ "hge",
304
+ "hgt",
305
+ "hle",
306
+ "hlt",
307
+ "hmax",
308
+ "hmin",
309
+ "hmax_nan",
310
+ "hmin_nan",
311
+ "hisinf",
312
+ "hisnan",
313
+ "hequ",
314
+ "hneu",
315
+ "hgeu",
316
+ "hgtu",
317
+ "hleu",
318
+ "hltu",
319
+ # Precision conversion and data movement
320
+ "float32_to_bfloat16",
321
+ "float64_to_bfloat16",
322
+ "bfloat16_to_float32",
323
+ "float32_to_bfloat16_rn",
324
+ "float32_to_bfloat16_rz",
325
+ "float32_to_bfloat16_rd",
326
+ "float32_to_bfloat16_ru",
327
+ "bfloat16_to_int8_rz",
328
+ "bfloat16_to_uint8_rz",
329
+ "int16_to_bfloat16_rn",
330
+ "int16_to_bfloat16_rz",
331
+ "int16_to_bfloat16_rd",
332
+ "int16_to_bfloat16_ru",
333
+ "bfloat16_to_int16_rn",
334
+ "bfloat16_to_int16_rz",
335
+ "bfloat16_to_int16_rd",
336
+ "bfloat16_to_int16_ru",
337
+ "uint16_to_bfloat16_rn",
338
+ "uint16_to_bfloat16_rz",
339
+ "uint16_to_bfloat16_rd",
340
+ "uint16_to_bfloat16_ru",
341
+ "bfloat16_to_uint16_rn",
342
+ "bfloat16_to_uint16_rz",
343
+ "bfloat16_to_uint16_rd",
344
+ "bfloat16_to_uint16_ru",
345
+ "int32_to_bfloat16_rn",
346
+ "int32_to_bfloat16_rz",
347
+ "int32_to_bfloat16_rd",
348
+ "int32_to_bfloat16_ru",
349
+ "bfloat16_to_int32_rn",
350
+ "bfloat16_to_int32_rz",
351
+ "bfloat16_to_int32_rd",
352
+ "bfloat16_to_int32_ru",
353
+ "uint32_to_bfloat16_rn",
354
+ "uint32_to_bfloat16_rz",
355
+ "uint32_to_bfloat16_rd",
356
+ "uint32_to_bfloat16_ru",
357
+ "bfloat16_to_uint32_rn",
358
+ "bfloat16_to_uint32_rz",
359
+ "bfloat16_to_uint32_rd",
360
+ "bfloat16_to_uint32_ru",
361
+ "int64_to_bfloat16_rn",
362
+ "int64_to_bfloat16_rz",
363
+ "int64_to_bfloat16_rd",
364
+ "int64_to_bfloat16_ru",
365
+ "bfloat16_to_int64_rn",
366
+ "bfloat16_to_int64_rz",
367
+ "bfloat16_to_int64_rd",
368
+ "bfloat16_to_int64_ru",
369
+ "uint64_to_bfloat16_rn",
370
+ "uint64_to_bfloat16_rz",
371
+ "uint64_to_bfloat16_rd",
372
+ "uint64_to_bfloat16_ru",
373
+ "bfloat16_to_uint64_rn",
374
+ "bfloat16_to_uint64_rz",
375
+ "bfloat16_to_uint64_rd",
376
+ "bfloat16_to_uint64_ru",
377
+ "bfloat16_as_int16",
378
+ "bfloat16_as_uint16",
379
+ "int16_as_bfloat16",
380
+ "uint16_as_bfloat16",
98
381
  "htrunc",
99
382
  "hceil",
100
383
  "hfloor",
@@ -11,8 +11,8 @@ import functools
11
11
 
12
12
  from llvmlite import ir
13
13
 
14
- from numba.core import types, debuginfo
15
- from numba.cuda import config, utils
14
+ from numba.core import types
15
+ from numba.cuda import config, utils, debuginfo
16
16
  import numba.core.datamodel
17
17
 
18
18
 
@@ -0,0 +1,21 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from . import cloudpickle
5
+ from .cloudpickle import * # noqa
6
+
7
+ __doc__ = cloudpickle.__doc__
8
+
9
+ __version__ = "3.1.1"
10
+
11
+ __all__ = [ # noqa
12
+ "__version__",
13
+ "Pickler",
14
+ "CloudPickler",
15
+ "dumps",
16
+ "loads",
17
+ "dump",
18
+ "load",
19
+ "register_pickle_by_value",
20
+ "unregister_pickle_by_value",
21
+ ]