numba-cuda 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numba-cuda might be problematic. Click here for more details.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +1 -1
- numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
- numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
- numba_cuda/numba/cuda/api.py +6 -1
- numba_cuda/numba/cuda/bf16.py +285 -2
- numba_cuda/numba/cuda/cgutils.py +2 -2
- numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
- numba_cuda/numba/cuda/codegen.py +1 -1
- numba_cuda/numba/cuda/compiler.py +373 -30
- numba_cuda/numba/cuda/core/analysis.py +319 -0
- numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
- numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
- numba_cuda/numba/cuda/core/base.py +1289 -0
- numba_cuda/numba/cuda/core/bytecode.py +727 -0
- numba_cuda/numba/cuda/core/caching.py +2 -2
- numba_cuda/numba/cuda/core/compiler.py +6 -14
- numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
- numba_cuda/numba/cuda/core/config.py +747 -0
- numba_cuda/numba/cuda/core/consts.py +124 -0
- numba_cuda/numba/cuda/core/cpu.py +370 -0
- numba_cuda/numba/cuda/core/environment.py +68 -0
- numba_cuda/numba/cuda/core/event.py +511 -0
- numba_cuda/numba/cuda/core/funcdesc.py +330 -0
- numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
- numba_cuda/numba/cuda/core/interpreter.py +48 -26
- numba_cuda/numba/cuda/core/ir_utils.py +15 -26
- numba_cuda/numba/cuda/core/options.py +262 -0
- numba_cuda/numba/cuda/core/postproc.py +249 -0
- numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
- numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
- numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
- numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
- numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
- numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
- numba_cuda/numba/cuda/core/ssa.py +496 -0
- numba_cuda/numba/cuda/core/targetconfig.py +329 -0
- numba_cuda/numba/cuda/core/tracing.py +231 -0
- numba_cuda/numba/cuda/core/transforms.py +952 -0
- numba_cuda/numba/cuda/core/typed_passes.py +738 -7
- numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
- numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
- numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
- numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
- numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
- numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
- numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
- numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
- numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
- numba_cuda/numba/cuda/cuda_paths.py +422 -246
- numba_cuda/numba/cuda/cudadecl.py +1 -1
- numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
- numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
- numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
- numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
- numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
- numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
- numba_cuda/numba/cuda/cudaimpl.py +5 -1
- numba_cuda/numba/cuda/debuginfo.py +85 -2
- numba_cuda/numba/cuda/decorators.py +3 -3
- numba_cuda/numba/cuda/descriptor.py +3 -4
- numba_cuda/numba/cuda/deviceufunc.py +66 -2
- numba_cuda/numba/cuda/dispatcher.py +18 -39
- numba_cuda/numba/cuda/flags.py +141 -1
- numba_cuda/numba/cuda/fp16.py +0 -2
- numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/lowering.py +7 -144
- numba_cuda/numba/cuda/mathimpl.py +2 -1
- numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
- numba_cuda/numba/cuda/misc/findlib.py +75 -0
- numba_cuda/numba/cuda/models.py +9 -1
- numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
- numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
- numba_cuda/numba/cuda/np/numpy_support.py +553 -0
- numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
- numba_cuda/numba/cuda/nvvmutils.py +1 -1
- numba_cuda/numba/cuda/printimpl.py +12 -1
- numba_cuda/numba/cuda/random.py +1 -1
- numba_cuda/numba/cuda/serialize.py +1 -1
- numba_cuda/numba/cuda/simulator/__init__.py +1 -1
- numba_cuda/numba/cuda/simulator/api.py +1 -1
- numba_cuda/numba/cuda/simulator/compiler.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
- numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
- numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
- numba_cuda/numba/cuda/target.py +35 -17
- numba_cuda/numba/cuda/testing.py +4 -19
- numba_cuda/numba/cuda/tests/__init__.py +1 -1
- numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
- numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
- numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
- numba_cuda/numba/cuda/tests/support.py +55 -15
- numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
- numba_cuda/numba/cuda/types.py +56 -0
- numba_cuda/numba/cuda/typing/__init__.py +9 -1
- numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
- numba_cuda/numba/cuda/typing/context.py +751 -0
- numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
- numba_cuda/numba/cuda/typing/npydecl.py +658 -0
- numba_cuda/numba/cuda/typing/templates.py +7 -6
- numba_cuda/numba/cuda/ufuncs.py +3 -3
- numba_cuda/numba/cuda/utils.py +6 -112
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +2 -1
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/RECORD +170 -115
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.19.1.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
numba_cuda/numba/cuda/api.py
CHANGED
|
@@ -11,7 +11,7 @@ import os
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
13
13
|
from .cudadrv import devicearray, devices, driver
|
|
14
|
-
from numba.core import config
|
|
14
|
+
from numba.cuda.core import config
|
|
15
15
|
from numba.cuda.api_util import prepare_shape_strides_dtype
|
|
16
16
|
|
|
17
17
|
# NDarray device helper
|
|
@@ -508,6 +508,11 @@ def close():
|
|
|
508
508
|
Explicitly clears all contexts in the current thread, and destroys all
|
|
509
509
|
contexts if the current thread is the main thread.
|
|
510
510
|
"""
|
|
511
|
+
# Must clear memsys object in case it has been used already
|
|
512
|
+
from .memory_management import rtsys
|
|
513
|
+
|
|
514
|
+
rtsys.close()
|
|
515
|
+
|
|
511
516
|
devices.reset()
|
|
512
517
|
|
|
513
518
|
|
numba_cuda/numba/cuda/bf16.py
CHANGED
|
@@ -2,8 +2,116 @@
|
|
|
2
2
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
3
|
|
|
4
4
|
from numba.cuda._internal.cuda_bf16 import (
|
|
5
|
-
|
|
5
|
+
typing_registry,
|
|
6
|
+
target_registry,
|
|
6
7
|
nv_bfloat16 as bfloat16,
|
|
8
|
+
# Arithmetic intrinsics
|
|
9
|
+
__habs as habs,
|
|
10
|
+
__hadd as hadd,
|
|
11
|
+
__hsub as hsub,
|
|
12
|
+
__hmul as hmul,
|
|
13
|
+
__hadd_rn as hadd_rn,
|
|
14
|
+
__hsub_rn as hsub_rn,
|
|
15
|
+
__hmul_rn as hmul_rn,
|
|
16
|
+
__hdiv as hdiv,
|
|
17
|
+
__hadd_sat as hadd_sat,
|
|
18
|
+
__hsub_sat as hsub_sat,
|
|
19
|
+
__hmul_sat as hmul_sat,
|
|
20
|
+
__hfma as hfma,
|
|
21
|
+
__hfma_sat as hfma_sat,
|
|
22
|
+
__hneg as hneg,
|
|
23
|
+
__hfma_relu as hfma_relu,
|
|
24
|
+
# Comparison intrinsics
|
|
25
|
+
__heq as heq,
|
|
26
|
+
__hne as hne,
|
|
27
|
+
__hge as hge,
|
|
28
|
+
__hgt as hgt,
|
|
29
|
+
__hle as hle,
|
|
30
|
+
__hlt as hlt,
|
|
31
|
+
__hmax as hmax,
|
|
32
|
+
__hmin as hmin,
|
|
33
|
+
__hmax_nan as hmax_nan,
|
|
34
|
+
__hmin_nan as hmin_nan,
|
|
35
|
+
__hisinf as hisinf,
|
|
36
|
+
__hisnan as hisnan,
|
|
37
|
+
# Unordered comparison intrinsics
|
|
38
|
+
__hequ as hequ,
|
|
39
|
+
__hneu as hneu,
|
|
40
|
+
__hgeu as hgeu,
|
|
41
|
+
__hgtu as hgtu,
|
|
42
|
+
__hleu as hleu,
|
|
43
|
+
__hltu as hltu,
|
|
44
|
+
# Precision conversion and data movement
|
|
45
|
+
# - floating-point family
|
|
46
|
+
__bfloat162float as bfloat162float,
|
|
47
|
+
__float2bfloat16 as float2bfloat16,
|
|
48
|
+
__double2bfloat16 as double2bfloat16,
|
|
49
|
+
__float2bfloat16_rn as float2bfloat16_rn,
|
|
50
|
+
__float2bfloat16_rz as float2bfloat16_rz,
|
|
51
|
+
__float2bfloat16_rd as float2bfloat16_rd,
|
|
52
|
+
__float2bfloat16_ru as float2bfloat16_ru,
|
|
53
|
+
# - char family
|
|
54
|
+
__bfloat162char_rz as bfloat162char_rz,
|
|
55
|
+
__bfloat162uchar_rz as bfloat162uchar_rz,
|
|
56
|
+
# - int family (signed 32-bit)
|
|
57
|
+
__int2bfloat16_rn as int2bfloat16_rn,
|
|
58
|
+
__int2bfloat16_rz as int2bfloat16_rz,
|
|
59
|
+
__int2bfloat16_rd as int2bfloat16_rd,
|
|
60
|
+
__int2bfloat16_ru as int2bfloat16_ru,
|
|
61
|
+
__bfloat162int_rn as bfloat162int_rn,
|
|
62
|
+
__bfloat162int_rz as bfloat162int_rz,
|
|
63
|
+
__bfloat162int_rd as bfloat162int_rd,
|
|
64
|
+
__bfloat162int_ru as bfloat162int_ru,
|
|
65
|
+
# - short family (signed 16-bit)
|
|
66
|
+
__short2bfloat16_rn as short2bfloat16_rn,
|
|
67
|
+
__short2bfloat16_rz as short2bfloat16_rz,
|
|
68
|
+
__short2bfloat16_rd as short2bfloat16_rd,
|
|
69
|
+
__short2bfloat16_ru as short2bfloat16_ru,
|
|
70
|
+
__bfloat162short_rn as bfloat162short_rn,
|
|
71
|
+
__bfloat162short_rz as bfloat162short_rz,
|
|
72
|
+
__bfloat162short_rd as bfloat162short_rd,
|
|
73
|
+
__bfloat162short_ru as bfloat162short_ru,
|
|
74
|
+
# - ushort family (unsigned 16-bit)
|
|
75
|
+
__ushort2bfloat16_rn as ushort2bfloat16_rn,
|
|
76
|
+
__ushort2bfloat16_rz as ushort2bfloat16_rz,
|
|
77
|
+
__ushort2bfloat16_rd as ushort2bfloat16_rd,
|
|
78
|
+
__ushort2bfloat16_ru as ushort2bfloat16_ru,
|
|
79
|
+
__bfloat162ushort_rn as bfloat162ushort_rn,
|
|
80
|
+
__bfloat162ushort_rz as bfloat162ushort_rz,
|
|
81
|
+
__bfloat162ushort_rd as bfloat162ushort_rd,
|
|
82
|
+
__bfloat162ushort_ru as bfloat162ushort_ru,
|
|
83
|
+
# - uint family (unsigned 32-bit)
|
|
84
|
+
__uint2bfloat16_rn as uint2bfloat16_rn,
|
|
85
|
+
__uint2bfloat16_rz as uint2bfloat16_rz,
|
|
86
|
+
__uint2bfloat16_rd as uint2bfloat16_rd,
|
|
87
|
+
__uint2bfloat16_ru as uint2bfloat16_ru,
|
|
88
|
+
__bfloat162uint_rn as bfloat162uint_rn,
|
|
89
|
+
__bfloat162uint_rz as bfloat162uint_rz,
|
|
90
|
+
__bfloat162uint_rd as bfloat162uint_rd,
|
|
91
|
+
__bfloat162uint_ru as bfloat162uint_ru,
|
|
92
|
+
# - ll family (signed 64-bit)
|
|
93
|
+
__ll2bfloat16_rn as ll2bfloat16_rn,
|
|
94
|
+
__ll2bfloat16_rz as ll2bfloat16_rz,
|
|
95
|
+
__ll2bfloat16_rd as ll2bfloat16_rd,
|
|
96
|
+
__ll2bfloat16_ru as ll2bfloat16_ru,
|
|
97
|
+
__bfloat162ll_rn as bfloat162ll_rn,
|
|
98
|
+
__bfloat162ll_rz as bfloat162ll_rz,
|
|
99
|
+
__bfloat162ll_rd as bfloat162ll_rd,
|
|
100
|
+
__bfloat162ll_ru as bfloat162ll_ru,
|
|
101
|
+
# - ull family (unsigned 64-bit)
|
|
102
|
+
__ull2bfloat16_rn as ull2bfloat16_rn,
|
|
103
|
+
__ull2bfloat16_rz as ull2bfloat16_rz,
|
|
104
|
+
__ull2bfloat16_rd as ull2bfloat16_rd,
|
|
105
|
+
__ull2bfloat16_ru as ull2bfloat16_ru,
|
|
106
|
+
__bfloat162ull_rn as bfloat162ull_rn,
|
|
107
|
+
__bfloat162ull_rz as bfloat162ull_rz,
|
|
108
|
+
__bfloat162ull_rd as bfloat162ull_rd,
|
|
109
|
+
__bfloat162ull_ru as bfloat162ull_ru,
|
|
110
|
+
# - bit reinterpret casts
|
|
111
|
+
__bfloat16_as_short as bfloat16_as_short,
|
|
112
|
+
__bfloat16_as_ushort as bfloat16_as_ushort,
|
|
113
|
+
__short_as_bfloat16 as short_as_bfloat16,
|
|
114
|
+
__ushort_as_bfloat16 as ushort_as_bfloat16,
|
|
7
115
|
htrunc,
|
|
8
116
|
hceil,
|
|
9
117
|
hfloor,
|
|
@@ -28,7 +136,7 @@ import math
|
|
|
28
136
|
|
|
29
137
|
|
|
30
138
|
def _make_unary(a, func):
|
|
31
|
-
if
|
|
139
|
+
if a == bfloat16:
|
|
32
140
|
return lambda a: func(a)
|
|
33
141
|
|
|
34
142
|
|
|
@@ -92,9 +200,184 @@ try:
|
|
|
92
200
|
except ImportError:
|
|
93
201
|
pass
|
|
94
202
|
|
|
203
|
+
## Public aliases using Numba/Numpy-style type names
|
|
204
|
+
# Floating-point
|
|
205
|
+
float32_to_bfloat16 = float2bfloat16
|
|
206
|
+
float64_to_bfloat16 = double2bfloat16
|
|
207
|
+
bfloat16_to_float32 = bfloat162float
|
|
208
|
+
float32_to_bfloat16_rn = float2bfloat16_rn
|
|
209
|
+
float32_to_bfloat16_rz = float2bfloat16_rz
|
|
210
|
+
float32_to_bfloat16_rd = float2bfloat16_rd
|
|
211
|
+
float32_to_bfloat16_ru = float2bfloat16_ru
|
|
212
|
+
|
|
213
|
+
# Char (8-bit)
|
|
214
|
+
bfloat16_to_int8_rz = bfloat162char_rz
|
|
215
|
+
bfloat16_to_uint8_rz = bfloat162uchar_rz
|
|
216
|
+
|
|
217
|
+
# Int16 / UInt16
|
|
218
|
+
int16_to_bfloat16_rn = short2bfloat16_rn
|
|
219
|
+
int16_to_bfloat16_rz = short2bfloat16_rz
|
|
220
|
+
int16_to_bfloat16_rd = short2bfloat16_rd
|
|
221
|
+
int16_to_bfloat16_ru = short2bfloat16_ru
|
|
222
|
+
bfloat16_to_int16_rn = bfloat162short_rn
|
|
223
|
+
bfloat16_to_int16_rz = bfloat162short_rz
|
|
224
|
+
bfloat16_to_int16_rd = bfloat162short_rd
|
|
225
|
+
bfloat16_to_int16_ru = bfloat162short_ru
|
|
226
|
+
|
|
227
|
+
uint16_to_bfloat16_rn = ushort2bfloat16_rn
|
|
228
|
+
uint16_to_bfloat16_rz = ushort2bfloat16_rz
|
|
229
|
+
uint16_to_bfloat16_rd = ushort2bfloat16_rd
|
|
230
|
+
uint16_to_bfloat16_ru = ushort2bfloat16_ru
|
|
231
|
+
bfloat16_to_uint16_rn = bfloat162ushort_rn
|
|
232
|
+
bfloat16_to_uint16_rz = bfloat162ushort_rz
|
|
233
|
+
bfloat16_to_uint16_rd = bfloat162ushort_rd
|
|
234
|
+
bfloat16_to_uint16_ru = bfloat162ushort_ru
|
|
235
|
+
|
|
236
|
+
# Int32 / UInt32
|
|
237
|
+
int32_to_bfloat16_rn = int2bfloat16_rn
|
|
238
|
+
int32_to_bfloat16_rz = int2bfloat16_rz
|
|
239
|
+
int32_to_bfloat16_rd = int2bfloat16_rd
|
|
240
|
+
int32_to_bfloat16_ru = int2bfloat16_ru
|
|
241
|
+
bfloat16_to_int32_rn = bfloat162int_rn
|
|
242
|
+
bfloat16_to_int32_rz = bfloat162int_rz
|
|
243
|
+
bfloat16_to_int32_rd = bfloat162int_rd
|
|
244
|
+
bfloat16_to_int32_ru = bfloat162int_ru
|
|
245
|
+
|
|
246
|
+
uint32_to_bfloat16_rn = uint2bfloat16_rn
|
|
247
|
+
uint32_to_bfloat16_rz = uint2bfloat16_rz
|
|
248
|
+
uint32_to_bfloat16_rd = uint2bfloat16_rd
|
|
249
|
+
uint32_to_bfloat16_ru = uint2bfloat16_ru
|
|
250
|
+
bfloat16_to_uint32_rn = bfloat162uint_rn
|
|
251
|
+
bfloat16_to_uint32_rz = bfloat162uint_rz
|
|
252
|
+
bfloat16_to_uint32_rd = bfloat162uint_rd
|
|
253
|
+
bfloat16_to_uint32_ru = bfloat162uint_ru
|
|
254
|
+
|
|
255
|
+
# Int64 / UInt64
|
|
256
|
+
int64_to_bfloat16_rn = ll2bfloat16_rn
|
|
257
|
+
int64_to_bfloat16_rz = ll2bfloat16_rz
|
|
258
|
+
int64_to_bfloat16_rd = ll2bfloat16_rd
|
|
259
|
+
int64_to_bfloat16_ru = ll2bfloat16_ru
|
|
260
|
+
bfloat16_to_int64_rn = bfloat162ll_rn
|
|
261
|
+
bfloat16_to_int64_rz = bfloat162ll_rz
|
|
262
|
+
bfloat16_to_int64_rd = bfloat162ll_rd
|
|
263
|
+
bfloat16_to_int64_ru = bfloat162ll_ru
|
|
264
|
+
|
|
265
|
+
uint64_to_bfloat16_rn = ull2bfloat16_rn
|
|
266
|
+
uint64_to_bfloat16_rz = ull2bfloat16_rz
|
|
267
|
+
uint64_to_bfloat16_rd = ull2bfloat16_rd
|
|
268
|
+
uint64_to_bfloat16_ru = ull2bfloat16_ru
|
|
269
|
+
bfloat16_to_uint64_rn = bfloat162ull_rn
|
|
270
|
+
bfloat16_to_uint64_rz = bfloat162ull_rz
|
|
271
|
+
bfloat16_to_uint64_rd = bfloat162ull_rd
|
|
272
|
+
bfloat16_to_uint64_ru = bfloat162ull_ru
|
|
273
|
+
|
|
274
|
+
# Bit reinterpret casts
|
|
275
|
+
bfloat16_as_int16 = bfloat16_as_short
|
|
276
|
+
bfloat16_as_uint16 = bfloat16_as_ushort
|
|
277
|
+
int16_as_bfloat16 = short_as_bfloat16
|
|
278
|
+
uint16_as_bfloat16 = ushort_as_bfloat16
|
|
95
279
|
|
|
96
280
|
__all__ = [
|
|
281
|
+
"typing_registry",
|
|
282
|
+
"target_registry",
|
|
97
283
|
"bfloat16",
|
|
284
|
+
# Arithmetic intrinsics
|
|
285
|
+
"habs",
|
|
286
|
+
"hadd",
|
|
287
|
+
"hsub",
|
|
288
|
+
"hmul",
|
|
289
|
+
"hadd_rn",
|
|
290
|
+
"hsub_rn",
|
|
291
|
+
"hmul_rn",
|
|
292
|
+
"hdiv",
|
|
293
|
+
"hadd_sat",
|
|
294
|
+
"hsub_sat",
|
|
295
|
+
"hmul_sat",
|
|
296
|
+
"hfma",
|
|
297
|
+
"hfma_sat",
|
|
298
|
+
"hneg",
|
|
299
|
+
"hfma_relu",
|
|
300
|
+
# Comparison intrinsics
|
|
301
|
+
"heq",
|
|
302
|
+
"hne",
|
|
303
|
+
"hge",
|
|
304
|
+
"hgt",
|
|
305
|
+
"hle",
|
|
306
|
+
"hlt",
|
|
307
|
+
"hmax",
|
|
308
|
+
"hmin",
|
|
309
|
+
"hmax_nan",
|
|
310
|
+
"hmin_nan",
|
|
311
|
+
"hisinf",
|
|
312
|
+
"hisnan",
|
|
313
|
+
"hequ",
|
|
314
|
+
"hneu",
|
|
315
|
+
"hgeu",
|
|
316
|
+
"hgtu",
|
|
317
|
+
"hleu",
|
|
318
|
+
"hltu",
|
|
319
|
+
# Precision conversion and data movement
|
|
320
|
+
"float32_to_bfloat16",
|
|
321
|
+
"float64_to_bfloat16",
|
|
322
|
+
"bfloat16_to_float32",
|
|
323
|
+
"float32_to_bfloat16_rn",
|
|
324
|
+
"float32_to_bfloat16_rz",
|
|
325
|
+
"float32_to_bfloat16_rd",
|
|
326
|
+
"float32_to_bfloat16_ru",
|
|
327
|
+
"bfloat16_to_int8_rz",
|
|
328
|
+
"bfloat16_to_uint8_rz",
|
|
329
|
+
"int16_to_bfloat16_rn",
|
|
330
|
+
"int16_to_bfloat16_rz",
|
|
331
|
+
"int16_to_bfloat16_rd",
|
|
332
|
+
"int16_to_bfloat16_ru",
|
|
333
|
+
"bfloat16_to_int16_rn",
|
|
334
|
+
"bfloat16_to_int16_rz",
|
|
335
|
+
"bfloat16_to_int16_rd",
|
|
336
|
+
"bfloat16_to_int16_ru",
|
|
337
|
+
"uint16_to_bfloat16_rn",
|
|
338
|
+
"uint16_to_bfloat16_rz",
|
|
339
|
+
"uint16_to_bfloat16_rd",
|
|
340
|
+
"uint16_to_bfloat16_ru",
|
|
341
|
+
"bfloat16_to_uint16_rn",
|
|
342
|
+
"bfloat16_to_uint16_rz",
|
|
343
|
+
"bfloat16_to_uint16_rd",
|
|
344
|
+
"bfloat16_to_uint16_ru",
|
|
345
|
+
"int32_to_bfloat16_rn",
|
|
346
|
+
"int32_to_bfloat16_rz",
|
|
347
|
+
"int32_to_bfloat16_rd",
|
|
348
|
+
"int32_to_bfloat16_ru",
|
|
349
|
+
"bfloat16_to_int32_rn",
|
|
350
|
+
"bfloat16_to_int32_rz",
|
|
351
|
+
"bfloat16_to_int32_rd",
|
|
352
|
+
"bfloat16_to_int32_ru",
|
|
353
|
+
"uint32_to_bfloat16_rn",
|
|
354
|
+
"uint32_to_bfloat16_rz",
|
|
355
|
+
"uint32_to_bfloat16_rd",
|
|
356
|
+
"uint32_to_bfloat16_ru",
|
|
357
|
+
"bfloat16_to_uint32_rn",
|
|
358
|
+
"bfloat16_to_uint32_rz",
|
|
359
|
+
"bfloat16_to_uint32_rd",
|
|
360
|
+
"bfloat16_to_uint32_ru",
|
|
361
|
+
"int64_to_bfloat16_rn",
|
|
362
|
+
"int64_to_bfloat16_rz",
|
|
363
|
+
"int64_to_bfloat16_rd",
|
|
364
|
+
"int64_to_bfloat16_ru",
|
|
365
|
+
"bfloat16_to_int64_rn",
|
|
366
|
+
"bfloat16_to_int64_rz",
|
|
367
|
+
"bfloat16_to_int64_rd",
|
|
368
|
+
"bfloat16_to_int64_ru",
|
|
369
|
+
"uint64_to_bfloat16_rn",
|
|
370
|
+
"uint64_to_bfloat16_rz",
|
|
371
|
+
"uint64_to_bfloat16_rd",
|
|
372
|
+
"uint64_to_bfloat16_ru",
|
|
373
|
+
"bfloat16_to_uint64_rn",
|
|
374
|
+
"bfloat16_to_uint64_rz",
|
|
375
|
+
"bfloat16_to_uint64_rd",
|
|
376
|
+
"bfloat16_to_uint64_ru",
|
|
377
|
+
"bfloat16_as_int16",
|
|
378
|
+
"bfloat16_as_uint16",
|
|
379
|
+
"int16_as_bfloat16",
|
|
380
|
+
"uint16_as_bfloat16",
|
|
98
381
|
"htrunc",
|
|
99
382
|
"hceil",
|
|
100
383
|
"hfloor",
|
numba_cuda/numba/cuda/cgutils.py
CHANGED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
3
|
+
|
|
4
|
+
from . import cloudpickle
|
|
5
|
+
from .cloudpickle import * # noqa
|
|
6
|
+
|
|
7
|
+
__doc__ = cloudpickle.__doc__
|
|
8
|
+
|
|
9
|
+
__version__ = "3.1.1"
|
|
10
|
+
|
|
11
|
+
__all__ = [ # noqa
|
|
12
|
+
"__version__",
|
|
13
|
+
"Pickler",
|
|
14
|
+
"CloudPickler",
|
|
15
|
+
"dumps",
|
|
16
|
+
"loads",
|
|
17
|
+
"dump",
|
|
18
|
+
"load",
|
|
19
|
+
"register_pickle_by_value",
|
|
20
|
+
"unregister_pickle_by_value",
|
|
21
|
+
]
|