numba-cuda 0.23.0__cp313-cp313-win_amd64.whl → 0.24.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba_cuda/VERSION +1 -1
- numba_cuda/numba/cuda/__init__.py +4 -1
- numba_cuda/numba/cuda/_compat.py +47 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_dispatcher.cpp +8 -2
- numba_cuda/numba/cuda/cext/_hashtable.cpp +5 -0
- numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_pymodule.h +1 -1
- numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cext/_typeof.cpp +56 -8
- numba_cuda/numba/cuda/cext/mviewbuf.c +7 -1
- numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
- numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +4 -5
- numba_cuda/numba/cuda/codegen.py +4 -2
- numba_cuda/numba/cuda/compiler.py +5 -5
- numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +1 -1
- numba_cuda/numba/cuda/core/base.py +6 -10
- numba_cuda/numba/cuda/core/bytecode.py +21 -13
- numba_cuda/numba/cuda/core/byteflow.py +336 -90
- numba_cuda/numba/cuda/core/compiler.py +3 -4
- numba_cuda/numba/cuda/core/compiler_machinery.py +3 -3
- numba_cuda/numba/cuda/core/config.py +5 -7
- numba_cuda/numba/cuda/core/controlflow.py +17 -9
- numba_cuda/numba/cuda/core/inline_closurecall.py +11 -10
- numba_cuda/numba/cuda/core/interpreter.py +255 -96
- numba_cuda/numba/cuda/core/ir_utils.py +8 -17
- numba_cuda/numba/cuda/core/pythonapi.py +3 -0
- numba_cuda/numba/cuda/core/rewrites/static_binop.py +1 -1
- numba_cuda/numba/cuda/core/ssa.py +2 -2
- numba_cuda/numba/cuda/core/transforms.py +4 -6
- numba_cuda/numba/cuda/core/typed_passes.py +1 -1
- numba_cuda/numba/cuda/core/typeinfer.py +3 -3
- numba_cuda/numba/cuda/core/untyped_passes.py +11 -10
- numba_cuda/numba/cuda/cpython/unicode.py +2 -2
- numba_cuda/numba/cuda/cpython/unicode_support.py +1 -3
- numba_cuda/numba/cuda/cudadrv/devicearray.py +4 -4
- numba_cuda/numba/cuda/cudadrv/driver.py +13 -11
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +71 -32
- numba_cuda/numba/cuda/debuginfo.py +10 -79
- numba_cuda/numba/cuda/deviceufunc.py +3 -6
- numba_cuda/numba/cuda/dispatcher.py +5 -19
- numba_cuda/numba/cuda/libdeviceimpl.py +1 -2
- numba_cuda/numba/cuda/lowering.py +0 -28
- numba_cuda/numba/cuda/memory_management/nrt.py +1 -1
- numba_cuda/numba/cuda/np/arrayobj.py +7 -9
- numba_cuda/numba/cuda/np/numpy_support.py +7 -10
- numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +4 -3
- numba_cuda/numba/cuda/testing.py +4 -8
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +66 -4
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +26 -4
- numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +61 -9
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +6 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +12 -1
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +13 -0
- numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +12 -7
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +37 -35
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +8 -7
- numba_cuda/numba/cuda/tests/support.py +11 -0
- numba_cuda/numba/cuda/types/cuda_functions.py +1 -1
- numba_cuda/numba/cuda/typing/asnumbatype.py +37 -2
- numba_cuda/numba/cuda/typing/typeof.py +9 -16
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/METADATA +4 -13
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/RECORD +74 -73
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/WHEEL +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/licenses/LICENSE.numba +0 -0
- {numba_cuda-0.23.0.dist-info → numba_cuda-0.24.0.dist-info}/top_level.txt +0 -0
|
@@ -23,10 +23,9 @@ class _CompileStatus(object):
|
|
|
23
23
|
self.can_fallback = can_fallback
|
|
24
24
|
|
|
25
25
|
def __repr__(self):
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
return ", ".join(vals)
|
|
26
|
+
return ", ".join(
|
|
27
|
+
"{k}={v}".format(k=k, v=getattr(self, k)) for k in self.__slots__
|
|
28
|
+
)
|
|
30
29
|
|
|
31
30
|
|
|
32
31
|
class StateDict(dict):
|
|
@@ -386,7 +386,7 @@ class PassManager(object):
|
|
|
386
386
|
self._runPass(idx, pass_inst, state)
|
|
387
387
|
else:
|
|
388
388
|
raise BaseException("Legacy pass in use")
|
|
389
|
-
except _EarlyPipelineCompletion as e:
|
|
389
|
+
except _EarlyPipelineCompletion as e: # noqa: PERF203
|
|
390
390
|
raise e
|
|
391
391
|
except Exception as e:
|
|
392
392
|
if not isinstance(e, errors.NumbaError):
|
|
@@ -468,14 +468,14 @@ class PassRegistry(object):
|
|
|
468
468
|
return self._registry[clazz]
|
|
469
469
|
|
|
470
470
|
def _does_pass_name_alias(self, check):
|
|
471
|
-
for
|
|
471
|
+
for v in self._registry.values():
|
|
472
472
|
if v.pass_inst.name == check:
|
|
473
473
|
return True
|
|
474
474
|
return False
|
|
475
475
|
|
|
476
476
|
def find_by_name(self, class_name):
|
|
477
477
|
assert isinstance(class_name, str)
|
|
478
|
-
for
|
|
478
|
+
for v in self._registry.values():
|
|
479
479
|
if v.pass_inst.name == class_name:
|
|
480
480
|
return v
|
|
481
481
|
else:
|
|
@@ -153,9 +153,11 @@ class _EnvReloader(object):
|
|
|
153
153
|
new_environ["NUMBA_" + k.upper()] = v
|
|
154
154
|
|
|
155
155
|
# clobber file based config with any locally defined env vars
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
156
|
+
new_environ.update(
|
|
157
|
+
(name, value)
|
|
158
|
+
for name, value in os.environ.items()
|
|
159
|
+
if name.startswith("NUMBA_")
|
|
160
|
+
)
|
|
159
161
|
# We update the config variables if at least one NUMBA environment
|
|
160
162
|
# variable was modified. This lets the user modify values
|
|
161
163
|
# directly in the config module without having them when
|
|
@@ -494,10 +496,6 @@ class _EnvReloader(object):
|
|
|
494
496
|
"NUMBA_CUDA_PER_THREAD_DEFAULT_STREAM", int, 0
|
|
495
497
|
)
|
|
496
498
|
|
|
497
|
-
CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = _readenv(
|
|
498
|
-
"NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY", int, 0
|
|
499
|
-
)
|
|
500
|
-
|
|
501
499
|
# Location of the CUDA include files
|
|
502
500
|
if IS_WIN32:
|
|
503
501
|
cuda_path = os.environ.get("CUDA_PATH")
|
|
@@ -11,9 +11,16 @@ from numba.cuda.utils import PYVERSION
|
|
|
11
11
|
|
|
12
12
|
# List of bytecodes creating a new block in the control flow graph
|
|
13
13
|
# (in addition to explicit jump labels).
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
if PYVERSION in ((3, 14),):
|
|
15
|
+
NEW_BLOCKERS = frozenset(
|
|
16
|
+
["SETUP_LOOP", "FOR_ITER", "SETUP_WITH", "BEFORE_WITH", "LOAD_SPECIAL"]
|
|
17
|
+
)
|
|
18
|
+
elif PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
|
|
19
|
+
NEW_BLOCKERS = frozenset(
|
|
20
|
+
["SETUP_LOOP", "FOR_ITER", "SETUP_WITH", "BEFORE_WITH"]
|
|
21
|
+
)
|
|
22
|
+
else:
|
|
23
|
+
raise NotImplementedError(PYVERSION)
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
class CFBlock(object):
|
|
@@ -400,8 +407,7 @@ class CFGraph(object):
|
|
|
400
407
|
if node not in seen:
|
|
401
408
|
yield node
|
|
402
409
|
seen.add(node)
|
|
403
|
-
|
|
404
|
-
stack.append(succ)
|
|
410
|
+
stack.extend(self._succs[node])
|
|
405
411
|
|
|
406
412
|
def _eliminate_dead_blocks(self):
|
|
407
413
|
"""
|
|
@@ -440,9 +446,11 @@ class CFGraph(object):
|
|
|
440
446
|
if node not in seen:
|
|
441
447
|
seen.add(node)
|
|
442
448
|
stack.append((post_order.append, node))
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
449
|
+
stack.extend(
|
|
450
|
+
(dfs_rec, dest)
|
|
451
|
+
for dest in succs[node]
|
|
452
|
+
if (node, dest) not in back_edges
|
|
453
|
+
)
|
|
446
454
|
|
|
447
455
|
stack = [(dfs_rec, self._entry_point)]
|
|
448
456
|
while stack:
|
|
@@ -970,7 +978,7 @@ class ControlFlowAnalysis(object):
|
|
|
970
978
|
self._curblock.terminating = True
|
|
971
979
|
self._force_new_block = True
|
|
972
980
|
|
|
973
|
-
if PYVERSION in ((3, 12), (3, 13)):
|
|
981
|
+
if PYVERSION in ((3, 12), (3, 13), (3, 14)):
|
|
974
982
|
|
|
975
983
|
def op_RETURN_CONST(self, inst):
|
|
976
984
|
self._curblock.terminating = True
|
|
@@ -344,10 +344,10 @@ class InlineWorker(object):
|
|
|
344
344
|
# Always copy the callee IR, it gets mutated
|
|
345
345
|
def copy_ir(the_ir):
|
|
346
346
|
kernel_copy = the_ir.copy()
|
|
347
|
-
kernel_copy.blocks = {
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
347
|
+
kernel_copy.blocks = {
|
|
348
|
+
block_label: copy.deepcopy(block)
|
|
349
|
+
for block_label, block in the_ir.blocks.items()
|
|
350
|
+
}
|
|
351
351
|
return kernel_copy
|
|
352
352
|
|
|
353
353
|
callee_ir = copy_ir(callee_ir)
|
|
@@ -834,7 +834,7 @@ def _debug_dump(func_ir):
|
|
|
834
834
|
def _get_all_scopes(blocks):
|
|
835
835
|
"""Get all block-local scopes from an IR."""
|
|
836
836
|
all_scopes = []
|
|
837
|
-
for
|
|
837
|
+
for block in blocks.values():
|
|
838
838
|
if block.scope not in all_scopes:
|
|
839
839
|
all_scopes.append(block.scope)
|
|
840
840
|
return all_scopes
|
|
@@ -844,7 +844,7 @@ def _replace_args_with(blocks, args):
|
|
|
844
844
|
"""
|
|
845
845
|
Replace ir.Arg(...) with real arguments from call site
|
|
846
846
|
"""
|
|
847
|
-
for
|
|
847
|
+
for block in blocks.values():
|
|
848
848
|
assigns = block.find_insts(ir.assign_types)
|
|
849
849
|
for stmt in assigns:
|
|
850
850
|
if isinstance(stmt.value, ir.arg_types):
|
|
@@ -857,7 +857,7 @@ def _replace_freevars(blocks, args):
|
|
|
857
857
|
"""
|
|
858
858
|
Replace ir.FreeVar(...) with real variables from parent function
|
|
859
859
|
"""
|
|
860
|
-
for
|
|
860
|
+
for block in blocks.values():
|
|
861
861
|
assigns = block.find_insts(ir.assign_types)
|
|
862
862
|
for stmt in assigns:
|
|
863
863
|
if isinstance(stmt.value, ir.freevar_types):
|
|
@@ -873,7 +873,7 @@ def _replace_returns(blocks, target, return_label):
|
|
|
873
873
|
"""
|
|
874
874
|
Return return statement by assigning directly to target, and a jump.
|
|
875
875
|
"""
|
|
876
|
-
for
|
|
876
|
+
for block in blocks.values():
|
|
877
877
|
casts = []
|
|
878
878
|
for i in range(len(block.body)):
|
|
879
879
|
stmt = block.body[i]
|
|
@@ -1316,8 +1316,9 @@ def _inline_arraycall(
|
|
|
1316
1316
|
)
|
|
1317
1317
|
|
|
1318
1318
|
# Add back removed just in case they are used by something else
|
|
1319
|
-
|
|
1320
|
-
|
|
1319
|
+
stmts.extend(
|
|
1320
|
+
_new_definition(func_ir, var, array_var, loc) for var in removed
|
|
1321
|
+
)
|
|
1321
1322
|
|
|
1322
1323
|
# Add back terminator
|
|
1323
1324
|
stmts.append(terminator)
|