PyPI - numba-cuda - Versions diffs - 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl - Mend

numba-cuda 0.19.1py3-none-any.whl → 0.20.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of numba-cuda might be problematic. Click here for more details.

Files changed (172) hide show

numba_cuda/VERSION +1 -1
numba_cuda/numba/cuda/__init__.py +1 -1
numba_cuda/numba/cuda/_internal/cuda_bf16.py +12706 -1470
numba_cuda/numba/cuda/_internal/cuda_fp16.py +2653 -8769
numba_cuda/numba/cuda/api.py +6 -1
numba_cuda/numba/cuda/bf16.py +285 -2
numba_cuda/numba/cuda/cgutils.py +2 -2
numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
numba_cuda/numba/cuda/codegen.py +1 -1
numba_cuda/numba/cuda/compiler.py +373 -30
numba_cuda/numba/cuda/core/analysis.py +319 -0
numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
numba_cuda/numba/cuda/core/base.py +1289 -0
numba_cuda/numba/cuda/core/bytecode.py +727 -0
numba_cuda/numba/cuda/core/caching.py +2 -2
numba_cuda/numba/cuda/core/compiler.py +6 -14
numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
numba_cuda/numba/cuda/core/config.py +747 -0
numba_cuda/numba/cuda/core/consts.py +124 -0
numba_cuda/numba/cuda/core/cpu.py +370 -0
numba_cuda/numba/cuda/core/environment.py +68 -0
numba_cuda/numba/cuda/core/event.py +511 -0
numba_cuda/numba/cuda/core/funcdesc.py +330 -0
numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
numba_cuda/numba/cuda/core/interpreter.py +48 -26
numba_cuda/numba/cuda/core/ir_utils.py +15 -26
numba_cuda/numba/cuda/core/options.py +262 -0
numba_cuda/numba/cuda/core/postproc.py +249 -0
numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
numba_cuda/numba/cuda/core/ssa.py +496 -0
numba_cuda/numba/cuda/core/targetconfig.py +329 -0
numba_cuda/numba/cuda/core/tracing.py +231 -0
numba_cuda/numba/cuda/core/transforms.py +952 -0
numba_cuda/numba/cuda/core/typed_passes.py +738 -7
numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
numba_cuda/numba/cuda/cuda_paths.py +422 -246
numba_cuda/numba/cuda/cudadecl.py +1 -1
numba_cuda/numba/cuda/cudadrv/__init__.py +1 -1
numba_cuda/numba/cuda/cudadrv/devicearray.py +2 -1
numba_cuda/numba/cuda/cudadrv/driver.py +11 -140
numba_cuda/numba/cuda/cudadrv/dummyarray.py +111 -24
numba_cuda/numba/cuda/cudadrv/libs.py +5 -5
numba_cuda/numba/cuda/cudadrv/mappings.py +1 -1
numba_cuda/numba/cuda/cudadrv/nvrtc.py +19 -8
numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -4
numba_cuda/numba/cuda/cudadrv/runtime.py +1 -1
numba_cuda/numba/cuda/cudaimpl.py +5 -1
numba_cuda/numba/cuda/debuginfo.py +85 -2
numba_cuda/numba/cuda/decorators.py +3 -3
numba_cuda/numba/cuda/descriptor.py +3 -4
numba_cuda/numba/cuda/deviceufunc.py +66 -2
numba_cuda/numba/cuda/dispatcher.py +18 -39
numba_cuda/numba/cuda/flags.py +141 -1
numba_cuda/numba/cuda/fp16.py +0 -2
numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
numba_cuda/numba/cuda/lowering.py +7 -144
numba_cuda/numba/cuda/mathimpl.py +2 -1
numba_cuda/numba/cuda/memory_management/nrt.py +43 -17
numba_cuda/numba/cuda/misc/findlib.py +75 -0
numba_cuda/numba/cuda/models.py +9 -1
numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
numba_cuda/numba/cuda/np/numpy_support.py +553 -0
numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
numba_cuda/numba/cuda/nvvmutils.py +1 -1
numba_cuda/numba/cuda/printimpl.py +12 -1
numba_cuda/numba/cuda/random.py +1 -1
numba_cuda/numba/cuda/serialize.py +1 -1
numba_cuda/numba/cuda/simulator/__init__.py +1 -1
numba_cuda/numba/cuda/simulator/api.py +1 -1
numba_cuda/numba/cuda/simulator/compiler.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +1 -1
numba_cuda/numba/cuda/simulator/kernelapi.py +1 -1
numba_cuda/numba/cuda/simulator/memory_management/nrt.py +14 -2
numba_cuda/numba/cuda/target.py +35 -17
numba_cuda/numba/cuda/testing.py +7 -19
numba_cuda/numba/cuda/tests/__init__.py +1 -1
numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
numba_cuda/numba/cuda/tests/core/test_serialize.py +4 -4
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +6 -3
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +18 -2
numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +2 -1
numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +1 -1
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_array.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +539 -2
numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +81 -1
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +1 -3
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +2 -3
numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +293 -4
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +2 -1
numba_cuda/numba/cuda/tests/cudapy/test_inline.py +18 -8
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +23 -21
numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +10 -37
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_math.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_print.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +263 -2
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +112 -6
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +1 -1
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +1 -1
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +3 -2
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +0 -2
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +0 -2
numba_cuda/numba/cuda/tests/nocuda/test_import.py +3 -1
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +24 -12
numba_cuda/numba/cuda/tests/nrt/test_nrt.py +2 -1
numba_cuda/numba/cuda/tests/support.py +55 -15
numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
numba_cuda/numba/cuda/types.py +56 -0
numba_cuda/numba/cuda/typing/__init__.py +9 -1
numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
numba_cuda/numba/cuda/typing/context.py +751 -0
numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
numba_cuda/numba/cuda/typing/npydecl.py +658 -0
numba_cuda/numba/cuda/typing/templates.py +7 -6
numba_cuda/numba/cuda/ufuncs.py +3 -3
numba_cuda/numba/cuda/utils.py +6 -112
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/METADATA +4 -3
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/RECORD +171 -116
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -60
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/WHEEL +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/licenses/LICENSE.numba +0 -0
{numba_cuda-0.19.1.dist-info → numba_cuda-0.20.1.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/core/interpreter.py CHANGED Viewed

@@ -8,28 +8,29 @@ import operator
 import logging
 import textwrap
-from numba.core import errors, ir, config
+from numba.core import errors, ir
+from numba.cuda.core import config
 from numba.cuda.errors import UnsupportedBytecodeError
 from numba.core.errors import (
     NotDefinedError,
     error_extras,
 )
 from numba.cuda.core import ir_utils
-from numba.core.utils import PYVERSION
 from numba.cuda.utils import (
+    PYVERSION,
     BINOPS_TO_OPERATORS,
     INPLACE_BINOPS_TO_OPERATORS,
-    _lazy_pformat,
 )
+from numba.cuda.utils import _lazy_pformat
 from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA, BlockKind
-from numba.core.unsafe import eh
+from numba.cuda.core.unsafe import eh
 from numba.cpython.unsafe.tuple import unpack_single_tuple
 if PYVERSION in ((3, 12), (3, 13)):
     # Operands for CALL_INTRINSIC_1
     from numba.core.byteflow import CALL_INTRINSIC_1_Operand as ci1op
-elif PYVERSION in ((3, 10), (3, 11)):
+elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
     pass
 else:
     raise NotImplementedError(PYVERSION)
@@ -1395,7 +1396,7 @@ class Interpreter(object):
                     if entry.start < self.last_active_offset
                 ]
             )
-        elif PYVERSION in ((3, 10), (3, 11)):
+        elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
             pass
         else:
             raise NotImplementedError(PYVERSION)
@@ -1410,7 +1411,10 @@ class Interpreter(object):
         if PYVERSION in ((3, 11), (3, 12), (3, 13)):
             # Insert end of try markers
             self._end_try_blocks()
-        elif PYVERSION in ((3, 10),):
+        elif PYVERSION in (
+            (3, 9),
+            (3, 10),
+        ):
             pass
         else:
             raise NotImplementedError(PYVERSION)
@@ -1434,7 +1438,7 @@ class Interpreter(object):
         peepholes = []
         if PYVERSION in ((3, 11), (3, 12), (3, 13)):
             peepholes.append(peep_hole_split_at_pop_block)
-        if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
+        if PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12), (3, 13)):
             peepholes.append(peep_hole_list_to_tuple)
         peepholes.append(peep_hole_delete_with_exit)
         if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
@@ -1592,7 +1596,10 @@ class Interpreter(object):
             if newtryblk is not None:
                 if newtryblk is not tryblk:
                     self._insert_try_block_begin()
-        elif PYVERSION in ((3, 10),):
+        elif PYVERSION in (
+            (3, 9),
+            (3, 10),
+        ):
             while self.syntax_blocks:
                 if offset >= self.syntax_blocks[-1].exit:
                     self.syntax_blocks.pop()
@@ -1829,7 +1836,10 @@ class Interpreter(object):
                     if inst.offset >= top.exit:
                         self.current_block.append(ir.PopBlock(loc=self.loc))
                         self.syntax_blocks.pop()
-        elif PYVERSION in ((3, 10),):
+        elif PYVERSION in (
+            (3, 9),
+            (3, 10),
+        ):
             pass
         else:
             raise NotImplementedError(PYVERSION)
@@ -2026,7 +2036,7 @@ class Interpreter(object):
             target = self.get(container)
             expr = ir.Expr.getitem(target, index=index, loc=self.loc)
             self.store(expr, res)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -2052,7 +2062,7 @@ class Interpreter(object):
                 target=target, index=index, value=value, loc=self.loc
             )
             self.current_block.append(stmt)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -2345,7 +2355,7 @@ class Interpreter(object):
             dstname = self.code_locals[oparg2]
             self.store(value=self.get(value2), name=dstname)
-    elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -2363,7 +2373,7 @@ class Interpreter(object):
                 undef = ir.Expr.undef(loc=self.loc)
                 self.store(undef, name=res)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -2403,7 +2413,7 @@ class Interpreter(object):
         item = self.get(item)
         if PYVERSION in ((3, 12), (3, 13)):
             attr = self.code_names[inst.arg >> 1]
-        elif PYVERSION in ((3, 10), (3, 11)):
+        elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
             attr = self.code_names[inst.arg]
         else:
             raise NotImplementedError(PYVERSION)
@@ -2439,7 +2449,10 @@ class Interpreter(object):
             value = self.get_global_value(name)
             gl = ir.Global(name, value, loc=self.loc)
             self.store(gl, res)
-    elif PYVERSION in ((3, 10),):
+    elif PYVERSION in (
+        (3, 9),
+        (3, 10),
+    ):
         def op_LOAD_GLOBAL(self, inst, res):
             name = self.code_names[inst.arg]
@@ -2467,7 +2480,10 @@ class Interpreter(object):
                 value = self.get_closure_value(idx)
                 gl = ir.FreeVar(idx, name, value, loc=self.loc)
             self.store(gl, res)
-    elif PYVERSION in ((3, 10),):
+    elif PYVERSION in (
+        (3, 9),
+        (3, 10),
+    ):
         def op_LOAD_DEREF(self, inst, res):
             n_cellvars = len(self.code_cellvars)
@@ -2494,7 +2510,10 @@ class Interpreter(object):
             name = self.func_id.func.__code__._varname_from_oparg(inst.arg)
             value = self.get(value)
             self.store(value=value, name=name)
-    elif PYVERSION in ((3, 10),):
+    elif PYVERSION in (
+        (3, 9),
+        (3, 10),
+    ):
         def op_STORE_DEREF(self, inst, value):
             n_cellvars = len(self.code_cellvars)
@@ -2546,7 +2565,7 @@ class Interpreter(object):
                         if ex.target == end
                     ]
                 )
-        elif PYVERSION in ((3, 10), (3, 11)):
+        elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
             pass
         else:
             raise NotImplementedError(PYVERSION)
@@ -3107,7 +3126,7 @@ class Interpreter(object):
             self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval)
             ret = ir.Return(self.get(castval), loc=self.loc)
             self.current_block.append(ret)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -3117,7 +3136,7 @@ class Interpreter(object):
         def op_TO_BOOL(self, inst, val, res):
             self.store(self.get(val), res)  # TODO: just a lazy hack
-    elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -3128,7 +3147,7 @@ class Interpreter(object):
             # TODO: fifth lowest bit now indicates a forced version to bool.
         elif PYVERSION in ((3, 12),):
             op = dis.cmp_op[inst.arg >> 4]
-        elif PYVERSION in ((3, 10), (3, 11)):
+        elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
             op = dis.cmp_op[inst.arg]
         else:
             raise NotImplementedError(PYVERSION)
@@ -3259,7 +3278,7 @@ class Interpreter(object):
         def op_POP_JUMP_IF_NOT_NONE(self, inst, pred):
             self._jump_if_none(inst, pred, False)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -3429,7 +3448,10 @@ class Interpreter(object):
                 assert 0, "unreachable"
             self.store(gl, res)
-    elif PYVERSION in ((3, 10),):
+    elif PYVERSION in (
+        (3, 9),
+        (3, 10),
+    ):
         def op_LOAD_CLOSURE(self, inst, res):
             n_cellvars = len(self.code_cellvars)
@@ -3579,7 +3601,7 @@ class Interpreter(object):
                 return
             else:
                 raise NotImplementedError(operand)
-    elif PYVERSION in ((3, 10), (3, 11)):
+    elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
         pass
     else:
         raise NotImplementedError(PYVERSION)
@@ -3589,7 +3611,7 @@ if PYVERSION in ((3, 12), (3, 13)):
     class INTRINSIC_STOPITERATION_ERROR(AssertionError):
         pass
-elif PYVERSION in ((3, 10), (3, 11)):
+elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
     pass
 else:
     raise NotImplementedError(PYVERSION)

numba_cuda/numba/cuda/core/ir_utils.py CHANGED Viewed

@@ -11,7 +11,9 @@ import warnings
 import numba
 from numba.core.extending import _Intrinsic
-from numba.core import types, typing, ir, analysis, postproc, rewrites, config
+from numba.core import types, ir, analysis
+from numba.cuda import typing
+from numba.cuda.core import postproc, rewrites, config
 from numba.core.typing.templates import signature
 from numba.core.analysis import (
     compute_live_map,
@@ -248,12 +250,7 @@ def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
     range_call_assign = ir.Assign(range_call, range_call_var, loc)
     # iter_var = getiter(range_call_var)
     iter_call = ir.Expr.getiter(range_call_var, loc)
-    if config.USE_LEGACY_TYPE_SYSTEM:
-        calltype_sig = signature(
-            types.range_iter64_type, types.range_state64_type
-        )
-    else:
-        calltype_sig = signature(types.range_iter_type, types.range_state_type)
+    calltype_sig = signature(types.range_iter64_type, types.range_state64_type)
     calltypes[iter_call] = calltype_sig
     iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
     typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
@@ -332,10 +329,7 @@ def mk_loop_header(typemap, phi_var, calltypes, scope, loc):
         types.intp, types.boolean
     )
     iternext_call = ir.Expr.iternext(phi_var, loc)
-    if config.USE_LEGACY_TYPE_SYSTEM:
-        range_iter_type = types.range_iter64_type
-    else:
-        range_iter_type = types.range_iter_type
+    range_iter_type = types.range_iter64_type
     calltypes[iternext_call] = signature(
         types.containers.Pair(types.intp, types.boolean), range_iter_type
     )
@@ -812,8 +806,6 @@ def has_no_side_effect(rhs, lives, call_table):
     """Returns True if this expression has no side effects that
     would prevent re-ordering.
     """
-    from numba.parfors import array_analysis, parfor
-    from numba.misc.special import prange
     if isinstance(rhs, ir.Expr) and rhs.op == "call":
         func_name = rhs.func.name
@@ -826,11 +818,7 @@ def has_no_side_effect(rhs, lives, call_table):
             or call_list == ["stencil", numba]
             or call_list == ["log", numpy]
             or call_list == ["dtype", numpy]
-            or call_list == [array_analysis.wrap_index]
-            or call_list == [prange]
-            or call_list == ["prange", numba]
             or call_list == ["pndindex", numba]
-            or call_list == [parfor.internal_prange]
             or call_list == ["ceil", math]
             or call_list == [max]
             or call_list == [int]
@@ -1893,7 +1881,7 @@ def compile_to_numba_ir(
     if typingctx and other typing inputs are available and update typemap and
     calltypes.
     """
-    from numba.core import typed_passes
+    from numba.cuda.core import typed_passes
     # mk_func can be actual function or make_function node, or a njit function
     if hasattr(mk_func, "code"):
@@ -1975,7 +1963,8 @@ def get_ir_of_code(glbls, fcode):
         fcode, func_env, func_arg, func_clo, glbls
     )
-    from numba.core import compiler
+    from numba.cuda import compiler
+    from numba.cuda.core.compiler import StateDict
     ir = compiler.run_frontend(f)
@@ -1984,7 +1973,7 @@ def get_ir_of_code(glbls, fcode):
     # for example, Raise nodes need to become StaticRaise before type inference
     class DummyPipeline(object):
         def __init__(self, f_ir):
-            self.state = compiler.StateDict()
+            self.state = StateDict()
             self.state.typingctx = None
             self.state.targetctx = None
             self.state.args = None
@@ -1997,10 +1986,10 @@ def get_ir_of_code(glbls, fcode):
     rewrites.rewrite_registry.apply("before-inference", state)
     # call inline pass to handle cases like stencils and comprehensions
     swapped = {}  # TODO: get this from diagnostics store
-    import numba.core.inline_closurecall
+    from numba.cuda.core.inline_closurecall import InlineClosureCallPass
-    inline_pass = numba.core.inline_closurecall.InlineClosureCallPass(
-        ir, numba.core.cpu.ParallelOptions(False), swapped
+    inline_pass = InlineClosureCallPass(
+        ir, numba.cuda.core.options.ParallelOptions(False), swapped
     )
     inline_pass.run()
@@ -2013,8 +2002,8 @@ def get_ir_of_code(glbls, fcode):
     # added to create valid IR.
     # rebuild IR in SSA form
-    from numba.core.untyped_passes import ReconstructSSA
-    from numba.core.typed_passes import PreLowerStripPhis
+    from numba.cuda.core.untyped_passes import ReconstructSSA
+    from numba.cuda.core.typed_passes import PreLowerStripPhis
     reconstruct_ssa = ReconstructSSA()
     phistrip = PreLowerStripPhis()
@@ -2494,7 +2483,7 @@ def legalize_single_scope(blocks):
     return len({blk.scope for blk in blocks.values()}) == 1
-def check_and_legalize_ir(func_ir, flags: "numba.core.compiler.Flags"):
+def check_and_legalize_ir(func_ir, flags: "numba.core.flags.Flags"):
     """
     This checks that the IR presented is legal
     """

numba_cuda/numba/cuda/core/options.py ADDED Viewed

@@ -0,0 +1,262 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+"""
+Defines CUDA Options for use in the CUDA target
+"""
+from abc import ABCMeta, abstractmethod
+class AbstractOptionValue(metaclass=ABCMeta):
+    """Abstract base class for custom option values."""
+    @abstractmethod
+    def encode(self) -> str:
+        """Returns an encoding of the values"""
+        ...
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.encode()})"
+class FastMathOptions(AbstractOptionValue):
+    """
+    Options for controlling fast math optimization.
+    """
+    def __init__(self, value):
+        # https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
+        valid_flags = {
+            "fast",
+            "nnan",
+            "ninf",
+            "nsz",
+            "arcp",
+            "contract",
+            "afn",
+            "reassoc",
+        }
+        if isinstance(value, FastMathOptions):
+            self.flags = value.flags.copy()
+        elif value is True:
+            self.flags = {"fast"}
+        elif value is False:
+            self.flags = set()
+        elif isinstance(value, set):
+            invalid = value - valid_flags
+            if invalid:
+                raise ValueError("Unrecognized fastmath flags: %s" % invalid)
+            self.flags = value
+        elif isinstance(value, dict):
+            invalid = set(value.keys()) - valid_flags
+            if invalid:
+                raise ValueError("Unrecognized fastmath flags: %s" % invalid)
+            self.flags = {v for v, enable in value.items() if enable}
+        else:
+            msg = "Expected fastmath option(s) to be either a bool, dict or set"
+            raise ValueError(msg)
+    def __bool__(self):
+        return bool(self.flags)
+    __nonzero__ = __bool__
+    def encode(self) -> str:
+        return str(self.flags)
+    def __eq__(self, other):
+        if type(other) is type(self):
+            return self.flags == other.flags
+        return NotImplemented
+class ParallelOptions(AbstractOptionValue):
+    """
+    Options for controlling auto parallelization.
+    """
+    __slots__ = (
+        "enabled",
+        "comprehension",
+        "reduction",
+        "inplace_binop",
+        "setitem",
+        "numpy",
+        "stencil",
+        "fusion",
+        "prange",
+    )
+    def __init__(self, value):
+        if isinstance(value, bool):
+            self.enabled = value
+            self.comprehension = value
+            self.reduction = value
+            self.inplace_binop = value
+            self.setitem = value
+            self.numpy = value
+            self.stencil = value
+            self.fusion = value
+            self.prange = value
+        elif isinstance(value, dict):
+            self.enabled = True
+            self.comprehension = value.pop("comprehension", True)
+            self.reduction = value.pop("reduction", True)
+            self.inplace_binop = value.pop("inplace_binop", True)
+            self.setitem = value.pop("setitem", True)
+            self.numpy = value.pop("numpy", True)
+            self.stencil = value.pop("stencil", True)
+            self.fusion = value.pop("fusion", True)
+            self.prange = value.pop("prange", True)
+            if value:
+                msg = "Unrecognized parallel options: %s" % value.keys()
+                raise NameError(msg)
+        elif isinstance(value, ParallelOptions):
+            self.enabled = value.enabled
+            self.comprehension = value.comprehension
+            self.reduction = value.reduction
+            self.inplace_binop = value.inplace_binop
+            self.setitem = value.setitem
+            self.numpy = value.numpy
+            self.stencil = value.stencil
+            self.fusion = value.fusion
+            self.prange = value.prange
+        else:
+            msg = "Expect parallel option to be either a bool or a dict"
+            raise ValueError(msg)
+    def _get_values(self):
+        """Get values as dictionary."""
+        return {k: getattr(self, k) for k in self.__slots__}
+    def __eq__(self, other):
+        if type(other) is type(self):
+            return self._get_values() == other._get_values()
+        return NotImplemented
+    def encode(self) -> str:
+        return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
+class InlineOptions(AbstractOptionValue):
+    """
+    Options for controlling inlining
+    """
+    def __init__(self, value):
+        ok = False
+        if isinstance(value, str):
+            if value in ("always", "never"):
+                ok = True
+        else:
+            ok = hasattr(value, "__call__")
+        if ok:
+            self._inline = value
+        else:
+            msg = (
+                "kwarg 'inline' must be one of the strings 'always' or "
+                "'never', or it can be a callable that returns True/False. "
+                "Found value %s" % value
+            )
+            raise ValueError(msg)
+    @property
+    def is_never_inline(self):
+        """
+        True if never inline
+        """
+        return self._inline == "never"
+    @property
+    def is_always_inline(self):
+        """
+        True if always inline
+        """
+        return self._inline == "always"
+    @property
+    def has_cost_model(self):
+        """
+        True if a cost model is provided
+        """
+        return not (self.is_always_inline or self.is_never_inline)
+    @property
+    def value(self):
+        """
+        The raw value
+        """
+        return self._inline
+    def __eq__(self, other):
+        if type(other) is type(self):
+            return self.value == other.value
+        return NotImplemented
+    def encode(self) -> str:
+        return repr(self._inline)
+class TargetOptions:
+    """Target options maps user options from decorators to the
+    ``numba.core.compiler.Flags`` used by lowering and target context.
+    """
+    class Mapping:
+        def __init__(self, flag_name, apply=lambda x: x):
+            self.flag_name = flag_name
+            self.apply = apply
+    def finalize(self, flags, options):
+        """Subclasses can override this method to make target specific
+        customizations of default flags.
+        Parameters
+        ----------
+        flags : Flags
+        options : dict
+        """
+        pass
+    @classmethod
+    def parse_as_flags(cls, flags, options):
+        """Parse target options defined in ``options`` and set ``flags``
+        accordingly.
+        Parameters
+        ----------
+        flags : Flags
+        options : dict
+        """
+        opt = cls()
+        opt._apply(flags, options)
+        opt.finalize(flags, options)
+        return flags
+    def _apply(self, flags, options):
+        # Find all Mapping instances in the class
+        mappings = {}
+        cls = type(self)
+        for k in dir(cls):
+            v = getattr(cls, k)
+            if isinstance(v, cls.Mapping):
+                mappings[k] = v
+        used = set()
+        for k, mapping in mappings.items():
+            if k in options:
+                v = mapping.apply(options[k])
+                setattr(flags, mapping.flag_name, v)
+                used.add(k)
+        unused = set(options) - used
+        if unused:
+            # Unread options?
+            m = (
+                f"Unrecognized options: {unused}. "
+                f"Known options are {mappings.keys()}"
+            )
+            raise KeyError(m)

numba-cuda 0.19.1__py3-none-any.whl → 0.20.1__py3-none-any.whl

Potentially problematic release.

numba-cuda 0.19.1py3-none-any.whl → 0.20.1py3-none-any.whl