numba-cuda 0.17.0__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. numba_cuda/VERSION +1 -1
  2. numba_cuda/numba/cuda/__init__.py +0 -8
  3. numba_cuda/numba/cuda/_internal/cuda_fp16.py +14225 -0
  4. numba_cuda/numba/cuda/api_util.py +6 -0
  5. numba_cuda/numba/cuda/cgutils.py +1291 -0
  6. numba_cuda/numba/cuda/codegen.py +32 -14
  7. numba_cuda/numba/cuda/compiler.py +113 -10
  8. numba_cuda/numba/cuda/core/caching.py +741 -0
  9. numba_cuda/numba/cuda/core/callconv.py +338 -0
  10. numba_cuda/numba/cuda/core/codegen.py +168 -0
  11. numba_cuda/numba/cuda/core/compiler.py +205 -0
  12. numba_cuda/numba/cuda/core/typed_passes.py +139 -0
  13. numba_cuda/numba/cuda/cudadecl.py +0 -268
  14. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  15. numba_cuda/numba/cuda/cudadrv/driver.py +2 -1
  16. numba_cuda/numba/cuda/cudadrv/nvvm.py +1 -1
  17. numba_cuda/numba/cuda/cudaimpl.py +4 -178
  18. numba_cuda/numba/cuda/debuginfo.py +469 -3
  19. numba_cuda/numba/cuda/device_init.py +0 -1
  20. numba_cuda/numba/cuda/dispatcher.py +309 -11
  21. numba_cuda/numba/cuda/extending.py +2 -1
  22. numba_cuda/numba/cuda/fp16.py +348 -0
  23. numba_cuda/numba/cuda/intrinsics.py +1 -1
  24. numba_cuda/numba/cuda/libdeviceimpl.py +2 -1
  25. numba_cuda/numba/cuda/lowering.py +1833 -8
  26. numba_cuda/numba/cuda/mathimpl.py +2 -90
  27. numba_cuda/numba/cuda/nvvmutils.py +2 -1
  28. numba_cuda/numba/cuda/printimpl.py +2 -1
  29. numba_cuda/numba/cuda/serialize.py +264 -0
  30. numba_cuda/numba/cuda/simulator/__init__.py +2 -0
  31. numba_cuda/numba/cuda/simulator/dispatcher.py +7 -0
  32. numba_cuda/numba/cuda/stubs.py +0 -308
  33. numba_cuda/numba/cuda/target.py +13 -5
  34. numba_cuda/numba/cuda/testing.py +156 -5
  35. numba_cuda/numba/cuda/tests/complex_usecases.py +113 -0
  36. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +110 -0
  37. numba_cuda/numba/cuda/tests/core/test_serialize.py +359 -0
  38. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +33 -0
  39. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +2 -2
  40. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +1 -0
  41. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +1 -1
  42. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +5 -10
  43. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +1 -1
  44. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +381 -0
  45. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +1 -1
  46. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1 -1
  47. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +94 -24
  48. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +37 -23
  49. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +43 -27
  50. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +26 -9
  51. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +27 -2
  52. numba_cuda/numba/cuda/tests/enum_usecases.py +56 -0
  53. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +1 -2
  54. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +1 -1
  55. numba_cuda/numba/cuda/utils.py +785 -0
  56. numba_cuda/numba/cuda/vector_types.py +1 -1
  57. {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/METADATA +18 -4
  58. {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/RECORD +61 -48
  59. numba_cuda/numba/cuda/cpp_function_wrappers.cu +0 -46
  60. {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/WHEEL +0 -0
  61. {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/licenses/LICENSE +0 -0
  62. {numba_cuda-0.17.0.dist-info → numba_cuda-0.18.0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,478 @@
1
+ import os
2
+
1
3
  from llvmlite import ir
2
- from numba.core import types, cgutils
3
- from numba.core.debuginfo import DIBuilder
4
+ from numba.core import types, config
5
+ from numba.cuda import cgutils
6
+ from numba.core.datamodel.models import ComplexModel, UnionModel, UniTupleModel
7
+ from numba.core.debuginfo import AbstractDIBuilder
4
8
  from numba.cuda.types import GridGroup
5
- from numba.core.datamodel.models import UnionModel
6
9
 
7
10
  _BYTE_SIZE = 8
8
11
 
9
12
 
13
+ class DIBuilder(AbstractDIBuilder):
14
+ DWARF_VERSION = 4
15
+ DEBUG_INFO_VERSION = 3
16
+ DBG_CU_NAME = "llvm.dbg.cu"
17
+ _DEBUG = False
18
+
19
+ def __init__(self, module, filepath, cgctx, directives_only):
20
+ self.module = module
21
+ self.filepath = os.path.abspath(filepath)
22
+ self.difile = self._di_file()
23
+ self.subprograms = []
24
+ self.cgctx = cgctx
25
+
26
+ if directives_only:
27
+ self.emission_kind = "DebugDirectivesOnly"
28
+ else:
29
+ self.emission_kind = "FullDebug"
30
+
31
+ self.initialize()
32
+
33
+ def initialize(self):
34
+ # Create the compile unit now because it is referenced when
35
+ # constructing subprograms
36
+ self.dicompileunit = self._di_compile_unit()
37
+
38
+ def _var_type(self, lltype, size, datamodel=None):
39
+ if self._DEBUG:
40
+ print(
41
+ "-->",
42
+ lltype,
43
+ size,
44
+ datamodel,
45
+ getattr(datamodel, "fe_type", "NO FE TYPE"),
46
+ )
47
+ m = self.module
48
+ bitsize = _BYTE_SIZE * size
49
+
50
+ int_type = (ir.IntType,)
51
+ real_type = ir.FloatType, ir.DoubleType
52
+ # For simple numeric types, choose the closest encoding.
53
+ # We treat all integers as unsigned when there's no known datamodel.
54
+ if isinstance(lltype, int_type + real_type):
55
+ if datamodel is None:
56
+ # This is probably something like an `i8*` member of a struct
57
+ name = str(lltype)
58
+ if isinstance(lltype, int_type):
59
+ ditok = "DW_ATE_unsigned"
60
+ else:
61
+ ditok = "DW_ATE_float"
62
+ else:
63
+ # This is probably a known int/float scalar type
64
+ name = str(datamodel.fe_type)
65
+ if isinstance(datamodel.fe_type, types.Integer):
66
+ if datamodel.fe_type.signed:
67
+ ditok = "DW_ATE_signed"
68
+ else:
69
+ ditok = "DW_ATE_unsigned"
70
+ else:
71
+ ditok = "DW_ATE_float"
72
+ mdtype = m.add_debug_info(
73
+ "DIBasicType",
74
+ {
75
+ "name": name,
76
+ "size": bitsize,
77
+ "encoding": ir.DIToken(ditok),
78
+ },
79
+ )
80
+ elif isinstance(datamodel, ComplexModel):
81
+ # TODO: Is there a better way of determining "this is a complex
82
+ # number"?
83
+ #
84
+ # NOTE: Commented below is the way to generate the metadata for a
85
+ # C99 complex type that's directly supported by DWARF. Numba however
86
+ # generates a struct with real/imag cf. CPython to give a more
87
+ # pythonic feel to inspection.
88
+ #
89
+ # mdtype = m.add_debug_info('DIBasicType', {
90
+ # 'name': f"{datamodel.fe_type} ({str(lltype)})",
91
+ # 'size': bitsize,
92
+ # 'encoding': ir.DIToken('DW_ATE_complex_float'),
93
+ # })
94
+ meta = []
95
+ offset = 0
96
+ for ix, name in enumerate(("real", "imag")):
97
+ component = lltype.elements[ix]
98
+ component_size = self.cgctx.get_abi_sizeof(component)
99
+ component_basetype = m.add_debug_info(
100
+ "DIBasicType",
101
+ {
102
+ "name": str(component),
103
+ "size": _BYTE_SIZE * component_size, # bits
104
+ "encoding": ir.DIToken("DW_ATE_float"),
105
+ },
106
+ )
107
+ derived_type = m.add_debug_info(
108
+ "DIDerivedType",
109
+ {
110
+ "tag": ir.DIToken("DW_TAG_member"),
111
+ "name": name,
112
+ "baseType": component_basetype,
113
+ "size": _BYTE_SIZE
114
+ * component_size, # DW_TAG_member size is in bits
115
+ "offset": offset,
116
+ },
117
+ )
118
+ meta.append(derived_type)
119
+ offset += _BYTE_SIZE * component_size # offset is in bits
120
+ mdtype = m.add_debug_info(
121
+ "DICompositeType",
122
+ {
123
+ "tag": ir.DIToken("DW_TAG_structure_type"),
124
+ "name": f"{datamodel.fe_type} ({str(lltype)})",
125
+ "identifier": str(lltype),
126
+ "elements": m.add_metadata(meta),
127
+ "size": offset,
128
+ },
129
+ is_distinct=True,
130
+ )
131
+ elif isinstance(datamodel, UniTupleModel):
132
+ element = lltype.element
133
+ el_size = self.cgctx.get_abi_sizeof(element)
134
+ basetype = self._var_type(element, el_size)
135
+ name = f"{datamodel.fe_type} ({str(lltype)})"
136
+ count = size // el_size
137
+ mdrange = m.add_debug_info(
138
+ "DISubrange",
139
+ {
140
+ "count": count,
141
+ },
142
+ )
143
+ mdtype = m.add_debug_info(
144
+ "DICompositeType",
145
+ {
146
+ "tag": ir.DIToken("DW_TAG_array_type"),
147
+ "baseType": basetype,
148
+ "name": name,
149
+ "size": bitsize,
150
+ "identifier": str(lltype),
151
+ "elements": m.add_metadata([mdrange]),
152
+ },
153
+ )
154
+ elif isinstance(lltype, ir.PointerType):
155
+ model = getattr(datamodel, "_pointee_model", None)
156
+ basetype = self._var_type(
157
+ lltype.pointee, self.cgctx.get_abi_sizeof(lltype.pointee), model
158
+ )
159
+ mdtype = m.add_debug_info(
160
+ "DIDerivedType",
161
+ {
162
+ "tag": ir.DIToken("DW_TAG_pointer_type"),
163
+ "baseType": basetype,
164
+ "size": _BYTE_SIZE * self.cgctx.get_abi_sizeof(lltype),
165
+ },
166
+ )
167
+ elif isinstance(lltype, ir.LiteralStructType):
168
+ # Struct type
169
+ meta = []
170
+ offset = 0
171
+ if datamodel is None or not datamodel.inner_models():
172
+ name = f"Anonymous struct ({str(lltype)})"
173
+ for field_id, element in enumerate(lltype.elements):
174
+ size = self.cgctx.get_abi_sizeof(element)
175
+ basetype = self._var_type(element, size)
176
+ derived_type = m.add_debug_info(
177
+ "DIDerivedType",
178
+ {
179
+ "tag": ir.DIToken("DW_TAG_member"),
180
+ "name": f"<field {field_id}>",
181
+ "baseType": basetype,
182
+ "size": _BYTE_SIZE
183
+ * size, # DW_TAG_member size is in bits
184
+ "offset": offset,
185
+ },
186
+ )
187
+ meta.append(derived_type)
188
+ offset += _BYTE_SIZE * size # offset is in bits
189
+ else:
190
+ name = f"{datamodel.fe_type} ({str(lltype)})"
191
+ for element, field, model in zip(
192
+ lltype.elements, datamodel._fields, datamodel.inner_models()
193
+ ):
194
+ size = self.cgctx.get_abi_sizeof(element)
195
+ basetype = self._var_type(element, size, datamodel=model)
196
+ derived_type = m.add_debug_info(
197
+ "DIDerivedType",
198
+ {
199
+ "tag": ir.DIToken("DW_TAG_member"),
200
+ "name": field,
201
+ "baseType": basetype,
202
+ "size": _BYTE_SIZE
203
+ * size, # DW_TAG_member size is in bits
204
+ "offset": offset,
205
+ },
206
+ )
207
+ meta.append(derived_type)
208
+ offset += _BYTE_SIZE * size # offset is in bits
209
+
210
+ mdtype = m.add_debug_info(
211
+ "DICompositeType",
212
+ {
213
+ "tag": ir.DIToken("DW_TAG_structure_type"),
214
+ "name": name,
215
+ "identifier": str(lltype),
216
+ "elements": m.add_metadata(meta),
217
+ "size": offset,
218
+ },
219
+ is_distinct=True,
220
+ )
221
+ elif isinstance(lltype, ir.ArrayType):
222
+ element = lltype.element
223
+ el_size = self.cgctx.get_abi_sizeof(element)
224
+ basetype = self._var_type(element, el_size)
225
+ count = size // el_size
226
+ mdrange = m.add_debug_info(
227
+ "DISubrange",
228
+ {
229
+ "count": count,
230
+ },
231
+ )
232
+ mdtype = m.add_debug_info(
233
+ "DICompositeType",
234
+ {
235
+ "tag": ir.DIToken("DW_TAG_array_type"),
236
+ "baseType": basetype,
237
+ "name": str(lltype),
238
+ "size": bitsize,
239
+ "identifier": str(lltype),
240
+ "elements": m.add_metadata([mdrange]),
241
+ },
242
+ )
243
+ else:
244
+ # For all other types, describe it as sequence of bytes
245
+ count = size
246
+ mdrange = m.add_debug_info(
247
+ "DISubrange",
248
+ {
249
+ "count": count,
250
+ },
251
+ )
252
+ mdbase = m.add_debug_info(
253
+ "DIBasicType",
254
+ {
255
+ "name": "byte",
256
+ "size": _BYTE_SIZE,
257
+ "encoding": ir.DIToken("DW_ATE_unsigned_char"),
258
+ },
259
+ )
260
+ mdtype = m.add_debug_info(
261
+ "DICompositeType",
262
+ {
263
+ "tag": ir.DIToken("DW_TAG_array_type"),
264
+ "baseType": mdbase,
265
+ "name": str(lltype),
266
+ "size": bitsize,
267
+ "identifier": str(lltype),
268
+ "elements": m.add_metadata([mdrange]),
269
+ },
270
+ )
271
+
272
+ return mdtype
273
+
274
+ def mark_variable(
275
+ self,
276
+ builder,
277
+ allocavalue,
278
+ name,
279
+ lltype,
280
+ size,
281
+ line,
282
+ datamodel=None,
283
+ argidx=None,
284
+ ):
285
+ arg_index = 0 if argidx is None else argidx
286
+ m = self.module
287
+ fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3)
288
+ decl = cgutils.get_or_insert_function(m, fnty, "llvm.dbg.declare")
289
+
290
+ mdtype = self._var_type(lltype, size, datamodel=datamodel)
291
+ name = name.replace(".", "$") # for gdb to work correctly
292
+ mdlocalvar = m.add_debug_info(
293
+ "DILocalVariable",
294
+ {
295
+ "name": name,
296
+ "arg": arg_index,
297
+ "scope": self.subprograms[-1],
298
+ "file": self.difile,
299
+ "line": line,
300
+ "type": mdtype,
301
+ },
302
+ )
303
+ mdexpr = m.add_debug_info("DIExpression", {})
304
+
305
+ return builder.call(decl, [allocavalue, mdlocalvar, mdexpr])
306
+
307
+ def mark_location(self, builder, line):
308
+ builder.debug_metadata = self._add_location(line)
309
+
310
+ def mark_subprogram(self, function, qualname, argnames, argtypes, line):
311
+ name = qualname
312
+ argmap = dict(zip(argnames, argtypes))
313
+ di_subp = self._add_subprogram(
314
+ name=name,
315
+ linkagename=function.name,
316
+ line=line,
317
+ function=function,
318
+ argmap=argmap,
319
+ )
320
+ function.set_metadata("dbg", di_subp)
321
+
322
+ def finalize(self):
323
+ dbgcu = cgutils.get_or_insert_named_metadata(
324
+ self.module, self.DBG_CU_NAME
325
+ )
326
+ dbgcu.add(self.dicompileunit)
327
+ self._set_module_flags()
328
+
329
+ #
330
+ # Internal APIs
331
+ #
332
+
333
+ def _set_module_flags(self):
334
+ """Set the module flags metadata"""
335
+ module = self.module
336
+ mflags = cgutils.get_or_insert_named_metadata(
337
+ module, "llvm.module.flags"
338
+ )
339
+ # Set *require* behavior to warning
340
+ # See http://llvm.org/docs/LangRef.html#module-flags-metadata
341
+ require_warning_behavior = self._const_int(2)
342
+ if self.DWARF_VERSION is not None:
343
+ dwarf_version = module.add_metadata(
344
+ [
345
+ require_warning_behavior,
346
+ "Dwarf Version",
347
+ self._const_int(self.DWARF_VERSION),
348
+ ]
349
+ )
350
+ if dwarf_version not in mflags.operands:
351
+ mflags.add(dwarf_version)
352
+ debuginfo_version = module.add_metadata(
353
+ [
354
+ require_warning_behavior,
355
+ "Debug Info Version",
356
+ self._const_int(self.DEBUG_INFO_VERSION),
357
+ ]
358
+ )
359
+ if debuginfo_version not in mflags.operands:
360
+ mflags.add(debuginfo_version)
361
+
362
+ def _add_subprogram(self, name, linkagename, line, function, argmap):
363
+ """Emit subprogram metadata"""
364
+ subp = self._di_subprogram(name, linkagename, line, function, argmap)
365
+ self.subprograms.append(subp)
366
+ return subp
367
+
368
+ def _add_location(self, line):
369
+ """Emit location metatdaa"""
370
+ loc = self._di_location(line)
371
+ return loc
372
+
373
+ @classmethod
374
+ def _const_int(cls, num, bits=32):
375
+ """Util to create constant int in metadata"""
376
+ return ir.IntType(bits)(num)
377
+
378
+ @classmethod
379
+ def _const_bool(cls, boolean):
380
+ """Util to create constant boolean in metadata"""
381
+ return ir.IntType(1)(boolean)
382
+
383
+ #
384
+ # Helpers to emit the metadata nodes
385
+ #
386
+
387
+ def _di_file(self):
388
+ return self.module.add_debug_info(
389
+ "DIFile",
390
+ {
391
+ "directory": os.path.dirname(self.filepath),
392
+ "filename": os.path.basename(self.filepath),
393
+ },
394
+ )
395
+
396
+ def _di_compile_unit(self):
397
+ return self.module.add_debug_info(
398
+ "DICompileUnit",
399
+ {
400
+ "language": ir.DIToken("DW_LANG_C_plus_plus"),
401
+ "file": self.difile,
402
+ # Numba has to pretend to be clang to ensure the prologue is skipped
403
+ # correctly in gdb. See:
404
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/amd64-tdep.c;h=e563d369d8cb3eb3c2f732c2fa850ec70ba8d63b;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l2521
405
+ # Note the "producer_is_llvm" call to specialise the prologue
406
+ # handling, this is defined here:
407
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=blob;f=gdb/producer.c;h=cdfd80d904c09394febd18749bb90359b2d128cc;hb=a4b0231e179607e47b1cdf1fe15c5dc25e482fad#l124
408
+ # and to get a match for this condition the 'producer' must start
409
+ # with "clang ", hence the following...
410
+ "producer": "clang (Numba)",
411
+ "runtimeVersion": 0,
412
+ "isOptimized": config.OPT != 0,
413
+ "emissionKind": ir.DIToken(self.emission_kind),
414
+ },
415
+ is_distinct=True,
416
+ )
417
+
418
+ def _di_subroutine_type(self, line, function, argmap):
419
+ # The function call conv needs encoding.
420
+ llfunc = function
421
+ md = []
422
+
423
+ for idx, llarg in enumerate(llfunc.args):
424
+ if not llarg.name.startswith("arg."):
425
+ name = llarg.name.replace(".", "$") # for gdb to work correctly
426
+ lltype = llarg.type
427
+ size = self.cgctx.get_abi_sizeof(lltype)
428
+ mdtype = self._var_type(lltype, size, datamodel=None)
429
+ md.append(mdtype)
430
+
431
+ for idx, (name, nbtype) in enumerate(argmap.items()):
432
+ name = name.replace(".", "$") # for gdb to work correctly
433
+ datamodel = self.cgctx.data_model_manager[nbtype]
434
+ lltype = self.cgctx.get_value_type(nbtype)
435
+ size = self.cgctx.get_abi_sizeof(lltype)
436
+ mdtype = self._var_type(lltype, size, datamodel=datamodel)
437
+ md.append(mdtype)
438
+
439
+ return self.module.add_debug_info(
440
+ "DISubroutineType",
441
+ {
442
+ "types": self.module.add_metadata(md),
443
+ },
444
+ )
445
+
446
+ def _di_subprogram(self, name, linkagename, line, function, argmap):
447
+ return self.module.add_debug_info(
448
+ "DISubprogram",
449
+ {
450
+ "name": name,
451
+ "linkageName": linkagename,
452
+ "scope": self.difile,
453
+ "file": self.difile,
454
+ "line": line,
455
+ "type": self._di_subroutine_type(line, function, argmap),
456
+ "isLocal": False,
457
+ "isDefinition": True,
458
+ "scopeLine": line,
459
+ "isOptimized": config.OPT != 0,
460
+ "unit": self.dicompileunit,
461
+ },
462
+ is_distinct=True,
463
+ )
464
+
465
+ def _di_location(self, line):
466
+ return self.module.add_debug_info(
467
+ "DILocation",
468
+ {
469
+ "line": line,
470
+ "column": 1,
471
+ "scope": self.subprograms[-1],
472
+ },
473
+ )
474
+
475
+
10
476
  class CUDADIBuilder(DIBuilder):
11
477
  def __init__(self, module, filepath, cgctx, directives_only):
12
478
  super().__init__(module, filepath, cgctx, directives_only)
@@ -29,7 +29,6 @@ from .stubs import (
29
29
  activemask,
30
30
  lanemask_lt,
31
31
  nanosleep,
32
- fp16,
33
32
  _vector_type_stubs,
34
33
  )
35
34
  from .intrinsics import (