pyomp 0.5.0__cp314-cp314-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba/openmp/__init__.py +106 -0
- numba/openmp/_version.py +34 -0
- numba/openmp/analysis.py +251 -0
- numba/openmp/compiler.py +402 -0
- numba/openmp/config.py +27 -0
- numba/openmp/decorators.py +27 -0
- numba/openmp/exceptions.py +26 -0
- numba/openmp/ir_utils.py +4 -0
- numba/openmp/libs/openmp/lib/libgomp.1.dylib +0 -0
- numba/openmp/libs/openmp/lib/libgomp.dylib +0 -0
- numba/openmp/libs/openmp/lib/libiomp5.dylib +0 -0
- numba/openmp/libs/openmp/lib/libomp.dylib +0 -0
- numba/openmp/libs/openmp/patches/14.0.6/0001-BACKPORT-Fix-for-CUDA-OpenMP-RTL.patch +39 -0
- numba/openmp/libs/openmp/patches/14.0.6/0002-Fix-missing-includes.patch +12 -0
- numba/openmp/libs/openmp/patches/14.0.6/0003-Link-static-LLVM-libs.patch +13 -0
- numba/openmp/libs/openmp/patches/15.0.7/0001-Fix-missing-includes.patch +14 -0
- numba/openmp/libs/openmp/patches/15.0.7/0002-Link-LLVM-statically.patch +101 -0
- numba/openmp/libs/openmp/patches/15.0.7/0003-Disable-opaque-pointers-DeviceRTL-bitcode.patch +12 -0
- numba/openmp/libs/openmp/patches/16.0.6/0001-Load-plugins-from-install-directory.patch +53 -0
- numba/openmp/libs/openmp/patches/16.0.6/0002-Link-LLVM-statically.patch +218 -0
- numba/openmp/libs/openmp/patches/20.1.8/0001-Enable-standalone-build.patch +13 -0
- numba/openmp/libs/openmp/patches/20.1.8/0002-Link-statically-LLVM.patch +24 -0
- numba/openmp/libs/openmp/patches/20.1.8/0003-Do-not-build-liboffload.patch +12 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.cpp +2939 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.h +606 -0
- numba/openmp/libs/pass/CMakeLists.txt +57 -0
- numba/openmp/libs/pass/DebugOpenMP.cpp +17 -0
- numba/openmp/libs/pass/DebugOpenMP.h +28 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.cpp +837 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.h +13 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP_CAPI.h +23 -0
- numba/openmp/libs/pass/libIntrinsicsOpenMP.dylib +0 -0
- numba/openmp/link_utils.py +126 -0
- numba/openmp/llvm_pass.py +48 -0
- numba/openmp/llvmlite_extensions.py +75 -0
- numba/openmp/omp_context.py +242 -0
- numba/openmp/omp_grammar.py +696 -0
- numba/openmp/omp_ir.py +2105 -0
- numba/openmp/omp_lower.py +3125 -0
- numba/openmp/omp_runtime.py +107 -0
- numba/openmp/overloads.py +53 -0
- numba/openmp/parser.py +6 -0
- numba/openmp/tags.py +532 -0
- numba/openmp/tests/test_openmp.py +5056 -0
- pyomp-0.5.0.dist-info/METADATA +193 -0
- pyomp-0.5.0.dist-info/RECORD +52 -0
- pyomp-0.5.0.dist-info/WHEEL +6 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE +25 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE-OPENMP.txt +361 -0
- pyomp-0.5.0.dist-info/top_level.txt +3 -0
- pyomp.dylibs/libc++.1.0.dylib +0 -0
- pyomp.dylibs/libzstd.1.5.7.dylib +0 -0
numba/openmp/compiler.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
from numba.core import compiler, compiler_machinery, cpu, ir, types
|
|
2
|
+
from numba import cuda as numba_cuda
|
|
3
|
+
from numba.core.controlflow import CFGraph
|
|
4
|
+
from numba.cuda import descriptor as cuda_descriptor
|
|
5
|
+
from numba.cuda.target import CUDACallConv
|
|
6
|
+
from numba.core.lowering import Lower
|
|
7
|
+
from functools import cached_property
|
|
8
|
+
from numba.core.callconv import (
|
|
9
|
+
RETCODE_OK,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from numba.core.codegen import AOTCodeLibrary, JITCodeLibrary
|
|
13
|
+
from numba.core.dispatcher import _FunctionCompiler
|
|
14
|
+
from numba.core.compiler_machinery import PassManager
|
|
15
|
+
from numba.core.compiler import DefaultPassBuilder
|
|
16
|
+
from numba.core.untyped_passes import (
|
|
17
|
+
TranslateByteCode,
|
|
18
|
+
FixupArgs,
|
|
19
|
+
IRProcessing,
|
|
20
|
+
InlineClosureLikes,
|
|
21
|
+
RewriteSemanticConstants,
|
|
22
|
+
DeadBranchPrune,
|
|
23
|
+
GenericRewrites,
|
|
24
|
+
RewriteDynamicRaises,
|
|
25
|
+
MakeFunctionToJitFunction,
|
|
26
|
+
InlineInlinables,
|
|
27
|
+
FindLiterallyCalls,
|
|
28
|
+
LiteralUnroll,
|
|
29
|
+
LiteralPropagationSubPipelinePass,
|
|
30
|
+
WithLifting,
|
|
31
|
+
)
|
|
32
|
+
import llvmlite.binding as ll
|
|
33
|
+
import llvmlite.ir as lir
|
|
34
|
+
|
|
35
|
+
from .config import DEBUG_OPENMP
|
|
36
|
+
from .llvm_pass import run_intrinsics_openmp_pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OnlyLower(compiler.CompilerBase):
|
|
40
|
+
def __init__(self, typingctx, targetctx, library, args, restype, flags, locals):
|
|
41
|
+
super().__init__(typingctx, targetctx, library, args, restype, flags, locals)
|
|
42
|
+
self.state.typemap = targetctx.state_copy.typemap
|
|
43
|
+
self.state.calltypes = targetctx.state_copy.calltypes
|
|
44
|
+
|
|
45
|
+
def define_pipelines(self):
|
|
46
|
+
pms = []
|
|
47
|
+
if not self.state.flags.force_pyobject:
|
|
48
|
+
pms.append(
|
|
49
|
+
compiler.DefaultPassBuilder.define_nopython_lowering_pipeline(
|
|
50
|
+
self.state
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
return pms
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class OnlyLowerCUDA(numba_cuda.compiler.CUDACompiler):
|
|
57
|
+
def __init__(self, typingctx, targetctx, library, args, restype, flags, locals):
|
|
58
|
+
super().__init__(typingctx, targetctx, library, args, restype, flags, locals)
|
|
59
|
+
self.state.typemap = targetctx.state_copy.typemap
|
|
60
|
+
self.state.calltypes = targetctx.state_copy.calltypes
|
|
61
|
+
|
|
62
|
+
def define_pipelines(self):
|
|
63
|
+
pm = compiler_machinery.PassManager("cuda")
|
|
64
|
+
# Numba <=0.57 implements CUDALegalization to support CUDA <11.2
|
|
65
|
+
# versions. Numba >0.58 drops this support. We enclose in a try-except
|
|
66
|
+
# block to avoid errors, delegating to Numba support.
|
|
67
|
+
try:
|
|
68
|
+
pm.add_pass(numba_cuda.compiler.CUDALegalization, "CUDA legalization")
|
|
69
|
+
except AttributeError:
|
|
70
|
+
pass
|
|
71
|
+
lowering_passes = self.define_cuda_lowering_pipeline(self.state)
|
|
72
|
+
pm.passes.extend(lowering_passes.passes)
|
|
73
|
+
pm.finalize()
|
|
74
|
+
return [pm]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def compute_cfg_from_llvm_blocks(blocks):
|
|
78
|
+
cfg = CFGraph()
|
|
79
|
+
name_to_index = {}
|
|
80
|
+
for b in blocks:
|
|
81
|
+
# print("b:", b.name, type(b.name))
|
|
82
|
+
cfg.add_node(b.name)
|
|
83
|
+
|
|
84
|
+
for bindex, b in enumerate(blocks):
|
|
85
|
+
term = b.terminator
|
|
86
|
+
# print("term:", b.name, term, type(term))
|
|
87
|
+
if isinstance(term, lir.instructions.Branch):
|
|
88
|
+
cfg.add_edge(b.name, term.operands[0].name)
|
|
89
|
+
name_to_index[b.name] = (bindex, [term.operands[0].name])
|
|
90
|
+
elif isinstance(term, lir.instructions.ConditionalBranch):
|
|
91
|
+
cfg.add_edge(b.name, term.operands[1].name)
|
|
92
|
+
cfg.add_edge(b.name, term.operands[2].name)
|
|
93
|
+
name_to_index[b.name] = (
|
|
94
|
+
bindex,
|
|
95
|
+
[term.operands[1].name, term.operands[2].name],
|
|
96
|
+
)
|
|
97
|
+
elif isinstance(term, lir.instructions.Ret):
|
|
98
|
+
name_to_index[b.name] = (bindex, [])
|
|
99
|
+
elif isinstance(term, lir.instructions.SwitchInstr):
|
|
100
|
+
cfg.add_edge(b.name, term.default.name)
|
|
101
|
+
for _, blk in term.cases:
|
|
102
|
+
cfg.add_edge(b.name, blk.name)
|
|
103
|
+
out_blks = [x[1].name for x in term.cases]
|
|
104
|
+
out_blks.append(term.default.name)
|
|
105
|
+
name_to_index[b.name] = (bindex, out_blks)
|
|
106
|
+
elif isinstance(term, lir.instructions.Unreachable):
|
|
107
|
+
pass
|
|
108
|
+
else:
|
|
109
|
+
print("Unknown term:", term, type(term))
|
|
110
|
+
assert False # Should never get here.
|
|
111
|
+
|
|
112
|
+
cfg.set_entry_point("entry")
|
|
113
|
+
cfg.process()
|
|
114
|
+
return cfg, name_to_index
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def compute_llvm_topo_order(blocks):
|
|
118
|
+
cfg, name_to_index = compute_cfg_from_llvm_blocks(blocks)
|
|
119
|
+
post_order = []
|
|
120
|
+
seen = set()
|
|
121
|
+
|
|
122
|
+
def _dfs_rec(node):
|
|
123
|
+
if node not in seen:
|
|
124
|
+
seen.add(node)
|
|
125
|
+
succs = cfg._succs[node]
|
|
126
|
+
|
|
127
|
+
# If there are no successors then we are done.
|
|
128
|
+
# This is the case for an unreachable.
|
|
129
|
+
if not succs:
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
# This is needed so that the inside of loops are
|
|
133
|
+
# handled first before their exits.
|
|
134
|
+
nexts = name_to_index[node][1]
|
|
135
|
+
if len(nexts) == 2:
|
|
136
|
+
succs = [nexts[1], nexts[0]]
|
|
137
|
+
|
|
138
|
+
for dest in succs:
|
|
139
|
+
if (node, dest) not in cfg._back_edges:
|
|
140
|
+
_dfs_rec(dest)
|
|
141
|
+
post_order.append(node)
|
|
142
|
+
|
|
143
|
+
_dfs_rec(cfg.entry_point())
|
|
144
|
+
post_order.reverse()
|
|
145
|
+
return post_order, name_to_index
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class CollectUnknownLLVMVarsPrivate(lir.transforms.Visitor):
|
|
149
|
+
def __init__(self):
|
|
150
|
+
self.active_openmp_directives = []
|
|
151
|
+
self.start_num = 0
|
|
152
|
+
|
|
153
|
+
# Override the default function visitor to go in topo order
|
|
154
|
+
def visit_Function(self, func):
|
|
155
|
+
self._function = func
|
|
156
|
+
if len(func.blocks) == 0:
|
|
157
|
+
return None
|
|
158
|
+
if DEBUG_OPENMP >= 1:
|
|
159
|
+
print("Collect visit_Function:", func.blocks, type(func.blocks))
|
|
160
|
+
topo_order, name_to_index = compute_llvm_topo_order(func.blocks)
|
|
161
|
+
topo_order = list(topo_order)
|
|
162
|
+
if DEBUG_OPENMP >= 1:
|
|
163
|
+
print("topo_order:", topo_order)
|
|
164
|
+
|
|
165
|
+
for bbname in topo_order:
|
|
166
|
+
if DEBUG_OPENMP >= 1:
|
|
167
|
+
print("Visiting block:", bbname)
|
|
168
|
+
self.visit_BasicBlock(func.blocks[name_to_index[bbname][0]])
|
|
169
|
+
|
|
170
|
+
if DEBUG_OPENMP >= 1:
|
|
171
|
+
print("Collect visit_Function done")
|
|
172
|
+
|
|
173
|
+
def visit_Instruction(self, instr):
|
|
174
|
+
if len(self.active_openmp_directives) > 0:
|
|
175
|
+
if DEBUG_OPENMP >= 1:
|
|
176
|
+
print("Collect instr:", instr, type(instr))
|
|
177
|
+
for op in instr.operands:
|
|
178
|
+
if isinstance(op, lir.AllocaInstr):
|
|
179
|
+
if DEBUG_OPENMP >= 1:
|
|
180
|
+
print("Collect AllocaInstr operand:", op, op.name)
|
|
181
|
+
for directive in self.active_openmp_directives:
|
|
182
|
+
directive.save_orig_numba_openmp.alloca(op, None)
|
|
183
|
+
else:
|
|
184
|
+
if DEBUG_OPENMP >= 2:
|
|
185
|
+
print("non-alloca:", op, type(op))
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
if isinstance(instr, lir.CallInstr):
|
|
189
|
+
if instr.callee.name == "llvm.directive.region.entry":
|
|
190
|
+
if DEBUG_OPENMP >= 1:
|
|
191
|
+
print(
|
|
192
|
+
"Collect Found openmp region entry:",
|
|
193
|
+
instr,
|
|
194
|
+
type(instr),
|
|
195
|
+
"\n",
|
|
196
|
+
instr.tags,
|
|
197
|
+
type(instr.tags),
|
|
198
|
+
id(self),
|
|
199
|
+
len(self.active_openmp_directives),
|
|
200
|
+
)
|
|
201
|
+
self.active_openmp_directives.append(instr)
|
|
202
|
+
if DEBUG_OPENMP >= 1:
|
|
203
|
+
print("post append:", len(self.active_openmp_directives))
|
|
204
|
+
assert hasattr(instr, "save_orig_numba_openmp")
|
|
205
|
+
if instr.callee.name == "llvm.directive.region.exit":
|
|
206
|
+
if DEBUG_OPENMP >= 1:
|
|
207
|
+
print(
|
|
208
|
+
"Collect Found openmp region exit:",
|
|
209
|
+
instr,
|
|
210
|
+
type(instr),
|
|
211
|
+
"\n",
|
|
212
|
+
instr.tags,
|
|
213
|
+
type(instr.tags),
|
|
214
|
+
id(self),
|
|
215
|
+
len(self.active_openmp_directives),
|
|
216
|
+
)
|
|
217
|
+
enter_directive = self.active_openmp_directives.pop()
|
|
218
|
+
enter_directive.save_orig_numba_openmp.post_lowering_process_alloca_queue(
|
|
219
|
+
enter_directive
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def post_lowering_openmp(mod):
|
|
224
|
+
if DEBUG_OPENMP >= 1:
|
|
225
|
+
print("post_lowering_openmp")
|
|
226
|
+
|
|
227
|
+
# This will gather the information.
|
|
228
|
+
collect_fixup = CollectUnknownLLVMVarsPrivate()
|
|
229
|
+
collect_fixup.visit(mod)
|
|
230
|
+
|
|
231
|
+
if DEBUG_OPENMP >= 1:
|
|
232
|
+
print("post_lowering_openmp done")
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class CustomContext(cpu.CPUContext):
|
|
236
|
+
def post_lowering(self, mod, library):
|
|
237
|
+
if hasattr(library, "openmp") and library.openmp:
|
|
238
|
+
post_lowering_openmp(mod)
|
|
239
|
+
super().post_lowering(mod, library)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class OpenmpCPUTargetContext(CustomContext):
|
|
243
|
+
def __init__(self, name, typingctx, target="cpu"):
|
|
244
|
+
super().__init__(typingctx, target)
|
|
245
|
+
self.device_func_name = name
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
class OpenmpCUDATargetContext(cuda_descriptor.CUDATargetContext):
|
|
249
|
+
def __init__(self, name, typingctx, target="cuda"):
|
|
250
|
+
super().__init__(typingctx, target)
|
|
251
|
+
self.device_func_name = name
|
|
252
|
+
|
|
253
|
+
def post_lowering(self, mod, library):
|
|
254
|
+
if hasattr(library, "openmp") and library.openmp:
|
|
255
|
+
post_lowering_openmp(mod)
|
|
256
|
+
super().post_lowering(mod, library)
|
|
257
|
+
|
|
258
|
+
@cached_property
|
|
259
|
+
def call_conv(self):
|
|
260
|
+
return CUDACallConv(self)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class LowerNoSROA(Lower):
|
|
264
|
+
@property
|
|
265
|
+
def _disable_sroa_like_opt(self):
|
|
266
|
+
# Always return True for this instance
|
|
267
|
+
return True
|
|
268
|
+
|
|
269
|
+
def lower_assign_inst(self, orig, inst):
|
|
270
|
+
# This fixes assignments for Arg instructions when the target is a
|
|
271
|
+
# CPointer. It sets the backing storage to the pointer of the argument
|
|
272
|
+
# itself.
|
|
273
|
+
if isinstance(self.context, OpenmpCPUTargetContext) or isinstance(
|
|
274
|
+
self.context, OpenmpCUDATargetContext
|
|
275
|
+
):
|
|
276
|
+
value = inst.value
|
|
277
|
+
if isinstance(value, ir.Arg):
|
|
278
|
+
argname = value.name
|
|
279
|
+
argty = self.typeof("arg." + argname)
|
|
280
|
+
if isinstance(argty, types.CPointer):
|
|
281
|
+
llty = self.context.get_value_type(argty)
|
|
282
|
+
ptr = lir.values.Argument(self.module, llty, "arg." + argname)
|
|
283
|
+
self.varmap[value.name] = ptr
|
|
284
|
+
return
|
|
285
|
+
|
|
286
|
+
return orig(self, inst)
|
|
287
|
+
|
|
288
|
+
def lower_return_inst(self, orig, inst):
|
|
289
|
+
if isinstance(self.context, OpenmpCUDATargetContext):
|
|
290
|
+
# This fixes Return instructions for CUDA device functions in an
|
|
291
|
+
# OpenMP target region. It avoids setting a value to the return
|
|
292
|
+
# value pointer argument, which otherwise breaks OpenMP code
|
|
293
|
+
# generation (looks like an upstream miscompilation) by DCE any
|
|
294
|
+
# memory effects (e.g., to other pointer arguments from a tofrom
|
|
295
|
+
# mapping.)
|
|
296
|
+
if self.fndesc.qualname == self.context.device_func_name:
|
|
297
|
+
self.call_conv._return_errcode_raw(self.builder, RETCODE_OK)
|
|
298
|
+
return
|
|
299
|
+
return orig(self, inst)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class CustomCPUCodeLibrary(JITCodeLibrary):
|
|
303
|
+
def add_llvm_module(self, ll_module):
|
|
304
|
+
lowered_module = run_intrinsics_openmp_pass(ll_module)
|
|
305
|
+
super().add_llvm_module(lowered_module)
|
|
306
|
+
|
|
307
|
+
def _finalize_specific(self):
|
|
308
|
+
super()._finalize_specific()
|
|
309
|
+
# Run target offloading descriptor registration functions, if there are any.
|
|
310
|
+
import ctypes
|
|
311
|
+
|
|
312
|
+
ee = self._codegen._engine._ee
|
|
313
|
+
for func in self.get_defined_functions():
|
|
314
|
+
if not func.name.startswith(".omp_offloading.descriptor_reg"):
|
|
315
|
+
continue
|
|
316
|
+
addr = ee.get_function_address(func.name)
|
|
317
|
+
reg = ctypes.CFUNCTYPE(None)(addr)
|
|
318
|
+
try:
|
|
319
|
+
reg()
|
|
320
|
+
except Exception:
|
|
321
|
+
raise RuntimeError("error registering OpenMP offloading descriptor")
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class CustomFunctionCompiler(_FunctionCompiler):
|
|
325
|
+
def _customize_flags(self, flags):
|
|
326
|
+
# We need to disable SSA form for OpenMP analysis to detect variables
|
|
327
|
+
# used within regions.
|
|
328
|
+
flags.enable_ssa = False
|
|
329
|
+
return flags
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
class CustomCompiler(compiler.CompilerBase):
|
|
333
|
+
@staticmethod
|
|
334
|
+
def custom_untyped_pipeline(state, name="untyped-openmp"):
|
|
335
|
+
"""Returns an untyped part of the nopython OpenMP pipeline"""
|
|
336
|
+
pm = PassManager(name)
|
|
337
|
+
if state.func_ir is None:
|
|
338
|
+
pm.add_pass(TranslateByteCode, "analyzing bytecode")
|
|
339
|
+
pm.add_pass(FixupArgs, "fix up args")
|
|
340
|
+
pm.add_pass(IRProcessing, "processing IR")
|
|
341
|
+
|
|
342
|
+
# inline closures early in case they are using nonlocal's
|
|
343
|
+
# see issue #6585.
|
|
344
|
+
pm.add_pass(InlineClosureLikes, "inline calls to locally defined closures")
|
|
345
|
+
|
|
346
|
+
# pre typing
|
|
347
|
+
if not state.flags.no_rewrites:
|
|
348
|
+
pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants")
|
|
349
|
+
pm.add_pass(DeadBranchPrune, "dead branch pruning")
|
|
350
|
+
pm.add_pass(GenericRewrites, "nopython rewrites")
|
|
351
|
+
|
|
352
|
+
pm.add_pass(RewriteDynamicRaises, "rewrite dynamic raises")
|
|
353
|
+
|
|
354
|
+
# convert any remaining closures into functions
|
|
355
|
+
pm.add_pass(
|
|
356
|
+
MakeFunctionToJitFunction, "convert make_function into JIT functions"
|
|
357
|
+
)
|
|
358
|
+
# inline functions that have been determined as inlinable and rerun
|
|
359
|
+
# branch pruning, this needs to be run after closures are inlined as
|
|
360
|
+
# the IR repr of a closure masks call sites if an inlinable is called
|
|
361
|
+
# inside a closure
|
|
362
|
+
pm.add_pass(InlineInlinables, "inline inlinable functions")
|
|
363
|
+
if not state.flags.no_rewrites:
|
|
364
|
+
pm.add_pass(DeadBranchPrune, "dead branch pruning")
|
|
365
|
+
|
|
366
|
+
pm.add_pass(FindLiterallyCalls, "find literally calls")
|
|
367
|
+
pm.add_pass(LiteralUnroll, "handles literal_unroll")
|
|
368
|
+
|
|
369
|
+
if state.flags.enable_ssa:
|
|
370
|
+
assert False, "SSA form is not supported in OpenMP"
|
|
371
|
+
|
|
372
|
+
pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation")
|
|
373
|
+
# Run WithLifting late to for make_implicit_explicit to work. TODO: We
|
|
374
|
+
# should create a pass that does this instead of replicating and hacking
|
|
375
|
+
# the untyped pipeline. This handling may also negatively affect
|
|
376
|
+
# optimizations.
|
|
377
|
+
pm.add_pass(WithLifting, "Handle with contexts")
|
|
378
|
+
|
|
379
|
+
pm.finalize()
|
|
380
|
+
return pm
|
|
381
|
+
|
|
382
|
+
def define_pipelines(self):
|
|
383
|
+
# compose pipeline from untyped, typed and lowering parts
|
|
384
|
+
dpb = DefaultPassBuilder
|
|
385
|
+
pm = PassManager("omp")
|
|
386
|
+
untyped_passes = self.custom_untyped_pipeline(self.state)
|
|
387
|
+
pm.passes.extend(untyped_passes.passes)
|
|
388
|
+
|
|
389
|
+
typed_passes = dpb.define_typed_pipeline(self.state)
|
|
390
|
+
pm.passes.extend(typed_passes.passes)
|
|
391
|
+
|
|
392
|
+
lowering_passes = dpb.define_nopython_lowering_pipeline(self.state)
|
|
393
|
+
pm.passes.extend(lowering_passes.passes)
|
|
394
|
+
|
|
395
|
+
pm.finalize()
|
|
396
|
+
return [pm]
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class CustomAOTCPUCodeLibrary(AOTCodeLibrary):
|
|
400
|
+
def add_llvm_module(self, ll_module):
|
|
401
|
+
lowered_module = run_intrinsics_openmp_pass(ll_module)
|
|
402
|
+
super().add_llvm_module(lowered_module)
|
numba/openmp/config.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
from numba.core import config
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
libpath = Path(__file__).absolute().parent / "libs"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _safe_readenv(name, ctor, default):
|
|
10
|
+
value = os.environ.get(name, default)
|
|
11
|
+
try:
|
|
12
|
+
return ctor(value)
|
|
13
|
+
except Exception:
|
|
14
|
+
warnings.warn(
|
|
15
|
+
"environ %s defined but failed to parse '%s'" % (name, value),
|
|
16
|
+
RuntimeWarning,
|
|
17
|
+
)
|
|
18
|
+
return default
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
DEBUG_OPENMP = _safe_readenv("NUMBA_DEBUG_OPENMP", int, 0)
|
|
22
|
+
if DEBUG_OPENMP > 0 and config.DEBUG_ARRAY_OPT == 0:
|
|
23
|
+
config.DEBUG_ARRAY_OPT = 1
|
|
24
|
+
DEBUG_OPENMP_LLVM_PASS = _safe_readenv("NUMBA_DEBUG_OPENMP_LLVM_PASS", int, 0)
|
|
25
|
+
OPENMP_DISABLED = _safe_readenv("NUMBA_OPENMP_DISABLED", int, 0)
|
|
26
|
+
# Use toolchain for device code compilation by default to avoid issues with libomptarget compatibility checks.
|
|
27
|
+
OPENMP_DEVICE_TOOLCHAIN = _safe_readenv("NUMBA_OPENMP_DEVICE_TOOLCHAIN", int, 1)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
import numba
|
|
3
|
+
|
|
4
|
+
from .compiler import (
|
|
5
|
+
CustomCompiler,
|
|
6
|
+
CustomFunctionCompiler,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def jit(*args, **kws):
|
|
11
|
+
"""
|
|
12
|
+
Equivalent to jit(nopython=True, nogil=True)
|
|
13
|
+
"""
|
|
14
|
+
if "nopython" in kws:
|
|
15
|
+
warnings.warn("nopython is set for njit and is ignored", RuntimeWarning)
|
|
16
|
+
if "forceobj" in kws:
|
|
17
|
+
warnings.warn("forceobj is set for njit and is ignored", RuntimeWarning)
|
|
18
|
+
del kws["forceobj"]
|
|
19
|
+
kws.update({"nopython": True, "nogil": True})
|
|
20
|
+
dispatcher = numba.jit(*args, **kws)
|
|
21
|
+
dispatcher._compiler.__class__ = CustomFunctionCompiler
|
|
22
|
+
dispatcher._compiler.pipeline_class = CustomCompiler
|
|
23
|
+
return dispatcher
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def njit(*args, **kws):
|
|
27
|
+
return jit(*args, **kws)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
class UnspecifiedVarInDefaultNone(Exception):
|
|
2
|
+
pass
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ParallelForExtraCode(Exception):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ParallelForWrongLoopCount(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ParallelForInvalidCollapseCount(Exception):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NonconstantOpenmpSpecification(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class NonStringOpenmpSpecification(Exception):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MultipleNumThreadsClauses(Exception):
|
|
26
|
+
pass
|
numba/openmp/ir_utils.py
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
diff --git a/libomptarget/plugins/cuda/src/rtl.cpp b/libomptarget/plugins/cuda/src/rtl.cpp
|
|
2
|
+
index 0ca05f0ec3a0..16da3f434bba 100644
|
|
3
|
+
--- a/libomptarget/plugins/cuda/src/rtl.cpp
|
|
4
|
+
+++ b/libomptarget/plugins/cuda/src/rtl.cpp
|
|
5
|
+
@@ -234,6 +234,7 @@ template <typename T> class ResourcePoolTy {
|
|
6
|
+
std::mutex Mutex;
|
|
7
|
+
/// Pool of resources.
|
|
8
|
+
std::vector<T> Resources;
|
|
9
|
+
+ std::vector<T> Pool;
|
|
10
|
+
/// A reference to the corresponding allocator.
|
|
11
|
+
AllocatorTy<T> Allocator;
|
|
12
|
+
|
|
13
|
+
@@ -243,11 +244,13 @@ template <typename T> class ResourcePoolTy {
|
|
14
|
+
auto CurSize = Resources.size();
|
|
15
|
+
assert(Size > CurSize && "Unexpected smaller size");
|
|
16
|
+
Resources.reserve(Size);
|
|
17
|
+
+ Pool.reserve(Size);
|
|
18
|
+
for (auto I = CurSize; I < Size; ++I) {
|
|
19
|
+
T NewItem;
|
|
20
|
+
int Ret = Allocator.create(NewItem);
|
|
21
|
+
if (Ret != OFFLOAD_SUCCESS)
|
|
22
|
+
return false;
|
|
23
|
+
+ Pool.push_back(NewItem);
|
|
24
|
+
Resources.push_back(NewItem);
|
|
25
|
+
}
|
|
26
|
+
return true;
|
|
27
|
+
@@ -308,8 +311,9 @@ public:
|
|
28
|
+
/// Released all stored resources and clear the pool.
|
|
29
|
+
/// Note: This function is not thread safe. Be sure to guard it if necessary.
|
|
30
|
+
void clear() noexcept {
|
|
31
|
+
- for (auto &R : Resources)
|
|
32
|
+
+ for (auto &R : Pool)
|
|
33
|
+
(void)Allocator.destroy(R);
|
|
34
|
+
+ Pool.clear();
|
|
35
|
+
Resources.clear();
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
--
|
|
39
|
+
2.29.1
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
diff -Naur a/libomptarget/include/Debug.h b/libomptarget/include/Debug.h
|
|
2
|
+
--- a/libomptarget/include/Debug.h
|
|
3
|
+
+++ b/libomptarget/include/Debug.h
|
|
4
|
+
@@ -39,6 +39,8 @@
|
|
5
|
+
|
|
6
|
+
#include <atomic>
|
|
7
|
+
#include <mutex>
|
|
8
|
+
+#include <cstdlib>
|
|
9
|
+
+#include <string>
|
|
10
|
+
|
|
11
|
+
/// 32-Bit field data attributes controlling information presented to the user.
|
|
12
|
+
enum OpenMPInfoType : uint32_t {
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
diff -Naur a/libomptarget/plugins/common/elf_common/CMakeLists.txt b/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
2
|
+
--- a/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
3
|
+
+++ b/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
4
|
+
@@ -16,9 +16,6 @@
|
|
5
|
+
set_property(TARGET elf_common PROPERTY POSITION_INDEPENDENT_CODE ON)
|
|
6
|
+
llvm_update_compile_flags(elf_common)
|
|
7
|
+
set(LINK_LLVM_LIBS LLVMBinaryFormat LLVMObject LLVMSupport)
|
|
8
|
+
-if (LLVM_LINK_LLVM_DYLIB)
|
|
9
|
+
- set(LINK_LLVM_LIBS LLVM)
|
|
10
|
+
-endif()
|
|
11
|
+
target_link_libraries(elf_common INTERFACE ${LINK_LLVM_LIBS})
|
|
12
|
+
include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS})
|
|
13
|
+
add_dependencies(elf_common ${LINK_LLVM_LIBS})
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
diff --git a/libomptarget/include/Debug.h b/libomptarget/include/Debug.h
|
|
2
|
+
index 8ff4695..d789551 100644
|
|
3
|
+
--- a/libomptarget/include/Debug.h
|
|
4
|
+
+++ b/libomptarget/include/Debug.h
|
|
5
|
+
@@ -38,7 +38,9 @@
|
|
6
|
+
#define _OMPTARGET_DEBUG_H
|
|
7
|
+
|
|
8
|
+
#include <atomic>
|
|
9
|
+
+#include <cstdlib>
|
|
10
|
+
#include <mutex>
|
|
11
|
+
+#include <string>
|
|
12
|
+
|
|
13
|
+
/// 32-Bit field data attributes controlling information presented to the user.
|
|
14
|
+
enum OpenMPInfoType : uint32_t {
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
diff --git a/libomptarget/plugins/CMakeLists.txt b/libomptarget/plugins/CMakeLists.txt
|
|
2
|
+
index 64c2539..6abc109 100644
|
|
3
|
+
--- a/libomptarget/plugins/CMakeLists.txt
|
|
4
|
+
+++ b/libomptarget/plugins/CMakeLists.txt
|
|
5
|
+
@@ -31,7 +31,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
|
|
6
|
+
add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
|
|
7
|
+
|
|
8
|
+
add_llvm_library("omptarget.rtl.${tmachine_libname}"
|
|
9
|
+
- SHARED
|
|
10
|
+
+ SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
11
|
+
|
|
12
|
+
${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
|
|
13
|
+
|
|
14
|
+
@@ -97,4 +97,3 @@ add_subdirectory(remote)
|
|
15
|
+
# Make sure the parent scope can see the plugins that will be created.
|
|
16
|
+
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
|
|
17
|
+
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
|
18
|
+
-
|
|
19
|
+
diff --git a/libomptarget/plugins/amdgpu/CMakeLists.txt b/libomptarget/plugins/amdgpu/CMakeLists.txt
|
|
20
|
+
index 66bf680..47935e5 100644
|
|
21
|
+
--- a/libomptarget/plugins/amdgpu/CMakeLists.txt
|
|
22
|
+
+++ b/libomptarget/plugins/amdgpu/CMakeLists.txt
|
|
23
|
+
@@ -66,7 +66,7 @@ else()
|
|
24
|
+
set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
|
|
25
|
+
endif()
|
|
26
|
+
|
|
27
|
+
-add_llvm_library(omptarget.rtl.amdgpu SHARED
|
|
28
|
+
+add_llvm_library(omptarget.rtl.amdgpu SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
29
|
+
impl/impl.cpp
|
|
30
|
+
impl/interop_hsa.cpp
|
|
31
|
+
impl/data.cpp
|
|
32
|
+
@@ -126,4 +126,3 @@ else()
|
|
33
|
+
list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.amdgpu")
|
|
34
|
+
set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
|
|
35
|
+
endif()
|
|
36
|
+
-
|
|
37
|
+
diff --git a/libomptarget/plugins/common/elf_common/CMakeLists.txt b/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
38
|
+
index 9ea2926..b3fb758 100644
|
|
39
|
+
--- a/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
40
|
+
+++ b/libomptarget/plugins/common/elf_common/CMakeLists.txt
|
|
41
|
+
@@ -16,9 +16,7 @@ add_library(elf_common OBJECT elf_common.cpp)
|
|
42
|
+
set_property(TARGET elf_common PROPERTY POSITION_INDEPENDENT_CODE ON)
|
|
43
|
+
llvm_update_compile_flags(elf_common)
|
|
44
|
+
set(LINK_LLVM_LIBS LLVMBinaryFormat LLVMObject LLVMSupport)
|
|
45
|
+
-if (LLVM_LINK_LLVM_DYLIB)
|
|
46
|
+
- set(LINK_LLVM_LIBS LLVM)
|
|
47
|
+
-endif()
|
|
48
|
+
+# Link LLVM static libraries to avoid dependency on shared LLVM libraries.
|
|
49
|
+
target_link_libraries(elf_common INTERFACE ${LINK_LLVM_LIBS})
|
|
50
|
+
add_dependencies(elf_common ${LINK_LLVM_LIBS})
|
|
51
|
+
|
|
52
|
+
diff --git a/libomptarget/plugins/cuda/CMakeLists.txt b/libomptarget/plugins/cuda/CMakeLists.txt
|
|
53
|
+
index 46e04c3..825e273 100644
|
|
54
|
+
--- a/libomptarget/plugins/cuda/CMakeLists.txt
|
|
55
|
+
+++ b/libomptarget/plugins/cuda/CMakeLists.txt
|
|
56
|
+
@@ -40,7 +40,7 @@ endif()
|
|
57
|
+
if (LIBOMPTARGET_CAN_LINK_LIBCUDA AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
|
|
58
|
+
libomptarget_say("Building CUDA plugin linked against libcuda")
|
|
59
|
+
include_directories(${LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS})
|
|
60
|
+
- add_llvm_library(omptarget.rtl.cuda SHARED
|
|
61
|
+
+ add_llvm_library(omptarget.rtl.cuda SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
62
|
+
|
|
63
|
+
src/rtl.cpp
|
|
64
|
+
|
|
65
|
+
@@ -64,7 +64,7 @@ else()
|
|
66
|
+
libomptarget_say("Building CUDA plugin for dlopened libcuda")
|
|
67
|
+
include_directories(dynamic_cuda)
|
|
68
|
+
add_llvm_library(omptarget.rtl.cuda
|
|
69
|
+
- SHARED
|
|
70
|
+
+ SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
71
|
+
|
|
72
|
+
src/rtl.cpp
|
|
73
|
+
dynamic_cuda/cuda.cpp
|
|
74
|
+
diff --git a/libomptarget/plugins/ve/CMakeLists.txt b/libomptarget/plugins/ve/CMakeLists.txt
|
|
75
|
+
index 5aded32..4a81583 100644
|
|
76
|
+
--- a/libomptarget/plugins/ve/CMakeLists.txt
|
|
77
|
+
+++ b/libomptarget/plugins/ve/CMakeLists.txt
|
|
78
|
+
@@ -24,7 +24,7 @@ if(${LIBOMPTARGET_DEP_VEO_FOUND})
|
|
79
|
+
add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
|
|
80
|
+
|
|
81
|
+
add_llvm_library("omptarget.rtl.${tmachine_libname}"
|
|
82
|
+
- SHARED
|
|
83
|
+
+ SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
84
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.cpp
|
|
85
|
+
|
|
86
|
+
ADDITIONAL_HEADER_DIRS
|
|
87
|
+
diff --git a/libomptarget/src/CMakeLists.txt b/libomptarget/src/CMakeLists.txt
|
|
88
|
+
index 071ec61..98b48ac 100644
|
|
89
|
+
--- a/libomptarget/src/CMakeLists.txt
|
|
90
|
+
+++ b/libomptarget/src/CMakeLists.txt
|
|
91
|
+
@@ -12,8 +12,9 @@
|
|
92
|
+
|
|
93
|
+
libomptarget_say("Building offloading runtime library libomptarget.")
|
|
94
|
+
|
|
95
|
+
+# Link LLVM statically to avoid dependency on dynamic libLLVM.
|
|
96
|
+
add_llvm_library(omptarget
|
|
97
|
+
- SHARED
|
|
98
|
+
+ SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
|
|
99
|
+
|
|
100
|
+
api.cpp
|
|
101
|
+
device.cpp
|