pyomp 0.5.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. numba/openmp/__init__.py +106 -0
  2. numba/openmp/_version.py +34 -0
  3. numba/openmp/analysis.py +251 -0
  4. numba/openmp/compiler.py +402 -0
  5. numba/openmp/config.py +27 -0
  6. numba/openmp/decorators.py +27 -0
  7. numba/openmp/exceptions.py +26 -0
  8. numba/openmp/ir_utils.py +4 -0
  9. numba/openmp/libs/openmp/lib/libgomp.1.dylib +0 -0
  10. numba/openmp/libs/openmp/lib/libgomp.dylib +0 -0
  11. numba/openmp/libs/openmp/lib/libiomp5.dylib +0 -0
  12. numba/openmp/libs/openmp/lib/libomp.dylib +0 -0
  13. numba/openmp/libs/openmp/patches/14.0.6/0001-BACKPORT-Fix-for-CUDA-OpenMP-RTL.patch +39 -0
  14. numba/openmp/libs/openmp/patches/14.0.6/0002-Fix-missing-includes.patch +12 -0
  15. numba/openmp/libs/openmp/patches/14.0.6/0003-Link-static-LLVM-libs.patch +13 -0
  16. numba/openmp/libs/openmp/patches/15.0.7/0001-Fix-missing-includes.patch +14 -0
  17. numba/openmp/libs/openmp/patches/15.0.7/0002-Link-LLVM-statically.patch +101 -0
  18. numba/openmp/libs/openmp/patches/15.0.7/0003-Disable-opaque-pointers-DeviceRTL-bitcode.patch +12 -0
  19. numba/openmp/libs/openmp/patches/16.0.6/0001-Load-plugins-from-install-directory.patch +53 -0
  20. numba/openmp/libs/openmp/patches/16.0.6/0002-Link-LLVM-statically.patch +218 -0
  21. numba/openmp/libs/openmp/patches/20.1.8/0001-Enable-standalone-build.patch +13 -0
  22. numba/openmp/libs/openmp/patches/20.1.8/0002-Link-statically-LLVM.patch +24 -0
  23. numba/openmp/libs/openmp/patches/20.1.8/0003-Do-not-build-liboffload.patch +12 -0
  24. numba/openmp/libs/pass/CGIntrinsicsOpenMP.cpp +2939 -0
  25. numba/openmp/libs/pass/CGIntrinsicsOpenMP.h +606 -0
  26. numba/openmp/libs/pass/CMakeLists.txt +57 -0
  27. numba/openmp/libs/pass/DebugOpenMP.cpp +17 -0
  28. numba/openmp/libs/pass/DebugOpenMP.h +28 -0
  29. numba/openmp/libs/pass/IntrinsicsOpenMP.cpp +837 -0
  30. numba/openmp/libs/pass/IntrinsicsOpenMP.h +13 -0
  31. numba/openmp/libs/pass/IntrinsicsOpenMP_CAPI.h +23 -0
  32. numba/openmp/libs/pass/libIntrinsicsOpenMP.dylib +0 -0
  33. numba/openmp/link_utils.py +126 -0
  34. numba/openmp/llvm_pass.py +48 -0
  35. numba/openmp/llvmlite_extensions.py +75 -0
  36. numba/openmp/omp_context.py +242 -0
  37. numba/openmp/omp_grammar.py +696 -0
  38. numba/openmp/omp_ir.py +2105 -0
  39. numba/openmp/omp_lower.py +3125 -0
  40. numba/openmp/omp_runtime.py +107 -0
  41. numba/openmp/overloads.py +53 -0
  42. numba/openmp/parser.py +6 -0
  43. numba/openmp/tags.py +532 -0
  44. numba/openmp/tests/test_openmp.py +5056 -0
  45. pyomp-0.5.0.dist-info/METADATA +193 -0
  46. pyomp-0.5.0.dist-info/RECORD +52 -0
  47. pyomp-0.5.0.dist-info/WHEEL +6 -0
  48. pyomp-0.5.0.dist-info/licenses/LICENSE +25 -0
  49. pyomp-0.5.0.dist-info/licenses/LICENSE-OPENMP.txt +361 -0
  50. pyomp-0.5.0.dist-info/top_level.txt +3 -0
  51. pyomp.dylibs/libc++.1.0.dylib +0 -0
  52. pyomp.dylibs/libzstd.1.5.7.dylib +0 -0
@@ -0,0 +1,402 @@
1
+ from numba.core import compiler, compiler_machinery, cpu, ir, types
2
+ from numba import cuda as numba_cuda
3
+ from numba.core.controlflow import CFGraph
4
+ from numba.cuda import descriptor as cuda_descriptor
5
+ from numba.cuda.target import CUDACallConv
6
+ from numba.core.lowering import Lower
7
+ from functools import cached_property
8
+ from numba.core.callconv import (
9
+ RETCODE_OK,
10
+ )
11
+
12
+ from numba.core.codegen import AOTCodeLibrary, JITCodeLibrary
13
+ from numba.core.dispatcher import _FunctionCompiler
14
+ from numba.core.compiler_machinery import PassManager
15
+ from numba.core.compiler import DefaultPassBuilder
16
+ from numba.core.untyped_passes import (
17
+ TranslateByteCode,
18
+ FixupArgs,
19
+ IRProcessing,
20
+ InlineClosureLikes,
21
+ RewriteSemanticConstants,
22
+ DeadBranchPrune,
23
+ GenericRewrites,
24
+ RewriteDynamicRaises,
25
+ MakeFunctionToJitFunction,
26
+ InlineInlinables,
27
+ FindLiterallyCalls,
28
+ LiteralUnroll,
29
+ LiteralPropagationSubPipelinePass,
30
+ WithLifting,
31
+ )
32
+ import llvmlite.binding as ll
33
+ import llvmlite.ir as lir
34
+
35
+ from .config import DEBUG_OPENMP
36
+ from .llvm_pass import run_intrinsics_openmp_pass
37
+
38
+
39
+ class OnlyLower(compiler.CompilerBase):
40
+ def __init__(self, typingctx, targetctx, library, args, restype, flags, locals):
41
+ super().__init__(typingctx, targetctx, library, args, restype, flags, locals)
42
+ self.state.typemap = targetctx.state_copy.typemap
43
+ self.state.calltypes = targetctx.state_copy.calltypes
44
+
45
+ def define_pipelines(self):
46
+ pms = []
47
+ if not self.state.flags.force_pyobject:
48
+ pms.append(
49
+ compiler.DefaultPassBuilder.define_nopython_lowering_pipeline(
50
+ self.state
51
+ )
52
+ )
53
+ return pms
54
+
55
+
56
+ class OnlyLowerCUDA(numba_cuda.compiler.CUDACompiler):
57
+ def __init__(self, typingctx, targetctx, library, args, restype, flags, locals):
58
+ super().__init__(typingctx, targetctx, library, args, restype, flags, locals)
59
+ self.state.typemap = targetctx.state_copy.typemap
60
+ self.state.calltypes = targetctx.state_copy.calltypes
61
+
62
+ def define_pipelines(self):
63
+ pm = compiler_machinery.PassManager("cuda")
64
+ # Numba <=0.57 implements CUDALegalization to support CUDA <11.2
65
+ # versions. Numba >0.58 drops this support. We enclose in a try-except
66
+ # block to avoid errors, delegating to Numba support.
67
+ try:
68
+ pm.add_pass(numba_cuda.compiler.CUDALegalization, "CUDA legalization")
69
+ except AttributeError:
70
+ pass
71
+ lowering_passes = self.define_cuda_lowering_pipeline(self.state)
72
+ pm.passes.extend(lowering_passes.passes)
73
+ pm.finalize()
74
+ return [pm]
75
+
76
+
77
+ def compute_cfg_from_llvm_blocks(blocks):
78
+ cfg = CFGraph()
79
+ name_to_index = {}
80
+ for b in blocks:
81
+ # print("b:", b.name, type(b.name))
82
+ cfg.add_node(b.name)
83
+
84
+ for bindex, b in enumerate(blocks):
85
+ term = b.terminator
86
+ # print("term:", b.name, term, type(term))
87
+ if isinstance(term, lir.instructions.Branch):
88
+ cfg.add_edge(b.name, term.operands[0].name)
89
+ name_to_index[b.name] = (bindex, [term.operands[0].name])
90
+ elif isinstance(term, lir.instructions.ConditionalBranch):
91
+ cfg.add_edge(b.name, term.operands[1].name)
92
+ cfg.add_edge(b.name, term.operands[2].name)
93
+ name_to_index[b.name] = (
94
+ bindex,
95
+ [term.operands[1].name, term.operands[2].name],
96
+ )
97
+ elif isinstance(term, lir.instructions.Ret):
98
+ name_to_index[b.name] = (bindex, [])
99
+ elif isinstance(term, lir.instructions.SwitchInstr):
100
+ cfg.add_edge(b.name, term.default.name)
101
+ for _, blk in term.cases:
102
+ cfg.add_edge(b.name, blk.name)
103
+ out_blks = [x[1].name for x in term.cases]
104
+ out_blks.append(term.default.name)
105
+ name_to_index[b.name] = (bindex, out_blks)
106
+ elif isinstance(term, lir.instructions.Unreachable):
107
+ pass
108
+ else:
109
+ print("Unknown term:", term, type(term))
110
+ assert False # Should never get here.
111
+
112
+ cfg.set_entry_point("entry")
113
+ cfg.process()
114
+ return cfg, name_to_index
115
+
116
+
117
+ def compute_llvm_topo_order(blocks):
118
+ cfg, name_to_index = compute_cfg_from_llvm_blocks(blocks)
119
+ post_order = []
120
+ seen = set()
121
+
122
+ def _dfs_rec(node):
123
+ if node not in seen:
124
+ seen.add(node)
125
+ succs = cfg._succs[node]
126
+
127
+ # If there are no successors then we are done.
128
+ # This is the case for an unreachable.
129
+ if not succs:
130
+ return
131
+
132
+ # This is needed so that the inside of loops are
133
+ # handled first before their exits.
134
+ nexts = name_to_index[node][1]
135
+ if len(nexts) == 2:
136
+ succs = [nexts[1], nexts[0]]
137
+
138
+ for dest in succs:
139
+ if (node, dest) not in cfg._back_edges:
140
+ _dfs_rec(dest)
141
+ post_order.append(node)
142
+
143
+ _dfs_rec(cfg.entry_point())
144
+ post_order.reverse()
145
+ return post_order, name_to_index
146
+
147
+
148
+ class CollectUnknownLLVMVarsPrivate(lir.transforms.Visitor):
149
+ def __init__(self):
150
+ self.active_openmp_directives = []
151
+ self.start_num = 0
152
+
153
+ # Override the default function visitor to go in topo order
154
+ def visit_Function(self, func):
155
+ self._function = func
156
+ if len(func.blocks) == 0:
157
+ return None
158
+ if DEBUG_OPENMP >= 1:
159
+ print("Collect visit_Function:", func.blocks, type(func.blocks))
160
+ topo_order, name_to_index = compute_llvm_topo_order(func.blocks)
161
+ topo_order = list(topo_order)
162
+ if DEBUG_OPENMP >= 1:
163
+ print("topo_order:", topo_order)
164
+
165
+ for bbname in topo_order:
166
+ if DEBUG_OPENMP >= 1:
167
+ print("Visiting block:", bbname)
168
+ self.visit_BasicBlock(func.blocks[name_to_index[bbname][0]])
169
+
170
+ if DEBUG_OPENMP >= 1:
171
+ print("Collect visit_Function done")
172
+
173
+ def visit_Instruction(self, instr):
174
+ if len(self.active_openmp_directives) > 0:
175
+ if DEBUG_OPENMP >= 1:
176
+ print("Collect instr:", instr, type(instr))
177
+ for op in instr.operands:
178
+ if isinstance(op, lir.AllocaInstr):
179
+ if DEBUG_OPENMP >= 1:
180
+ print("Collect AllocaInstr operand:", op, op.name)
181
+ for directive in self.active_openmp_directives:
182
+ directive.save_orig_numba_openmp.alloca(op, None)
183
+ else:
184
+ if DEBUG_OPENMP >= 2:
185
+ print("non-alloca:", op, type(op))
186
+ pass
187
+
188
+ if isinstance(instr, lir.CallInstr):
189
+ if instr.callee.name == "llvm.directive.region.entry":
190
+ if DEBUG_OPENMP >= 1:
191
+ print(
192
+ "Collect Found openmp region entry:",
193
+ instr,
194
+ type(instr),
195
+ "\n",
196
+ instr.tags,
197
+ type(instr.tags),
198
+ id(self),
199
+ len(self.active_openmp_directives),
200
+ )
201
+ self.active_openmp_directives.append(instr)
202
+ if DEBUG_OPENMP >= 1:
203
+ print("post append:", len(self.active_openmp_directives))
204
+ assert hasattr(instr, "save_orig_numba_openmp")
205
+ if instr.callee.name == "llvm.directive.region.exit":
206
+ if DEBUG_OPENMP >= 1:
207
+ print(
208
+ "Collect Found openmp region exit:",
209
+ instr,
210
+ type(instr),
211
+ "\n",
212
+ instr.tags,
213
+ type(instr.tags),
214
+ id(self),
215
+ len(self.active_openmp_directives),
216
+ )
217
+ enter_directive = self.active_openmp_directives.pop()
218
+ enter_directive.save_orig_numba_openmp.post_lowering_process_alloca_queue(
219
+ enter_directive
220
+ )
221
+
222
+
223
+ def post_lowering_openmp(mod):
224
+ if DEBUG_OPENMP >= 1:
225
+ print("post_lowering_openmp")
226
+
227
+ # This will gather the information.
228
+ collect_fixup = CollectUnknownLLVMVarsPrivate()
229
+ collect_fixup.visit(mod)
230
+
231
+ if DEBUG_OPENMP >= 1:
232
+ print("post_lowering_openmp done")
233
+
234
+
235
+ class CustomContext(cpu.CPUContext):
236
+ def post_lowering(self, mod, library):
237
+ if hasattr(library, "openmp") and library.openmp:
238
+ post_lowering_openmp(mod)
239
+ super().post_lowering(mod, library)
240
+
241
+
242
+ class OpenmpCPUTargetContext(CustomContext):
243
+ def __init__(self, name, typingctx, target="cpu"):
244
+ super().__init__(typingctx, target)
245
+ self.device_func_name = name
246
+
247
+
248
+ class OpenmpCUDATargetContext(cuda_descriptor.CUDATargetContext):
249
+ def __init__(self, name, typingctx, target="cuda"):
250
+ super().__init__(typingctx, target)
251
+ self.device_func_name = name
252
+
253
+ def post_lowering(self, mod, library):
254
+ if hasattr(library, "openmp") and library.openmp:
255
+ post_lowering_openmp(mod)
256
+ super().post_lowering(mod, library)
257
+
258
+ @cached_property
259
+ def call_conv(self):
260
+ return CUDACallConv(self)
261
+
262
+
263
+ class LowerNoSROA(Lower):
264
+ @property
265
+ def _disable_sroa_like_opt(self):
266
+ # Always return True for this instance
267
+ return True
268
+
269
+ def lower_assign_inst(self, orig, inst):
270
+ # This fixes assignments for Arg instructions when the target is a
271
+ # CPointer. It sets the backing storage to the pointer of the argument
272
+ # itself.
273
+ if isinstance(self.context, OpenmpCPUTargetContext) or isinstance(
274
+ self.context, OpenmpCUDATargetContext
275
+ ):
276
+ value = inst.value
277
+ if isinstance(value, ir.Arg):
278
+ argname = value.name
279
+ argty = self.typeof("arg." + argname)
280
+ if isinstance(argty, types.CPointer):
281
+ llty = self.context.get_value_type(argty)
282
+ ptr = lir.values.Argument(self.module, llty, "arg." + argname)
283
+ self.varmap[value.name] = ptr
284
+ return
285
+
286
+ return orig(self, inst)
287
+
288
+ def lower_return_inst(self, orig, inst):
289
+ if isinstance(self.context, OpenmpCUDATargetContext):
290
+ # This fixes Return instructions for CUDA device functions in an
291
+ # OpenMP target region. It avoids setting a value to the return
292
+ # value pointer argument, which otherwise breaks OpenMP code
293
+ # generation (looks like an upstream miscompilation) by DCE any
294
+ # memory effects (e.g., to other pointer arguments from a tofrom
295
+ # mapping.)
296
+ if self.fndesc.qualname == self.context.device_func_name:
297
+ self.call_conv._return_errcode_raw(self.builder, RETCODE_OK)
298
+ return
299
+ return orig(self, inst)
300
+
301
+
302
+ class CustomCPUCodeLibrary(JITCodeLibrary):
303
+ def add_llvm_module(self, ll_module):
304
+ lowered_module = run_intrinsics_openmp_pass(ll_module)
305
+ super().add_llvm_module(lowered_module)
306
+
307
+ def _finalize_specific(self):
308
+ super()._finalize_specific()
309
+ # Run target offloading descriptor registration functions, if there are any.
310
+ import ctypes
311
+
312
+ ee = self._codegen._engine._ee
313
+ for func in self.get_defined_functions():
314
+ if not func.name.startswith(".omp_offloading.descriptor_reg"):
315
+ continue
316
+ addr = ee.get_function_address(func.name)
317
+ reg = ctypes.CFUNCTYPE(None)(addr)
318
+ try:
319
+ reg()
320
+ except Exception:
321
+ raise RuntimeError("error registering OpenMP offloading descriptor")
322
+
323
+
324
+ class CustomFunctionCompiler(_FunctionCompiler):
325
+ def _customize_flags(self, flags):
326
+ # We need to disable SSA form for OpenMP analysis to detect variables
327
+ # used within regions.
328
+ flags.enable_ssa = False
329
+ return flags
330
+
331
+
332
+ class CustomCompiler(compiler.CompilerBase):
333
+ @staticmethod
334
+ def custom_untyped_pipeline(state, name="untyped-openmp"):
335
+ """Returns an untyped part of the nopython OpenMP pipeline"""
336
+ pm = PassManager(name)
337
+ if state.func_ir is None:
338
+ pm.add_pass(TranslateByteCode, "analyzing bytecode")
339
+ pm.add_pass(FixupArgs, "fix up args")
340
+ pm.add_pass(IRProcessing, "processing IR")
341
+
342
+ # inline closures early in case they are using nonlocal's
343
+ # see issue #6585.
344
+ pm.add_pass(InlineClosureLikes, "inline calls to locally defined closures")
345
+
346
+ # pre typing
347
+ if not state.flags.no_rewrites:
348
+ pm.add_pass(RewriteSemanticConstants, "rewrite semantic constants")
349
+ pm.add_pass(DeadBranchPrune, "dead branch pruning")
350
+ pm.add_pass(GenericRewrites, "nopython rewrites")
351
+
352
+ pm.add_pass(RewriteDynamicRaises, "rewrite dynamic raises")
353
+
354
+ # convert any remaining closures into functions
355
+ pm.add_pass(
356
+ MakeFunctionToJitFunction, "convert make_function into JIT functions"
357
+ )
358
+ # inline functions that have been determined as inlinable and rerun
359
+ # branch pruning, this needs to be run after closures are inlined as
360
+ # the IR repr of a closure masks call sites if an inlinable is called
361
+ # inside a closure
362
+ pm.add_pass(InlineInlinables, "inline inlinable functions")
363
+ if not state.flags.no_rewrites:
364
+ pm.add_pass(DeadBranchPrune, "dead branch pruning")
365
+
366
+ pm.add_pass(FindLiterallyCalls, "find literally calls")
367
+ pm.add_pass(LiteralUnroll, "handles literal_unroll")
368
+
369
+ if state.flags.enable_ssa:
370
+ assert False, "SSA form is not supported in OpenMP"
371
+
372
+ pm.add_pass(LiteralPropagationSubPipelinePass, "Literal propagation")
373
+ # Run WithLifting late to for make_implicit_explicit to work. TODO: We
374
+ # should create a pass that does this instead of replicating and hacking
375
+ # the untyped pipeline. This handling may also negatively affect
376
+ # optimizations.
377
+ pm.add_pass(WithLifting, "Handle with contexts")
378
+
379
+ pm.finalize()
380
+ return pm
381
+
382
+ def define_pipelines(self):
383
+ # compose pipeline from untyped, typed and lowering parts
384
+ dpb = DefaultPassBuilder
385
+ pm = PassManager("omp")
386
+ untyped_passes = self.custom_untyped_pipeline(self.state)
387
+ pm.passes.extend(untyped_passes.passes)
388
+
389
+ typed_passes = dpb.define_typed_pipeline(self.state)
390
+ pm.passes.extend(typed_passes.passes)
391
+
392
+ lowering_passes = dpb.define_nopython_lowering_pipeline(self.state)
393
+ pm.passes.extend(lowering_passes.passes)
394
+
395
+ pm.finalize()
396
+ return [pm]
397
+
398
+
399
+ class CustomAOTCPUCodeLibrary(AOTCodeLibrary):
400
+ def add_llvm_module(self, ll_module):
401
+ lowered_module = run_intrinsics_openmp_pass(ll_module)
402
+ super().add_llvm_module(lowered_module)
numba/openmp/config.py ADDED
@@ -0,0 +1,27 @@
1
+ import os
2
+ import warnings
3
+ from numba.core import config
4
+ from pathlib import Path
5
+
6
+ libpath = Path(__file__).absolute().parent / "libs"
7
+
8
+
9
+ def _safe_readenv(name, ctor, default):
10
+ value = os.environ.get(name, default)
11
+ try:
12
+ return ctor(value)
13
+ except Exception:
14
+ warnings.warn(
15
+ "environ %s defined but failed to parse '%s'" % (name, value),
16
+ RuntimeWarning,
17
+ )
18
+ return default
19
+
20
+
21
+ DEBUG_OPENMP = _safe_readenv("NUMBA_DEBUG_OPENMP", int, 0)
22
+ if DEBUG_OPENMP > 0 and config.DEBUG_ARRAY_OPT == 0:
23
+ config.DEBUG_ARRAY_OPT = 1
24
+ DEBUG_OPENMP_LLVM_PASS = _safe_readenv("NUMBA_DEBUG_OPENMP_LLVM_PASS", int, 0)
25
+ OPENMP_DISABLED = _safe_readenv("NUMBA_OPENMP_DISABLED", int, 0)
26
+ # Use toolchain for device code compilation by default to avoid issues with libomptarget compatibility checks.
27
+ OPENMP_DEVICE_TOOLCHAIN = _safe_readenv("NUMBA_OPENMP_DEVICE_TOOLCHAIN", int, 1)
@@ -0,0 +1,27 @@
1
+ import warnings
2
+ import numba
3
+
4
+ from .compiler import (
5
+ CustomCompiler,
6
+ CustomFunctionCompiler,
7
+ )
8
+
9
+
10
+ def jit(*args, **kws):
11
+ """
12
+ Equivalent to jit(nopython=True, nogil=True)
13
+ """
14
+ if "nopython" in kws:
15
+ warnings.warn("nopython is set for njit and is ignored", RuntimeWarning)
16
+ if "forceobj" in kws:
17
+ warnings.warn("forceobj is set for njit and is ignored", RuntimeWarning)
18
+ del kws["forceobj"]
19
+ kws.update({"nopython": True, "nogil": True})
20
+ dispatcher = numba.jit(*args, **kws)
21
+ dispatcher._compiler.__class__ = CustomFunctionCompiler
22
+ dispatcher._compiler.pipeline_class = CustomCompiler
23
+ return dispatcher
24
+
25
+
26
+ def njit(*args, **kws):
27
+ return jit(*args, **kws)
@@ -0,0 +1,26 @@
1
+ class UnspecifiedVarInDefaultNone(Exception):
2
+ pass
3
+
4
+
5
+ class ParallelForExtraCode(Exception):
6
+ pass
7
+
8
+
9
+ class ParallelForWrongLoopCount(Exception):
10
+ pass
11
+
12
+
13
+ class ParallelForInvalidCollapseCount(Exception):
14
+ pass
15
+
16
+
17
+ class NonconstantOpenmpSpecification(Exception):
18
+ pass
19
+
20
+
21
+ class NonStringOpenmpSpecification(Exception):
22
+ pass
23
+
24
+
25
+ class MultipleNumThreadsClauses(Exception):
26
+ pass
@@ -0,0 +1,4 @@
1
+ def dump_block(label, block):
2
+ print(label, ":")
3
+ for stmt in block.body:
4
+ print(" ", stmt)
@@ -0,0 +1,39 @@
1
+ diff --git a/libomptarget/plugins/cuda/src/rtl.cpp b/libomptarget/plugins/cuda/src/rtl.cpp
2
+ index 0ca05f0ec3a0..16da3f434bba 100644
3
+ --- a/libomptarget/plugins/cuda/src/rtl.cpp
4
+ +++ b/libomptarget/plugins/cuda/src/rtl.cpp
5
+ @@ -234,6 +234,7 @@ template <typename T> class ResourcePoolTy {
6
+ std::mutex Mutex;
7
+ /// Pool of resources.
8
+ std::vector<T> Resources;
9
+ + std::vector<T> Pool;
10
+ /// A reference to the corresponding allocator.
11
+ AllocatorTy<T> Allocator;
12
+
13
+ @@ -243,11 +244,13 @@ template <typename T> class ResourcePoolTy {
14
+ auto CurSize = Resources.size();
15
+ assert(Size > CurSize && "Unexpected smaller size");
16
+ Resources.reserve(Size);
17
+ + Pool.reserve(Size);
18
+ for (auto I = CurSize; I < Size; ++I) {
19
+ T NewItem;
20
+ int Ret = Allocator.create(NewItem);
21
+ if (Ret != OFFLOAD_SUCCESS)
22
+ return false;
23
+ + Pool.push_back(NewItem);
24
+ Resources.push_back(NewItem);
25
+ }
26
+ return true;
27
+ @@ -308,8 +311,9 @@ public:
28
+ /// Released all stored resources and clear the pool.
29
+ /// Note: This function is not thread safe. Be sure to guard it if necessary.
30
+ void clear() noexcept {
31
+ - for (auto &R : Resources)
32
+ + for (auto &R : Pool)
33
+ (void)Allocator.destroy(R);
34
+ + Pool.clear();
35
+ Resources.clear();
36
+ }
37
+ };
38
+ --
39
+ 2.29.1
@@ -0,0 +1,12 @@
1
+ diff -Naur a/libomptarget/include/Debug.h b/libomptarget/include/Debug.h
2
+ --- a/libomptarget/include/Debug.h
3
+ +++ b/libomptarget/include/Debug.h
4
+ @@ -39,6 +39,8 @@
5
+
6
+ #include <atomic>
7
+ #include <mutex>
8
+ +#include <cstdlib>
9
+ +#include <string>
10
+
11
+ /// 32-Bit field data attributes controlling information presented to the user.
12
+ enum OpenMPInfoType : uint32_t {
@@ -0,0 +1,13 @@
1
+ diff -Naur a/libomptarget/plugins/common/elf_common/CMakeLists.txt b/libomptarget/plugins/common/elf_common/CMakeLists.txt
2
+ --- a/libomptarget/plugins/common/elf_common/CMakeLists.txt
3
+ +++ b/libomptarget/plugins/common/elf_common/CMakeLists.txt
4
+ @@ -16,9 +16,6 @@
5
+ set_property(TARGET elf_common PROPERTY POSITION_INDEPENDENT_CODE ON)
6
+ llvm_update_compile_flags(elf_common)
7
+ set(LINK_LLVM_LIBS LLVMBinaryFormat LLVMObject LLVMSupport)
8
+ -if (LLVM_LINK_LLVM_DYLIB)
9
+ - set(LINK_LLVM_LIBS LLVM)
10
+ -endif()
11
+ target_link_libraries(elf_common INTERFACE ${LINK_LLVM_LIBS})
12
+ include_directories(${LIBOMPTARGET_LLVM_INCLUDE_DIRS})
13
+ add_dependencies(elf_common ${LINK_LLVM_LIBS})
@@ -0,0 +1,14 @@
1
+ diff --git a/libomptarget/include/Debug.h b/libomptarget/include/Debug.h
2
+ index 8ff4695..d789551 100644
3
+ --- a/libomptarget/include/Debug.h
4
+ +++ b/libomptarget/include/Debug.h
5
+ @@ -38,7 +38,9 @@
6
+ #define _OMPTARGET_DEBUG_H
7
+
8
+ #include <atomic>
9
+ +#include <cstdlib>
10
+ #include <mutex>
11
+ +#include <string>
12
+
13
+ /// 32-Bit field data attributes controlling information presented to the user.
14
+ enum OpenMPInfoType : uint32_t {
@@ -0,0 +1,101 @@
1
+ diff --git a/libomptarget/plugins/CMakeLists.txt b/libomptarget/plugins/CMakeLists.txt
2
+ index 64c2539..6abc109 100644
3
+ --- a/libomptarget/plugins/CMakeLists.txt
4
+ +++ b/libomptarget/plugins/CMakeLists.txt
5
+ @@ -31,7 +31,7 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "${tmachine}$")
6
+ add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
7
+
8
+ add_llvm_library("omptarget.rtl.${tmachine_libname}"
9
+ - SHARED
10
+ + SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
11
+
12
+ ${CMAKE_CURRENT_SOURCE_DIR}/../generic-elf-64bit/src/rtl.cpp
13
+
14
+ @@ -97,4 +97,3 @@ add_subdirectory(remote)
15
+ # Make sure the parent scope can see the plugins that will be created.
16
+ set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}" PARENT_SCOPE)
17
+ set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
18
+ -
19
+ diff --git a/libomptarget/plugins/amdgpu/CMakeLists.txt b/libomptarget/plugins/amdgpu/CMakeLists.txt
20
+ index 66bf680..47935e5 100644
21
+ --- a/libomptarget/plugins/amdgpu/CMakeLists.txt
22
+ +++ b/libomptarget/plugins/amdgpu/CMakeLists.txt
23
+ @@ -66,7 +66,7 @@ else()
24
+ set(LDFLAGS_UNDEFINED "-Wl,-z,defs")
25
+ endif()
26
+
27
+ -add_llvm_library(omptarget.rtl.amdgpu SHARED
28
+ +add_llvm_library(omptarget.rtl.amdgpu SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
29
+ impl/impl.cpp
30
+ impl/interop_hsa.cpp
31
+ impl/data.cpp
32
+ @@ -126,4 +126,3 @@ else()
33
+ list(APPEND LIBOMPTARGET_TESTED_PLUGINS "omptarget.rtl.amdgpu")
34
+ set(LIBOMPTARGET_TESTED_PLUGINS "${LIBOMPTARGET_TESTED_PLUGINS}" PARENT_SCOPE)
35
+ endif()
36
+ -
37
+ diff --git a/libomptarget/plugins/common/elf_common/CMakeLists.txt b/libomptarget/plugins/common/elf_common/CMakeLists.txt
38
+ index 9ea2926..b3fb758 100644
39
+ --- a/libomptarget/plugins/common/elf_common/CMakeLists.txt
40
+ +++ b/libomptarget/plugins/common/elf_common/CMakeLists.txt
41
+ @@ -16,9 +16,7 @@ add_library(elf_common OBJECT elf_common.cpp)
42
+ set_property(TARGET elf_common PROPERTY POSITION_INDEPENDENT_CODE ON)
43
+ llvm_update_compile_flags(elf_common)
44
+ set(LINK_LLVM_LIBS LLVMBinaryFormat LLVMObject LLVMSupport)
45
+ -if (LLVM_LINK_LLVM_DYLIB)
46
+ - set(LINK_LLVM_LIBS LLVM)
47
+ -endif()
48
+ +# Link LLVM static libraries to avoid dependency on shared LLVM libraries.
49
+ target_link_libraries(elf_common INTERFACE ${LINK_LLVM_LIBS})
50
+ add_dependencies(elf_common ${LINK_LLVM_LIBS})
51
+
52
+ diff --git a/libomptarget/plugins/cuda/CMakeLists.txt b/libomptarget/plugins/cuda/CMakeLists.txt
53
+ index 46e04c3..825e273 100644
54
+ --- a/libomptarget/plugins/cuda/CMakeLists.txt
55
+ +++ b/libomptarget/plugins/cuda/CMakeLists.txt
56
+ @@ -40,7 +40,7 @@ endif()
57
+ if (LIBOMPTARGET_CAN_LINK_LIBCUDA AND NOT LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
58
+ libomptarget_say("Building CUDA plugin linked against libcuda")
59
+ include_directories(${LIBOMPTARGET_DEP_CUDA_INCLUDE_DIRS})
60
+ - add_llvm_library(omptarget.rtl.cuda SHARED
61
+ + add_llvm_library(omptarget.rtl.cuda SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
62
+
63
+ src/rtl.cpp
64
+
65
+ @@ -64,7 +64,7 @@ else()
66
+ libomptarget_say("Building CUDA plugin for dlopened libcuda")
67
+ include_directories(dynamic_cuda)
68
+ add_llvm_library(omptarget.rtl.cuda
69
+ - SHARED
70
+ + SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
71
+
72
+ src/rtl.cpp
73
+ dynamic_cuda/cuda.cpp
74
+ diff --git a/libomptarget/plugins/ve/CMakeLists.txt b/libomptarget/plugins/ve/CMakeLists.txt
75
+ index 5aded32..4a81583 100644
76
+ --- a/libomptarget/plugins/ve/CMakeLists.txt
77
+ +++ b/libomptarget/plugins/ve/CMakeLists.txt
78
+ @@ -24,7 +24,7 @@ if(${LIBOMPTARGET_DEP_VEO_FOUND})
79
+ add_definitions("-DTARGET_ELF_ID=${elf_machine_id}")
80
+
81
+ add_llvm_library("omptarget.rtl.${tmachine_libname}"
82
+ - SHARED
83
+ + SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
84
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.cpp
85
+
86
+ ADDITIONAL_HEADER_DIRS
87
+ diff --git a/libomptarget/src/CMakeLists.txt b/libomptarget/src/CMakeLists.txt
88
+ index 071ec61..98b48ac 100644
89
+ --- a/libomptarget/src/CMakeLists.txt
90
+ +++ b/libomptarget/src/CMakeLists.txt
91
+ @@ -12,8 +12,9 @@
92
+
93
+ libomptarget_say("Building offloading runtime library libomptarget.")
94
+
95
+ +# Link LLVM statically to avoid dependency on dynamic libLLVM.
96
+ add_llvm_library(omptarget
97
+ - SHARED
98
+ + SHARED DISABLE_LLVM_LINK_LLVM_DYLIB
99
+
100
+ api.cpp
101
+ device.cpp