pyomp 0.5.0__cp314-cp314t-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba/openmp/__init__.py +106 -0
- numba/openmp/_version.py +34 -0
- numba/openmp/analysis.py +251 -0
- numba/openmp/compiler.py +402 -0
- numba/openmp/config.py +27 -0
- numba/openmp/decorators.py +27 -0
- numba/openmp/exceptions.py +26 -0
- numba/openmp/ir_utils.py +4 -0
- numba/openmp/libs/openmp/lib/libgomp.1.dylib +0 -0
- numba/openmp/libs/openmp/lib/libgomp.dylib +0 -0
- numba/openmp/libs/openmp/lib/libiomp5.dylib +0 -0
- numba/openmp/libs/openmp/lib/libomp.dylib +0 -0
- numba/openmp/libs/openmp/patches/14.0.6/0001-BACKPORT-Fix-for-CUDA-OpenMP-RTL.patch +39 -0
- numba/openmp/libs/openmp/patches/14.0.6/0002-Fix-missing-includes.patch +12 -0
- numba/openmp/libs/openmp/patches/14.0.6/0003-Link-static-LLVM-libs.patch +13 -0
- numba/openmp/libs/openmp/patches/15.0.7/0001-Fix-missing-includes.patch +14 -0
- numba/openmp/libs/openmp/patches/15.0.7/0002-Link-LLVM-statically.patch +101 -0
- numba/openmp/libs/openmp/patches/15.0.7/0003-Disable-opaque-pointers-DeviceRTL-bitcode.patch +12 -0
- numba/openmp/libs/openmp/patches/16.0.6/0001-Load-plugins-from-install-directory.patch +53 -0
- numba/openmp/libs/openmp/patches/16.0.6/0002-Link-LLVM-statically.patch +218 -0
- numba/openmp/libs/openmp/patches/20.1.8/0001-Enable-standalone-build.patch +13 -0
- numba/openmp/libs/openmp/patches/20.1.8/0002-Link-statically-LLVM.patch +24 -0
- numba/openmp/libs/openmp/patches/20.1.8/0003-Do-not-build-liboffload.patch +12 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.cpp +2939 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.h +606 -0
- numba/openmp/libs/pass/CMakeLists.txt +57 -0
- numba/openmp/libs/pass/DebugOpenMP.cpp +17 -0
- numba/openmp/libs/pass/DebugOpenMP.h +28 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.cpp +837 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.h +13 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP_CAPI.h +23 -0
- numba/openmp/libs/pass/libIntrinsicsOpenMP.dylib +0 -0
- numba/openmp/link_utils.py +126 -0
- numba/openmp/llvm_pass.py +48 -0
- numba/openmp/llvmlite_extensions.py +75 -0
- numba/openmp/omp_context.py +242 -0
- numba/openmp/omp_grammar.py +696 -0
- numba/openmp/omp_ir.py +2105 -0
- numba/openmp/omp_lower.py +3125 -0
- numba/openmp/omp_runtime.py +107 -0
- numba/openmp/overloads.py +53 -0
- numba/openmp/parser.py +6 -0
- numba/openmp/tags.py +532 -0
- numba/openmp/tests/test_openmp.py +5056 -0
- pyomp-0.5.0.dist-info/METADATA +193 -0
- pyomp-0.5.0.dist-info/RECORD +52 -0
- pyomp-0.5.0.dist-info/WHEEL +6 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE +25 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE-OPENMP.txt +361 -0
- pyomp-0.5.0.dist-info/top_level.txt +3 -0
- pyomp.dylibs/libc++.1.0.dylib +0 -0
- pyomp.dylibs/libzstd.1.5.7.dylib +0 -0
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
#ifndef LLVM_TRANSFORMS_INTRINSICS_OPENMP_CODEGEN_H
|
|
2
|
+
#define LLVM_TRANSFORMS_INTRINSICS_OPENMP_CODEGEN_H
|
|
3
|
+
|
|
4
|
+
#include "DebugOpenMP.h"
|
|
5
|
+
|
|
6
|
+
#include <llvm/ADT/DenseMap.h>
|
|
7
|
+
#include <llvm/Frontend/OpenMP/OMP.h.inc>
|
|
8
|
+
#include <llvm/Frontend/OpenMP/OMPConstants.h>
|
|
9
|
+
#include <llvm/Frontend/OpenMP/OMPIRBuilder.h>
|
|
10
|
+
#include <llvm/IR/BasicBlock.h>
|
|
11
|
+
#include <llvm/IR/IRBuilder.h>
|
|
12
|
+
#include <llvm/IR/Instructions.h>
|
|
13
|
+
#include <llvm/IR/LLVMContext.h>
|
|
14
|
+
#include <llvm/IR/Value.h>
|
|
15
|
+
#include <llvm/Support/AtomicOrdering.h>
|
|
16
|
+
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
|
|
17
|
+
#include <llvm/Transforms/Utils/ValueMapper.h>
|
|
18
|
+
|
|
19
|
+
using namespace llvm;
|
|
20
|
+
using namespace omp;
|
|
21
|
+
|
|
22
|
+
namespace iomp {
|
|
23
|
+
|
|
24
|
+
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
|
|
25
|
+
using BodyGenCallbackTy = OpenMPIRBuilder::BodyGenCallbackTy;
|
|
26
|
+
using FinalizeCallbackTy = OpenMPIRBuilder::FinalizeCallbackTy;
|
|
27
|
+
|
|
28
|
+
// TODO: expose clauses through namespace omp?
|
|
29
|
+
enum DSAType {
|
|
30
|
+
DSA_NONE,
|
|
31
|
+
DSA_PRIVATE,
|
|
32
|
+
DSA_FIRSTPRIVATE,
|
|
33
|
+
DSA_LASTPRIVATE,
|
|
34
|
+
DSA_SHARED,
|
|
35
|
+
DSA_REDUCTION_ADD,
|
|
36
|
+
DSA_REDUCTION_SUB,
|
|
37
|
+
DSA_REDUCTION_MUL,
|
|
38
|
+
DSA_MAP_ALLOC,
|
|
39
|
+
DSA_MAP_TO,
|
|
40
|
+
DSA_MAP_FROM,
|
|
41
|
+
DSA_MAP_TOFROM,
|
|
42
|
+
DSA_MAP_ALLOC_STRUCT,
|
|
43
|
+
DSA_MAP_TO_STRUCT,
|
|
44
|
+
DSA_MAP_FROM_STRUCT,
|
|
45
|
+
DSA_MAP_TOFROM_STRUCT,
|
|
46
|
+
DSA_MAP_STRUCT
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
struct DSATypeInfo {
|
|
50
|
+
DSAType Type = DSA_NONE;
|
|
51
|
+
FunctionCallee CopyConstructor = nullptr;
|
|
52
|
+
llvm::Type *PointeeType = nullptr;
|
|
53
|
+
|
|
54
|
+
DSATypeInfo() = default;
|
|
55
|
+
DSATypeInfo(DSAType Type) : Type(Type) {}
|
|
56
|
+
DSATypeInfo(DSAType Type, llvm::Type *PointeeType)
|
|
57
|
+
: Type(Type), PointeeType(PointeeType) {}
|
|
58
|
+
DSATypeInfo(DSAType Type, FunctionCallee InCopyConstructor)
|
|
59
|
+
: Type(Type), CopyConstructor(InCopyConstructor) {}
|
|
60
|
+
|
|
61
|
+
DSATypeInfo(const DSATypeInfo &DTI) = default;
|
|
62
|
+
|
|
63
|
+
DSATypeInfo &operator=(const DSATypeInfo &DTI) = default;
|
|
64
|
+
};
|
|
65
|
+
using DSAValueMapTy = MapVector<Value *, DSATypeInfo>;
|
|
66
|
+
|
|
67
|
+
static const DenseMap<StringRef, Directive> StringToDir = {
|
|
68
|
+
{"DIR.OMP.PARALLEL", OMPD_parallel},
|
|
69
|
+
{"DIR.OMP.SINGLE", OMPD_single},
|
|
70
|
+
{"DIR.OMP.CRITICAL", OMPD_critical},
|
|
71
|
+
{"DIR.OMP.BARRIER", OMPD_barrier},
|
|
72
|
+
{"DIR.OMP.LOOP", OMPD_for},
|
|
73
|
+
{"DIR.OMP.PARALLEL.LOOP", OMPD_parallel_for},
|
|
74
|
+
{"DIR.OMP.TASK", OMPD_task},
|
|
75
|
+
{"DIR.OMP.TASKWAIT", OMPD_taskwait},
|
|
76
|
+
{"DIR.OMP.TARGET", OMPD_target},
|
|
77
|
+
{"DIR.OMP.TEAMS", OMPD_teams},
|
|
78
|
+
{"DIR.OMP.DISTRIBUTE", OMPD_distribute},
|
|
79
|
+
{"DIR.OMP.TEAMS.DISTRIBUTE", OMPD_teams_distribute},
|
|
80
|
+
{"DIR.OMP.TEAMS.DISTRIBUTE.PARALLEL.LOOP",
|
|
81
|
+
OMPD_teams_distribute_parallel_for},
|
|
82
|
+
{"DIR.OMP.TARGET.TEAMS", OMPD_target_teams},
|
|
83
|
+
{"DIR.OMP.TARGET.DATA", OMPD_target_data},
|
|
84
|
+
{"DIR.OMP.TARGET.ENTER.DATA", OMPD_target_enter_data},
|
|
85
|
+
{"DIR.OMP.TARGET.EXIT.DATA", OMPD_target_exit_data},
|
|
86
|
+
{"DIR.OMP.TARGET.UPDATE", OMPD_target_update},
|
|
87
|
+
{"DIR.OMP.TARGET.TEAMS.DISTRIBUTE", OMPD_target_teams_distribute},
|
|
88
|
+
{"DIR.OMP.DISTRIBUTE.PARALLEL.LOOP", OMPD_distribute_parallel_for},
|
|
89
|
+
{"DIR.OMP.TARGET.TEAMS.DISTRIBUTE.PARALLEL.LOOP",
|
|
90
|
+
OMPD_target_teams_distribute_parallel_for}};
|
|
91
|
+
|
|
92
|
+
// TODO: add more reduction operators.
|
|
93
|
+
static const DenseMap<StringRef, DSAType> StringToDSA = {
|
|
94
|
+
{"QUAL.OMP.PRIVATE", DSA_PRIVATE},
|
|
95
|
+
{"QUAL.OMP.FIRSTPRIVATE", DSA_FIRSTPRIVATE},
|
|
96
|
+
{"QUAL.OMP.LASTPRIVATE", DSA_LASTPRIVATE},
|
|
97
|
+
{"QUAL.OMP.SHARED", DSA_SHARED},
|
|
98
|
+
{"QUAL.OMP.REDUCTION.ADD", DSA_REDUCTION_ADD},
|
|
99
|
+
{"QUAL.OMP.REDUCTION.SUB", DSA_REDUCTION_SUB},
|
|
100
|
+
{"QUAL.OMP.REDUCTION.MUL", DSA_REDUCTION_MUL},
|
|
101
|
+
{"QUAL.OMP.MAP.ALLOC", DSA_MAP_ALLOC},
|
|
102
|
+
{"QUAL.OMP.MAP.TO", DSA_MAP_TO},
|
|
103
|
+
{"QUAL.OMP.MAP.FROM", DSA_MAP_FROM},
|
|
104
|
+
{"QUAL.OMP.MAP.TOFROM", DSA_MAP_TOFROM},
|
|
105
|
+
{"QUAL.OMP.MAP.ALLOC.STRUCT", DSA_MAP_ALLOC_STRUCT},
|
|
106
|
+
{"QUAL.OMP.MAP.TO.STRUCT", DSA_MAP_TO_STRUCT},
|
|
107
|
+
{"QUAL.OMP.MAP.FROM.STRUCT", DSA_MAP_FROM_STRUCT},
|
|
108
|
+
{"QUAL.OMP.MAP.TOFROM.STRUCT", DSA_MAP_TOFROM_STRUCT}};
|
|
109
|
+
|
|
110
|
+
namespace helpers {
|
|
111
|
+
Type *getPointeeType(DSAValueMapTy &DSAValueMap, Value *V);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
inline std::string toString(const DSAType &DSA) {
|
|
115
|
+
switch (DSA) {
|
|
116
|
+
case DSA_NONE:
|
|
117
|
+
return "DSA_NONE";
|
|
118
|
+
case DSA_PRIVATE:
|
|
119
|
+
return "DSA_PRIVATE";
|
|
120
|
+
case DSA_FIRSTPRIVATE:
|
|
121
|
+
return "DSA_FIRSTPRIVATE";
|
|
122
|
+
case DSA_LASTPRIVATE:
|
|
123
|
+
return "DSA_LASTPRIVATE";
|
|
124
|
+
case DSA_SHARED:
|
|
125
|
+
return "DSA_SHARED";
|
|
126
|
+
case DSA_REDUCTION_ADD:
|
|
127
|
+
return "DSA_REDUCTION_ADD";
|
|
128
|
+
case DSA_REDUCTION_SUB:
|
|
129
|
+
return "DSA_REDUCTION_SUB";
|
|
130
|
+
case DSA_REDUCTION_MUL:
|
|
131
|
+
return "DSA_REDUCTION_MUL";
|
|
132
|
+
case DSA_MAP_ALLOC:
|
|
133
|
+
return "DSA_MAP_ALLOC";
|
|
134
|
+
case DSA_MAP_TO:
|
|
135
|
+
return "DSA_MAP_TO";
|
|
136
|
+
case DSA_MAP_FROM:
|
|
137
|
+
return "DSA_MAP_FROM";
|
|
138
|
+
case DSA_MAP_TOFROM:
|
|
139
|
+
return "DSA_MAP_TOFROM";
|
|
140
|
+
case DSA_MAP_ALLOC_STRUCT:
|
|
141
|
+
return "DSA_MAP_ALLOC_STRUCT";
|
|
142
|
+
case DSA_MAP_TO_STRUCT:
|
|
143
|
+
return "DSA_MAP_TO_STRUCT";
|
|
144
|
+
case DSA_MAP_FROM_STRUCT:
|
|
145
|
+
return "DSA_MAP_FROM_STRUCT";
|
|
146
|
+
case DSA_MAP_TOFROM_STRUCT:
|
|
147
|
+
return "DSA_MAP_TOFROM_STRUCT";
|
|
148
|
+
case DSA_MAP_STRUCT:
|
|
149
|
+
return "DSA_MAP_STRUCT";
|
|
150
|
+
default:
|
|
151
|
+
FATAL_ERROR("Unknown DSA: " + std::to_string(DSA));
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/// Data attributes for each data reference used in an OpenMP target region.
|
|
156
|
+
enum tgt_map_type {
|
|
157
|
+
// No flags
|
|
158
|
+
OMP_TGT_MAPTYPE_NONE = 0x000,
|
|
159
|
+
// copy data from host to device
|
|
160
|
+
OMP_TGT_MAPTYPE_TO = 0x001,
|
|
161
|
+
// copy data from device to host
|
|
162
|
+
OMP_TGT_MAPTYPE_FROM = 0x002,
|
|
163
|
+
// copy regardless of the reference count
|
|
164
|
+
OMP_TGT_MAPTYPE_ALWAYS = 0x004,
|
|
165
|
+
// force unmapping of data
|
|
166
|
+
OMP_TGT_MAPTYPE_DELETE = 0x008,
|
|
167
|
+
// map the pointer as well as the pointee
|
|
168
|
+
OMP_TGT_MAPTYPE_PTR_AND_OBJ = 0x010,
|
|
169
|
+
// pass device base address to kernel
|
|
170
|
+
OMP_TGT_MAPTYPE_TARGET_PARAM = 0x020,
|
|
171
|
+
// return base device address of mapped data
|
|
172
|
+
OMP_TGT_MAPTYPE_RETURN_PARAM = 0x040,
|
|
173
|
+
// private variable - not mapped
|
|
174
|
+
OMP_TGT_MAPTYPE_PRIVATE = 0x080,
|
|
175
|
+
// copy by value - not mapped
|
|
176
|
+
OMP_TGT_MAPTYPE_LITERAL = 0x100,
|
|
177
|
+
// mapping is implicit
|
|
178
|
+
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
|
|
179
|
+
// copy data to device
|
|
180
|
+
OMP_TGT_MAPTYPE_CLOSE = 0x400,
|
|
181
|
+
// runtime error if not already allocated
|
|
182
|
+
OMP_TGT_MAPTYPE_PRESENT = 0x1000,
|
|
183
|
+
// descriptor for non-contiguous target-update
|
|
184
|
+
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
|
|
185
|
+
// member of struct, member given by [16 MSBs] - 1
|
|
186
|
+
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
struct OffloadingMappingArgsTy {
|
|
190
|
+
Value *Sizes;
|
|
191
|
+
Value *MapTypes;
|
|
192
|
+
Value *MapNames;
|
|
193
|
+
Value *BasePtrs;
|
|
194
|
+
Value *Ptrs;
|
|
195
|
+
size_t Size;
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
struct FieldMappingInfo {
|
|
199
|
+
Type *PointeeType;
|
|
200
|
+
Value *Index;
|
|
201
|
+
Value *Offset;
|
|
202
|
+
Value *NumElements;
|
|
203
|
+
DSAType MapType;
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
using StructMapTy = MapVector<Value *, SmallVector<FieldMappingInfo, 4>>;
|
|
207
|
+
|
|
208
|
+
struct OMPLoopInfoStruct {
|
|
209
|
+
Value *IV = nullptr;
|
|
210
|
+
Value *Start = nullptr;
|
|
211
|
+
Value *LB = nullptr;
|
|
212
|
+
Value *UB = nullptr;
|
|
213
|
+
// 0 is invalid, schedule will be set by the user or to reasonable defaults
|
|
214
|
+
// by the pass.
|
|
215
|
+
OMPScheduleType DistSched = static_cast<OMPScheduleType>(0);
|
|
216
|
+
OMPScheduleType Sched = static_cast<OMPScheduleType>(0);
|
|
217
|
+
Value *Chunk = nullptr;
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
struct OMPDistributeInfoStruct {
|
|
221
|
+
Value *UB = nullptr;
|
|
222
|
+
Value *LB = nullptr;
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
struct TargetInfoStruct {
|
|
226
|
+
StringRef DevFuncName;
|
|
227
|
+
ConstantDataArray *ELF = nullptr;
|
|
228
|
+
Value *NumTeams = nullptr;
|
|
229
|
+
Value *ThreadLimit = nullptr;
|
|
230
|
+
OMPTgtExecModeFlags ExecMode = OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;
|
|
231
|
+
bool NoWait = false;
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
struct ParRegionInfoStruct {
|
|
235
|
+
Value *NumThreads = nullptr;
|
|
236
|
+
Value *IfCondition = nullptr;
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
struct TeamsInfoStruct {
|
|
240
|
+
Value *NumTeams = nullptr;
|
|
241
|
+
Value *ThreadLimit = nullptr;
|
|
242
|
+
OMPTgtExecModeFlags ExecMode = OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
struct OutlinedInfoStruct {
|
|
246
|
+
Function *Fn;
|
|
247
|
+
BasicBlock *EntryBB;
|
|
248
|
+
BasicBlock *ExitBB;
|
|
249
|
+
SmallVector<OpenMPIRBuilder::ReductionInfo> ReductionInfos;
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
struct CGReduction {
|
|
253
|
+
template <DSAType ReductionOperator>
|
|
254
|
+
static Value *emitOperation(IRBuilderBase &IRB, Value *LHS, Value *RHS);
|
|
255
|
+
|
|
256
|
+
template <DSAType ReductionOperator>
|
|
257
|
+
static OpenMPIRBuilder::InsertPointTy
|
|
258
|
+
reductionNonAtomic(OpenMPIRBuilder::InsertPointTy IP, Value *LHS, Value *RHS,
|
|
259
|
+
Value *&Result) {
|
|
260
|
+
IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
|
|
261
|
+
Result = emitOperation<ReductionOperator>(Builder, LHS, RHS);
|
|
262
|
+
return Builder.saveIP();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
template <DSAType ReductionOperator>
|
|
266
|
+
static InsertPointTy emitAtomicOperationRMW(IRBuilderBase &IRB, Value *LHS,
|
|
267
|
+
Value *Partial);
|
|
268
|
+
|
|
269
|
+
template <DSAType ReductionOperator>
|
|
270
|
+
static InsertPointTy emitAtomicOperationCmpxchg(IRBuilderBase &IRB,
|
|
271
|
+
InsertPointTy IP, Type *VTy,
|
|
272
|
+
Value *LHS, Value *Partial) {
|
|
273
|
+
LLVMContext &Ctx = IRB.getContext();
|
|
274
|
+
unsigned int Bitwidth = VTy->getScalarSizeInBits();
|
|
275
|
+
auto *IntTy =
|
|
276
|
+
(Bitwidth == 64 ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx));
|
|
277
|
+
#if LLVM_VERSION_MAJOR <= 15
|
|
278
|
+
auto *IntPtrTy =
|
|
279
|
+
(Bitwidth == 64 ? Type::getInt64PtrTy(Ctx) : Type::getInt32PtrTy(Ctx));
|
|
280
|
+
#else
|
|
281
|
+
auto *IntPtrTy = PointerType::getUnqual(IntTy);
|
|
282
|
+
#endif
|
|
283
|
+
|
|
284
|
+
auto SaveIP = IRB.saveIP();
|
|
285
|
+
// TODO: move alloca to function entry point, may be outlined later, e.g.,
|
|
286
|
+
// for nested under parallel.
|
|
287
|
+
Value *AllocaTemp = IRB.CreateAlloca(IntTy, nullptr, "atomic.alloca.tmp");
|
|
288
|
+
IRB.restoreIP(SaveIP);
|
|
289
|
+
|
|
290
|
+
Value *CastLHS =
|
|
291
|
+
IRB.CreateBitCast(LHS, IntPtrTy, LHS->getName() + ".cast.int");
|
|
292
|
+
auto *LoadAtomic =
|
|
293
|
+
IRB.CreateLoad(IntTy, CastLHS, LHS->getName() + ".load.atomic");
|
|
294
|
+
LoadAtomic->setAtomic(AtomicOrdering::Monotonic);
|
|
295
|
+
|
|
296
|
+
Value *CastFP = IRB.CreateBitCast(LoadAtomic, VTy, "cast.fp");
|
|
297
|
+
Value *RedOp = emitOperation<ReductionOperator>(IRB, CastFP, Partial);
|
|
298
|
+
Value *CastFAdd =
|
|
299
|
+
IRB.CreateBitCast(RedOp, IntTy, RedOp->getName() + ".cast.int");
|
|
300
|
+
|
|
301
|
+
auto *CmpXchg = IRB.CreateAtomicCmpXchg(
|
|
302
|
+
CastLHS, LoadAtomic, CastFAdd, MaybeAlign(), AtomicOrdering::Monotonic,
|
|
303
|
+
AtomicOrdering::Monotonic);
|
|
304
|
+
|
|
305
|
+
auto *Returned = IRB.CreateExtractValue(CmpXchg, 0);
|
|
306
|
+
auto *StoreTemp = IRB.CreateStore(Returned, AllocaTemp);
|
|
307
|
+
auto *Cond = IRB.CreateExtractValue(CmpXchg, 1);
|
|
308
|
+
// Add unreachable as placholder for splitting.
|
|
309
|
+
auto *Unreachable = IRB.CreateUnreachable();
|
|
310
|
+
auto *IfTrueTerm = SplitBlockAndInsertIfThen(Cond, Unreachable, false);
|
|
311
|
+
auto *ExitBlock = IfTrueTerm->getParent();
|
|
312
|
+
auto *Retry = ExitBlock->getSingleSuccessor();
|
|
313
|
+
assert(Retry && "Expected single successor tail block");
|
|
314
|
+
// Erase the fall-through branch.
|
|
315
|
+
IfTrueTerm->eraseFromParent();
|
|
316
|
+
|
|
317
|
+
SaveIP = IRB.saveIP();
|
|
318
|
+
IRB.SetInsertPoint(Retry, Retry->getFirstInsertionPt());
|
|
319
|
+
auto *LoadReturned = IRB.CreateLoad(IntTy, AllocaTemp);
|
|
320
|
+
auto *CastLoad = IRB.CreateBitCast(LoadReturned, VTy);
|
|
321
|
+
// FAdd = IRB.CreateFAdd(CastLoad, Partial, "retry.add");
|
|
322
|
+
RedOp = emitOperation<ReductionOperator>(IRB, CastLoad, Partial);
|
|
323
|
+
CastFAdd = IRB.CreateBitCast(RedOp, IntTy, RedOp->getName() + ".cast.int");
|
|
324
|
+
CmpXchg = IRB.CreateAtomicCmpXchg(CastLHS, LoadReturned, CastFAdd,
|
|
325
|
+
MaybeAlign(), AtomicOrdering::Monotonic,
|
|
326
|
+
AtomicOrdering::Monotonic);
|
|
327
|
+
Returned = IRB.CreateExtractValue(CmpXchg, 0);
|
|
328
|
+
StoreTemp = IRB.CreateStore(Returned, AllocaTemp);
|
|
329
|
+
Cond = IRB.CreateExtractValue(CmpXchg, 1);
|
|
330
|
+
IRB.CreateCondBr(Cond, ExitBlock, Retry);
|
|
331
|
+
// Remove unreachable placeholder.
|
|
332
|
+
Unreachable->eraseFromParent();
|
|
333
|
+
IRB.restoreIP(SaveIP);
|
|
334
|
+
|
|
335
|
+
return InsertPointTy(ExitBlock, ExitBlock->getFirstInsertionPt());
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
template <DSAType ReductionOperator>
|
|
339
|
+
static OpenMPIRBuilder::InsertPointTy
|
|
340
|
+
reductionAtomic(OpenMPIRBuilder::InsertPointTy IP, Type *VTy, Value *LHS,
|
|
341
|
+
Value *RHS) {
|
|
342
|
+
IRBuilder<> Builder(IP.getBlock(), IP.getPoint());
|
|
343
|
+
Value *Partial = Builder.CreateLoad(VTy, RHS, "red.partial");
|
|
344
|
+
if (VTy->isIntegerTy())
|
|
345
|
+
switch (ReductionOperator) {
|
|
346
|
+
case DSA_REDUCTION_ADD:
|
|
347
|
+
case DSA_REDUCTION_SUB:
|
|
348
|
+
return emitAtomicOperationRMW<ReductionOperator>(Builder, LHS, Partial);
|
|
349
|
+
break;
|
|
350
|
+
case DSA_REDUCTION_MUL:
|
|
351
|
+
// RMW does not support mul.
|
|
352
|
+
return emitAtomicOperationCmpxchg<ReductionOperator>(Builder, IP, VTy,
|
|
353
|
+
LHS, Partial);
|
|
354
|
+
default:
|
|
355
|
+
FATAL_ERROR("Unsupported reduction operation");
|
|
356
|
+
}
|
|
357
|
+
else if (VTy->isFloatTy() || VTy->isDoubleTy()) {
|
|
358
|
+
// NOTE: Using atomicrmw for floats is buggy for aarch64, fallback to
|
|
359
|
+
// cmpxchg codegen for now similarly to Clang. Revisit with newer LLVM
|
|
360
|
+
// versions.
|
|
361
|
+
// Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, LHS, Partial, None,
|
|
362
|
+
// AtomicOrdering::Monotonic);
|
|
363
|
+
return emitAtomicOperationCmpxchg<ReductionOperator>(Builder, IP, VTy,
|
|
364
|
+
LHS, Partial);
|
|
365
|
+
} else
|
|
366
|
+
FATAL_ERROR("Unsupported type for reductionAtomic");
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
template <DSAType ReductionOperator>
|
|
370
|
+
static Value *emitInitAndAppendInfo(
|
|
371
|
+
IRBuilderBase &IRB, InsertPointTy AllocaIP, Value *Orig,
|
|
372
|
+
Type *ReductionTy,
|
|
373
|
+
SmallVectorImpl<OpenMPIRBuilder::ReductionInfo> &ReductionInfos,
|
|
374
|
+
bool IsGPUTeamsReduction) {
|
|
375
|
+
auto GetIdentityValue = []() {
|
|
376
|
+
switch (ReductionOperator) {
|
|
377
|
+
case DSA_REDUCTION_ADD:
|
|
378
|
+
case DSA_REDUCTION_SUB:
|
|
379
|
+
return 0;
|
|
380
|
+
case DSA_REDUCTION_MUL:
|
|
381
|
+
return 1;
|
|
382
|
+
default:
|
|
383
|
+
FATAL_ERROR("Unknown reduction type");
|
|
384
|
+
}
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
auto SaveIP = IRB.saveIP();
|
|
388
|
+
IRB.restoreIP(AllocaIP);
|
|
389
|
+
Value *Priv = nullptr;
|
|
390
|
+
|
|
391
|
+
if (IsGPUTeamsReduction) {
|
|
392
|
+
Module *M = IRB.GetInsertBlock()->getModule();
|
|
393
|
+
GlobalVariable *ShmemGV = new GlobalVariable(
|
|
394
|
+
*M, ReductionTy, false, GlobalValue::InternalLinkage,
|
|
395
|
+
UndefValue::get(ReductionTy), Orig->getName() + ".red.priv.shmem",
|
|
396
|
+
nullptr, llvm::GlobalValue::NotThreadLocal, 3, false);
|
|
397
|
+
Value *AddrCast = IRB.CreateAddrSpaceCast(ShmemGV, Orig->getType());
|
|
398
|
+
Priv = AddrCast;
|
|
399
|
+
} else {
|
|
400
|
+
Priv = IRB.CreateAlloca(ReductionTy, /* ArraySize */ nullptr,
|
|
401
|
+
Orig->getName() + ".red.priv");
|
|
402
|
+
}
|
|
403
|
+
IRB.restoreIP(SaveIP);
|
|
404
|
+
|
|
405
|
+
// Store identity value based on operation and type.
|
|
406
|
+
if (ReductionTy->isIntegerTy()) {
|
|
407
|
+
IRB.CreateStore(ConstantInt::get(ReductionTy, GetIdentityValue()), Priv);
|
|
408
|
+
} else if (ReductionTy->isFloatTy() || ReductionTy->isDoubleTy()) {
|
|
409
|
+
IRB.CreateStore(ConstantFP::get(ReductionTy, GetIdentityValue()), Priv);
|
|
410
|
+
} else
|
|
411
|
+
FATAL_ERROR("Unsupported type to init with identity reduction value");
|
|
412
|
+
|
|
413
|
+
#if LLVM_VERSION_MAJOR <= 16
|
|
414
|
+
ReductionInfos.push_back(
|
|
415
|
+
{ReductionTy, Orig, Priv,
|
|
416
|
+
CGReduction::reductionNonAtomic<ReductionOperator>,
|
|
417
|
+
CGReduction::reductionAtomic<ReductionOperator>});
|
|
418
|
+
#else
|
|
419
|
+
// TODO: Support more evaluation kinds besides scalar.
|
|
420
|
+
ReductionInfos.push_back(
|
|
421
|
+
{ReductionTy, Orig, Priv, OpenMPIRBuilder::EvalKind::Scalar,
|
|
422
|
+
CGReduction::reductionNonAtomic<ReductionOperator>,
|
|
423
|
+
/* ReductionGenClang */ nullptr,
|
|
424
|
+
CGReduction::reductionAtomic<ReductionOperator>});
|
|
425
|
+
#endif
|
|
426
|
+
|
|
427
|
+
return Priv;
|
|
428
|
+
}
|
|
429
|
+
};
|
|
430
|
+
|
|
431
|
+
class CGIntrinsicsOpenMP {
|
|
432
|
+
public:
|
|
433
|
+
CGIntrinsicsOpenMP(Module &M);
|
|
434
|
+
|
|
435
|
+
OpenMPIRBuilder OMPBuilder;
|
|
436
|
+
Module &M;
|
|
437
|
+
StructType *TgtOffloadEntryTy;
|
|
438
|
+
|
|
439
|
+
StructType *getTgtOffloadEntryTy() { return TgtOffloadEntryTy; }
|
|
440
|
+
|
|
441
|
+
void emitOMPParallel(DSAValueMapTy &DSAValueMap, ValueToValueMapTy *VMap,
|
|
442
|
+
const DebugLoc &DL, Function *Fn, BasicBlock *BBEntry,
|
|
443
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
444
|
+
BasicBlock *AfterBB, FinalizeCallbackTy FiniCB,
|
|
445
|
+
ParRegionInfoStruct &ParRegionInfo);
|
|
446
|
+
|
|
447
|
+
void emitOMPFor(DSAValueMapTy &DSAValueMap, OMPLoopInfoStruct &OMPLoopInfo,
|
|
448
|
+
BasicBlock *StartBB, BasicBlock *ExitBB, bool IsStandalone,
|
|
449
|
+
bool IsDistributeParallelFor);
|
|
450
|
+
|
|
451
|
+
void emitOMPTask(DSAValueMapTy &DSAValueMap, Function *Fn,
|
|
452
|
+
BasicBlock *BBEntry, BasicBlock *StartBB, BasicBlock *EndBB,
|
|
453
|
+
BasicBlock *AfterBB);
|
|
454
|
+
|
|
455
|
+
void emitOMPOffloadingEntry(const Twine &DevFuncName, Value *EntryPtr,
|
|
456
|
+
Constant *&OMPOffloadEntry);
|
|
457
|
+
|
|
458
|
+
void emitOMPOffloadingMappings(InsertPointTy AllocaIP,
|
|
459
|
+
DSAValueMapTy &DSAValueMap,
|
|
460
|
+
StructMapTy &StructMappingInfoMap,
|
|
461
|
+
OffloadingMappingArgsTy &OffloadingMappingArgs,
|
|
462
|
+
bool IsTargetRegion);
|
|
463
|
+
|
|
464
|
+
void emitOMPSingle(Function *Fn, BasicBlock *BBEntry, BasicBlock *AfterBB,
|
|
465
|
+
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB);
|
|
466
|
+
|
|
467
|
+
void emitOMPCritical(Function *Fn, BasicBlock *BBEntry, BasicBlock *AfterBB,
|
|
468
|
+
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB);
|
|
469
|
+
|
|
470
|
+
void emitOMPBarrier(Function *Fn, BasicBlock *BBEntry, Directive DK);
|
|
471
|
+
|
|
472
|
+
void emitOMPTaskwait(BasicBlock *BBEntry);
|
|
473
|
+
|
|
474
|
+
void emitOMPTarget(Function *Fn, BasicBlock *BBEntry, BasicBlock *StartBB,
|
|
475
|
+
BasicBlock *EndBB, DSAValueMapTy &DSAValueMap,
|
|
476
|
+
StructMapTy &StructMappingInfoMap,
|
|
477
|
+
TargetInfoStruct &TargetInfo,
|
|
478
|
+
OMPLoopInfoStruct *OMPLoopInfo, bool IsDeviceTargetRegion);
|
|
479
|
+
|
|
480
|
+
void emitOMPTeams(DSAValueMapTy &DSAValueMap, ValueToValueMapTy *VMap,
|
|
481
|
+
const DebugLoc &DL, Function *Fn, BasicBlock *BBEntry,
|
|
482
|
+
BasicBlock *StartBB, BasicBlock *EndBB, BasicBlock *AfterBB,
|
|
483
|
+
TeamsInfoStruct &TeamsInfo);
|
|
484
|
+
|
|
485
|
+
void emitOMPTargetData(Function *Fn, BasicBlock *BBEntry, BasicBlock *BBExit,
|
|
486
|
+
DSAValueMapTy &DSAValueMap,
|
|
487
|
+
StructMapTy &StructMappingInfoMap);
|
|
488
|
+
|
|
489
|
+
void emitOMPTargetEnterData(Function *Fn, BasicBlock *BBEntry,
|
|
490
|
+
DSAValueMapTy &DSAValueMap,
|
|
491
|
+
StructMapTy &StructMappingInfoMap);
|
|
492
|
+
|
|
493
|
+
void emitOMPTargetExitData(Function *Fn, BasicBlock *BBEntry,
|
|
494
|
+
DSAValueMapTy &DSAValueMap,
|
|
495
|
+
StructMapTy &StructMappingInfoMap);
|
|
496
|
+
|
|
497
|
+
void emitOMPTargetUpdate(Function *Fn, BasicBlock *BBEntry,
|
|
498
|
+
DSAValueMapTy &DSAValueMap,
|
|
499
|
+
StructMapTy &StructMappingInfoMap);
|
|
500
|
+
|
|
501
|
+
void emitOMPDistribute(DSAValueMapTy &DSAValueMap,
|
|
502
|
+
OMPLoopInfoStruct &OMPLoopInfo, BasicBlock *StartBB,
|
|
503
|
+
BasicBlock *ExitBB, bool IsStandalone,
|
|
504
|
+
bool IsDistributeParallelFor,
|
|
505
|
+
OMPDistributeInfoStruct *DistributeInfo = nullptr);
|
|
506
|
+
|
|
507
|
+
void emitOMPDistributeParallelFor(DSAValueMapTy &DSAValueMap,
|
|
508
|
+
BasicBlock *StartBB, BasicBlock *ExitBB,
|
|
509
|
+
OMPLoopInfoStruct &OMPLoopInfo,
|
|
510
|
+
ParRegionInfoStruct &ParRegionInfo,
|
|
511
|
+
bool IsStandalone);
|
|
512
|
+
|
|
513
|
+
void emitOMPTargetTeams(DSAValueMapTy &DSAValueMap, ValueToValueMapTy *VMap,
|
|
514
|
+
const DebugLoc &DL, Function *Fn, BasicBlock *EntryBB,
|
|
515
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
516
|
+
BasicBlock *AfterBB, TargetInfoStruct &TargetInfo,
|
|
517
|
+
TeamsInfoStruct &TeamsInfo,
|
|
518
|
+
OMPLoopInfoStruct *OMPLoopInfo,
|
|
519
|
+
StructMapTy &StructMappingInfoMap,
|
|
520
|
+
bool IsDeviceTargetRegion);
|
|
521
|
+
|
|
522
|
+
GlobalVariable *emitOffloadingGlobals(StringRef DevWrapperFuncName,
|
|
523
|
+
ConstantDataArray *ELF);
|
|
524
|
+
|
|
525
|
+
Twine getDevWrapperFuncPrefix() { return "__omp_offload_numba_"; }
|
|
526
|
+
|
|
527
|
+
OutlinedInfoStruct
|
|
528
|
+
createOutlinedFunction(DSAValueMapTy &DSAValueMap, ValueToValueMapTy *VMap,
|
|
529
|
+
Function *OuterFn, BasicBlock *StartBB,
|
|
530
|
+
BasicBlock *EndBB,
|
|
531
|
+
SmallVectorImpl<llvm::Value *> &CapturedVars,
|
|
532
|
+
StringRef Suffix, omp::Directive Kind);
|
|
533
|
+
|
|
534
|
+
void setDeviceGlobalizedValues(const ArrayRef<Value *> GlobalizedValues);
|
|
535
|
+
|
|
536
|
+
private:
|
|
537
|
+
void emitOMPParallelDeviceRuntime(DSAValueMapTy &DSAValueMap,
|
|
538
|
+
ValueToValueMapTy *VMap, const DebugLoc &DL,
|
|
539
|
+
Function *Fn, BasicBlock *BBEntry,
|
|
540
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
541
|
+
BasicBlock *AfterBB,
|
|
542
|
+
FinalizeCallbackTy FiniCB,
|
|
543
|
+
ParRegionInfoStruct &ParRegionInfo);
|
|
544
|
+
|
|
545
|
+
void emitOMPParallelHostRuntime(DSAValueMapTy &DSAValueMap,
|
|
546
|
+
ValueToValueMapTy *VMap, const DebugLoc &DL,
|
|
547
|
+
Function *Fn, BasicBlock *BBEntry,
|
|
548
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
549
|
+
BasicBlock *AfterBB,
|
|
550
|
+
FinalizeCallbackTy FiniCB,
|
|
551
|
+
ParRegionInfoStruct &ParRegionInfo);
|
|
552
|
+
void emitOMPParallelHostRuntimeOMPIRBuilder(
|
|
553
|
+
DSAValueMapTy &DSAValueMap, ValueToValueMapTy *VMap, const DebugLoc &DL,
|
|
554
|
+
Function *Fn, BasicBlock *BBEntry, BasicBlock *StartBB, BasicBlock *EndBB,
|
|
555
|
+
BasicBlock *AfterBB, FinalizeCallbackTy FiniCB,
|
|
556
|
+
ParRegionInfoStruct &ParRegionInfo);
|
|
557
|
+
|
|
558
|
+
void emitOMPTeamsDeviceRuntime(DSAValueMapTy &DSAValueMap,
|
|
559
|
+
ValueToValueMapTy *VMap, const DebugLoc &DL,
|
|
560
|
+
Function *Fn, BasicBlock *BBEntry,
|
|
561
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
562
|
+
BasicBlock *AfterBB,
|
|
563
|
+
TeamsInfoStruct &TeamsInfo);
|
|
564
|
+
void emitOMPTeamsHostRuntime(DSAValueMapTy &DSAValueMap,
|
|
565
|
+
ValueToValueMapTy *VMap, const DebugLoc &DL,
|
|
566
|
+
Function *Fn, BasicBlock *BBEntry,
|
|
567
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
568
|
+
BasicBlock *AfterBB, TeamsInfoStruct &TeamsInfo);
|
|
569
|
+
|
|
570
|
+
void emitOMPTargetHost(Function *Fn, BasicBlock *BBEntry, BasicBlock *StartBB,
|
|
571
|
+
BasicBlock *EndBB, DSAValueMapTy &DSAValueMap,
|
|
572
|
+
StructMapTy &StructMappingInfoMap,
|
|
573
|
+
TargetInfoStruct &TargetInfo,
|
|
574
|
+
OMPLoopInfoStruct *OMPLoopInfo);
|
|
575
|
+
|
|
576
|
+
void emitOMPTargetDevice(Function *Fn, BasicBlock *BBEntry,
|
|
577
|
+
BasicBlock *StartBB, BasicBlock *EndBB,
|
|
578
|
+
DSAValueMapTy &DSAValueMap,
|
|
579
|
+
StructMapTy &StructMappingInfoMap,
|
|
580
|
+
TargetInfoStruct &TargetInfo);
|
|
581
|
+
|
|
582
|
+
void emitLoop(DSAValueMapTy &DSAValueMap, OMPLoopInfoStruct &OMPLoopInfo,
|
|
583
|
+
BasicBlock *StartBB, BasicBlock *ExitBB, bool IsStandalone,
|
|
584
|
+
bool IsDistribute, bool IsDistributeParallelFor,
|
|
585
|
+
OMPDistributeInfoStruct *OMPDistributeInfo = nullptr);
|
|
586
|
+
|
|
587
|
+
InsertPointTy
|
|
588
|
+
emitReductionsHost(const OpenMPIRBuilder::LocationDescription &Loc,
|
|
589
|
+
InsertPointTy AllocaIP,
|
|
590
|
+
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos);
|
|
591
|
+
|
|
592
|
+
InsertPointTy emitReductionsDevice(
|
|
593
|
+
const OpenMPIRBuilder::LocationDescription &Loc, InsertPointTy AllocaIP,
|
|
594
|
+
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos, bool IsTeamSPMD);
|
|
595
|
+
|
|
596
|
+
FunctionCallee getKmpcForStaticInit(Type *Ty);
|
|
597
|
+
FunctionCallee getKmpcDistributeStaticInit(Type *Ty);
|
|
598
|
+
Value *createScalarCast(Value *V, Type *DestTy);
|
|
599
|
+
bool isOpenMPDeviceRuntime();
|
|
600
|
+
|
|
601
|
+
SmallPtrSet<Value *, 32> DeviceGlobalizedValues;
|
|
602
|
+
};
|
|
603
|
+
|
|
604
|
+
} // namespace iomp
|
|
605
|
+
|
|
606
|
+
#endif
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.20)
|
|
2
|
+
project(pyomp-pass)
|
|
3
|
+
|
|
4
|
+
if(NOT DEFINED LLVM_DIR OR LLVM_DIR STREQUAL "")
|
|
5
|
+
message(FATAL_ERROR
|
|
6
|
+
"LLVM_DIR is required but not defined. "
|
|
7
|
+
"Please specify it with: cmake -DLLVM_DIR=/path/to/llvm/install ...")
|
|
8
|
+
endif()
|
|
9
|
+
|
|
10
|
+
message(STATUS "LLVM_DIR ${LLVM_DIR}")
|
|
11
|
+
|
|
12
|
+
find_package(LLVM REQUIRED CONFIG NO_DEFAULT_PATH PATHS
|
|
13
|
+
${LLVM_DIR}
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
message(STATUS "Found LLVM version ${LLVM_VERSION}")
|
|
17
|
+
|
|
18
|
+
include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
|
|
19
|
+
|
|
20
|
+
# Use the same C++ standard as LLVM does
|
|
21
|
+
set(CMAKE_CXX_STANDARD 17 CACHE STRING "")
|
|
22
|
+
|
|
23
|
+
# LLVM is normally built without RTTI. Be consistent with that.
|
|
24
|
+
if(NOT LLVM_ENABLE_RTTI)
|
|
25
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
|
|
26
|
+
endif()
|
|
27
|
+
|
|
28
|
+
add_library(IntrinsicsOpenMP SHARED
|
|
29
|
+
CGIntrinsicsOpenMP.cpp
|
|
30
|
+
DebugOpenMP.cpp
|
|
31
|
+
IntrinsicsOpenMP.cpp)
|
|
32
|
+
|
|
33
|
+
if(DEFINED ENV{USE_CXX11_ABI})
|
|
34
|
+
target_compile_definitions(IntrinsicsOpenMP PRIVATE _GLIBCXX_USE_CXX11_ABI=$ENV{USE_CXX11_ABI})
|
|
35
|
+
endif()
|
|
36
|
+
|
|
37
|
+
# Use static library components to avoid issues with shared library dependencies.
|
|
38
|
+
set(llvm_libs LLVMPasses)
|
|
39
|
+
|
|
40
|
+
if(NOT APPLE)
|
|
41
|
+
target_link_options(IntrinsicsOpenMP PRIVATE "-Wl,--no-undefined")
|
|
42
|
+
endif()
|
|
43
|
+
|
|
44
|
+
target_link_libraries(IntrinsicsOpenMP
|
|
45
|
+
PRIVATE ${llvm_libs}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
if(APPLE)
|
|
49
|
+
set_property(TARGET IntrinsicsOpenMP APPEND_STRING PROPERTY LINK_FLAGS "-Wl,-exported_symbol,_runIntrinsicsOpenMPPass")
|
|
50
|
+
else()
|
|
51
|
+
set_property(TARGET IntrinsicsOpenMP APPEND_STRING PROPERTY LINK_FLAGS "-Wl,--exclude-libs,ALL")
|
|
52
|
+
endif()
|
|
53
|
+
|
|
54
|
+
install(TARGETS IntrinsicsOpenMP
|
|
55
|
+
EXPORT IntrinsicsOpenMP
|
|
56
|
+
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}
|
|
57
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#include <cstdlib>
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <string>
|
|
4
|
+
|
|
5
|
+
bool DebugOpenMPFlag;
|
|
6
|
+
void DebugOpenMPInit() {
|
|
7
|
+
char *DebugStr = getenv("NUMBA_DEBUG_OPENMP_LLVM_PASS");
|
|
8
|
+
DebugOpenMPFlag = false;
|
|
9
|
+
if (DebugStr)
|
|
10
|
+
DebugOpenMPFlag = (std::stoi(DebugStr) >= 1);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
[[noreturn]] void fatalError(const std::string &msg, const char *file,
|
|
14
|
+
int line) {
|
|
15
|
+
std::cerr << "Fatal error @ " << file << ":" << line << " :: " << msg << "\n";
|
|
16
|
+
std::abort();
|
|
17
|
+
}
|