pyomp 0.5.0__cp314-cp314t-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numba/openmp/__init__.py +106 -0
- numba/openmp/_version.py +34 -0
- numba/openmp/analysis.py +251 -0
- numba/openmp/compiler.py +402 -0
- numba/openmp/config.py +27 -0
- numba/openmp/decorators.py +27 -0
- numba/openmp/exceptions.py +26 -0
- numba/openmp/ir_utils.py +4 -0
- numba/openmp/libs/openmp/lib/libgomp.1.dylib +0 -0
- numba/openmp/libs/openmp/lib/libgomp.dylib +0 -0
- numba/openmp/libs/openmp/lib/libiomp5.dylib +0 -0
- numba/openmp/libs/openmp/lib/libomp.dylib +0 -0
- numba/openmp/libs/openmp/patches/14.0.6/0001-BACKPORT-Fix-for-CUDA-OpenMP-RTL.patch +39 -0
- numba/openmp/libs/openmp/patches/14.0.6/0002-Fix-missing-includes.patch +12 -0
- numba/openmp/libs/openmp/patches/14.0.6/0003-Link-static-LLVM-libs.patch +13 -0
- numba/openmp/libs/openmp/patches/15.0.7/0001-Fix-missing-includes.patch +14 -0
- numba/openmp/libs/openmp/patches/15.0.7/0002-Link-LLVM-statically.patch +101 -0
- numba/openmp/libs/openmp/patches/15.0.7/0003-Disable-opaque-pointers-DeviceRTL-bitcode.patch +12 -0
- numba/openmp/libs/openmp/patches/16.0.6/0001-Load-plugins-from-install-directory.patch +53 -0
- numba/openmp/libs/openmp/patches/16.0.6/0002-Link-LLVM-statically.patch +218 -0
- numba/openmp/libs/openmp/patches/20.1.8/0001-Enable-standalone-build.patch +13 -0
- numba/openmp/libs/openmp/patches/20.1.8/0002-Link-statically-LLVM.patch +24 -0
- numba/openmp/libs/openmp/patches/20.1.8/0003-Do-not-build-liboffload.patch +12 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.cpp +2939 -0
- numba/openmp/libs/pass/CGIntrinsicsOpenMP.h +606 -0
- numba/openmp/libs/pass/CMakeLists.txt +57 -0
- numba/openmp/libs/pass/DebugOpenMP.cpp +17 -0
- numba/openmp/libs/pass/DebugOpenMP.h +28 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.cpp +837 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP.h +13 -0
- numba/openmp/libs/pass/IntrinsicsOpenMP_CAPI.h +23 -0
- numba/openmp/libs/pass/libIntrinsicsOpenMP.dylib +0 -0
- numba/openmp/link_utils.py +126 -0
- numba/openmp/llvm_pass.py +48 -0
- numba/openmp/llvmlite_extensions.py +75 -0
- numba/openmp/omp_context.py +242 -0
- numba/openmp/omp_grammar.py +696 -0
- numba/openmp/omp_ir.py +2105 -0
- numba/openmp/omp_lower.py +3125 -0
- numba/openmp/omp_runtime.py +107 -0
- numba/openmp/overloads.py +53 -0
- numba/openmp/parser.py +6 -0
- numba/openmp/tags.py +532 -0
- numba/openmp/tests/test_openmp.py +5056 -0
- pyomp-0.5.0.dist-info/METADATA +193 -0
- pyomp-0.5.0.dist-info/RECORD +52 -0
- pyomp-0.5.0.dist-info/WHEEL +6 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE +25 -0
- pyomp-0.5.0.dist-info/licenses/LICENSE-OPENMP.txt +361 -0
- pyomp-0.5.0.dist-info/top_level.txt +3 -0
- pyomp.dylibs/libc++.1.0.dylib +0 -0
- pyomp.dylibs/libzstd.1.5.7.dylib +0 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#ifndef LLVM_TRANSFORMS_INTRINSICS_OPENMP_H
|
|
2
|
+
#define LLVM_TRANSFORMS_INTRINSICS_OPENMP_H
|
|
3
|
+
|
|
4
|
+
#include <llvm/IR/PassManager.h>
|
|
5
|
+
#include <llvm/Pass.h>
|
|
6
|
+
|
|
7
|
+
namespace llvm {
|
|
8
|
+
|
|
9
|
+
ModulePass *createIntrinsicsOpenMPPass();
|
|
10
|
+
|
|
11
|
+
} // namespace llvm
|
|
12
|
+
|
|
13
|
+
#endif // LLVM_TRANSFORMS_INTRINSICS_OPENMP_H
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#ifndef LLVM_C_TRANSFORMS_INTRINSICS_OPENMP_H
|
|
2
|
+
#define LLVM_C_TRANSFORMS_INTRINSICS_OPENMP_H
|
|
3
|
+
|
|
4
|
+
#include <llvm-c/ExternC.h>
|
|
5
|
+
#include <llvm-c/Types.h>
|
|
6
|
+
|
|
7
|
+
LLVM_C_EXTERN_C_BEGIN
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @defgroup LLVMCTransformsIntrinsicsOpenMP IntrinsicsOpenMP transformations
|
|
11
|
+
* @ingroup LLVMCTransforms
|
|
12
|
+
*
|
|
13
|
+
* @{
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/** See llvm::createIntrinsicsOpenMPPass function. */
|
|
17
|
+
void LLVMAddIntrinsicsOpenMPPass(LLVMPassManagerRef PM);
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @}
|
|
21
|
+
*/
|
|
22
|
+
LLVM_C_EXTERN_C_END
|
|
23
|
+
#endif
|
|
Binary file
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Python 3.12+ removed distutils; use the shim in setuptools.
|
|
7
|
+
try:
|
|
8
|
+
from setuptools._distutils import ccompiler, sysconfig
|
|
9
|
+
except Exception: # Python <3.12, or older setuptools
|
|
10
|
+
from distutils import ccompiler, sysconfig # type: ignore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def link_shared_library(obj_path, out_path):
|
|
14
|
+
# Generate trampolines for numba/NRT symbols. We use trampolines to link the
|
|
15
|
+
# absolute symbol addresses from numba to the self-contained shared library
|
|
16
|
+
# for the OpenMP target CPU module.
|
|
17
|
+
# TODO: ask numba upstream to provide a static library with these symbols.
|
|
18
|
+
@lru_cache
|
|
19
|
+
def generate_trampolines():
|
|
20
|
+
from numba import _helperlib
|
|
21
|
+
from numba.core.runtime import _nrt_python as _nrt
|
|
22
|
+
|
|
23
|
+
# Signature mapping for numba/NRT functions. Add more as needed.
|
|
24
|
+
SIGNATURES = {
|
|
25
|
+
# GIL management
|
|
26
|
+
"numba_gil_ensure": ("void", []),
|
|
27
|
+
"numba_gil_release": ("void", []),
|
|
28
|
+
# Memory allocation
|
|
29
|
+
"NRT_MemInfo_alloc": ("void*", ["size_t"]),
|
|
30
|
+
"NRT_MemInfo_alloc_safe": ("void*", ["size_t"]),
|
|
31
|
+
"NRT_MemInfo_alloc_aligned": ("void*", ["size_t", "size_t"]),
|
|
32
|
+
"NRT_MemInfo_alloc_safe_aligned": ("void*", ["size_t", "size_t"]),
|
|
33
|
+
"NRT_MemInfo_free": ("void", ["void*"]),
|
|
34
|
+
# Helperlib
|
|
35
|
+
"numba_unpickle": ("void*", ["void*", "int", "void*"]),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
trampoline_c = """#include <stddef.h>"""
|
|
39
|
+
|
|
40
|
+
symbols = []
|
|
41
|
+
# Process _helperlib symbols
|
|
42
|
+
for py_name in _helperlib.c_helpers:
|
|
43
|
+
c_name = "numba_" + py_name
|
|
44
|
+
c_address = _helperlib.c_helpers[py_name]
|
|
45
|
+
|
|
46
|
+
if c_name in SIGNATURES:
|
|
47
|
+
ret_type, params = SIGNATURES[c_name]
|
|
48
|
+
symbols.append((c_name, c_address, ret_type, params))
|
|
49
|
+
|
|
50
|
+
# Process _nrt symbols
|
|
51
|
+
for py_name in _nrt.c_helpers:
|
|
52
|
+
if py_name.startswith("_"):
|
|
53
|
+
c_name = py_name
|
|
54
|
+
else:
|
|
55
|
+
c_name = "NRT_" + py_name
|
|
56
|
+
c_address = _nrt.c_helpers[py_name]
|
|
57
|
+
|
|
58
|
+
if c_name in SIGNATURES:
|
|
59
|
+
ret_type, params = SIGNATURES[c_name]
|
|
60
|
+
symbols.append((c_name, c_address, ret_type, params))
|
|
61
|
+
|
|
62
|
+
# Generate trampolines
|
|
63
|
+
for c_name, c_address, ret_type, params in sorted(symbols):
|
|
64
|
+
# Build parameter list
|
|
65
|
+
if not params:
|
|
66
|
+
param_list = "void"
|
|
67
|
+
arg_list = ""
|
|
68
|
+
else:
|
|
69
|
+
param_list = ", ".join(
|
|
70
|
+
f"{ptype} arg{i}" for i, ptype in enumerate(params)
|
|
71
|
+
)
|
|
72
|
+
arg_list = ", ".join(f"arg{i}" for i in range(len(params)))
|
|
73
|
+
|
|
74
|
+
# Build function pointer type
|
|
75
|
+
func_ptr_type = f"{ret_type} (*)({', '.join(params) if params else 'void'})"
|
|
76
|
+
|
|
77
|
+
# Generate the trampoline
|
|
78
|
+
trampoline_c += f"""
|
|
79
|
+
__attribute__((visibility("default")))
|
|
80
|
+
{ret_type} {c_name}({param_list}) {{
|
|
81
|
+
{"" if ret_type == "void" else "return "}(({func_ptr_type})0x{c_address:x})({arg_list});
|
|
82
|
+
}}
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
return trampoline_c
|
|
86
|
+
|
|
87
|
+
"""
|
|
88
|
+
Produce a shared library from a single object file and link numba C symbols.
|
|
89
|
+
Uses distutils' compiler.
|
|
90
|
+
"""
|
|
91
|
+
obj_path = str(Path(obj_path))
|
|
92
|
+
out_path = str(Path(out_path))
|
|
93
|
+
|
|
94
|
+
trampoline_code = generate_trampolines()
|
|
95
|
+
fd, trampoline_c = tempfile.mkstemp(".c")
|
|
96
|
+
os.close(fd)
|
|
97
|
+
with open(trampoline_c, "w") as f:
|
|
98
|
+
f.write(trampoline_code)
|
|
99
|
+
|
|
100
|
+
cc = ccompiler.new_compiler()
|
|
101
|
+
sysconfig.customize_compiler(cc)
|
|
102
|
+
extra_pre = []
|
|
103
|
+
extra_post = []
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
trampoline_o = cc.compile([trampoline_c])
|
|
107
|
+
except Exception as e:
|
|
108
|
+
raise RuntimeError(
|
|
109
|
+
f"Compilation failed for trampolines in {trampoline_c}"
|
|
110
|
+
) from e
|
|
111
|
+
finally:
|
|
112
|
+
os.remove(trampoline_c)
|
|
113
|
+
|
|
114
|
+
objs = [obj_path] + trampoline_o
|
|
115
|
+
try:
|
|
116
|
+
cc.link_shared_object(
|
|
117
|
+
objects=objs,
|
|
118
|
+
output_filename=out_path,
|
|
119
|
+
extra_preargs=extra_pre,
|
|
120
|
+
extra_postargs=extra_post,
|
|
121
|
+
)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
raise RuntimeError(f"Link failed for {out_path}") from e
|
|
124
|
+
finally:
|
|
125
|
+
for file_o in trampoline_o:
|
|
126
|
+
os.remove(file_o)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import ctypes
|
|
2
|
+
import sys
|
|
3
|
+
import llvmlite.binding as ll
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from .config import libpath, DEBUG_OPENMP_LLVM_PASS
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def run_intrinsics_openmp_pass(ll_module):
|
|
10
|
+
libpass = (
|
|
11
|
+
libpath
|
|
12
|
+
/ "pass"
|
|
13
|
+
/ f"libIntrinsicsOpenMP.{'dylib' if sys.platform == 'darwin' else 'so'}"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Roundtrip the LLVM module through the intrinsics OpenMP pass.
|
|
17
|
+
WRITE_CB = ctypes.CFUNCTYPE(None, ctypes.c_void_p, ctypes.c_size_t)
|
|
18
|
+
|
|
19
|
+
out = bytearray()
|
|
20
|
+
|
|
21
|
+
def _writer_cb(ptr, size):
|
|
22
|
+
out.extend(ctypes.string_at(ptr, size))
|
|
23
|
+
|
|
24
|
+
writer_cb = WRITE_CB(_writer_cb)
|
|
25
|
+
|
|
26
|
+
lib = ctypes.CDLL(str(libpass))
|
|
27
|
+
lib.runIntrinsicsOpenMPPass.argtypes = [
|
|
28
|
+
ctypes.c_void_p,
|
|
29
|
+
ctypes.c_size_t,
|
|
30
|
+
WRITE_CB,
|
|
31
|
+
]
|
|
32
|
+
lib.runIntrinsicsOpenMPPass.restype = ctypes.c_int
|
|
33
|
+
|
|
34
|
+
bc = ll_module.as_bitcode()
|
|
35
|
+
buf = ctypes.create_string_buffer(bc)
|
|
36
|
+
ptr = ctypes.cast(buf, ctypes.c_void_p)
|
|
37
|
+
rc = lib.runIntrinsicsOpenMPPass(ptr, len(bc), writer_cb)
|
|
38
|
+
if rc != 0:
|
|
39
|
+
raise RuntimeError(f"Running IntrinsicsOpenMPPass failed with return code {rc}")
|
|
40
|
+
|
|
41
|
+
bc_out = bytes(out)
|
|
42
|
+
|
|
43
|
+
lowered_module = ll.parse_bitcode(bc_out)
|
|
44
|
+
if DEBUG_OPENMP_LLVM_PASS >= 1:
|
|
45
|
+
with open(f"{ll_module.name}-intrinsics-omp.ll", "w") as f:
|
|
46
|
+
f.write(str(lowered_module))
|
|
47
|
+
|
|
48
|
+
return lowered_module
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import llvmlite.ir as lir
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_decl(alloca):
|
|
5
|
+
if not isinstance(alloca, lir.instructions.AllocaInstr):
|
|
6
|
+
raise TypeError("Expected AllocaInstr, got %s" % type(alloca))
|
|
7
|
+
return '{0} %"{1}"'.format(alloca.type, alloca._get_name())
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# TODO: Upstream to llvmlite, it's part of the langref.
|
|
11
|
+
class TokenType(lir.Type):
|
|
12
|
+
"""
|
|
13
|
+
The type for tokens. From the LLVM Language Reference.
|
|
14
|
+
|
|
15
|
+
'The token type is used when a value is associated with an
|
|
16
|
+
instruction but all uses of the value must not attempt to
|
|
17
|
+
introspect or obscure it. As such, it is not appropriate
|
|
18
|
+
to have a phi or select of type token.'
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def _to_string(self):
|
|
22
|
+
return "token"
|
|
23
|
+
|
|
24
|
+
def __eq__(self, other):
|
|
25
|
+
return isinstance(other, TokenType)
|
|
26
|
+
|
|
27
|
+
def __hash__(self):
|
|
28
|
+
return hash(TokenType)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CallInstrWithOperandBundle(lir.instructions.CallInstr):
|
|
32
|
+
def set_tags(self, tags):
|
|
33
|
+
self.tags = tags
|
|
34
|
+
|
|
35
|
+
# TODO: This is ugly duplication, we should upstream to llvmlite.
|
|
36
|
+
def descr(self, buf, add_metadata=True):
|
|
37
|
+
def descr_arg(i, a):
|
|
38
|
+
if i in self.arg_attributes:
|
|
39
|
+
attrs = " ".join(self.arg_attributes[i]._to_list()) + " "
|
|
40
|
+
else:
|
|
41
|
+
attrs = ""
|
|
42
|
+
return "{0} {1}{2}".format(a.type, attrs, a.get_reference())
|
|
43
|
+
|
|
44
|
+
args = ", ".join([descr_arg(i, a) for i, a in enumerate(self.args)])
|
|
45
|
+
|
|
46
|
+
fnty = self.callee.function_type
|
|
47
|
+
# Only print function type if variable-argument
|
|
48
|
+
if fnty.var_arg:
|
|
49
|
+
ty = fnty
|
|
50
|
+
# Otherwise, just print the return type.
|
|
51
|
+
else:
|
|
52
|
+
# Fastmath flag work only in this case
|
|
53
|
+
ty = fnty.return_type
|
|
54
|
+
callee_ref = "{0} {1}".format(ty, self.callee.get_reference())
|
|
55
|
+
if self.cconv:
|
|
56
|
+
callee_ref = "{0} {1}".format(self.cconv, callee_ref)
|
|
57
|
+
|
|
58
|
+
tail_marker = ""
|
|
59
|
+
if self.tail:
|
|
60
|
+
tail_marker = "{0} ".format(self.tail)
|
|
61
|
+
|
|
62
|
+
buf.append(
|
|
63
|
+
"{tail}{op}{fastmath} {callee}({args}){attr}{tags}{meta}\n".format(
|
|
64
|
+
tail=tail_marker,
|
|
65
|
+
op=self.opname,
|
|
66
|
+
fastmath="".join([" " + attr for attr in self.fastmath]),
|
|
67
|
+
callee=callee_ref,
|
|
68
|
+
args=args,
|
|
69
|
+
attr="".join([" " + attr for attr in self.attributes]),
|
|
70
|
+
tags=(" " + self.tags if self.tags is not None else ""),
|
|
71
|
+
meta=(
|
|
72
|
+
self._stringify_metadata(leading_comma=True) if add_metadata else ""
|
|
73
|
+
),
|
|
74
|
+
)
|
|
75
|
+
)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
from numba.core import ir
|
|
2
|
+
from numba.core.ir_utils import (
|
|
3
|
+
build_definitions,
|
|
4
|
+
get_definition,
|
|
5
|
+
dump_blocks,
|
|
6
|
+
dprint_func_ir,
|
|
7
|
+
compute_cfg_from_blocks,
|
|
8
|
+
compute_use_defs,
|
|
9
|
+
compute_live_map,
|
|
10
|
+
)
|
|
11
|
+
from numba.core.withcontexts import WithContext
|
|
12
|
+
import sys
|
|
13
|
+
import os
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from .parser import var_collector_parser
|
|
17
|
+
from .analysis import get_name_var_table
|
|
18
|
+
from .config import DEBUG_OPENMP, OPENMP_DISABLED
|
|
19
|
+
from .compiler import LowerNoSROA
|
|
20
|
+
from .omp_ir import (
|
|
21
|
+
openmp_region_start,
|
|
22
|
+
openmp_region_end,
|
|
23
|
+
_lower_openmp_region_start,
|
|
24
|
+
_lower_openmp_region_end,
|
|
25
|
+
)
|
|
26
|
+
from .analysis import in_openmp_region
|
|
27
|
+
from .omp_lower import VarCollector, remove_ssa_from_func_ir, _add_openmp_ir_nodes
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PythonOpenmp:
|
|
31
|
+
def __init__(self, *args):
|
|
32
|
+
self.args = args
|
|
33
|
+
|
|
34
|
+
def __enter__(self):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
def __exit__(self, typ, val, tb):
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def iscall(x):
|
|
42
|
+
if isinstance(x, ir.Assign):
|
|
43
|
+
return isinstance(x.value, ir.Expr) and x.value.op == "call"
|
|
44
|
+
elif isinstance(x, ir.Expr):
|
|
45
|
+
return x.op == "call"
|
|
46
|
+
else:
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def extract_args_from_openmp(func_ir):
|
|
51
|
+
"""Find all the openmp context calls in the function and then
|
|
52
|
+
use the VarCollector transformer to find all the Python variables
|
|
53
|
+
referenced in the openmp clauses. We then add those variables as
|
|
54
|
+
regular arguments to the openmp context call just so Numba's
|
|
55
|
+
usedef analysis is able to keep variables alive that are only
|
|
56
|
+
referenced in openmp clauses.
|
|
57
|
+
"""
|
|
58
|
+
func_ir._definitions = build_definitions(func_ir.blocks)
|
|
59
|
+
var_table = get_name_var_table(func_ir.blocks)
|
|
60
|
+
for block in func_ir.blocks.values():
|
|
61
|
+
for inst in block.body:
|
|
62
|
+
if iscall(inst):
|
|
63
|
+
func_def = get_definition(func_ir, inst.value.func)
|
|
64
|
+
if isinstance(func_def, ir.Global) and isinstance(
|
|
65
|
+
func_def.value, _OpenmpContextType
|
|
66
|
+
):
|
|
67
|
+
str_def = get_definition(func_ir, inst.value.args[0])
|
|
68
|
+
if not isinstance(str_def, ir.Const) or not isinstance(
|
|
69
|
+
str_def.value, str
|
|
70
|
+
):
|
|
71
|
+
# The non-const openmp string error is handled later.
|
|
72
|
+
continue
|
|
73
|
+
assert isinstance(str_def, ir.Const) and isinstance(
|
|
74
|
+
str_def.value, str
|
|
75
|
+
)
|
|
76
|
+
parse_res = var_collector_parser.parse(str_def.value)
|
|
77
|
+
visitor = VarCollector()
|
|
78
|
+
try:
|
|
79
|
+
visit_res = visitor.transform(parse_res)
|
|
80
|
+
inst.value.args.extend([var_table[x] for x in visit_res])
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(f"generic transform exception: {e}")
|
|
83
|
+
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
84
|
+
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
85
|
+
print(exc_type, fname, exc_tb.tb_lineno)
|
|
86
|
+
sys.exit(-2)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def remove_empty_blocks(blocks):
|
|
90
|
+
found = True
|
|
91
|
+
while found:
|
|
92
|
+
found = False
|
|
93
|
+
empty_block = None
|
|
94
|
+
for label, block in blocks.items():
|
|
95
|
+
if len(block.body) == 1:
|
|
96
|
+
assert isinstance(block.body[-1], ir.Jump)
|
|
97
|
+
empty_block = label
|
|
98
|
+
next_block = block.body[-1].target
|
|
99
|
+
break
|
|
100
|
+
|
|
101
|
+
if empty_block is not None:
|
|
102
|
+
del blocks[empty_block]
|
|
103
|
+
|
|
104
|
+
found = True
|
|
105
|
+
for block in blocks.values():
|
|
106
|
+
last_stmt = block.body[-1]
|
|
107
|
+
if isinstance(last_stmt, ir.Jump):
|
|
108
|
+
if last_stmt.target == empty_block:
|
|
109
|
+
block.body[-1] = ir.Jump(next_block, last_stmt.loc)
|
|
110
|
+
elif isinstance(last_stmt, ir.Branch):
|
|
111
|
+
if last_stmt.truebr == empty_block:
|
|
112
|
+
block.body[-1] = ir.Branch(
|
|
113
|
+
last_stmt.cond, next_block, last_stmt.falsebr, last_stmt.loc
|
|
114
|
+
)
|
|
115
|
+
elif block.body[-1].falsebr == empty_block:
|
|
116
|
+
block.body[-1] = ir.Branch(
|
|
117
|
+
last_stmt.cond, last_stmt.truebr, next_block, last_stmt.loc
|
|
118
|
+
)
|
|
119
|
+
elif isinstance(last_stmt, ir.Return):
|
|
120
|
+
# Intentionally do nothing.
|
|
121
|
+
pass
|
|
122
|
+
else:
|
|
123
|
+
print(type(last_stmt))
|
|
124
|
+
assert False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _OpenmpContextType(WithContext):
|
|
128
|
+
is_callable = True
|
|
129
|
+
first_time = True
|
|
130
|
+
blk_end_live_map = set()
|
|
131
|
+
|
|
132
|
+
def do_numba_fixups(self):
|
|
133
|
+
from numba import core
|
|
134
|
+
|
|
135
|
+
orig_lower_inst = core.lowering.Lower.lower_inst
|
|
136
|
+
core.lowering.Lower.orig_lower_inst = orig_lower_inst
|
|
137
|
+
|
|
138
|
+
orig_lower = core.lowering.Lower.lower
|
|
139
|
+
core.lowering.Lower.orig_lower = orig_lower
|
|
140
|
+
|
|
141
|
+
# Use method to retrieve the outside region live map, which is updated
|
|
142
|
+
# during the with-context mutation.
|
|
143
|
+
def get_blk_end_live_map():
|
|
144
|
+
return self.blk_end_live_map
|
|
145
|
+
|
|
146
|
+
def new_lower(self, inst):
|
|
147
|
+
if not isinstance(self, LowerNoSROA):
|
|
148
|
+
self.__class__ = LowerNoSROA
|
|
149
|
+
if isinstance(inst, openmp_region_start):
|
|
150
|
+
return _lower_openmp_region_start(self, inst)
|
|
151
|
+
elif isinstance(inst, openmp_region_end):
|
|
152
|
+
return _lower_openmp_region_end(self, inst)
|
|
153
|
+
# TODO: instead of monkey patching for Del instructions outside the
|
|
154
|
+
# openmp region do: (1) either outline to create a function scope
|
|
155
|
+
# that will decouple the lifetime of variables inside the OpenMP
|
|
156
|
+
# region, (2) or subclass the PostProcessor to extend use-def
|
|
157
|
+
# analysis with OpenMP lifetime information.
|
|
158
|
+
elif isinstance(inst, ir.Del):
|
|
159
|
+
# Lower Del normally in the openmp region.
|
|
160
|
+
if in_openmp_region(self.builder):
|
|
161
|
+
return self.orig_lower_inst(inst)
|
|
162
|
+
|
|
163
|
+
# Lower the Del instruction ONLY if the variable is not live
|
|
164
|
+
# after the openmp region.
|
|
165
|
+
if inst.value not in get_blk_end_live_map():
|
|
166
|
+
return self.orig_lower_inst(inst)
|
|
167
|
+
elif isinstance(inst, ir.Assign):
|
|
168
|
+
return self.lower_assign_inst(orig_lower_inst, inst)
|
|
169
|
+
elif isinstance(inst, ir.Return):
|
|
170
|
+
return self.lower_return_inst(orig_lower_inst, inst)
|
|
171
|
+
else:
|
|
172
|
+
return self.orig_lower_inst(inst)
|
|
173
|
+
|
|
174
|
+
core.lowering.Lower.lower_inst = new_lower
|
|
175
|
+
|
|
176
|
+
def mutate_with_body(
|
|
177
|
+
self,
|
|
178
|
+
func_ir,
|
|
179
|
+
blocks,
|
|
180
|
+
blk_start,
|
|
181
|
+
blk_end,
|
|
182
|
+
body_blocks,
|
|
183
|
+
dispatcher_factory,
|
|
184
|
+
extra,
|
|
185
|
+
):
|
|
186
|
+
if _OpenmpContextType.first_time:
|
|
187
|
+
_OpenmpContextType.first_time = False
|
|
188
|
+
self.do_numba_fixups()
|
|
189
|
+
|
|
190
|
+
if DEBUG_OPENMP >= 1:
|
|
191
|
+
print("pre-dead-code")
|
|
192
|
+
dump_blocks(blocks)
|
|
193
|
+
if not OPENMP_DISABLED and not hasattr(func_ir, "has_openmp_region"):
|
|
194
|
+
# We can't do dead code elimination at this point because if an argument
|
|
195
|
+
# is used only in an openmp clause then it is detected as dead and is
|
|
196
|
+
# eliminated. We'd have to run through the IR and find all the
|
|
197
|
+
# openmp regions and extract the vars used there and then modify the
|
|
198
|
+
# IR with something fake just to take the var alive. The other approach
|
|
199
|
+
# would be to modify dead code elimination to find the vars referenced
|
|
200
|
+
# in openmp context strings.
|
|
201
|
+
extract_args_from_openmp(func_ir)
|
|
202
|
+
# dead_code_elimination(func_ir)
|
|
203
|
+
remove_ssa_from_func_ir(func_ir)
|
|
204
|
+
# remove_empty_blocks(blocks)
|
|
205
|
+
func_ir.has_openmp_region = True
|
|
206
|
+
if DEBUG_OPENMP >= 1:
|
|
207
|
+
print("pre-with-removal")
|
|
208
|
+
dump_blocks(blocks)
|
|
209
|
+
if OPENMP_DISABLED:
|
|
210
|
+
# If OpenMP disabled, do nothing except remove the enter_with marker.
|
|
211
|
+
sblk = blocks[blk_start]
|
|
212
|
+
sblk.body = sblk.body[1:]
|
|
213
|
+
else:
|
|
214
|
+
if DEBUG_OPENMP >= 1:
|
|
215
|
+
print("openmp:mutate_with_body")
|
|
216
|
+
dprint_func_ir(func_ir, "func_ir")
|
|
217
|
+
print("blocks:", blocks, type(blocks))
|
|
218
|
+
print("blk_start:", blk_start, type(blk_start))
|
|
219
|
+
print("blk_end:", blk_end, type(blk_end))
|
|
220
|
+
print("body_blocks:", body_blocks, type(body_blocks))
|
|
221
|
+
print("extra:", extra, type(extra))
|
|
222
|
+
assert extra is not None
|
|
223
|
+
_add_openmp_ir_nodes(
|
|
224
|
+
func_ir, blocks, blk_start, blk_end, body_blocks, extra
|
|
225
|
+
)
|
|
226
|
+
func_ir._definitions = build_definitions(blocks)
|
|
227
|
+
if DEBUG_OPENMP >= 1:
|
|
228
|
+
print("post-with-removal")
|
|
229
|
+
dump_blocks(blocks)
|
|
230
|
+
dispatcher = dispatcher_factory(func_ir)
|
|
231
|
+
dispatcher.can_cache = True
|
|
232
|
+
|
|
233
|
+
# Find live variables after the region to make sure we don't Del
|
|
234
|
+
# them if they are defined in the openmp region.
|
|
235
|
+
cfg = compute_cfg_from_blocks(blocks)
|
|
236
|
+
usedefs = compute_use_defs(blocks)
|
|
237
|
+
live_map = compute_live_map(cfg, blocks, usedefs.usemap, usedefs.defmap)
|
|
238
|
+
self.blk_end_live_map = live_map[blk_end]
|
|
239
|
+
return dispatcher
|
|
240
|
+
|
|
241
|
+
def __call__(self, args):
|
|
242
|
+
return PythonOpenmp(args)
|