gstaichi 0.0.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gstaichi/CHANGELOG.md +4 -0
- gstaichi/__init__.py +51 -0
- gstaichi/_funcs.py +706 -0
- gstaichi/_kernels.py +420 -0
- gstaichi/_lib/__init__.py +5 -0
- gstaichi/_lib/core/__init__.py +0 -0
- gstaichi/_lib/core/gstaichi_python.cpython-312-darwin.so +0 -0
- gstaichi/_lib/core/gstaichi_python.pyi +2917 -0
- gstaichi/_lib/core/py.typed +0 -0
- gstaichi/_lib/runtime/libMoltenVK.dylib +0 -0
- gstaichi/_lib/runtime/runtime_arm64.bc +0 -0
- gstaichi/_lib/utils.py +243 -0
- gstaichi/_logging.py +131 -0
- gstaichi/_snode/__init__.py +5 -0
- gstaichi/_snode/fields_builder.py +187 -0
- gstaichi/_snode/snode_tree.py +34 -0
- gstaichi/_test_tools/__init__.py +18 -0
- gstaichi/_test_tools/dataclass_test_tools.py +36 -0
- gstaichi/_test_tools/load_kernel_string.py +30 -0
- gstaichi/_test_tools/textwrap2.py +6 -0
- gstaichi/_version_check.py +100 -0
- gstaichi/ad/__init__.py +3 -0
- gstaichi/ad/_ad.py +530 -0
- gstaichi/algorithms/__init__.py +3 -0
- gstaichi/algorithms/_algorithms.py +117 -0
- gstaichi/assets/.git +1 -0
- gstaichi/assets/Go-Regular.ttf +0 -0
- gstaichi/assets/static/imgs/ti_gallery.png +0 -0
- gstaichi/examples/lcg_python.py +26 -0
- gstaichi/examples/lcg_taichi.py +34 -0
- gstaichi/examples/minimal.py +28 -0
- gstaichi/experimental.py +16 -0
- gstaichi/lang/__init__.py +50 -0
- gstaichi/lang/_dataclass_util.py +31 -0
- gstaichi/lang/_fast_caching/__init__.py +3 -0
- gstaichi/lang/_fast_caching/args_hasher.py +122 -0
- gstaichi/lang/_fast_caching/config_hasher.py +30 -0
- gstaichi/lang/_fast_caching/fast_caching_types.py +21 -0
- gstaichi/lang/_fast_caching/function_hasher.py +57 -0
- gstaichi/lang/_fast_caching/hash_utils.py +11 -0
- gstaichi/lang/_fast_caching/python_side_cache.py +52 -0
- gstaichi/lang/_fast_caching/src_hasher.py +83 -0
- gstaichi/lang/_kernel_impl_dataclass.py +212 -0
- gstaichi/lang/_ndarray.py +366 -0
- gstaichi/lang/_ndrange.py +152 -0
- gstaichi/lang/_template_mapper.py +195 -0
- gstaichi/lang/_texture.py +172 -0
- gstaichi/lang/_wrap_inspect.py +215 -0
- gstaichi/lang/any_array.py +99 -0
- gstaichi/lang/ast/__init__.py +7 -0
- gstaichi/lang/ast/ast_transformer.py +1351 -0
- gstaichi/lang/ast/ast_transformer_utils.py +346 -0
- gstaichi/lang/ast/ast_transformers/__init__.py +0 -0
- gstaichi/lang/ast/ast_transformers/call_transformer.py +327 -0
- gstaichi/lang/ast/ast_transformers/function_def_transformer.py +304 -0
- gstaichi/lang/ast/checkers.py +106 -0
- gstaichi/lang/ast/symbol_resolver.py +57 -0
- gstaichi/lang/ast/transform.py +9 -0
- gstaichi/lang/common_ops.py +310 -0
- gstaichi/lang/exception.py +80 -0
- gstaichi/lang/expr.py +180 -0
- gstaichi/lang/field.py +428 -0
- gstaichi/lang/impl.py +1259 -0
- gstaichi/lang/kernel_arguments.py +155 -0
- gstaichi/lang/kernel_impl.py +1386 -0
- gstaichi/lang/matrix.py +1835 -0
- gstaichi/lang/matrix_ops.py +341 -0
- gstaichi/lang/matrix_ops_utils.py +190 -0
- gstaichi/lang/mesh.py +687 -0
- gstaichi/lang/misc.py +784 -0
- gstaichi/lang/ops.py +1494 -0
- gstaichi/lang/runtime_ops.py +13 -0
- gstaichi/lang/shell.py +35 -0
- gstaichi/lang/simt/__init__.py +5 -0
- gstaichi/lang/simt/block.py +94 -0
- gstaichi/lang/simt/grid.py +7 -0
- gstaichi/lang/simt/subgroup.py +191 -0
- gstaichi/lang/simt/warp.py +96 -0
- gstaichi/lang/snode.py +489 -0
- gstaichi/lang/source_builder.py +150 -0
- gstaichi/lang/struct.py +810 -0
- gstaichi/lang/util.py +312 -0
- gstaichi/linalg/__init__.py +10 -0
- gstaichi/linalg/matrixfree_cg.py +310 -0
- gstaichi/linalg/sparse_cg.py +59 -0
- gstaichi/linalg/sparse_matrix.py +303 -0
- gstaichi/linalg/sparse_solver.py +123 -0
- gstaichi/math/__init__.py +11 -0
- gstaichi/math/_complex.py +205 -0
- gstaichi/math/mathimpl.py +886 -0
- gstaichi/profiler/__init__.py +6 -0
- gstaichi/profiler/kernel_metrics.py +260 -0
- gstaichi/profiler/kernel_profiler.py +586 -0
- gstaichi/profiler/memory_profiler.py +15 -0
- gstaichi/profiler/scoped_profiler.py +36 -0
- gstaichi/sparse/__init__.py +3 -0
- gstaichi/sparse/_sparse_grid.py +77 -0
- gstaichi/tools/__init__.py +12 -0
- gstaichi/tools/diagnose.py +117 -0
- gstaichi/tools/np2ply.py +364 -0
- gstaichi/tools/vtk.py +38 -0
- gstaichi/types/__init__.py +21 -0
- gstaichi/types/annotations.py +52 -0
- gstaichi/types/compound_types.py +71 -0
- gstaichi/types/enums.py +49 -0
- gstaichi/types/ndarray_type.py +169 -0
- gstaichi/types/primitive_types.py +206 -0
- gstaichi/types/quant.py +88 -0
- gstaichi/types/texture_type.py +85 -0
- gstaichi/types/utils.py +11 -0
- gstaichi-0.0.0.data/data/include/GLFW/glfw3.h +6389 -0
- gstaichi-0.0.0.data/data/include/GLFW/glfw3native.h +594 -0
- gstaichi-0.0.0.data/data/include/spirv-tools/instrument.hpp +268 -0
- gstaichi-0.0.0.data/data/include/spirv-tools/libspirv.h +907 -0
- gstaichi-0.0.0.data/data/include/spirv-tools/libspirv.hpp +375 -0
- gstaichi-0.0.0.data/data/include/spirv-tools/linker.hpp +97 -0
- gstaichi-0.0.0.data/data/include/spirv-tools/optimizer.hpp +970 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/GLSL.std.450.h +114 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv.h +2568 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv.hpp +2579 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cfg.hpp +168 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_common.hpp +1920 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cpp.hpp +93 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross.hpp +1171 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross_c.h +1074 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross_containers.hpp +754 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross_error_handling.hpp +94 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross_parsed_ir.hpp +256 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_cross_util.hpp +37 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_glsl.hpp +1001 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_hlsl.hpp +406 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_msl.hpp +1273 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_parser.hpp +103 -0
- gstaichi-0.0.0.data/data/include/spirv_cross/spirv_reflect.hpp +91 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools/SPIRV-ToolsConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools/SPIRV-ToolsTarget-release.cmake +29 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools/SPIRV-ToolsTarget.cmake +114 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-diff/SPIRV-Tools-diffConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-diff/SPIRV-Tools-diffTargets-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-diff/SPIRV-Tools-diffTargets.cmake +123 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-link/SPIRV-Tools-linkConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-link/SPIRV-Tools-linkTargets-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-link/SPIRV-Tools-linkTargets.cmake +123 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-lint/SPIRV-Tools-lintConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-lint/SPIRV-Tools-lintTargets-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-lint/SPIRV-Tools-lintTargets.cmake +123 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-opt/SPIRV-Tools-optConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-opt/SPIRV-Tools-optTargets-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-opt/SPIRV-Tools-optTargets.cmake +123 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-reduce/SPIRV-Tools-reduceConfig.cmake +5 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-reduce/SPIRV-Tools-reduceTarget-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/SPIRV-Tools-reduce/SPIRV-Tools-reduceTarget.cmake +123 -0
- gstaichi-0.0.0.data/data/lib/cmake/glfw3/glfw3Config.cmake +3 -0
- gstaichi-0.0.0.data/data/lib/cmake/glfw3/glfw3ConfigVersion.cmake +65 -0
- gstaichi-0.0.0.data/data/lib/cmake/glfw3/glfw3Targets-release.cmake +19 -0
- gstaichi-0.0.0.data/data/lib/cmake/glfw3/glfw3Targets.cmake +107 -0
- gstaichi-0.0.0.data/data/lib/libSPIRV-Tools-shared.dylib +0 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_c/cmake/spirv_cross_cConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_c/cmake/spirv_cross_cConfig.cmake +123 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_core/cmake/spirv_cross_coreConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_core/cmake/spirv_cross_coreConfig.cmake +106 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_cpp/cmake/spirv_cross_cppConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_cpp/cmake/spirv_cross_cppConfig.cmake +123 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_glsl/cmake/spirv_cross_glslConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_glsl/cmake/spirv_cross_glslConfig.cmake +123 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_hlsl/cmake/spirv_cross_hlslConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_hlsl/cmake/spirv_cross_hlslConfig.cmake +123 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_msl/cmake/spirv_cross_mslConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_msl/cmake/spirv_cross_mslConfig.cmake +123 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_reflect/cmake/spirv_cross_reflectConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_reflect/cmake/spirv_cross_reflectConfig.cmake +106 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_util/cmake/spirv_cross_utilConfig-release.cmake +19 -0
- gstaichi-0.0.0.data/data/share/spirv_cross_util/cmake/spirv_cross_utilConfig.cmake +123 -0
- gstaichi-0.0.0.dist-info/METADATA +97 -0
- gstaichi-0.0.0.dist-info/RECORD +178 -0
- gstaichi-0.0.0.dist-info/WHEEL +5 -0
- gstaichi-0.0.0.dist-info/licenses/LICENSE +201 -0
- gstaichi-0.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,586 @@
|
|
1
|
+
# type: ignore
|
2
|
+
|
3
|
+
from contextlib import contextmanager
|
4
|
+
|
5
|
+
from gstaichi._lib import core as _ti_core
|
6
|
+
from gstaichi.lang import impl
|
7
|
+
from gstaichi.profiler.kernel_metrics import default_cupti_metrics
|
8
|
+
|
9
|
+
|
10
|
+
class StatisticalResult:
|
11
|
+
"""Statistical result of records.
|
12
|
+
|
13
|
+
Profiling records with the same kernel name will be counted in a ``StatisticalResult`` instance via function ``insert_record(time)``.
|
14
|
+
Currently, only the kernel elapsed time is counted, other statistics related to the kernel will be added in the feature.
|
15
|
+
"""
|
16
|
+
|
17
|
+
def __init__(self, name):
|
18
|
+
self.name = name
|
19
|
+
self.counter = 0
|
20
|
+
self.min_time = 0.0
|
21
|
+
self.max_time = 0.0
|
22
|
+
self.total_time = 0.0
|
23
|
+
|
24
|
+
def __lt__(self, other):
|
25
|
+
# For sorted()
|
26
|
+
return self.total_time < other.total_time
|
27
|
+
|
28
|
+
def insert_record(self, time):
|
29
|
+
"""Insert records with the same kernel name.
|
30
|
+
|
31
|
+
Currently, only the kernel elapsed time is counted.
|
32
|
+
"""
|
33
|
+
if self.counter == 0:
|
34
|
+
self.min_time = time
|
35
|
+
self.max_time = time
|
36
|
+
self.counter += 1
|
37
|
+
self.total_time += time
|
38
|
+
self.min_time = min(self.min_time, time)
|
39
|
+
self.max_time = max(self.max_time, time)
|
40
|
+
|
41
|
+
|
42
|
+
class KernelProfiler:
|
43
|
+
"""Kernel profiler of GsTaichi.
|
44
|
+
|
45
|
+
Kernel profiler acquires kernel profiling records from backend, counts records in Python scope,
|
46
|
+
and prints the results to the console by :func:`~gstaichi.profiler.kernel_profiler.KernelProfiler.print_info`.
|
47
|
+
|
48
|
+
``KernelProfiler`` now support detailed low-level performance metrics (such as memory bandwidth consumption) in its advanced mode.
|
49
|
+
This mode is only available for the CUDA backend with CUPTI toolkit, i.e. you need ``ti.init(kernel_profiler=True, arch=ti.cuda)``.
|
50
|
+
|
51
|
+
Note:
|
52
|
+
For details about using CUPTI in GsTaichi, please visit https://docs.taichi-lang.org/docs/profiler#advanced-mode.
|
53
|
+
"""
|
54
|
+
|
55
|
+
def __init__(self):
|
56
|
+
self._profiling_mode = False
|
57
|
+
self._profiling_toolkit = "default"
|
58
|
+
self._metric_list = [default_cupti_metrics]
|
59
|
+
self._total_time_ms = 0.0
|
60
|
+
self._traced_records = []
|
61
|
+
self._statistical_results = {}
|
62
|
+
|
63
|
+
# public methods
|
64
|
+
|
65
|
+
def set_kernel_profiler_mode(self, mode=False):
|
66
|
+
"""Turn on or off :class:`~gstaichi.profiler.kernel_profiler.KernelProfiler`."""
|
67
|
+
if type(mode) is bool:
|
68
|
+
self._profiling_mode = mode
|
69
|
+
else:
|
70
|
+
raise TypeError(f"Arg `mode` must be of type boolean. Type {type(mode)} is not supported.")
|
71
|
+
|
72
|
+
def get_kernel_profiler_mode(self):
|
73
|
+
"""Get status of :class:`~gstaichi.profiler.kernel_profiler.KernelProfiler`."""
|
74
|
+
return self._profiling_mode
|
75
|
+
|
76
|
+
def set_toolkit(self, toolkit_name="default"):
|
77
|
+
if self._check_not_turned_on_with_warning_message():
|
78
|
+
return False
|
79
|
+
status = impl.get_runtime().prog.set_kernel_profiler_toolkit(toolkit_name)
|
80
|
+
if status is True:
|
81
|
+
self._profiling_toolkit = toolkit_name
|
82
|
+
else:
|
83
|
+
_ti_core.warn(
|
84
|
+
f"Failed to set kernel profiler toolkit ({toolkit_name}) , keep using ({self._profiling_toolkit})."
|
85
|
+
)
|
86
|
+
return status
|
87
|
+
|
88
|
+
def get_total_time(self):
|
89
|
+
"""Get elapsed time of all kernels recorded in KernelProfiler.
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
time (float): total time in second.
|
93
|
+
"""
|
94
|
+
if self._check_not_turned_on_with_warning_message():
|
95
|
+
return 0.0
|
96
|
+
self._update_records() # kernel records
|
97
|
+
self._count_statistics() # _total_time_ms is counted here
|
98
|
+
return self._total_time_ms / 1000 # ms to s
|
99
|
+
|
100
|
+
def clear_info(self):
|
101
|
+
"""Clear all records both in front-end :class:`~gstaichi.profiler.kernel_profiler.KernelProfiler` and back-end instance ``KernelProfilerBase``.
|
102
|
+
|
103
|
+
Note:
|
104
|
+
The values of ``self._profiling_mode`` and ``self._metric_list`` will not be cleared.
|
105
|
+
"""
|
106
|
+
if self._check_not_turned_on_with_warning_message():
|
107
|
+
return None
|
108
|
+
# sync first
|
109
|
+
impl.get_runtime().prog.sync_kernel_profiler()
|
110
|
+
# then clear backend & frontend info
|
111
|
+
impl.get_runtime().prog.clear_kernel_profiler()
|
112
|
+
self._clear_frontend()
|
113
|
+
|
114
|
+
return None
|
115
|
+
|
116
|
+
def query_info(self, name):
|
117
|
+
"""For docstring of this function, see :func:`~gstaichi.profiler.query_kernel_profiler_info`."""
|
118
|
+
if self._check_not_turned_on_with_warning_message():
|
119
|
+
return None
|
120
|
+
self._update_records() # kernel records
|
121
|
+
self._count_statistics() # statistics results
|
122
|
+
# TODO : query self.StatisticalResult in python scope
|
123
|
+
return impl.get_runtime().prog.query_kernel_profile_info(name)
|
124
|
+
|
125
|
+
def set_metrics(self, metric_list=default_cupti_metrics):
|
126
|
+
"""For docstring of this function, see :func:`~gstaichi.profiler.set_kernel_profiler_metrics`."""
|
127
|
+
if self._check_not_turned_on_with_warning_message():
|
128
|
+
return None
|
129
|
+
self._metric_list = metric_list
|
130
|
+
metric_name_list = [metric.name for metric in metric_list]
|
131
|
+
self.clear_info()
|
132
|
+
impl.get_runtime().prog.reinit_kernel_profiler_with_metrics(metric_name_list)
|
133
|
+
|
134
|
+
return None
|
135
|
+
|
136
|
+
@contextmanager
|
137
|
+
def collect_metrics_in_context(self, metric_list=default_cupti_metrics):
|
138
|
+
"""This function is not exposed to user now.
|
139
|
+
|
140
|
+
For usage of this function, see :func:`~gstaichi.profiler.collect_kernel_profiler_metrics`.
|
141
|
+
"""
|
142
|
+
if self._check_not_turned_on_with_warning_message():
|
143
|
+
return None
|
144
|
+
self.set_metrics(metric_list)
|
145
|
+
yield self
|
146
|
+
self.set_metrics() # back to default metric list
|
147
|
+
|
148
|
+
return None
|
149
|
+
|
150
|
+
# mode of print_info
|
151
|
+
COUNT = "count" # print the statistical results (min,max,avg time) of GsTaichi kernels.
|
152
|
+
TRACE = "trace" # print the records of launched GsTaichi kernels with specific profiling metrics (time, memory load/store and core utilization etc.)
|
153
|
+
|
154
|
+
def print_info(self, mode=COUNT):
|
155
|
+
"""Print the profiling results of GsTaichi kernels.
|
156
|
+
|
157
|
+
For usage of this function, see :func:`~gstaichi.profiler.print_kernel_profiler_info`.
|
158
|
+
|
159
|
+
Args:
|
160
|
+
mode (str): the way to print profiling results.
|
161
|
+
"""
|
162
|
+
if self._check_not_turned_on_with_warning_message():
|
163
|
+
return None
|
164
|
+
self._update_records() # kernel records
|
165
|
+
self._count_statistics() # statistics results
|
166
|
+
|
167
|
+
# COUNT mode (default) : print statistics of all kernel
|
168
|
+
if mode == self.COUNT:
|
169
|
+
self._print_statistics_info()
|
170
|
+
# TRACE mode : print records of launched kernel
|
171
|
+
elif mode == self.TRACE:
|
172
|
+
self._print_kernel_info()
|
173
|
+
else:
|
174
|
+
raise ValueError("Arg `mode` must be of type 'str', and has the value 'count' or 'trace'.")
|
175
|
+
|
176
|
+
return None
|
177
|
+
|
178
|
+
# private methods
|
179
|
+
def _check_not_turned_on_with_warning_message(self):
|
180
|
+
if self._profiling_mode is False:
|
181
|
+
_ti_core.warn("use 'ti.init(kernel_profiler = True)' to turn on KernelProfiler.")
|
182
|
+
return True
|
183
|
+
return False
|
184
|
+
|
185
|
+
def _clear_frontend(self):
|
186
|
+
"""Clear member variables in :class:`~gstaichi.profiler.kernel_profiler.KernelProfiler`.
|
187
|
+
|
188
|
+
Note:
|
189
|
+
The values of ``self._profiling_mode`` and ``self._metric_list`` will not be cleared.
|
190
|
+
"""
|
191
|
+
self._total_time_ms = 0.0
|
192
|
+
self._traced_records.clear()
|
193
|
+
self._statistical_results.clear()
|
194
|
+
|
195
|
+
def _update_records(self):
|
196
|
+
"""Acquires kernel records from a backend."""
|
197
|
+
impl.get_runtime().prog.sync_kernel_profiler()
|
198
|
+
impl.get_runtime().prog.update_kernel_profiler()
|
199
|
+
self._clear_frontend()
|
200
|
+
self._traced_records = impl.get_runtime().prog.get_kernel_profiler_records()
|
201
|
+
|
202
|
+
def _count_statistics(self):
|
203
|
+
"""Counts the statistics of launched kernels during the profiling period.
|
204
|
+
|
205
|
+
The profiling records with the same kernel name are counted as a profiling result.
|
206
|
+
"""
|
207
|
+
for record in self._traced_records:
|
208
|
+
if self._statistical_results.get(record.name) is None:
|
209
|
+
self._statistical_results[record.name] = StatisticalResult(record.name)
|
210
|
+
self._statistical_results[record.name].insert_record(record.kernel_time)
|
211
|
+
self._total_time_ms += record.kernel_time
|
212
|
+
self._statistical_results = {
|
213
|
+
k: v
|
214
|
+
for k, v in sorted(
|
215
|
+
self._statistical_results.items(),
|
216
|
+
key=lambda item: item[1],
|
217
|
+
reverse=True,
|
218
|
+
)
|
219
|
+
}
|
220
|
+
|
221
|
+
def _make_table_header(self, mode):
|
222
|
+
header_str = f"Kernel Profiler({mode}, {self._profiling_toolkit})"
|
223
|
+
arch_name = f" @ {_ti_core.arch_name(impl.current_cfg().arch).upper()}"
|
224
|
+
device_name = impl.get_runtime().prog.get_kernel_profiler_device_name()
|
225
|
+
if len(device_name) > 1: # default device_name = ' '
|
226
|
+
device_name = " on " + device_name
|
227
|
+
return header_str + arch_name + device_name
|
228
|
+
|
229
|
+
def _print_statistics_info(self):
|
230
|
+
"""Print statistics of launched kernels during the profiling period."""
|
231
|
+
|
232
|
+
# headers
|
233
|
+
table_header = table_header = self._make_table_header("count")
|
234
|
+
column_header = "[ % total count | min avg max ] Kernel name"
|
235
|
+
# partition line
|
236
|
+
line_length = max(len(column_header), len(table_header))
|
237
|
+
outer_partition_line = "=" * line_length
|
238
|
+
inner_partition_line = "-" * line_length
|
239
|
+
|
240
|
+
# message in one line
|
241
|
+
string_list = []
|
242
|
+
values_list = []
|
243
|
+
for key in self._statistical_results:
|
244
|
+
result = self._statistical_results[key]
|
245
|
+
fraction = result.total_time / self._total_time_ms * 100.0
|
246
|
+
string_list.append("[{:6.2f}% {:7.3f} s {:6d}x |{:9.3f} {:9.3f} {:9.3f} ms] {}")
|
247
|
+
values_list.append(
|
248
|
+
[
|
249
|
+
fraction,
|
250
|
+
result.total_time / 1000.0,
|
251
|
+
result.counter,
|
252
|
+
result.min_time,
|
253
|
+
result.total_time / result.counter, # avg_time
|
254
|
+
result.max_time,
|
255
|
+
result.name,
|
256
|
+
]
|
257
|
+
)
|
258
|
+
|
259
|
+
# summary
|
260
|
+
summary_line = "[100.00%] Total execution time: "
|
261
|
+
summary_line += f"{self._total_time_ms/1000:7.3f} s "
|
262
|
+
summary_line += f"number of results: {len(self._statistical_results)}"
|
263
|
+
|
264
|
+
# print
|
265
|
+
print(outer_partition_line)
|
266
|
+
print(table_header)
|
267
|
+
print(outer_partition_line)
|
268
|
+
print(column_header)
|
269
|
+
print(inner_partition_line)
|
270
|
+
result_num = len(self._statistical_results)
|
271
|
+
for idx in range(result_num):
|
272
|
+
print(string_list[idx].format(*values_list[idx]))
|
273
|
+
print(inner_partition_line)
|
274
|
+
print(summary_line)
|
275
|
+
print(outer_partition_line)
|
276
|
+
|
277
|
+
def _print_kernel_info(self):
|
278
|
+
"""Print a list of launched kernels during the profiling period."""
|
279
|
+
metric_list = self._metric_list
|
280
|
+
values_num = len(self._traced_records[0].metric_values)
|
281
|
+
|
282
|
+
# We currently get kernel attributes through CUDA Driver API,
|
283
|
+
# there is no corresponding implementation in other backends yet.
|
284
|
+
# Profiler dose not print invalid kernel attributes info for now.
|
285
|
+
kernel_attribute_state = self._traced_records[0].register_per_thread > 0
|
286
|
+
|
287
|
+
# headers
|
288
|
+
table_header = self._make_table_header("trace")
|
289
|
+
column_header = "[ start.time | kernel.time |" # default
|
290
|
+
if kernel_attribute_state:
|
291
|
+
column_header += " regs | shared mem | grid size | block size | occupancy |" # kernel_attributes
|
292
|
+
for idx in range(values_num):
|
293
|
+
column_header += metric_list[idx].header + "|"
|
294
|
+
column_header = (column_header + "] Kernel name").replace("|]", "]")
|
295
|
+
|
296
|
+
# partition line
|
297
|
+
line_length = max(len(column_header), len(table_header))
|
298
|
+
outer_partition_line = "=" * line_length
|
299
|
+
inner_partition_line = "-" * line_length
|
300
|
+
|
301
|
+
# message in one line: formatted_str.format(*values)
|
302
|
+
fake_timestamp = 0.0
|
303
|
+
string_list = []
|
304
|
+
values_list = []
|
305
|
+
for record in self._traced_records:
|
306
|
+
formatted_str = "[{:9.3f} ms |{:9.3f} ms |" # default
|
307
|
+
values = [fake_timestamp, record.kernel_time] # default
|
308
|
+
if kernel_attribute_state:
|
309
|
+
formatted_str += " {:4d} | {:6d} bytes | {:6d} | {:6d} | {:2d} blocks |"
|
310
|
+
values += [
|
311
|
+
record.register_per_thread,
|
312
|
+
record.shared_mem_per_block,
|
313
|
+
record.grid_size,
|
314
|
+
record.block_size,
|
315
|
+
record.active_blocks_per_multiprocessor,
|
316
|
+
]
|
317
|
+
for idx in range(values_num):
|
318
|
+
formatted_str += metric_list[idx].val_format + "|"
|
319
|
+
values += [record.metric_values[idx] * metric_list[idx].scale]
|
320
|
+
formatted_str = formatted_str + "] " + record.name
|
321
|
+
string_list.append(formatted_str.replace("|]", "]"))
|
322
|
+
values_list.append(values)
|
323
|
+
fake_timestamp += record.kernel_time
|
324
|
+
|
325
|
+
# print
|
326
|
+
print(outer_partition_line)
|
327
|
+
print(table_header)
|
328
|
+
print(outer_partition_line)
|
329
|
+
print(column_header)
|
330
|
+
print(inner_partition_line)
|
331
|
+
record_num = len(self._traced_records)
|
332
|
+
for idx in range(record_num):
|
333
|
+
print(string_list[idx].format(*values_list[idx]))
|
334
|
+
print(inner_partition_line)
|
335
|
+
print(f"Number of records: {len(self._traced_records)}")
|
336
|
+
print(outer_partition_line)
|
337
|
+
|
338
|
+
|
339
|
+
_ti_kernel_profiler = KernelProfiler()
|
340
|
+
|
341
|
+
|
342
|
+
def get_default_kernel_profiler():
|
343
|
+
"""We have only one :class:`~gstaichi.profiler.kernelprofiler.KernelProfiler` instance(i.e. ``_ti_kernel_profiler``) now.
|
344
|
+
|
345
|
+
For ``KernelProfiler`` using ``CuptiToolkit``, GPU devices can only work in a certain configuration.
|
346
|
+
Profiling mode and metrics are configured by the host(CPU) via CUPTI APIs, and device(GPU) will use
|
347
|
+
its counter registers to collect specific metrics.
|
348
|
+
So if there are multiple instances of ``KernelProfiler``, the device will work in the latest configuration,
|
349
|
+
the profiling configuration of other instances will be changed as a result.
|
350
|
+
For data retention purposes, multiple instances will be considered in the future.
|
351
|
+
"""
|
352
|
+
return _ti_kernel_profiler
|
353
|
+
|
354
|
+
|
355
|
+
def print_kernel_profiler_info(mode="count"):
|
356
|
+
"""Print the profiling results of GsTaichi kernels.
|
357
|
+
|
358
|
+
To enable this profiler, set ``kernel_profiler=True`` in ``ti.init()``.
|
359
|
+
``'count'`` mode: print the statistics (min,max,avg time) of launched kernels,
|
360
|
+
``'trace'`` mode: print the records of launched kernels with specific profiling metrics (time, memory load/store and core utilization etc.),
|
361
|
+
and defaults to ``'count'``.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
mode (str): the way to print profiling results.
|
365
|
+
|
366
|
+
Example::
|
367
|
+
|
368
|
+
>>> import gstaichi as ti
|
369
|
+
|
370
|
+
>>> ti.init(ti.cpu, kernel_profiler=True)
|
371
|
+
>>> var = ti.field(ti.f32, shape=1)
|
372
|
+
|
373
|
+
>>> @ti.kernel
|
374
|
+
>>> def compute():
|
375
|
+
>>> var[0] = 1.0
|
376
|
+
|
377
|
+
>>> compute()
|
378
|
+
>>> ti.profiler.print_kernel_profiler_info()
|
379
|
+
>>> # equivalent calls :
|
380
|
+
>>> # ti.profiler.print_kernel_profiler_info('count')
|
381
|
+
|
382
|
+
>>> ti.profiler.print_kernel_profiler_info('trace')
|
383
|
+
|
384
|
+
Note:
|
385
|
+
For advanced mode of `KernelProfiler`, please visit https://docs.taichi-lang.org/docs/profiler#advanced-mode.
|
386
|
+
"""
|
387
|
+
get_default_kernel_profiler().print_info(mode)
|
388
|
+
|
389
|
+
|
390
|
+
def query_kernel_profiler_info(name):
|
391
|
+
"""Query kernel elapsed time(min,avg,max) on devices using the kernel name.
|
392
|
+
|
393
|
+
To enable this profiler, set `kernel_profiler=True` in `ti.init`.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
name (str): kernel name.
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
KernelProfilerQueryResult (class): with member variables(counter, min, max, avg)
|
400
|
+
|
401
|
+
Example::
|
402
|
+
|
403
|
+
>>> import gstaichi as ti
|
404
|
+
|
405
|
+
>>> ti.init(ti.cpu, kernel_profiler=True)
|
406
|
+
>>> n = 1024*1024
|
407
|
+
>>> var = ti.field(ti.f32, shape=n)
|
408
|
+
|
409
|
+
>>> @ti.kernel
|
410
|
+
>>> def fill():
|
411
|
+
>>> for i in range(n):
|
412
|
+
>>> var[i] = 0.1
|
413
|
+
|
414
|
+
>>> fill()
|
415
|
+
>>> ti.profiler.clear_kernel_profiler_info() #[1]
|
416
|
+
>>> for i in range(100):
|
417
|
+
>>> fill()
|
418
|
+
>>> query_result = ti.profiler.query_kernel_profiler_info(fill.__name__) #[2]
|
419
|
+
>>> print("kernel executed times =",query_result.counter)
|
420
|
+
>>> print("kernel elapsed time(min_in_ms) =",query_result.min)
|
421
|
+
>>> print("kernel elapsed time(max_in_ms) =",query_result.max)
|
422
|
+
>>> print("kernel elapsed time(avg_in_ms) =",query_result.avg)
|
423
|
+
|
424
|
+
Note:
|
425
|
+
[1] To get the correct result, query_kernel_profiler_info() must be used in conjunction with
|
426
|
+
clear_kernel_profiler_info().
|
427
|
+
"""
|
428
|
+
return get_default_kernel_profiler().query_info(name)
|
429
|
+
|
430
|
+
|
431
|
+
def clear_kernel_profiler_info():
|
432
|
+
"""Clear all KernelProfiler records."""
|
433
|
+
get_default_kernel_profiler().clear_info()
|
434
|
+
|
435
|
+
|
436
|
+
def get_kernel_profiler_total_time():
|
437
|
+
"""Get elapsed time of all kernels recorded in KernelProfiler.
|
438
|
+
|
439
|
+
Returns:
|
440
|
+
time (float): total time in second.
|
441
|
+
"""
|
442
|
+
return get_default_kernel_profiler().get_total_time()
|
443
|
+
|
444
|
+
|
445
|
+
def set_kernel_profiler_toolkit(toolkit_name="default"):
|
446
|
+
"""Set the toolkit used by KernelProfiler.
|
447
|
+
|
448
|
+
Currently, we only support toolkits: ``'default'`` and ``'cupti'``.
|
449
|
+
|
450
|
+
Args:
|
451
|
+
toolkit_name (str): string of toolkit name.
|
452
|
+
|
453
|
+
Returns:
|
454
|
+
status (bool): whether the setting is successful or not.
|
455
|
+
|
456
|
+
Example::
|
457
|
+
|
458
|
+
>>> import gstaichi as ti
|
459
|
+
|
460
|
+
>>> ti.init(arch=ti.cuda, kernel_profiler=True)
|
461
|
+
>>> x = ti.field(ti.f32, shape=1024*1024)
|
462
|
+
|
463
|
+
>>> @ti.kernel
|
464
|
+
>>> def fill():
|
465
|
+
>>> for i in x:
|
466
|
+
>>> x[i] = i
|
467
|
+
|
468
|
+
>>> ti.profiler.set_kernel_profiler_toolkit('cupti')
|
469
|
+
>>> for i in range(100):
|
470
|
+
>>> fill()
|
471
|
+
>>> ti.profiler.print_kernel_profiler_info()
|
472
|
+
|
473
|
+
>>> ti.profiler.set_kernel_profiler_toolkit('default')
|
474
|
+
>>> for i in range(100):
|
475
|
+
>>> fill()
|
476
|
+
>>> ti.profiler.print_kernel_profiler_info()
|
477
|
+
"""
|
478
|
+
return get_default_kernel_profiler().set_toolkit(toolkit_name)
|
479
|
+
|
480
|
+
|
481
|
+
def set_kernel_profiler_metrics(metric_list=default_cupti_metrics):
|
482
|
+
"""Set metrics that will be collected by the CUPTI toolkit.
|
483
|
+
|
484
|
+
Args:
|
485
|
+
metric_list (list): a list of :class:`~gstaichi.profiler.CuptiMetric()` instances, default value: :data:`~gstaichi.profiler.kernel_metrics.default_cupti_metrics`.
|
486
|
+
|
487
|
+
Example::
|
488
|
+
|
489
|
+
>>> import gstaichi as ti
|
490
|
+
|
491
|
+
>>> ti.init(kernel_profiler=True, arch=ti.cuda)
|
492
|
+
>>> ti.profiler.set_kernel_profiler_toolkit('cupti')
|
493
|
+
>>> num_elements = 128*1024*1024
|
494
|
+
|
495
|
+
>>> x = ti.field(ti.f32, shape=num_elements)
|
496
|
+
>>> y = ti.field(ti.f32, shape=())
|
497
|
+
>>> y[None] = 0
|
498
|
+
|
499
|
+
>>> @ti.kernel
|
500
|
+
>>> def reduction():
|
501
|
+
>>> for i in x:
|
502
|
+
>>> y[None] += x[i]
|
503
|
+
|
504
|
+
>>> # In the case of not parameter, GsTaichi will print its pre-defined metrics list
|
505
|
+
>>> ti.profiler.get_predefined_cupti_metrics()
|
506
|
+
>>> # get GsTaichi pre-defined metrics
|
507
|
+
>>> profiling_metrics = ti.profiler.get_predefined_cupti_metrics('shared_access')
|
508
|
+
|
509
|
+
>>> global_op_atom = ti.profiler.CuptiMetric(
|
510
|
+
>>> name='l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum',
|
511
|
+
>>> header=' global.atom ',
|
512
|
+
>>> format=' {:8.0f} ')
|
513
|
+
>>> # add user defined metrics
|
514
|
+
>>> profiling_metrics += [global_op_atom]
|
515
|
+
|
516
|
+
>>> # metrics setting will be retained until the next configuration
|
517
|
+
>>> ti.profiler.set_kernel_profiler_metrics(profiling_metrics)
|
518
|
+
>>> for i in range(16):
|
519
|
+
>>> reduction()
|
520
|
+
>>> ti.profiler.print_kernel_profiler_info('trace')
|
521
|
+
|
522
|
+
Note:
|
523
|
+
Metrics setting will be retained until the next configuration.
|
524
|
+
"""
|
525
|
+
get_default_kernel_profiler().set_metrics(metric_list)
|
526
|
+
|
527
|
+
|
528
|
+
@contextmanager
|
529
|
+
def collect_kernel_profiler_metrics(metric_list=default_cupti_metrics):
|
530
|
+
"""Set temporary metrics that will be collected by the CUPTI toolkit within this context.
|
531
|
+
|
532
|
+
Args:
|
533
|
+
metric_list (list): a list of :class:`~gstaichi.profiler.CuptiMetric()` instances, default value: :data:`~gstaichi.profiler.kernel_metrics.default_cupti_metrics`.
|
534
|
+
|
535
|
+
Example::
|
536
|
+
|
537
|
+
>>> import gstaichi as ti
|
538
|
+
|
539
|
+
>>> ti.init(kernel_profiler=True, arch=ti.cuda)
|
540
|
+
>>> ti.profiler.set_kernel_profiler_toolkit('cupti')
|
541
|
+
>>> num_elements = 128*1024*1024
|
542
|
+
|
543
|
+
>>> x = ti.field(ti.f32, shape=num_elements)
|
544
|
+
>>> y = ti.field(ti.f32, shape=())
|
545
|
+
>>> y[None] = 0
|
546
|
+
|
547
|
+
>>> @ti.kernel
|
548
|
+
>>> def reduction():
|
549
|
+
>>> for i in x:
|
550
|
+
>>> y[None] += x[i]
|
551
|
+
|
552
|
+
>>> # In the case of not parameter, GsTaichi will print its pre-defined metrics list
|
553
|
+
>>> ti.profiler.get_predefined_cupti_metrics()
|
554
|
+
>>> # get GsTaichi pre-defined metrics
|
555
|
+
>>> profiling_metrics = ti.profiler.get_predefined_cupti_metrics('device_utilization')
|
556
|
+
|
557
|
+
>>> global_op_atom = ti.profiler.CuptiMetric(
|
558
|
+
>>> name='l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum',
|
559
|
+
>>> header=' global.atom ',
|
560
|
+
>>> format=' {:8.0f} ')
|
561
|
+
>>> # add user defined metrics
|
562
|
+
>>> profiling_metrics += [global_op_atom]
|
563
|
+
|
564
|
+
>>> # metrics setting is temporary, and will be clear when exit from this context.
|
565
|
+
>>> with ti.profiler.collect_kernel_profiler_metrics(profiling_metrics):
|
566
|
+
>>> for i in range(16):
|
567
|
+
>>> reduction()
|
568
|
+
>>> ti.profiler.print_kernel_profiler_info('trace')
|
569
|
+
|
570
|
+
Note:
|
571
|
+
The configuration of the ``metric_list`` will be clear when exit from this context.
|
572
|
+
"""
|
573
|
+
get_default_kernel_profiler().set_metrics(metric_list)
|
574
|
+
yield get_default_kernel_profiler()
|
575
|
+
get_default_kernel_profiler().set_metrics()
|
576
|
+
|
577
|
+
|
578
|
+
__all__ = [
|
579
|
+
"clear_kernel_profiler_info",
|
580
|
+
"collect_kernel_profiler_metrics",
|
581
|
+
"get_kernel_profiler_total_time",
|
582
|
+
"print_kernel_profiler_info",
|
583
|
+
"query_kernel_profiler_info",
|
584
|
+
"set_kernel_profiler_metrics",
|
585
|
+
"set_kernel_profiler_toolkit",
|
586
|
+
]
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# type: ignore
|
2
|
+
|
3
|
+
from gstaichi.lang.impl import get_runtime
|
4
|
+
|
5
|
+
|
6
|
+
def print_memory_profiler_info():
|
7
|
+
"""Memory profiling tool for LLVM backends with full sparse support.
|
8
|
+
|
9
|
+
This profiler is automatically on.
|
10
|
+
"""
|
11
|
+
get_runtime().materialize()
|
12
|
+
get_runtime().prog.print_memory_profiler_info()
|
13
|
+
|
14
|
+
|
15
|
+
__all__ = ["print_memory_profiler_info"]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# type: ignore
|
2
|
+
|
3
|
+
from gstaichi._lib import core as _ti_core
|
4
|
+
|
5
|
+
|
6
|
+
def print_scoped_profiler_info():
|
7
|
+
"""Print time elapsed on the host tasks in a hierarchical format.
|
8
|
+
|
9
|
+
This profiler is automatically on.
|
10
|
+
|
11
|
+
Call function imports from C++ : _ti_core.print_profile_info()
|
12
|
+
|
13
|
+
Example::
|
14
|
+
|
15
|
+
>>> import gstaichi as ti
|
16
|
+
>>> ti.init(arch=ti.cpu)
|
17
|
+
>>> var = ti.field(ti.f32, shape=1)
|
18
|
+
>>> @ti.kernel
|
19
|
+
>>> def compute():
|
20
|
+
>>> var[0] = 1.0
|
21
|
+
>>> print("Setting var[0] =", var[0])
|
22
|
+
>>> compute()
|
23
|
+
>>> ti.profiler.print_scoped_profiler_info()
|
24
|
+
"""
|
25
|
+
_ti_core.print_profile_info()
|
26
|
+
|
27
|
+
|
28
|
+
def clear_scoped_profiler_info():
|
29
|
+
"""Clear profiler's records about time elapsed on the host tasks.
|
30
|
+
|
31
|
+
Call function imports from C++ : _ti_core.clear_profile_info()
|
32
|
+
"""
|
33
|
+
_ti_core.clear_profile_info()
|
34
|
+
|
35
|
+
|
36
|
+
__all__ = ["print_scoped_profiler_info", "clear_scoped_profiler_info"]
|