l0n0lacl 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.1
2
+ Name: l0n0lacl
3
+ Version: 0.0.1
4
+ Summary: 用于调用ascendc编写的算子
5
+ Author: l0n0l
6
+ Author-email: 1038352856@qq.com
7
+ Keywords: acl,ascendc,算子,算子开发
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Software Development :: Build Tools
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Requires-Python: >=3.7, <4
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: colorama
21
+ Requires-Dist: numpy
22
+
23
+ # 1 功能描述
24
+ 由于在ascendc算子开发过程中运行算子比较复杂,为了简化算子的运行,将运行算子变成可以用python直接调用的函数。所以编写了此代码。
25
+
26
+ # 2 安装
27
+ ```
28
+ pip install l0n0lacl
29
+ ```
30
+
31
+ # 3 运行算子实例
32
+ ## 3.1 先切换到cann环境,比如我的环境是:
33
+ ```
34
+ source /home/HwHiAiUser/Ascend/ascend-toolkit/set_env.sh
35
+ ```
36
+ ## 3.2 先安装我们编写的算子
37
+ ```
38
+ bash custom_opp_xxx_aarch64.run
39
+ ```
40
+ ## 3.3 创建算子运行器
41
+ ```python
42
+ from l0n0lacl import *
43
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
44
+ ```
45
+
46
+ ## 3.4 调用算子
47
+ ### 3.4.1 先看调用传参顺序
48
+ 在算子工程编译后,会有代码生成,在算子工程目录:
49
+ `${算子目录}/build_out/autogen/aclnn_xxx.h`中可以找到`aclnnXXXGetWorkspaceSize`函数。以Gelu为例:
50
+ ```c++
51
+ __attribute__((visibility("default")))
52
+ aclnnStatus aclnnGeluGetWorkspaceSize(
53
+ const aclTensor *x,
54
+ const aclTensor *out,
55
+ uint64_t *workspaceSize,
56
+ aclOpExecutor **executor);
57
+ ```
58
+ 可以看到参数为 `x`, `out`, `workspaceSize`, `executor`。其中 `workspaceSize`, `executor`不需要管。
59
+ * `aclTensor*`对应`numpy.ndarray`
60
+ * 其他参考: <a href = "https://docs.python.org/zh-cn/3/library/ctypes.html#fundamental-data-types">ctypes类型</a>
61
+ ### 3.4.2 调用算子
62
+ ```python
63
+ import torch
64
+ from l0n0lacl import *
65
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
66
+ target_dtype = torch.float
67
+ x = torch.empty(shape, dtype=target_dtype).uniform_(-1, 1)
68
+ y = torch.empty(shape, dtype=target_dtype).zero_()
69
+ out = ascendc_gelu(x.numpy(), y.numpy()).to_cpu()
70
+ print(out)
71
+ ```
72
+
73
+ # 4. api参考
74
+ ## 4.1 AclNDTensor
75
+ ```python
76
+ class AclNDTensor:
77
+ def __init__(self, np_array: np.ndarray):
78
+ pass
79
+ def to_cpu(self):
80
+ pass
81
+ ```
82
+ numpy ndarray与ascend nd tensor间的桥梁
83
+ ### 4.1.1 `__init__`
84
+ * `np_array`: numpy的tensor
85
+ ### 4.1.2 `to_cpu`
86
+ 将运算结果从npu拷贝到cpu
87
+ ## 4.2 OpRunner
88
+ ```python
89
+ class OpRunner:
90
+ def __init__(self, name, op_path_prefix='customize', op_path=None, device_id=0) -> None:
91
+ pass
92
+ def __call__(self, *args, outCout=1, argtypes=None, stream=None) -> Union[AclNDTensor, List[AclNDTensor]]:
93
+ pass
94
+ def sync_stream(self)->None:
95
+ pass
96
+ ```
97
+ ### 4.2.1 `__init__`
98
+ * `name`:算子名称,
99
+ * `op_path_prefix`: 算子工程中**CMakePresets.json**文件中**vender_name**的值。默认是`customize`,可以不传
100
+ ```json
101
+ "vendor_name": {
102
+ "type": "STRING",
103
+ "value": "customize"
104
+ },
105
+ ```
106
+ * `op_path`: 算子`libcust_opapi.so`库的绝对位置。不传。
107
+ * `device_id`: 设备ID。默认`0`
108
+
109
+ ### 4.2.2 `__call__`
110
+ * `args`: 表示传给`aclnnXXXGetWorkspaceSize`除了`workspaceSize`, `executor`的参数
111
+ * `outCout` : 表示算子的输出个数。如果出处个数为`1`,返回一个`AclNDTensor`。如果输出个数大于1,返回`List[AclNDTensor]`
112
+ * `argtypes`: 表示`aclnnXXXGetWorkspaceSize`的参数`ctypes`参数类型,对于特别复杂的算子,如果发现调用异常,可以手动指定类型。
113
+ 比如(**仅用于举例,其实可以不传,自动推导就可运行。但是当发现运行异常的情况下,可以自己指定**),对于:
114
+ ```c++
115
+ __attribute__((visibility("default")))
116
+ aclnnStatus aclnnCumsumGetWorkspaceSize(
117
+ const aclTensor *x,
118
+ const aclTensor *axis,
119
+ bool exclusiveOptional,
120
+ bool reverseOptional,
121
+ const aclTensor *out,
122
+ uint64_t *workspaceSize,
123
+ aclOpExecutor **executor);
124
+ ```
125
+
126
+ ```python
127
+ import ctypes
128
+ from l0n0lacl import *
129
+ ascendc_cumsum = OpRunner("Cumsum")
130
+ target_dtype = np.float32
131
+ data_range = (-10, 10)
132
+ shape = [100, 3, 2304]
133
+ axis_py = 1
134
+ exclusive = True
135
+ reverse = False
136
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
137
+ axis = np.array([axis_py]).astype(np.int32)
138
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse, argtypes=[
139
+ ctypes.c_void_p, # x
140
+ ctypes.c_void_p, # axis
141
+ ctypes.c_bool, # exclusiveOptional
142
+ ctypes.c_bool, # reverseOptional
143
+ ctypes.c_void_p, # out
144
+ ctypes.c_void_p, # workspaceSize
145
+ ctypes.c_void_p, # executor
146
+ ]).numpy()
147
+ y = np.ones_like(golden, golden.dtype) * 123
148
+ ascendc_cumsum(x, axis, exclusive, reverse, y).to_cpu()
149
+ print(y)
150
+ ```
151
+ * `stream` 如果是多stream的情况下,可以自己指定stream:
152
+ 例如:
153
+ ```python
154
+ import ctypes
155
+ import tensorflow as tf
156
+ from l0n0lacl import *
157
+ ascendc_cumsum = OpRunner("Cumsum")
158
+ target_dtype = np.float32
159
+ data_range = (-10, 10)
160
+ shape = [100, 3, 2304]
161
+ axis_py = 1
162
+ exclusive = True
163
+ reverse = False
164
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
165
+ axis = np.array([axis_py]).astype(np.int32)
166
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse).numpy()
167
+ y = np.ones_like(golden, golden.dtype) * 123
168
+ ascendc_cumsum(x, axis, exclusive, reverse, y, argtypes=[
169
+ ctypes.c_void_p, # x
170
+ ctypes.c_void_p, # axis
171
+ ctypes.c_bool, # exclusiveOptional
172
+ ctypes.c_bool, # reverseOptional
173
+ ctypes.c_void_p, # out
174
+ ctypes.c_void_p, # workspaceSize
175
+ ctypes.c_void_p, # executor
176
+ ]).to_cpu()
177
+ verify_result(y, golden)
178
+ print(y)
179
+ ```
180
+
181
+ ### 4.2.3 `sync_stream`
182
+ 用于同步stream
183
+
184
+ ## 4.3 verify_result
185
+ 参考自:https://gitee.com/ascend/samples/blob/master/operator/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
186
+ ```python
187
+ def verify_result(real_result:numpy.ndarray, golden:numpy.ndarray):
188
+ pass
189
+ ```
190
+ 判断精度是否符合
191
+ float16: 千分之一
192
+ float32: 万分之一
193
+ int16,int32,int8: 0
@@ -0,0 +1,171 @@
1
+ # 1 功能描述
2
+ 由于在ascendc算子开发过程中运行算子比较复杂,为了简化算子的运行,将运行算子变成可以用python直接调用的函数。所以编写了此代码。
3
+
4
+ # 2 安装
5
+ ```
6
+ pip install l0n0lacl
7
+ ```
8
+
9
+ # 3 运行算子实例
10
+ ## 3.1 先切换到cann环境,比如我的环境是:
11
+ ```
12
+ source /home/HwHiAiUser/Ascend/ascend-toolkit/set_env.sh
13
+ ```
14
+ ## 3.2 先安装我们编写的算子
15
+ ```
16
+ bash custom_opp_xxx_aarch64.run
17
+ ```
18
+ ## 3.3 创建算子运行器
19
+ ```python
20
+ from l0n0lacl import *
21
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
22
+ ```
23
+
24
+ ## 3.4 调用算子
25
+ ### 3.4.1 先看调用传参顺序
26
+ 在算子工程编译后,会有代码生成,在算子工程目录:
27
+ `${算子目录}/build_out/autogen/aclnn_xxx.h`中可以找到`aclnnXXXGetWorkspaceSize`函数。以Gelu为例:
28
+ ```c++
29
+ __attribute__((visibility("default")))
30
+ aclnnStatus aclnnGeluGetWorkspaceSize(
31
+ const aclTensor *x,
32
+ const aclTensor *out,
33
+ uint64_t *workspaceSize,
34
+ aclOpExecutor **executor);
35
+ ```
36
+ 可以看到参数为 `x`, `out`, `workspaceSize`, `executor`。其中 `workspaceSize`, `executor`不需要管。
37
+ * `aclTensor*`对应`numpy.ndarray`
38
+ * 其他参考: <a href = "https://docs.python.org/zh-cn/3/library/ctypes.html#fundamental-data-types">ctypes类型</a>
39
+ ### 3.4.2 调用算子
40
+ ```python
41
+ import torch
42
+ from l0n0lacl import *
43
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
44
+ target_dtype = torch.float
45
+ x = torch.empty(shape, dtype=target_dtype).uniform_(-1, 1)
46
+ y = torch.empty(shape, dtype=target_dtype).zero_()
47
+ out = ascendc_gelu(x.numpy(), y.numpy()).to_cpu()
48
+ print(out)
49
+ ```
50
+
51
+ # 4. api参考
52
+ ## 4.1 AclNDTensor
53
+ ```python
54
+ class AclNDTensor:
55
+ def __init__(self, np_array: np.ndarray):
56
+ pass
57
+ def to_cpu(self):
58
+ pass
59
+ ```
60
+ numpy ndarray与ascend nd tensor间的桥梁
61
+ ### 4.1.1 `__init__`
62
+ * `np_array`: numpy的tensor
63
+ ### 4.1.2 `to_cpu`
64
+ 将运算结果从npu拷贝到cpu
65
+ ## 4.2 OpRunner
66
+ ```python
67
+ class OpRunner:
68
+ def __init__(self, name, op_path_prefix='customize', op_path=None, device_id=0) -> None:
69
+ pass
70
+ def __call__(self, *args, outCout=1, argtypes=None, stream=None) -> Union[AclNDTensor, List[AclNDTensor]]:
71
+ pass
72
+ def sync_stream(self)->None:
73
+ pass
74
+ ```
75
+ ### 4.2.1 `__init__`
76
+ * `name`:算子名称,
77
+ * `op_path_prefix`: 算子工程中**CMakePresets.json**文件中**vender_name**的值。默认是`customize`,可以不传
78
+ ```json
79
+ "vendor_name": {
80
+ "type": "STRING",
81
+ "value": "customize"
82
+ },
83
+ ```
84
+ * `op_path`: 算子`libcust_opapi.so`库的绝对位置。不传。
85
+ * `device_id`: 设备ID。默认`0`
86
+
87
+ ### 4.2.2 `__call__`
88
+ * `args`: 表示传给`aclnnXXXGetWorkspaceSize`除了`workspaceSize`, `executor`的参数
89
+ * `outCout` : 表示算子的输出个数。如果出处个数为`1`,返回一个`AclNDTensor`。如果输出个数大于1,返回`List[AclNDTensor]`
90
+ * `argtypes`: 表示`aclnnXXXGetWorkspaceSize`的参数`ctypes`参数类型,对于特别复杂的算子,如果发现调用异常,可以手动指定类型。
91
+ 比如(**仅用于举例,其实可以不传,自动推导就可运行。但是当发现运行异常的情况下,可以自己指定**),对于:
92
+ ```c++
93
+ __attribute__((visibility("default")))
94
+ aclnnStatus aclnnCumsumGetWorkspaceSize(
95
+ const aclTensor *x,
96
+ const aclTensor *axis,
97
+ bool exclusiveOptional,
98
+ bool reverseOptional,
99
+ const aclTensor *out,
100
+ uint64_t *workspaceSize,
101
+ aclOpExecutor **executor);
102
+ ```
103
+
104
+ ```python
105
+ import ctypes
106
+ from l0n0lacl import *
107
+ ascendc_cumsum = OpRunner("Cumsum")
108
+ target_dtype = np.float32
109
+ data_range = (-10, 10)
110
+ shape = [100, 3, 2304]
111
+ axis_py = 1
112
+ exclusive = True
113
+ reverse = False
114
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
115
+ axis = np.array([axis_py]).astype(np.int32)
116
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse, argtypes=[
117
+ ctypes.c_void_p, # x
118
+ ctypes.c_void_p, # axis
119
+ ctypes.c_bool, # exclusiveOptional
120
+ ctypes.c_bool, # reverseOptional
121
+ ctypes.c_void_p, # out
122
+ ctypes.c_void_p, # workspaceSize
123
+ ctypes.c_void_p, # executor
124
+ ]).numpy()
125
+ y = np.ones_like(golden, golden.dtype) * 123
126
+ ascendc_cumsum(x, axis, exclusive, reverse, y).to_cpu()
127
+ print(y)
128
+ ```
129
+ * `stream` 如果是多stream的情况下,可以自己指定stream:
130
+ 例如:
131
+ ```python
132
+ import ctypes
133
+ import tensorflow as tf
134
+ from l0n0lacl import *
135
+ ascendc_cumsum = OpRunner("Cumsum")
136
+ target_dtype = np.float32
137
+ data_range = (-10, 10)
138
+ shape = [100, 3, 2304]
139
+ axis_py = 1
140
+ exclusive = True
141
+ reverse = False
142
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
143
+ axis = np.array([axis_py]).astype(np.int32)
144
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse).numpy()
145
+ y = np.ones_like(golden, golden.dtype) * 123
146
+ ascendc_cumsum(x, axis, exclusive, reverse, y, argtypes=[
147
+ ctypes.c_void_p, # x
148
+ ctypes.c_void_p, # axis
149
+ ctypes.c_bool, # exclusiveOptional
150
+ ctypes.c_bool, # reverseOptional
151
+ ctypes.c_void_p, # out
152
+ ctypes.c_void_p, # workspaceSize
153
+ ctypes.c_void_p, # executor
154
+ ]).to_cpu()
155
+ verify_result(y, golden)
156
+ print(y)
157
+ ```
158
+
159
+ ### 4.2.3 `sync_stream`
160
+ 用于同步stream
161
+
162
+ ## 4.3 verify_result
163
+ 参考自:https://gitee.com/ascend/samples/blob/master/operator/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
164
+ ```python
165
+ def verify_result(real_result:numpy.ndarray, golden:numpy.ndarray):
166
+ pass
167
+ ```
168
+ 判断精度是否符合
169
+ float16: 千分之一
170
+ float32: 万分之一
171
+ int16,int32,int8: 0
@@ -0,0 +1,93 @@
1
+ from .utils import *
2
+
3
+
4
+ def build_acl_libnnopbase_path():
5
+ ascend_home_path = os.environ["ASCEND_HOME_PATH"]
6
+ lib_path = f"{ascend_home_path}/aarch64-linux/lib64/libnnopbase.so"
7
+ return lib_path
8
+
9
+
10
+ libnnopbase = ctypes.CDLL(build_acl_libnnopbase_path())
11
+ # ACL_FUNC_VISIBILITY aclTensor * aclCreateTensor(
12
+ # const int64_t * viewDims,
13
+ # uint64_t viewDimsNum,
14
+ # aclDataType dataType,
15
+ # const int64_t * stride,
16
+ # int64_t offset,
17
+ # aclFormat format,
18
+ # const int64_t * storageDims,
19
+ # uint64_t storageDimsNum,
20
+ # void * tensorData)
21
+ libnnopbase.aclCreateTensor.argtypes = [
22
+ ctypes.c_void_p, # viewDims
23
+ ctypes.c_uint64, # viewDimsNum
24
+ ctypes.c_int, # dataType
25
+ ctypes.c_void_p, # stride
26
+ ctypes.c_int64, # offset
27
+ ctypes.c_int, # format
28
+ ctypes.c_void_p, # storageDims
29
+ ctypes.c_uint64, # storageDimsNum
30
+ ctypes.c_void_p, # tensorData
31
+ ]
32
+ libnnopbase.aclCreateTensor.restype = ctypes.c_void_p
33
+ libnnopbase.aclDestroyTensor.argtypes = [ctypes.c_void_p]
34
+
35
+
36
+ class AclNDTensor:
37
+ def __init__(self, np_array: np.ndarray):
38
+ self.np_array = np_array
39
+ self.op_runner = None
40
+ self.data_bytes_size = np_array.size * np_array.itemsize
41
+ self.mem_size = int(
42
+ math.ceil(np_array.size * np_array.itemsize / 256) * 256)
43
+ if self.mem_size > 0:
44
+ self.device_ptr, ret = acl.rt.malloc(self.mem_size, 0)
45
+ assert ret == 0
46
+ ret = acl.rt.memcpy(
47
+ self.device_ptr,
48
+ self.mem_size,
49
+ np_array.ctypes.data,
50
+ self.data_bytes_size,
51
+ 1,
52
+ )
53
+ assert ret == 0
54
+ else:
55
+ self.device_ptr = 0
56
+ self.shape = np.array(np_array.shape, dtype=np.int64)
57
+ self.shape_size = len(np_array.shape)
58
+ self.acl_dtype = numpy_dtype_2_acl_dtype(np_array.dtype)
59
+ self.ptr = libnnopbase.aclCreateTensor(
60
+ self.shape.ctypes.data,
61
+ self.shape_size,
62
+ self.acl_dtype,
63
+ 0,
64
+ 0,
65
+ 2,
66
+ self.shape.ctypes.data,
67
+ self.shape_size,
68
+ self.device_ptr
69
+ )
70
+ assert (self.ptr != 0)
71
+ self.need_copy_to_cpu = False
72
+
73
+ def __str__(self) -> str:
74
+ return str(self.to_cpu())
75
+
76
+ def __del__(self):
77
+ assert (self.ptr != 0)
78
+ libnnopbase.aclDestroyTensor(self.ptr)
79
+
80
+ def to_cpu(self):
81
+ if self.op_runner is not None:
82
+ self.op_runner.sync_stream()
83
+ if self.need_copy_to_cpu:
84
+ ret = acl.rt.memcpy(
85
+ self.np_array.ctypes.data,
86
+ self.data_bytes_size,
87
+ self.device_ptr,
88
+ self.data_bytes_size,
89
+ 2,
90
+ )
91
+ assert ret == 0
92
+ self.need_copy_to_cpu = False
93
+ return self.np_array
@@ -0,0 +1,24 @@
1
+ from .utils import *
2
+
3
+ class AclStream:
4
+ def __init__(self, device_id=0):
5
+ self.stream = None
6
+ self.device_id = device_id
7
+ self.set_device(device_id)
8
+
9
+ def set_device(self, device_id):
10
+ acl.rt.set_device(device_id)
11
+
12
+ def __enter__(self):
13
+ stream, ret = acl.rt.create_stream()
14
+ print_ret("创建stream失败", ret)
15
+ self.stream = stream
16
+ return stream
17
+
18
+ def __exit__(self, *args, **kwargs):
19
+ if self.stream is None or self.stream == 0:
20
+ return
21
+ try_sync_stream(self.stream)
22
+ ret = acl.rt.destroy_stream(self.stream)
23
+ print_ret("销毁stream错误!", ret)
24
+
@@ -0,0 +1,97 @@
1
+ from .AclNDTensor import *
2
+
3
+ def build_op_api_lib_path(prefix='customize'):
4
+ ascend_home_path = os.environ["ASCEND_HOME_PATH"]
5
+ op_path = f"{ascend_home_path}/opp/vendors/{prefix}/op_api/lib/libcust_opapi.so"
6
+ return op_path
7
+
8
+ class OpRunner:
9
+ default_stream = None
10
+ def __init__(self, name, op_path_prefix='customize', op_path=None, device_id=0) -> None:
11
+ op_path = op_path or build_op_api_lib_path(op_path_prefix)
12
+ self.op_lib = ctypes.CDLL(op_path)
13
+ self.get_workspace_size = getattr(
14
+ self.op_lib, f"aclnn{name}GetWorkspaceSize")
15
+ self.run = getattr(self.op_lib, f"aclnn{name}")
16
+ self.run.argtypes = [
17
+ ctypes.c_void_p,
18
+ ctypes.c_uint64,
19
+ ctypes.c_void_p,
20
+ ctypes.c_void_p,
21
+ ]
22
+ self.executor = np.array([0], dtype=np.uint64)
23
+
24
+ def __call__(self, *args, outCout=1, argtypes=None, stream=None) -> Union[AclNDTensor, List[AclNDTensor]]:
25
+ stream = stream or OpRunner.default_stream
26
+ self.stream = stream
27
+ temp_args = []
28
+ for arg in args:
29
+ if isinstance(arg, np.ndarray):
30
+ acl_tensor = AclNDTensor(arg)
31
+ acl_tensor.op_runner = self
32
+ acl_tensor.need_copy_to_cpu = True
33
+ temp_args.append(acl_tensor)
34
+ elif isinstance(arg, AclNDTensor):
35
+ arg.op_runner = self
36
+ arg.need_copy_to_cpu = True
37
+ temp_args.append(arg)
38
+ else:
39
+ temp_args.append(arg)
40
+ workspace = np.zeros([1], dtype=np.uint64)
41
+ temp_args.append(workspace.ctypes.data)
42
+ temp_args.append(self.executor.ctypes.data)
43
+ real_args = []
44
+ run_argtypes = argtypes or []
45
+ for arg in temp_args:
46
+ if isinstance(arg, AclNDTensor):
47
+ real_args.append(arg.ptr)
48
+ if argtypes is None:
49
+ run_argtypes.append(ctypes.c_void_p)
50
+ elif isinstance(arg, bool):
51
+ real_args.append(arg)
52
+ if argtypes is None:
53
+ run_argtypes.append(ctypes.c_bool)
54
+ elif isinstance(arg, int):
55
+ real_args.append(arg)
56
+ if argtypes is None:
57
+ int32_info = np.iinfo(np.int32)
58
+ if arg > int32_info.max or arg < int32_info.min:
59
+ run_argtypes.append(ctypes.c_int64)
60
+ else:
61
+ run_argtypes.append(ctypes.c_int32)
62
+ elif isinstance(arg, float):
63
+ real_args.append(arg)
64
+ if argtypes is None:
65
+ float32_info = np.finfo(np.float32)
66
+ if arg > float32_info.max or arg < float32_info.min:
67
+ run_argtypes.append(ctypes.c_double)
68
+ else:
69
+ run_argtypes.append(ctypes.c_float)
70
+ elif isinstance(arg, bytes):
71
+ real_args.append(arg)
72
+ if argtypes is None:
73
+ run_argtypes.append(ctypes.c_char_p)
74
+ self.get_workspace_size.argtypes = run_argtypes
75
+ self.get_workspace_size(*real_args)
76
+ workspace_ptr = 0
77
+ workspace_size = int(workspace[0])
78
+ if workspace_size > 0:
79
+ print("需要workspace大小为:", workspace_size, flush=True)
80
+ if workspace_size > 0:
81
+ workspace_ptr, ret = acl.rt.malloc(workspace_size, 0)
82
+ print_ret("分配workspace失败", ret)
83
+ self.workspace_ptr = workspace_ptr
84
+ self.workspace_size = workspace_size
85
+ self.run(workspace_ptr, workspace_size,
86
+ int(self.executor[0]), stream)
87
+ if outCout == 1:
88
+ return temp_args[-3]
89
+ return temp_args[-(outCout + 2):-2]
90
+
91
+ def sync_stream(self):
92
+ if self.stream is None or not stream_need_sync(self.stream):
93
+ return
94
+ try_sync_stream(self.stream)
95
+ if self.workspace_size > 0:
96
+ acl.rt.free(self.workspace_ptr)
97
+ self.workspace_size = 0
@@ -0,0 +1,20 @@
1
+ from .utils import *
2
+ from .AclNDTensor import AclNDTensor
3
+ from .OpRunner import OpRunner
4
+ from .AclStream import AclStream
5
+
6
+ # 初始化acl
7
+ acl.init()
8
+ acl.rt.set_device(0)
9
+ OpRunner.default_stream, ret = acl.rt.create_stream()
10
+ print_ret("创建_defualt_stream失败", ret)
11
+
12
+ @atexit.register
13
+ def finalize():
14
+ global default_stream
15
+ if OpRunner.default_stream is not None and OpRunner.default_stream != 0:
16
+ try_sync_stream(OpRunner.default_stream)
17
+ acl.rt.destroy_stream(OpRunner.default_stream)
18
+ OpRunner.default_stream = None
19
+ print_ret("销毁stream错误!", ret)
20
+ acl.finalize()
@@ -0,0 +1,222 @@
1
+ import os
2
+ import acl
3
+ import time
4
+ import math
5
+ import ctypes
6
+ import colorama
7
+ import numpy as np
8
+ import colorama
9
+ import atexit
10
+ from typing import List, Union
11
+
12
+
13
+ def print_ret(msg, ret):
14
+ if ret == 0:
15
+ return
16
+ print(colorama.Fore.RED, "[错误]", msg, ret, flush=True)
17
+ print(colorama.Style.RESET_ALL, flush=True)
18
+
19
+
20
+ class AclStreamStatus:
21
+ # Stream上的所有任务已完成。
22
+ ACL_STREAM_STATUS_COMPLETE = 0
23
+ # Stream上至少有一个任务未完成。
24
+ ACL_STREAM_STATUS_NOT_READY = 1
25
+ # 预留。
26
+ ACL_STREAM_STATUS_RESERVED = 0xFFFF
27
+
28
+
29
+ def stream_need_sync(stream: int):
30
+ status, ret = acl.rt.stream_query(stream)
31
+ if ret != 0:
32
+ print_ret("获取stream状态错误", ret)
33
+ return False
34
+ return status == AclStreamStatus.ACL_STREAM_STATUS_NOT_READY
35
+
36
+
37
+ def try_sync_stream(stream: int):
38
+ ret = acl.rt.synchronize_stream(stream)
39
+ if ret != 0:
40
+ print_ret("同步stream错误", ret)
41
+ return False
42
+ return True
43
+
44
+
45
+ def get_loss_by_type(dtype):
46
+ loss = 0
47
+ if dtype == np.float16:
48
+ loss = 1 / 1000
49
+ elif dtype == np.float32:
50
+ loss = 1 / 10000
51
+ return loss
52
+
53
+
54
+ def _compare(v1: np.ndarray, v2: np.ndarray):
55
+ loss = get_loss_by_type(v1.dtype)
56
+ return np.abs(v1 - v2) <= loss
57
+
58
+
59
+ def compare(v1: np.ndarray, v2: np.ndarray):
60
+ return _compare(v1, v2).all()
61
+
62
+
63
+ def right_rate(v1: np.ndarray, v2: np.ndarray):
64
+ ret = _compare(v1, v2)
65
+ return ret.astype(np.int32).sum() / v1.size
66
+
67
+ # 参考自:https://gitee.com/ascend/samples/blob/master/operator/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
68
+ def verify_result(real_result, golden):
69
+ loss = get_loss_by_type(real_result.dtype)
70
+ minimum = 10e-10
71
+ result = np.abs(real_result - golden) # 计算运算结果和预期结果偏差
72
+ deno = np.maximum(np.abs(real_result), np.abs(golden)) # 获取最大值并组成新数组
73
+ result_atol = np.less_equal(result, loss) # 计算绝对误差
74
+ result_rtol = np.less_equal(result / np.add(deno, minimum), loss) # 计算相对误差
75
+ if not result_rtol.all() and not result_atol.all():
76
+ if (
77
+ np.sum(result_rtol == False) > real_result.size * loss
78
+ and np.sum(result_atol == False) > real_result.size * loss
79
+ ): # 误差超出预期时返回打印错误,返回对比失败
80
+ print(
81
+ colorama.Fore.RED, real_result.dtype, "[ERROR] result error", flush=True
82
+ )
83
+ print(colorama.Style.RESET_ALL, flush=True)
84
+ return False
85
+ print(colorama.Fore.GREEN, real_result.dtype, "test pass", flush=True)
86
+ print(colorama.Style.RESET_ALL, flush=True)
87
+ return True
88
+
89
+
90
+ class AclDtype:
91
+ ACL_FLOAT = 0
92
+ ACL_FLOAT16 = 1
93
+ ACL_INT8 = 2
94
+ ACL_INT32 = 3
95
+ ACL_UINT8 = 4
96
+ ACL_INT16 = 6
97
+ ACL_UINT16 = 7
98
+ ACL_UINT32 = 8
99
+ ACL_INT64 = 9
100
+ ACL_UINT64 = 10
101
+ ACL_DOUBLE = 11
102
+ ACL_BOOL = 12
103
+ ACL_STRING = 13
104
+ ACL_COMPLEX64 = 16
105
+ ACL_COMPLEX128 = 17
106
+ ACL_BF16 = 27
107
+ ACL_INT4 = 29
108
+ ACL_UINT1 = 30
109
+ ACL_COMPLEX32 = 33
110
+
111
+
112
+ class AclRunMode:
113
+ ACL_DEVICE = 0
114
+ ACL_HOST = 1
115
+
116
+
117
+ class AclAllocPolicy:
118
+ """
119
+ 0:ACL_MEM_MALLOC_HUGE_FIRST,当申请的内存小于等于1M时,即使使用该内存分配规则,也是申请普通页的内存。当申请的内存大于1M时,优先申请大页内存,如果大页内存不够,则使用普通页的内存。
120
+ 1:ACL_MEM_MALLOC_HUGE_ONLY,仅申请大页,如果大页内存不够,则返回错误。
121
+ 2:ACL_MEM_MALLOC_NORMAL_ONLY,仅申请普通页。
122
+ 3:ACL_MEM_MALLOC_HUGE_FIRST_P2P,仅Device之间内存复制场景下申请内存时使用该选项,表示优先申请大页内存,如果大页内存不够,则使用普通页的内存。预留选项。
123
+ 4:ACL_MEM_MALLOC_HUGE_ONLY_P2P,仅Device之间内存复制场景下申请内存时使用该选项,仅申请大页内存,如果大页内存不够,则返回错误。预留选项。
124
+ 5:ACL_MEM_MALLOC_NORMAL_ONLY_P2P,仅Device之间内存复制场景下申请内存时使用该选项,仅申请普通页的内存。预留选项。
125
+ """
126
+
127
+ ACL_MEM_MALLOC_HUGE_FIRST = 0
128
+ ACL_MEM_MALLOC_HUGE_ONLY = 1
129
+ ACL_MEM_MALLOC_NORMAL_ONLY = 2
130
+ ACL_MEM_MALLOC_HUGE_FIRST_P2P = 3
131
+ ACL_MEM_MALLOC_HUGE_ONLY_P2P = 4
132
+ ACL_MEM_MALLOC_NORMAL_ONLY_P2P = 5
133
+
134
+
135
+ class AclMemcopyKind:
136
+ """
137
+ 0:ACL_MEMCPY_HOST_TO_HOST,Host内的内存复制。
138
+ 1:ACL_MEMCPY_HOST_TO_DEVICE,Host到Device的内存复制。
139
+ 2:ACL_MEMCPY_DEVICE_TO_HOST,Device到Host的内存复制。
140
+ 3:ACL_MEMCPY_DEVICE_TO_DEVICE,Device内的内存复制。
141
+ """
142
+
143
+ ACL_MEMCPY_HOST_TO_HOST = 0
144
+ ACL_MEMCPY_HOST_TO_DEVICE = 1
145
+ ACL_MEMCPY_DEVICE_TO_HOST = 2
146
+ ACL_MEMCPY_DEVICE_TO_DEVICE = 3
147
+
148
+
149
+ def numpy_dtype_2_acl_dtype(numpy_dtype):
150
+ if numpy_dtype == np.float32:
151
+ return AclDtype.ACL_FLOAT
152
+ if numpy_dtype == np.float16:
153
+ return AclDtype.ACL_FLOAT16
154
+ if numpy_dtype == np.int8:
155
+ return AclDtype.ACL_INT8
156
+ if numpy_dtype == np.int32:
157
+ return AclDtype.ACL_INT32
158
+ if numpy_dtype == np.uint8:
159
+ return AclDtype.ACL_UINT8
160
+ if numpy_dtype == np.int16:
161
+ return AclDtype.ACL_INT16
162
+ if numpy_dtype == np.uint16:
163
+ return AclDtype.ACL_UINT16
164
+ if numpy_dtype == np.uint32:
165
+ return AclDtype.ACL_UINT32
166
+ if numpy_dtype == np.int64:
167
+ return AclDtype.ACL_INT64
168
+ if numpy_dtype == np.uint64:
169
+ return AclDtype.ACL_UINT64
170
+ if numpy_dtype == np.double:
171
+ return AclDtype.ACL_DOUBLE
172
+ if numpy_dtype == np.bool_:
173
+ return AclDtype.ACL_BOOL
174
+ if numpy_dtype == np.string_:
175
+ return AclDtype.ACL_STRING
176
+ if numpy_dtype == np.complex64:
177
+ return AclDtype.ACL_COMPLEX64
178
+ if numpy_dtype == np.complex128:
179
+ return AclDtype.ACL_COMPLEX128
180
+ if numpy_dtype == np.complex_:
181
+ return AclDtype.ACL_COMPLEX32
182
+ # TODO 如何使用bf16
183
+
184
+
185
+ def numpy_dtype_2_torch_dtype(numpy_dtype):
186
+ import torch
187
+ if numpy_dtype == np.float32:
188
+ return torch.float32
189
+ if numpy_dtype == np.float16:
190
+ return torch.float16
191
+ if numpy_dtype == np.int8:
192
+ return torch.int8
193
+ if numpy_dtype == np.int32:
194
+ return torch.int32
195
+ if numpy_dtype == np.uint8:
196
+ return torch.uint8
197
+ if numpy_dtype == np.int16:
198
+ return torch.int16
199
+ if numpy_dtype == np.uint16:
200
+ return torch.int16
201
+ if numpy_dtype == np.uint32:
202
+ return torch.int32
203
+ if numpy_dtype == np.int64:
204
+ return torch.int64
205
+ if numpy_dtype == np.uint64:
206
+ return torch.int64
207
+ if numpy_dtype == np.double:
208
+ return torch.double
209
+ if numpy_dtype == np.bool_:
210
+ return torch.bool
211
+ if numpy_dtype == np.complex64:
212
+ return torch.complex64
213
+ if numpy_dtype == np.complex128:
214
+ return torch.complex128
215
+ if numpy_dtype == np.complex_:
216
+ return torch.complex32
217
+
218
+
219
+ if __name__ == "__main__":
220
+ a = np.zeros((3, 3), dtype=np.float16)
221
+ env = AclEnv(0)
222
+ nd_tensor = AclNDTensor(a)
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.1
2
+ Name: l0n0lacl
3
+ Version: 0.0.1
4
+ Summary: 用于调用ascendc编写的算子
5
+ Author: l0n0l
6
+ Author-email: 1038352856@qq.com
7
+ Keywords: acl,ascendc,算子,算子开发
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Software Development :: Build Tools
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.7
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3 :: Only
18
+ Requires-Python: >=3.7, <4
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: colorama
21
+ Requires-Dist: numpy
22
+
23
+ # 1 功能描述
24
+ 由于在ascendc算子开发过程中运行算子比较复杂,为了简化算子的运行,将运行算子变成可以用python直接调用的函数。所以编写了此代码。
25
+
26
+ # 2 安装
27
+ ```
28
+ pip install l0n0lacl
29
+ ```
30
+
31
+ # 3 运行算子实例
32
+ ## 3.1 先切换到cann环境,比如我的环境是:
33
+ ```
34
+ source /home/HwHiAiUser/Ascend/ascend-toolkit/set_env.sh
35
+ ```
36
+ ## 3.2 先安装我们编写的算子
37
+ ```
38
+ bash custom_opp_xxx_aarch64.run
39
+ ```
40
+ ## 3.3 创建算子运行器
41
+ ```python
42
+ from l0n0lacl import *
43
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
44
+ ```
45
+
46
+ ## 3.4 调用算子
47
+ ### 3.4.1 先看调用传参顺序
48
+ 在算子工程编译后,会有代码生成,在算子工程目录:
49
+ `${算子目录}/build_out/autogen/aclnn_xxx.h`中可以找到`aclnnXXXGetWorkspaceSize`函数。以Gelu为例:
50
+ ```c++
51
+ __attribute__((visibility("default")))
52
+ aclnnStatus aclnnGeluGetWorkspaceSize(
53
+ const aclTensor *x,
54
+ const aclTensor *out,
55
+ uint64_t *workspaceSize,
56
+ aclOpExecutor **executor);
57
+ ```
58
+ 可以看到参数为 `x`, `out`, `workspaceSize`, `executor`。其中 `workspaceSize`, `executor`不需要管。
59
+ * `aclTensor*`对应`numpy.ndarray`
60
+ * 其他参考: <a href = "https://docs.python.org/zh-cn/3/library/ctypes.html#fundamental-data-types">ctypes类型</a>
61
+ ### 3.4.2 调用算子
62
+ ```python
63
+ import torch
64
+ from l0n0lacl import *
65
+ ascendc_gelu = OpRunner("Gelu", op_path_prefix='customize')
66
+ target_dtype = torch.float
67
+ x = torch.empty(shape, dtype=target_dtype).uniform_(-1, 1)
68
+ y = torch.empty(shape, dtype=target_dtype).zero_()
69
+ out = ascendc_gelu(x.numpy(), y.numpy()).to_cpu()
70
+ print(out)
71
+ ```
72
+
73
+ # 4. api参考
74
+ ## 4.1 AclNDTensor
75
+ ```python
76
+ class AclNDTensor:
77
+ def __init__(self, np_array: np.ndarray):
78
+ pass
79
+ def to_cpu(self):
80
+ pass
81
+ ```
82
+ numpy ndarray与ascend nd tensor间的桥梁
83
+ ### 4.1.1 `__init__`
84
+ * `np_array`: numpy的tensor
85
+ ### 4.1.2 `to_cpu`
86
+ 将运算结果从npu拷贝到cpu
87
+ ## 4.2 OpRunner
88
+ ```python
89
+ class OpRunner:
90
+ def __init__(self, name, op_path_prefix='customize', op_path=None, device_id=0) -> None:
91
+ pass
92
+ def __call__(self, *args, outCout=1, argtypes=None, stream=None) -> Union[AclNDTensor, List[AclNDTensor]]:
93
+ pass
94
+ def sync_stream(self)->None:
95
+ pass
96
+ ```
97
+ ### 4.2.1 `__init__`
98
+ * `name`:算子名称,
99
+ * `op_path_prefix`: 算子工程中**CMakePresets.json**文件中**vender_name**的值。默认是`customize`,可以不传
100
+ ```json
101
+ "vendor_name": {
102
+ "type": "STRING",
103
+ "value": "customize"
104
+ },
105
+ ```
106
+ * `op_path`: 算子`libcust_opapi.so`库的绝对位置。不传。
107
+ * `device_id`: 设备ID。默认`0`
108
+
109
+ ### 4.2.2 `__call__`
110
+ * `args`: 表示传给`aclnnXXXGetWorkspaceSize`除了`workspaceSize`, `executor`的参数
111
+ * `outCout` : 表示算子的输出个数。如果出处个数为`1`,返回一个`AclNDTensor`。如果输出个数大于1,返回`List[AclNDTensor]`
112
+ * `argtypes`: 表示`aclnnXXXGetWorkspaceSize`的参数`ctypes`参数类型,对于特别复杂的算子,如果发现调用异常,可以手动指定类型。
113
+ 比如(**仅用于举例,其实可以不传,自动推导就可运行。但是当发现运行异常的情况下,可以自己指定**),对于:
114
+ ```c++
115
+ __attribute__((visibility("default")))
116
+ aclnnStatus aclnnCumsumGetWorkspaceSize(
117
+ const aclTensor *x,
118
+ const aclTensor *axis,
119
+ bool exclusiveOptional,
120
+ bool reverseOptional,
121
+ const aclTensor *out,
122
+ uint64_t *workspaceSize,
123
+ aclOpExecutor **executor);
124
+ ```
125
+
126
+ ```python
127
+ import ctypes
128
+ from l0n0lacl import *
129
+ ascendc_cumsum = OpRunner("Cumsum")
130
+ target_dtype = np.float32
131
+ data_range = (-10, 10)
132
+ shape = [100, 3, 2304]
133
+ axis_py = 1
134
+ exclusive = True
135
+ reverse = False
136
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
137
+ axis = np.array([axis_py]).astype(np.int32)
138
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse, argtypes=[
139
+ ctypes.c_void_p, # x
140
+ ctypes.c_void_p, # axis
141
+ ctypes.c_bool, # exclusiveOptional
142
+ ctypes.c_bool, # reverseOptional
143
+ ctypes.c_void_p, # out
144
+ ctypes.c_void_p, # workspaceSize
145
+ ctypes.c_void_p, # executor
146
+ ]).numpy()
147
+ y = np.ones_like(golden, golden.dtype) * 123
148
+ ascendc_cumsum(x, axis, exclusive, reverse, y).to_cpu()
149
+ print(y)
150
+ ```
151
+ * `stream` 如果是多stream的情况下,可以自己指定stream:
152
+ 例如:
153
+ ```python
154
+ import ctypes
155
+ import tensorflow as tf
156
+ from l0n0lacl import *
157
+ ascendc_cumsum = OpRunner("Cumsum")
158
+ target_dtype = np.float32
159
+ data_range = (-10, 10)
160
+ shape = [100, 3, 2304]
161
+ axis_py = 1
162
+ exclusive = True
163
+ reverse = False
164
+ x = np.random.uniform(*data_range, shape).astype(target_dtype)
165
+ axis = np.array([axis_py]).astype(np.int32)
166
+ golden: np.ndarray = tf.cumsum(x, axis_py, exclusive, reverse).numpy()
167
+ y = np.ones_like(golden, golden.dtype) * 123
168
+ ascendc_cumsum(x, axis, exclusive, reverse, y, argtypes=[
169
+ ctypes.c_void_p, # x
170
+ ctypes.c_void_p, # axis
171
+ ctypes.c_bool, # exclusiveOptional
172
+ ctypes.c_bool, # reverseOptional
173
+ ctypes.c_void_p, # out
174
+ ctypes.c_void_p, # workspaceSize
175
+ ctypes.c_void_p, # executor
176
+ ]).to_cpu()
177
+ verify_result(y, golden)
178
+ print(y)
179
+ ```
180
+
181
+ ### 4.2.3 `sync_stream`
182
+ 用于同步stream
183
+
184
+ ## 4.3 verify_result
185
+ 参考自:https://gitee.com/ascend/samples/blob/master/operator/AddCustomSample/KernelLaunch/AddKernelInvocationNeo/scripts/verify_result.py
186
+ ```python
187
+ def verify_result(real_result:numpy.ndarray, golden:numpy.ndarray):
188
+ pass
189
+ ```
190
+ 判断精度是否符合
191
+ float16: 千分之一
192
+ float32: 万分之一
193
+ int16,int32,int8: 0
@@ -0,0 +1,12 @@
1
+ README.md
2
+ setup.py
3
+ ./l0n0lacl/AclNDTensor.py
4
+ ./l0n0lacl/AclStream.py
5
+ ./l0n0lacl/OpRunner.py
6
+ ./l0n0lacl/__init__.py
7
+ ./l0n0lacl/utils.py
8
+ ./l0n0lacl.egg-info/PKG-INFO
9
+ ./l0n0lacl.egg-info/SOURCES.txt
10
+ ./l0n0lacl.egg-info/dependency_links.txt
11
+ ./l0n0lacl.egg-info/requires.txt
12
+ ./l0n0lacl.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ colorama
2
+ numpy
@@ -0,0 +1 @@
1
+ l0n0lacl
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,166 @@
1
+ """A setuptools based setup module.
2
+
3
+ See:
4
+ https://packaging.python.org/guides/distributing-packages-using-setuptools/
5
+ https://github.com/pypa/sampleproject
6
+ """
7
+
8
+ # Always prefer setuptools over distutils
9
+ from setuptools import setup, find_packages
10
+ import pathlib
11
+
12
+ here = pathlib.Path(__file__).parent.resolve()
13
+
14
+ # Get the long description from the README file
15
+ long_description = (here / "README.md").read_text(encoding="utf-8")
16
+ # Arguments marked as "Required" below must be included for upload to PyPI.
17
+ # Fields marked as "Optional" may be commented out.
18
+
19
+ setup(
20
+ # This is the name of your project. The first time you publish this
21
+ # package, this name will be registered for you. It will determine how
22
+ # users can install this project, e.g.:
23
+ #
24
+ # $ pip install sampleproject
25
+ #
26
+ # And where it will live on PyPI: https://pypi.org/project/sampleproject/
27
+ #
28
+ # There are some restrictions on what makes a valid project name
29
+ # specification here:
30
+ # https://packaging.python.org/specifications/core-metadata/#name
31
+ name="l0n0lacl", # Required
32
+ # Versions should comply with PEP 440:
33
+ # https://www.python.org/dev/peps/pep-0440/
34
+ #
35
+ # For a discussion on single-sourcing the version across setup.py and the
36
+ # project code, see
37
+ # https://packaging.python.org/guides/single-sourcing-package-version/
38
+ version="0.0.1", # Required
39
+ # This is a one-line description or tagline of what your project does. This
40
+ # corresponds to the "Summary" metadata field:
41
+ # https://packaging.python.org/specifications/core-metadata/#summary
42
+ description="用于调用ascendc编写的算子", # Optional
43
+ # This is an optional longer description of your project that represents
44
+ # the body of text which users will see when they visit PyPI.
45
+ #
46
+ # Often, this is the same as your README, so you can just read it in from
47
+ # that file directly (as we have already done above)
48
+ #
49
+ # This field corresponds to the "Description" metadata field:
50
+ # https://packaging.python.org/specifications/core-metadata/#description-optional
51
+ long_description=long_description, # Optional
52
+ # Denotes that our long_description is in Markdown; valid values are
53
+ # text/plain, text/x-rst, and text/markdown
54
+ #
55
+ # Optional if long_description is written in reStructuredText (rst) but
56
+ # required for plain-text or Markdown; if unspecified, "applications should
57
+ # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
58
+ # fall back to text/plain if it is not valid rst" (see link below)
59
+ #
60
+ # This field corresponds to the "Description-Content-Type" metadata field:
61
+ # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
62
+ long_description_content_type="text/markdown", # Optional (see note above)
63
+ # This should be a valid link to your project's main homepage.
64
+ #
65
+ # This field corresponds to the "Home-Page" metadata field:
66
+ # https://packaging.python.org/specifications/core-metadata/#home-page-optional
67
+ # url="https://github.com/pypa/sampleproject", # Optional
68
+ # This should be your name or the name of the organization which owns the
69
+ # project.
70
+ author="l0n0l", # Optional
71
+ # This should be a valid email address corresponding to the author listed
72
+ # above.
73
+ author_email="1038352856@qq.com", # Optional
74
+ # Classifiers help users find your project by categorizing it.
75
+ #
76
+ # For a list of valid classifiers, see https://pypi.org/classifiers/
77
+ classifiers=[ # Optional
78
+ # How mature is this project? Common values are
79
+ # 3 - Alpha
80
+ # 4 - Beta
81
+ # 5 - Production/Stable
82
+ "Development Status :: 3 - Alpha",
83
+ # Indicate who your project is intended for
84
+ "Intended Audience :: Developers",
85
+ "Topic :: Software Development :: Build Tools",
86
+ # Pick your license as you wish
87
+ "License :: OSI Approved :: MIT License",
88
+ # Specify the Python versions you support here. In particular, ensure
89
+ # that you indicate you support Python 3. These classifiers are *not*
90
+ # checked by 'pip install'. See instead 'python_requires' below.
91
+ "Programming Language :: Python :: 3",
92
+ "Programming Language :: Python :: 3.7",
93
+ "Programming Language :: Python :: 3.8",
94
+ "Programming Language :: Python :: 3.9",
95
+ "Programming Language :: Python :: 3.10",
96
+ "Programming Language :: Python :: 3 :: Only",
97
+ ],
98
+ # This field adds keywords for your project which will appear on the
99
+ # project page. What does your project relate to?
100
+ #
101
+ # Note that this is a list of additional keywords, separated
102
+ # by commas, to be used to assist searching for the distribution in a
103
+ # larger catalog.
104
+ keywords="acl, ascendc, 算子, 算子开发", # Optional
105
+ # When your source code is in a subdirectory under the project root, e.g.
106
+ # `src/`, it is necessary to specify the `package_dir` argument.
107
+ package_dir={"": "./"}, # Optional
108
+ # You can just specify package directories manually here if your project is
109
+ # simple. Or you can use find_packages().
110
+ #
111
+ # Alternatively, if you just want to distribute a single Python file, use
112
+ # the `py_modules` argument instead as follows, which will expect a file
113
+ # called `my_module.py` to exist:
114
+ #
115
+ # py_modules=["my_module"],
116
+ #
117
+ packages=find_packages(where="./"), # Required
118
+ # Specify which Python versions you support. In contrast to the
119
+ # 'Programming Language' classifiers above, 'pip install' will check this
120
+ # and refuse to install the project if the version does not match. See
121
+ # https://packaging.python.org/guides/distributing-packages-using-setuptools/#python-requires
122
+ python_requires=">=3.7, <4",
123
+ # This field lists other packages that your project depends on to run.
124
+ # Any package you put here will be installed by pip when your project is
125
+ # installed, so they must be valid existing projects.
126
+ #
127
+ # For an analysis of "install_requires" vs pip's requirements files see:
128
+ # https://packaging.python.org/discussions/install-requires-vs-requirements/
129
+ install_requires=["colorama", "numpy"], # Optional
130
+ # List additional groups of dependencies here (e.g. development
131
+ # dependencies). Users will be able to install these using the "extras"
132
+ # syntax, for example:
133
+ #
134
+ # $ pip install sampleproject[dev]
135
+ #
136
+ # Similar to `install_requires` above, these must be valid existing
137
+ # projects.
138
+ extras_require={ # Optional
139
+ },
140
+ # If there are data files included in your packages that need to be
141
+ # installed, specify them here.
142
+ package_data={ # Optional
143
+ },
144
+ # Entry points. The following would provide a command called `sample` which
145
+ # executes the function `main` from this package when invoked:
146
+ entry_points={ # Optional
147
+ # "console_scripts": [
148
+ # "sample=sample:main",
149
+ # ],
150
+ },
151
+ # List additional URLs that are relevant to your project as a dict.
152
+ #
153
+ # This field corresponds to the "Project-URL" metadata fields:
154
+ # https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
155
+ #
156
+ # Examples listed include a pattern for specifying where the package tracks
157
+ # issues, where the source is hosted, where to say thanks to the package
158
+ # maintainers, and where to support the project financially. The key is
159
+ # what's used to render the link text on PyPI.
160
+ project_urls={ # Optional
161
+ # "Bug Reports": "https://github.com/pypa/sampleproject/issues",
162
+ # "Funding": "https://donate.pypi.org",
163
+ # "Say Thanks!": "http://saythanks.io/to/example",
164
+ # "Source": "https://github.com/pypa/sampleproject/",
165
+ },
166
+ )