mindstudio-probe 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/LICENSE +201 -201
  2. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/METADATA +36 -34
  3. mindstudio_probe-1.0.4.dist-info/RECORD +276 -0
  4. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/WHEEL +1 -1
  5. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/entry_points.txt +1 -0
  6. msprobe/README.md +101 -237
  7. msprobe/{config/config.json → config.json} +49 -49
  8. msprobe/core/advisor/advisor.py +124 -124
  9. msprobe/core/advisor/advisor_const.py +59 -59
  10. msprobe/core/advisor/advisor_result.py +58 -58
  11. msprobe/core/common/const.py +341 -318
  12. msprobe/core/common/exceptions.py +99 -99
  13. msprobe/core/common/{file_check.py → file_utils.py} +478 -283
  14. msprobe/core/common/log.py +76 -69
  15. msprobe/core/common/utils.py +385 -616
  16. msprobe/core/common_config.py +85 -71
  17. msprobe/core/compare/acc_compare.py +299 -298
  18. msprobe/core/compare/check.py +95 -95
  19. msprobe/core/compare/compare_cli.py +49 -49
  20. msprobe/core/compare/highlight.py +223 -222
  21. msprobe/core/compare/multiprocessing_compute.py +149 -149
  22. msprobe/core/compare/npy_compare.py +295 -295
  23. msprobe/core/compare/utils.py +430 -429
  24. msprobe/core/data_dump/data_collector.py +154 -144
  25. msprobe/core/data_dump/data_processor/base.py +314 -293
  26. msprobe/core/data_dump/data_processor/factory.py +59 -59
  27. msprobe/core/data_dump/data_processor/mindspore_processor.py +186 -198
  28. msprobe/core/data_dump/data_processor/pytorch_processor.py +366 -389
  29. msprobe/core/data_dump/json_writer.py +96 -116
  30. msprobe/core/data_dump/scope.py +178 -178
  31. msprobe/core/grad_probe/constant.py +70 -70
  32. msprobe/core/grad_probe/grad_compare.py +171 -175
  33. msprobe/core/grad_probe/utils.py +64 -52
  34. msprobe/docs/01.installation.md +89 -0
  35. msprobe/docs/02.config_introduction.md +165 -0
  36. msprobe/docs/03.config_examples.md +247 -0
  37. msprobe/docs/04.acl_config_examples.md +76 -0
  38. msprobe/docs/05.data_dump_PyTorch.md +198 -0
  39. msprobe/docs/06.data_dump_MindSpore.md +243 -0
  40. msprobe/docs/07.accuracy_checker_PyTorch.md +274 -0
  41. msprobe/docs/08.accuracy_checker_online_PyTorch.md +198 -0
  42. msprobe/docs/09.accuracy_checker_MindSpore.md +68 -0
  43. msprobe/docs/10.accuracy_compare_PyTorch.md +245 -0
  44. msprobe/docs/11.accuracy_compare_MindSpore.md +202 -0
  45. msprobe/docs/12.overflow_check_PyTorch.md +79 -0
  46. msprobe/docs/13.overflow_check_MindSpore.md +31 -0
  47. msprobe/{pytorch/doc/parse_tool.md → docs/14.data_parse_PyTorch.md} +283 -286
  48. msprobe/docs/15.free_benchmarking_PyTorch.md +164 -0
  49. msprobe/{doc/grad_probe/grad_probe.md → docs/17.grad_probe.md} +207 -207
  50. msprobe/docs/FAQ_PyTorch.md +177 -0
  51. msprobe/docs/S02.report_free_benchmarking_validation_performance_baseline.md +146 -0
  52. msprobe/docs/img/free_benchmark_framework.png +0 -0
  53. msprobe/mindspore/__init__.py +1 -1
  54. msprobe/mindspore/api_accuracy_checker/api_accuracy_checker.py +254 -245
  55. msprobe/mindspore/api_accuracy_checker/api_info.py +69 -69
  56. msprobe/mindspore/api_accuracy_checker/api_runner.py +155 -151
  57. msprobe/mindspore/api_accuracy_checker/base_compare_algorithm.py +196 -196
  58. msprobe/mindspore/api_accuracy_checker/cmd_parser.py +6 -0
  59. msprobe/mindspore/api_accuracy_checker/compute_element.py +238 -223
  60. msprobe/mindspore/api_accuracy_checker/main.py +8 -15
  61. msprobe/mindspore/api_accuracy_checker/type_mapping.py +113 -113
  62. msprobe/mindspore/api_accuracy_checker/utils.py +79 -62
  63. msprobe/mindspore/cell_processor.py +34 -34
  64. msprobe/mindspore/common/const.py +106 -87
  65. msprobe/mindspore/common/log.py +37 -37
  66. msprobe/mindspore/common/utils.py +81 -57
  67. msprobe/mindspore/compare/distributed_compare.py +75 -75
  68. msprobe/mindspore/compare/ms_compare.py +219 -117
  69. msprobe/mindspore/compare/ms_graph_compare.py +348 -317
  70. msprobe/mindspore/compare/ms_to_pt_api.yaml +399 -399
  71. msprobe/mindspore/debugger/debugger_config.py +66 -74
  72. msprobe/mindspore/debugger/precision_debugger.py +126 -107
  73. msprobe/mindspore/dump/dump_tool_factory.py +35 -35
  74. msprobe/mindspore/dump/hook_cell/api_registry.py +118 -104
  75. msprobe/mindspore/dump/hook_cell/hook_cell.py +55 -53
  76. msprobe/mindspore/dump/hook_cell/support_wrap_ops.yaml +922 -925
  77. msprobe/mindspore/dump/hook_cell/wrap_api.py +113 -0
  78. msprobe/mindspore/dump/jit_dump.py +72 -56
  79. msprobe/mindspore/dump/kernel_graph_dump.py +59 -60
  80. msprobe/mindspore/dump/kernel_kbyk_dump.py +64 -65
  81. msprobe/mindspore/free_benchmark/api_pynative_self_check.py +116 -116
  82. msprobe/mindspore/free_benchmark/common/config.py +12 -12
  83. msprobe/mindspore/free_benchmark/common/handler_params.py +17 -17
  84. msprobe/mindspore/free_benchmark/common/utils.py +71 -71
  85. msprobe/mindspore/free_benchmark/data/support_wrap_ops.yaml +842 -842
  86. msprobe/mindspore/free_benchmark/decorator/dec_forward.py +43 -42
  87. msprobe/mindspore/free_benchmark/decorator/decorator_factory.py +107 -107
  88. msprobe/mindspore/free_benchmark/handler/base_handler.py +90 -90
  89. msprobe/mindspore/free_benchmark/handler/check_handler.py +41 -41
  90. msprobe/mindspore/free_benchmark/handler/fix_handler.py +36 -36
  91. msprobe/mindspore/free_benchmark/handler/handler_factory.py +21 -21
  92. msprobe/mindspore/free_benchmark/perturbation/add_noise.py +67 -67
  93. msprobe/mindspore/free_benchmark/perturbation/base_perturbation.py +21 -21
  94. msprobe/mindspore/free_benchmark/perturbation/bit_noise.py +63 -63
  95. msprobe/mindspore/free_benchmark/perturbation/exchange_value.py +51 -0
  96. msprobe/mindspore/free_benchmark/perturbation/improve_precision.py +35 -34
  97. msprobe/mindspore/free_benchmark/perturbation/no_change.py +12 -12
  98. msprobe/mindspore/free_benchmark/perturbation/perturbation_factory.py +29 -27
  99. msprobe/mindspore/free_benchmark/self_check_tool_factory.py +33 -33
  100. msprobe/mindspore/grad_probe/global_context.py +90 -91
  101. msprobe/mindspore/grad_probe/grad_analyzer.py +231 -231
  102. msprobe/mindspore/grad_probe/grad_monitor.py +27 -27
  103. msprobe/mindspore/grad_probe/grad_stat_csv.py +131 -131
  104. msprobe/mindspore/grad_probe/hook.py +94 -92
  105. msprobe/mindspore/grad_probe/utils.py +29 -28
  106. msprobe/mindspore/ms_config.py +128 -126
  107. msprobe/mindspore/overflow_check/kernel_graph_overflow_check.py +44 -45
  108. msprobe/mindspore/overflow_check/overflow_check_tool_factory.py +34 -34
  109. msprobe/mindspore/runtime.py +4 -4
  110. msprobe/mindspore/service.py +378 -354
  111. msprobe/mindspore/task_handler_factory.py +24 -24
  112. msprobe/msprobe.py +105 -107
  113. msprobe/pytorch/__init__.py +3 -3
  114. msprobe/pytorch/api_accuracy_checker/common/config.py +53 -55
  115. msprobe/pytorch/api_accuracy_checker/common/utils.py +214 -165
  116. msprobe/pytorch/api_accuracy_checker/compare/algorithm.py +213 -213
  117. msprobe/pytorch/api_accuracy_checker/compare/api_precision_compare.py +606 -581
  118. msprobe/pytorch/api_accuracy_checker/compare/api_precision_standard.yaml +132 -132
  119. msprobe/pytorch/api_accuracy_checker/compare/api_precision_threshold.yaml +390 -390
  120. msprobe/pytorch/api_accuracy_checker/compare/compare.py +386 -381
  121. msprobe/pytorch/api_accuracy_checker/compare/compare_column.py +73 -73
  122. msprobe/pytorch/api_accuracy_checker/compare/compare_utils.py +245 -244
  123. msprobe/pytorch/api_accuracy_checker/config.yaml +10 -10
  124. msprobe/pytorch/api_accuracy_checker/run_ut/data_generate.py +335 -332
  125. msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py +200 -199
  126. msprobe/pytorch/api_accuracy_checker/run_ut/run_overflow_check.py +133 -134
  127. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut.py +592 -581
  128. msprobe/pytorch/api_accuracy_checker/run_ut/run_ut_utils.py +70 -74
  129. msprobe/pytorch/api_accuracy_checker/run_ut/torch_ut_setting.json +7 -4
  130. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/attl.py +197 -202
  131. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/client.py +325 -324
  132. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/device_dispatch.py +204 -204
  133. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/server.py +219 -218
  134. msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/ssl_config.py +10 -10
  135. msprobe/pytorch/bench_functions/__init__.py +15 -15
  136. msprobe/pytorch/bench_functions/apply_adam_w.py +28 -28
  137. msprobe/pytorch/bench_functions/confusion_transpose.py +19 -19
  138. msprobe/pytorch/bench_functions/fast_gelu.py +55 -55
  139. msprobe/pytorch/bench_functions/layer_norm_eval.py +6 -6
  140. msprobe/pytorch/bench_functions/linear.py +12 -12
  141. msprobe/pytorch/bench_functions/matmul_backward.py +48 -48
  142. msprobe/pytorch/bench_functions/npu_fusion_attention.py +509 -421
  143. msprobe/pytorch/bench_functions/rms_norm.py +15 -15
  144. msprobe/pytorch/bench_functions/rotary_mul.py +52 -52
  145. msprobe/pytorch/bench_functions/scaled_mask_softmax.py +26 -26
  146. msprobe/pytorch/bench_functions/swiglu.py +55 -55
  147. msprobe/pytorch/common/__init__.py +2 -2
  148. msprobe/pytorch/common/compare_script.template +14 -14
  149. msprobe/pytorch/common/log.py +20 -31
  150. msprobe/pytorch/common/parse_json.py +39 -39
  151. msprobe/pytorch/common/utils.py +305 -300
  152. msprobe/pytorch/compare/distributed_compare.py +66 -66
  153. msprobe/pytorch/compare/mapping.yaml +607 -607
  154. msprobe/pytorch/compare/match.py +34 -33
  155. msprobe/pytorch/compare/pt_compare.py +50 -40
  156. msprobe/pytorch/debugger/debugger_config.py +95 -95
  157. msprobe/pytorch/debugger/precision_debugger.py +125 -125
  158. msprobe/pytorch/free_benchmark/__init__.py +8 -8
  159. msprobe/pytorch/free_benchmark/common/constant.py +70 -70
  160. msprobe/pytorch/free_benchmark/common/counter.py +71 -71
  161. msprobe/pytorch/free_benchmark/common/enums.py +37 -37
  162. msprobe/pytorch/free_benchmark/common/params.py +129 -129
  163. msprobe/pytorch/free_benchmark/common/utils.py +102 -102
  164. msprobe/pytorch/free_benchmark/compare/grad_saver.py +179 -179
  165. msprobe/pytorch/free_benchmark/compare/single_benchmark.py +104 -104
  166. msprobe/pytorch/free_benchmark/main.py +105 -105
  167. msprobe/pytorch/free_benchmark/perturbed_layers/base_layer.py +13 -13
  168. msprobe/pytorch/free_benchmark/perturbed_layers/layer_factory.py +41 -41
  169. msprobe/pytorch/free_benchmark/perturbed_layers/npu/add_noise.py +90 -90
  170. msprobe/pytorch/free_benchmark/perturbed_layers/npu/bit_noise.py +104 -104
  171. msprobe/pytorch/free_benchmark/perturbed_layers/npu/change_value.py +63 -63
  172. msprobe/pytorch/free_benchmark/perturbed_layers/npu/improve_precision.py +68 -68
  173. msprobe/pytorch/free_benchmark/perturbed_layers/npu/no_change.py +28 -28
  174. msprobe/pytorch/free_benchmark/perturbed_layers/npu/npu_base_layser.py +45 -45
  175. msprobe/pytorch/free_benchmark/perturbed_layers/run_cpu.py +19 -19
  176. msprobe/pytorch/free_benchmark/result_handlers/base_handler.py +217 -217
  177. msprobe/pytorch/free_benchmark/result_handlers/check_handler.py +39 -39
  178. msprobe/pytorch/free_benchmark/result_handlers/fix_handler.py +23 -23
  179. msprobe/pytorch/free_benchmark/result_handlers/handler_factory.py +30 -30
  180. msprobe/pytorch/free_benchmark/result_handlers/preheat_handler.py +170 -170
  181. msprobe/pytorch/function_factory.py +76 -75
  182. msprobe/pytorch/functional/dump_module.py +39 -39
  183. msprobe/pytorch/grad_probe/grad_monitor.py +91 -90
  184. msprobe/pytorch/grad_probe/grad_stat_csv.py +128 -128
  185. msprobe/pytorch/hook_module/api_registry.py +161 -161
  186. msprobe/pytorch/hook_module/hook_module.py +120 -120
  187. msprobe/pytorch/hook_module/support_wrap_ops.yaml +1879 -1877
  188. msprobe/pytorch/hook_module/utils.py +30 -29
  189. msprobe/pytorch/hook_module/wrap_aten.py +110 -110
  190. msprobe/pytorch/hook_module/wrap_distributed.py +78 -78
  191. msprobe/pytorch/hook_module/wrap_functional.py +105 -105
  192. msprobe/pytorch/hook_module/wrap_npu_custom.py +93 -84
  193. msprobe/pytorch/hook_module/wrap_tensor.py +71 -71
  194. msprobe/pytorch/hook_module/wrap_torch.py +86 -86
  195. msprobe/pytorch/hook_module/wrap_vf.py +62 -62
  196. msprobe/pytorch/module_processer.py +138 -138
  197. msprobe/pytorch/online_dispatch/__init__.py +20 -20
  198. msprobe/pytorch/online_dispatch/compare.py +236 -236
  199. msprobe/pytorch/online_dispatch/dispatch.py +271 -271
  200. msprobe/pytorch/online_dispatch/dump_compare.py +155 -156
  201. msprobe/pytorch/online_dispatch/single_compare.py +391 -391
  202. msprobe/pytorch/online_dispatch/torch_ops_config.yaml +49 -49
  203. msprobe/pytorch/online_dispatch/utils.py +130 -146
  204. msprobe/pytorch/parse.py +4 -4
  205. msprobe/pytorch/parse_tool/cli.py +32 -32
  206. msprobe/pytorch/parse_tool/lib/compare.py +260 -271
  207. msprobe/pytorch/parse_tool/lib/config.py +52 -52
  208. msprobe/pytorch/parse_tool/lib/file_desc.py +31 -31
  209. msprobe/pytorch/parse_tool/lib/interactive_cli.py +102 -102
  210. msprobe/pytorch/parse_tool/lib/parse_exception.py +54 -54
  211. msprobe/pytorch/parse_tool/lib/parse_tool.py +158 -158
  212. msprobe/pytorch/parse_tool/lib/utils.py +316 -321
  213. msprobe/pytorch/parse_tool/lib/visualization.py +85 -91
  214. msprobe/pytorch/pt_config.py +188 -187
  215. msprobe/pytorch/service.py +246 -252
  216. mindstudio_probe-1.0.3.dist-info/RECORD +0 -272
  217. msprobe/config/README.md +0 -539
  218. msprobe/mindspore/doc/compare.md +0 -58
  219. msprobe/mindspore/doc/dump.md +0 -217
  220. msprobe/mindspore/dump/hook_cell/wrap_functional.py +0 -91
  221. msprobe/mindspore/dump/hook_cell/wrap_tensor.py +0 -63
  222. msprobe/pytorch/doc/FAQ.md +0 -193
  223. msprobe/pytorch/doc/api_accuracy_checker.md +0 -313
  224. msprobe/pytorch/doc/api_accuracy_checker_online.md +0 -187
  225. msprobe/pytorch/doc/dump.md +0 -260
  226. msprobe/pytorch/doc/msprobe/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/206/320/245/342/226/221/321/206/320/235/320/276dump/321/206/320/260/320/227/321/205/320/227/320/226/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -182
  227. msprobe/pytorch/doc/ptdbg_ascend_compare.md +0 -240
  228. msprobe/pytorch/doc/ptdbg_ascend_overview.md +0 -68
  229. msprobe/pytorch/doc/ptdbg_ascend_quickstart.md +0 -381
  230. msprobe/pytorch/doc/run_overflow_check.md +0 -25
  231. msprobe/pytorch/doc//321/205/320/254/320/270/321/207/342/225/221/342/224/220/321/207/342/226/223/342/225/233/321/205/342/225/221/320/266/321/206/320/277/320/244/321/205/320/277/342/225/243.md +0 -90
  232. msprobe/pytorch/doc//321/206/320/247/320/260/321/206/320/260/320/227/321/206/320/255/320/226/321/205/342/225/226/320/265/321/205/320/225/342/225/226/321/205/320/254/342/225/221/321/206/320/251/320/277/321/211/320/272/320/234/321/210/320/277/320/221/321/205/320/242/320/234/321/206/320/220/320/267/321/210/320/223/342/225/234/321/205/320/257/342/225/221/321/207/342/225/221/342/224/220/321/206/320/232/320/265/321/205/320/241/320/232.md +0 -151
  233. {mindstudio_probe-1.0.3.dist-info → mindstudio_probe-1.0.4.dist-info}/top_level.txt +0 -0
  234. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_1.png +0 -0
  235. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_2.png +0 -0
  236. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_3.png +0 -0
  237. /msprobe/{pytorch/doc → docs}/img/BLOOM-7B_4.png +0 -0
  238. /msprobe/{pytorch/doc → docs}/img/GPT-3_1.png +0 -0
  239. /msprobe/{pytorch/doc → docs}/img/GPT-3_2.png +0 -0
  240. /msprobe/{pytorch/doc → docs}/img/GPT-3_3.png +0 -0
  241. /msprobe/{pytorch/doc → docs}/img/GPT-3_4.png +0 -0
  242. /msprobe/{pytorch/doc → docs}/img/GPT-3_5.png +0 -0
  243. /msprobe/{pytorch/doc → docs}/img/GPT-3_6.png +0 -0
  244. /msprobe/{pytorch/doc → docs}/img/GPT-3_7.png +0 -0
  245. /msprobe/{pytorch/doc → docs}/img/GPT-3_8.png +0 -0
  246. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_1.png +0 -0
  247. /msprobe/{pytorch/doc → docs}/img/YOLOV5S_2.png +0 -0
  248. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_details.png +0 -0
  249. /msprobe/{pytorch/doc → docs}/img/accuracy_checking_result.png +0 -0
  250. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_details.png +0 -0
  251. /msprobe/{pytorch/doc → docs}/img/api_precision_compare_result.png +0 -0
  252. /msprobe/{pytorch/doc → docs}/img/auto_analyze_log.png +0 -0
  253. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl.png +0 -0
  254. /msprobe/{pytorch/doc → docs}/img/compare_result_pkl_md5.png.png +0 -0
  255. /msprobe/{pytorch/doc → docs}/img/cpu_info.png +0 -0
  256. /msprobe/{config → docs}/img/free_benchmark.png +0 -0
  257. /msprobe/{doc/grad_probe/img/image-1.png → docs/img/grad_probe_image-1.png} +0 -0
  258. /msprobe/{doc/grad_probe/img/image-2.png → docs/img/grad_probe_image-2.png} +0 -0
  259. /msprobe/{doc/grad_probe/img/image-3.png → docs/img/grad_probe_image-3.png} +0 -0
  260. /msprobe/{doc/grad_probe/img/image-4.png → docs/img/grad_probe_image-4.png} +0 -0
  261. /msprobe/{doc/grad_probe/img/image.png → docs/img/grad_probe_image.png} +0 -0
  262. /msprobe/{pytorch/doc → docs}/img/module_compare.png +0 -0
@@ -1,324 +1,325 @@
1
- import hashlib
2
- import io
3
- import struct
4
- import time
5
- import os
6
- import signal
7
- import sys
8
- from queue import Queue
9
- from threading import Thread
10
- from typing import Union
11
-
12
- from OpenSSL import SSL
13
- from twisted.internet import ssl, reactor, protocol, endpoints
14
- from twisted.protocols.basic import FileSender
15
-
16
- from msprobe.pytorch.common.utils import logger
17
- from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.ssl_config import cipher_list
18
-
19
-
20
- class TCPDataItem:
21
- def __init__(self, data,
22
- sequence_number: int,
23
- rank: int = 0,
24
- step: int = 0):
25
- self.raw_data = data
26
- self.sequence_number = sequence_number
27
- self.rank = rank
28
- self.step = step
29
- self.retry_times = 0
30
- self.pending_time = 0
31
- self.busy_time = 0
32
-
33
-
34
- class TCPClient:
35
- MAX_SENDING_QUEUE_SIZE = 20
36
- ACK_SUCCESS = b"OK___"
37
- ACK_ERROR = b"ERROR"
38
- ACK_BUSY = b"BUSY_"
39
- ACK_STOP = b"STOP_"
40
- ACK_STOP_CONFIRM = b"OVER_"
41
- ACK_KILL_PROCESS = b"KILL_"
42
-
43
- QUEUE_PENDING_TIME = 600 # 队列10分钟都处于阻塞状态,则终止sending进程
44
- RESEND_RETRY_TIMES = 2 # 最大重传数
45
- RESEND_TIMER_TIME = 5 # 接收ACK超时定时器
46
- RESEND_PENDING_TIME = 60 # 连续pending时间超过1分钟则放弃该数据
47
-
48
- def __init__(self, host="localhost", port=8000, check_sum=False, tls_path=None):
49
- self.send_queue = Queue(self.MAX_SENDING_QUEUE_SIZE)
50
- self.resend_dict = dict()
51
- self.host = host
52
- self.port = port
53
- self.tls_path = tls_path
54
- self.factory = None
55
- self.sequence_number = 0
56
- self.signal_exit = False
57
- self.tcp_manager = ClientProtocol(ack_queue_size=100,
58
- chunk_size=655360,
59
- check_sum=check_sum)
60
- self.send_thread = Thread(target=self._sending_queue_data)
61
- self.send_thread.setDaemon(True)
62
- self.send_thread.start()
63
- self.destroy_thread = Thread(target=self._destroy_queue_data)
64
- self.destroy_thread.setDaemon(True)
65
- self.destroy_thread.start()
66
-
67
- @staticmethod
68
- def run_reactor():
69
- reactor.run(installSignalHandlers=False)
70
-
71
- def start(self):
72
- def conn_callback(cur_protocol):
73
- if cur_protocol.transport and cur_protocol.transport.getPeer().host == self.host:
74
- logger.debug(f"Process: {os.getpid()} connects to server successfully.")
75
- else:
76
- logger.warning(f"Process: {os.getpid()} fails to connect to server. ")
77
- raise ConnectionError(f"Failed to connect to {self.host}.")
78
-
79
- def conn_err_callback(failure):
80
- self.signal_exit = True
81
- time.sleep(1)
82
- reactor.stop()
83
- logger.error(f"Failed to connected {self.host} {self.port}. Reason is {failure.getErrorMessage()}")
84
- os.kill(os.getpid(), signal.SIGKILL)
85
- os.kill(os.getppid(), signal.SIGKILL)
86
-
87
- def cur_protocol():
88
- return self.tcp_manager
89
-
90
- self.factory = MessageClientFactory()
91
- self.factory.protocol = cur_protocol
92
- if self.tls_path:
93
- client_key = os.path.join(self.tls_path, "client.key")
94
- client_crt = os.path.join(self.tls_path, "client.crt")
95
- client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt, SSL.TLSv1_2_METHOD)
96
- client_context_ = client_context_factory.getContext()
97
- client_context_.set_cipher_list(cipher_list)
98
- client_context_.set_options(SSL.OP_NO_RENEGOTIATION)
99
- endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
100
- else:
101
- endpoint = endpoints.TCP4ClientEndpoint(reactor, self.host, self.port)
102
- d = endpoint.connect(self.factory)
103
- d.addCallback(conn_callback)
104
- d.addErrback(conn_err_callback)
105
-
106
- reactor_thread = Thread(target=self.run_reactor, daemon=True)
107
- reactor_thread.start()
108
-
109
- def send_after_queue_empty(self, data):
110
- while not self._ready_to_exit():
111
- self.add_to_sending_queue(data)
112
- time.sleep(2)
113
-
114
- def check_client_alive(self):
115
- return self.factory.num_connections > 0
116
-
117
- def stop(self):
118
- self.tcp_manager.connection_timeout()
119
-
120
- def send_stop_signal(self):
121
- self.send_after_queue_empty(self.ACK_STOP)
122
- while not self._ready_to_exit():
123
- if not self.check_client_alive():
124
- break
125
- time.sleep(1)
126
- while not self.tcp_manager.kill_process:
127
- time.sleep(1)
128
-
129
- def add_to_sending_queue(self, data: Union[bytes, TCPDataItem], rank: int = 0, step: int = 0):
130
- if self._ready_to_exit():
131
- return
132
-
133
- send_data = data
134
- if not isinstance(data, TCPDataItem):
135
- send_data = TCPDataItem(data=data,
136
- sequence_number=self.sequence_number,
137
- rank=rank,
138
- step=step)
139
- self.sequence_number += 1
140
- try:
141
- self.send_queue.put(send_data, block=True, timeout=self.QUEUE_PENDING_TIME)
142
- except Exception as e:
143
- logger.error(f"send_queue put send_data timeout, rank: {send_data.rank}, step: {send_data.step},"
144
- f"sequence_number: {send_data.sequence_number}, {str(e)}")
145
-
146
- def _send_data(self, data: TCPDataItem):
147
- self.tcp_manager.send_wrapped_data(data.raw_data,
148
- sequence_number=data.sequence_number,
149
- rank=data.rank,
150
- step=data.step
151
- )
152
-
153
- def _sending_queue_data(self):
154
- while True:
155
- if not self.tcp_manager.is_connected:
156
- continue
157
-
158
- while self.send_queue.qsize() > 0:
159
- if self._ready_to_exit():
160
- break
161
- if len(self.resend_dict) < self.MAX_SENDING_QUEUE_SIZE:
162
- data_obj = self.send_queue.get()
163
- self._send_data(data_obj)
164
- resend_key = str(data_obj.sequence_number) + "_" + str(data_obj.rank) + "_" + str(data_obj.step)
165
- if resend_key not in self.resend_dict.keys():
166
- # Send data for the first time
167
- self.resend_dict[resend_key] = data_obj
168
- else:
169
- time.sleep(0.1)
170
-
171
- if self._ready_to_exit():
172
- logger.debug("Successfully close sending process.")
173
- break
174
- time.sleep(0.1)
175
-
176
- def _destroy_queue_data(self):
177
- while True:
178
- if self._ready_to_exit():
179
- break
180
-
181
- while len(self.resend_dict) > 0 and self.tcp_manager.ack_queue.qsize() > 0:
182
- ack_info, seq_number, rank, step = self.tcp_manager.ack_queue.get()
183
- obj_key = str(seq_number) + "_" + str(rank) + "_" + str(step)
184
- current_item = self.resend_dict.get(obj_key)
185
-
186
- if current_item is None:
187
- continue
188
-
189
- if ack_info == self.ACK_SUCCESS:
190
- self.resend_dict.pop(obj_key)
191
- elif ack_info == self.ACK_BUSY:
192
- logger.debug("RECV BUSY ACK")
193
- if current_item.busy_time > 5:
194
- self._resend_data(current_item)
195
- else:
196
- current_item.busy_time += 1
197
- elif ack_info == self.ACK_ERROR:
198
- logger.debug("RECV ERROR ACK")
199
- self._resend_data(current_item)
200
- elif ack_info == self.ACK_STOP_CONFIRM:
201
- logger.debug("RECV STOP ACK")
202
- self.factory.num_connections -= 1
203
-
204
- break
205
-
206
- time.sleep(0.1)
207
-
208
- def _resend_data(self, data: TCPDataItem):
209
- if data.retry_times < self.RESEND_RETRY_TIMES:
210
- data.retry_times += 1
211
- logger.debug(f"Resend data seq number: {data.sequence_number}")
212
- self.add_to_sending_queue(data)
213
- else:
214
- self.resend_dict.pop(data.sequence_number)
215
- logger.debug(f"SKIP send sequence number {data.sequence_number} after retry {data.retry_times} times!")
216
-
217
- def _pending_data(self, data: TCPDataItem):
218
- if data.pending_time >= self.RESEND_PENDING_TIME:
219
- self.resend_dict.pop(data.sequence_number)
220
- logger.debug(f"SKIP send sequence number {data.sequence_number} after pending {data.pending_time} times!")
221
- return
222
-
223
- # wait time is 100MB per second
224
- pending_time = max(1, len(data.raw_data) // (2 ** 20 * 50))
225
- data.pending_time += pending_time
226
- time.sleep(pending_time)
227
-
228
- def _ready_to_exit(self):
229
- return self.signal_exit or self.tcp_manager.signal_exit
230
-
231
-
232
- class ClientProtocol(protocol.Protocol):
233
- TIMEOUT = 60 * 10
234
-
235
- def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False):
236
- self.buffer = io.BytesIO()
237
- self.is_connected = False
238
- self.check_sum = check_sum
239
- self.tell = 0
240
- self.ack_queue = Queue(maxsize=ack_queue_size)
241
- self.file_sender = FileSender()
242
- self.file_sender.CHUNK_SIZE = chunk_size
243
- self.signal_exit = False
244
- self.defer = None
245
- self.kill_process = False
246
-
247
- def dataReceived(self, data):
248
- if self.timeout_call.active():
249
- self.timeout_call.reset(self.TIMEOUT)
250
-
251
- self.buffer.seek(0, 2)
252
- self.buffer.write(data)
253
- self.buffer.seek(self.tell)
254
- while True:
255
- if len(self.buffer.getvalue()) >= 29: # 5 + 8 * 3
256
- ack = self.buffer.read(5)
257
- seq_number = struct.unpack('!Q', self.buffer.read(8))[0]
258
- rank = struct.unpack('!Q', self.buffer.read(8))[0]
259
- step = struct.unpack('!Q', self.buffer.read(8))[0]
260
- if ack == b"KILL_":
261
- self.kill_process = True
262
- logger.debug(f"接收到KILL信号, PID {os.getpid()}")
263
- if ack == b"OVER_":
264
- self.factory.num_connections -= 1
265
- self.tell += 29
266
- if not self.ack_queue.full():
267
- self.ack_queue.put((ack, seq_number, rank, step))
268
- self.buffer = io.BytesIO(self.buffer.getvalue()[self.tell:])
269
- self.tell = 0
270
- else:
271
- time.sleep(0.1)
272
- else:
273
- break
274
-
275
- def send_wrapped_data(self, data, sequence_number: int = 0, rank: int = 0, step: int = 0):
276
- length = len(data)
277
- md5_hash = hashlib.md5(data).hexdigest() if self.check_sum else ""
278
- while True:
279
- if self.defer is None or self.defer.called:
280
- self.defer = self.send_large_data(
281
- length.to_bytes(8, byteorder='big') +
282
- sequence_number.to_bytes(8, byteorder='big') +
283
- rank.to_bytes(8, byteorder='big') +
284
- step.to_bytes(8, byteorder='big') +
285
- md5_hash.encode() +
286
- data)
287
- break
288
- time.sleep(0.01)
289
-
290
- def send_large_data(self, data):
291
- d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
292
- return d
293
-
294
- def connection_timeout(self):
295
- if self.factory.num_connections <= 0:
296
- return
297
-
298
- self.factory.num_connections -= 1
299
- logger.debug(f"超时退出{self.transport.addr}, PID {os.getpid()}")
300
- self.transport.loseConnection()
301
-
302
- def connectionMade(self):
303
- self.timeout_call = reactor.callLater(self.TIMEOUT, self.connection_timeout)
304
- self.is_connected = True
305
- self.factory.num_connections += 1
306
- logger.info("successfully connect server")
307
-
308
- def connectionLost(self, reason):
309
- self.signal_exit = True
310
- self.factory.num_connections -= 1
311
- logger.info(f"Lost connection with server, reason is : {reason}")
312
-
313
-
314
- class MessageClientFactory(protocol.ClientFactory):
315
- def __init__(self):
316
- self.num_connections = 0
317
-
318
- def clientConnectionFailed(self, connector, reason):
319
- logger.info(f"Fail to connection with server: {reason.getErrorMessage()}")
320
- reactor.stop()
321
-
322
- def clientConnectionLost(self, connector, reason):
323
- logger.info(f"Client lost connection with server: {reason.getErrorMessage()}")
324
- reactor.stop()
1
+ import hashlib
2
+ import io
3
+ import struct
4
+ import time
5
+ import os
6
+ import signal
7
+ import sys
8
+ from queue import Queue
9
+ from threading import Thread
10
+ from typing import Union
11
+
12
+ from twisted.internet import reactor, protocol, endpoints
13
+ from twisted.protocols.basic import FileSender
14
+
15
+ from msprobe.pytorch.common.utils import logger
16
+ from msprobe.pytorch.api_accuracy_checker.tensor_transport_layer.ssl_config import cipher_list
17
+
18
+
19
+ class TCPDataItem:
20
+ def __init__(self, data,
21
+ sequence_number: int,
22
+ rank: int = 0,
23
+ step: int = 0):
24
+ self.raw_data = data
25
+ self.sequence_number = sequence_number
26
+ self.rank = rank
27
+ self.step = step
28
+ self.retry_times = 0
29
+ self.pending_time = 0
30
+ self.busy_time = 0
31
+
32
+
33
+ class TCPClient:
34
+ MAX_SENDING_QUEUE_SIZE = 20
35
+ ACK_SUCCESS = b"OK___"
36
+ ACK_ERROR = b"ERROR"
37
+ ACK_BUSY = b"BUSY_"
38
+ ACK_STOP = b"STOP_"
39
+ ACK_STOP_CONFIRM = b"OVER_"
40
+ ACK_KILL_PROCESS = b"KILL_"
41
+
42
+ QUEUE_PENDING_TIME = 600 # 队列10分钟都处于阻塞状态,则终止sending进程
43
+ RESEND_RETRY_TIMES = 2 # 最大重传数
44
+ RESEND_TIMER_TIME = 5 # 接收ACK超时定时器
45
+ RESEND_PENDING_TIME = 60 # 连续pending时间超过1分钟则放弃该数据
46
+
47
+ def __init__(self, host="localhost", port=8000, check_sum=False, tls_path=None):
48
+ self.send_queue = Queue(self.MAX_SENDING_QUEUE_SIZE)
49
+ self.resend_dict = dict()
50
+ self.host = host
51
+ self.port = port
52
+ self.tls_path = tls_path
53
+ self.factory = None
54
+ self.sequence_number = 0
55
+ self.signal_exit = False
56
+ self.tcp_manager = ClientProtocol(ack_queue_size=100,
57
+ chunk_size=655360,
58
+ check_sum=check_sum)
59
+ self.send_thread = Thread(target=self._sending_queue_data)
60
+ self.send_thread.setDaemon(True)
61
+ self.send_thread.start()
62
+ self.destroy_thread = Thread(target=self._destroy_queue_data)
63
+ self.destroy_thread.setDaemon(True)
64
+ self.destroy_thread.start()
65
+
66
+ @staticmethod
67
+ def run_reactor():
68
+ reactor.run(installSignalHandlers=False)
69
+
70
+ def start(self):
71
+ def conn_callback(cur_protocol):
72
+ if cur_protocol.transport and cur_protocol.transport.getPeer().host == self.host:
73
+ logger.debug(f"Process: {os.getpid()} connects to server successfully.")
74
+ else:
75
+ logger.warning(f"Process: {os.getpid()} fails to connect to server. ")
76
+ raise ConnectionError(f"Failed to connect to {self.host}.")
77
+
78
+ def conn_err_callback(failure):
79
+ self.signal_exit = True
80
+ time.sleep(1)
81
+ reactor.stop()
82
+ logger.error(f"Failed to connected {self.host} {self.port}. Reason is {failure.getErrorMessage()}")
83
+ os.kill(os.getpid(), signal.SIGKILL)
84
+ os.kill(os.getppid(), signal.SIGKILL)
85
+
86
+ def cur_protocol():
87
+ return self.tcp_manager
88
+
89
+ self.factory = MessageClientFactory()
90
+ self.factory.protocol = cur_protocol
91
+ if self.tls_path:
92
+ from OpenSSL import SSL
93
+ from twisted.internet import ssl
94
+ client_key = os.path.join(self.tls_path, "client.key")
95
+ client_crt = os.path.join(self.tls_path, "client.crt")
96
+ client_context_factory = ssl.DefaultOpenSSLContextFactory(client_key, client_crt, SSL.TLSv1_2_METHOD)
97
+ client_context_ = client_context_factory.getContext()
98
+ client_context_.set_cipher_list(cipher_list)
99
+ client_context_.set_options(SSL.OP_NO_RENEGOTIATION)
100
+ endpoint = endpoints.SSL4ClientEndpoint(reactor, self.host, self.port, client_context_factory)
101
+ else:
102
+ endpoint = endpoints.TCP4ClientEndpoint(reactor, self.host, self.port)
103
+ d = endpoint.connect(self.factory)
104
+ d.addCallback(conn_callback)
105
+ d.addErrback(conn_err_callback)
106
+
107
+ reactor_thread = Thread(target=self.run_reactor, daemon=True)
108
+ reactor_thread.start()
109
+
110
+ def send_after_queue_empty(self, data):
111
+ while not self._ready_to_exit():
112
+ self.add_to_sending_queue(data)
113
+ time.sleep(2)
114
+
115
+ def check_client_alive(self):
116
+ return self.factory.num_connections > 0
117
+
118
+ def stop(self):
119
+ self.tcp_manager.connection_timeout()
120
+
121
+ def send_stop_signal(self):
122
+ self.send_after_queue_empty(self.ACK_STOP)
123
+ while not self._ready_to_exit():
124
+ if not self.check_client_alive():
125
+ break
126
+ time.sleep(1)
127
+ while not self.tcp_manager.kill_process:
128
+ time.sleep(1)
129
+
130
+ def add_to_sending_queue(self, data: Union[bytes, TCPDataItem], rank: int = 0, step: int = 0):
131
+ if self._ready_to_exit():
132
+ return
133
+
134
+ send_data = data
135
+ if not isinstance(data, TCPDataItem):
136
+ send_data = TCPDataItem(data=data,
137
+ sequence_number=self.sequence_number,
138
+ rank=rank,
139
+ step=step)
140
+ self.sequence_number += 1
141
+ try:
142
+ self.send_queue.put(send_data, block=True, timeout=self.QUEUE_PENDING_TIME)
143
+ except Exception as e:
144
+ logger.error(f"send_queue put send_data timeout, rank: {send_data.rank}, step: {send_data.step},"
145
+ f"sequence_number: {send_data.sequence_number}, {str(e)}")
146
+
147
+ def _send_data(self, data: TCPDataItem):
148
+ self.tcp_manager.send_wrapped_data(data.raw_data,
149
+ sequence_number=data.sequence_number,
150
+ rank=data.rank,
151
+ step=data.step
152
+ )
153
+
154
+ def _sending_queue_data(self):
155
+ while True:
156
+ if not self.tcp_manager.is_connected:
157
+ continue
158
+
159
+ while self.send_queue.qsize() > 0:
160
+ if self._ready_to_exit():
161
+ break
162
+ if len(self.resend_dict) < self.MAX_SENDING_QUEUE_SIZE:
163
+ data_obj = self.send_queue.get()
164
+ self._send_data(data_obj)
165
+ resend_key = str(data_obj.sequence_number) + "_" + str(data_obj.rank) + "_" + str(data_obj.step)
166
+ if resend_key not in self.resend_dict.keys():
167
+ # Send data for the first time
168
+ self.resend_dict[resend_key] = data_obj
169
+ else:
170
+ time.sleep(0.1)
171
+
172
+ if self._ready_to_exit():
173
+ logger.debug("Successfully close sending process.")
174
+ break
175
+ time.sleep(0.1)
176
+
177
+ def _destroy_queue_data(self):
178
+ while True:
179
+ if self._ready_to_exit():
180
+ break
181
+
182
+ while len(self.resend_dict) > 0 and self.tcp_manager.ack_queue.qsize() > 0:
183
+ ack_info, seq_number, rank, step = self.tcp_manager.ack_queue.get()
184
+ obj_key = str(seq_number) + "_" + str(rank) + "_" + str(step)
185
+ current_item = self.resend_dict.get(obj_key)
186
+
187
+ if current_item is None:
188
+ continue
189
+
190
+ if ack_info == self.ACK_SUCCESS:
191
+ self.resend_dict.pop(obj_key)
192
+ elif ack_info == self.ACK_BUSY:
193
+ logger.debug("RECV BUSY ACK")
194
+ if current_item.busy_time > 5:
195
+ self._resend_data(current_item)
196
+ else:
197
+ current_item.busy_time += 1
198
+ elif ack_info == self.ACK_ERROR:
199
+ logger.debug("RECV ERROR ACK")
200
+ self._resend_data(current_item)
201
+ elif ack_info == self.ACK_STOP_CONFIRM:
202
+ logger.debug("RECV STOP ACK")
203
+ self.factory.num_connections -= 1
204
+
205
+ break
206
+
207
+ time.sleep(0.1)
208
+
209
+ def _resend_data(self, data: TCPDataItem):
210
+ if data.retry_times < self.RESEND_RETRY_TIMES:
211
+ data.retry_times += 1
212
+ logger.debug(f"Resend data seq number: {data.sequence_number}")
213
+ self.add_to_sending_queue(data)
214
+ else:
215
+ self.resend_dict.pop(data.sequence_number)
216
+ logger.debug(f"SKIP send sequence number {data.sequence_number} after retry {data.retry_times} times!")
217
+
218
+ def _pending_data(self, data: TCPDataItem):
219
+ if data.pending_time >= self.RESEND_PENDING_TIME:
220
+ self.resend_dict.pop(data.sequence_number)
221
+ logger.debug(f"SKIP send sequence number {data.sequence_number} after pending {data.pending_time} times!")
222
+ return
223
+
224
+ # wait time is 100MB per second
225
+ pending_time = max(1, len(data.raw_data) // (2 ** 20 * 50))
226
+ data.pending_time += pending_time
227
+ time.sleep(pending_time)
228
+
229
+ def _ready_to_exit(self):
230
+ return self.signal_exit or self.tcp_manager.signal_exit
231
+
232
+
233
+ class ClientProtocol(protocol.Protocol):
234
+ TIMEOUT = 60 * 10
235
+
236
+ def __init__(self, ack_queue_size=100, chunk_size=65536, check_sum=False):
237
+ self.buffer = io.BytesIO()
238
+ self.is_connected = False
239
+ self.check_sum = check_sum
240
+ self.tell = 0
241
+ self.ack_queue = Queue(maxsize=ack_queue_size)
242
+ self.file_sender = FileSender()
243
+ self.file_sender.CHUNK_SIZE = chunk_size
244
+ self.signal_exit = False
245
+ self.defer = None
246
+ self.kill_process = False
247
+
248
+ def dataReceived(self, data):
249
+ if self.timeout_call.active():
250
+ self.timeout_call.reset(self.TIMEOUT)
251
+
252
+ self.buffer.seek(0, 2)
253
+ self.buffer.write(data)
254
+ self.buffer.seek(self.tell)
255
+ while True:
256
+ if len(self.buffer.getvalue()) >= 29: # 5 + 8 * 3
257
+ ack = self.buffer.read(5)
258
+ seq_number = struct.unpack('!Q', self.buffer.read(8))[0]
259
+ rank = struct.unpack('!Q', self.buffer.read(8))[0]
260
+ step = struct.unpack('!Q', self.buffer.read(8))[0]
261
+ if ack == b"KILL_":
262
+ self.kill_process = True
263
+ logger.debug(f"接收到KILL信号, PID {os.getpid()}")
264
+ if ack == b"OVER_":
265
+ self.factory.num_connections -= 1
266
+ self.tell += 29
267
+ if not self.ack_queue.full():
268
+ self.ack_queue.put((ack, seq_number, rank, step))
269
+ self.buffer = io.BytesIO(self.buffer.getvalue()[self.tell:])
270
+ self.tell = 0
271
+ else:
272
+ time.sleep(0.1)
273
+ else:
274
+ break
275
+
276
+ def send_wrapped_data(self, data, sequence_number: int = 0, rank: int = 0, step: int = 0):
277
+ length = len(data)
278
+ md5_hash = hashlib.md5(data).hexdigest() if self.check_sum else ""
279
+ while True:
280
+ if self.defer is None or self.defer.called:
281
+ self.defer = self.send_large_data(
282
+ length.to_bytes(8, byteorder='big') +
283
+ sequence_number.to_bytes(8, byteorder='big') +
284
+ rank.to_bytes(8, byteorder='big') +
285
+ step.to_bytes(8, byteorder='big') +
286
+ md5_hash.encode() +
287
+ data)
288
+ break
289
+ time.sleep(0.01)
290
+
291
+ def send_large_data(self, data):
292
+ d = self.file_sender.beginFileTransfer(io.BytesIO(data), self.transport)
293
+ return d
294
+
295
+ def connection_timeout(self):
296
+ if self.factory.num_connections <= 0:
297
+ return
298
+
299
+ self.factory.num_connections -= 1
300
+ logger.debug(f"超时退出{self.transport.addr}, PID {os.getpid()}")
301
+ self.transport.loseConnection()
302
+
303
+ def connectionMade(self):
304
+ self.timeout_call = reactor.callLater(self.TIMEOUT, self.connection_timeout)
305
+ self.is_connected = True
306
+ self.factory.num_connections += 1
307
+ logger.info("successfully connect server")
308
+
309
+ def connectionLost(self, reason):
310
+ self.signal_exit = True
311
+ self.factory.num_connections -= 1
312
+ logger.info(f"Lost connection with server, reason is : {reason}")
313
+
314
+
315
+ class MessageClientFactory(protocol.ClientFactory):
316
+ def __init__(self):
317
+ self.num_connections = 0
318
+
319
+ def clientConnectionFailed(self, connector, reason):
320
+ logger.info(f"Fail to connection with server: {reason.getErrorMessage()}")
321
+ reactor.stop()
322
+
323
+ def clientConnectionLost(self, connector, reason):
324
+ logger.info(f"Client lost connection with server: {reason.getErrorMessage()}")
325
+ reactor.stop()