PyPI - mindstudio-probe - Versions diffs - 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl - Mend

mindstudio-probe 8.1.0py3-none-any.whl → 8.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

msprobe/pytorch/api_accuracy_checker/run_ut/multi_run_ut.py CHANGED Viewed

@@ -70,7 +70,7 @@ def split_json_file(input_file, num_splits, filter_api):
         split_forward_data = dict(items[start:end])
         temp_data = {
             **input_data,
-            "data":{
+            "data": {
                 **split_forward_data,
                 **backward_data
             }
@@ -141,7 +141,7 @@ def run_parallel_ut(config):
     for api_info in config.api_files:
         cmd = create_cmd(api_info, next(device_id_cycle))
-        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL,
                                    text=True, bufsize=1, shell=False)
         processes.append(process)
         threading.Thread(target=read_process_output, args=(process,), daemon=True).start()
@@ -187,8 +187,8 @@ def run_parallel_ut(config):
 def prepare_config(args):
-    api_info_file_checker = FileChecker(file_path=args.api_info_file, path_type=FileCheckConst.FILE,
-                                            ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
+    api_info_file_checker = FileChecker(file_path=args.api_info_file, path_type=FileCheckConst.FILE,
+                                        ability=FileCheckConst.READ_ABLE, file_type=FileCheckConst.JSON_SUFFIX)
     api_info = api_info_file_checker.common_check()
     out_path = args.out_path if args.out_path else Const.DEFAULT_PATH
     create_directory(out_path)
@@ -197,11 +197,11 @@ def prepare_config(args):
     split_files, total_items = split_json_file(api_info, args.num_splits, args.filter_api)
     config_path = args.config_path if args.config_path else None
     if config_path:
-        config_path_checker = FileChecker(config_path, FileCheckConst.FILE,
+        config_path_checker = FileChecker(config_path, FileCheckConst.FILE,
                                           FileCheckConst.READ_ABLE, FileCheckConst.JSON_SUFFIX)
         config_path = config_path_checker.common_check()
     result_csv_path = args.result_csv_path or os.path.join(
-                      out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv")
+        out_path, f"accuracy_checking_result_{time.strftime('%Y%m%d%H%M%S')}.csv")
     if not args.result_csv_path:
         details_csv_path = os.path.join(out_path, f"accuracy_checking_details_{time.strftime('%Y%m%d%H%M%S')}.csv")
         comparator = Comparator(result_csv_path, details_csv_path, False)
@@ -220,7 +220,7 @@ def main():
     signal.signal(signal.SIGTERM, signal_handler)
     parser = argparse.ArgumentParser(description='Run UT in parallel')
     _run_ut_parser(parser)
-    parser.add_argument('-n', '--num_splits', type=int, choices=range(1, 65), default=8,
+    parser.add_argument('-n', '--num_splits', type=int, choices=range(1, 65), default=8,
                         help='Number of splits for parallel processing. Range: 1-64')
     args = parser.parse_args()
     config = prepare_config(args)

msprobe/pytorch/api_accuracy_checker/tensor_transport_layer/utils.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import os
 from datetime import datetime, timezone
@@ -117,6 +118,7 @@ def load_ssl_pem(key_file, cert_file, ca_file, crl_file):
         with FileOpen(key_file, "rb") as f:
             key = crypto.load_privatekey(crypto.FILETYPE_PEM, f.read(), passphrase.encode())
             del passphrase
+            gc.collect()
         with FileOpen(cert_file, "rb") as f:
             crt = crypto.load_certificate(crypto.FILETYPE_PEM, f.read())
             check_crt_valid(crt)

msprobe/pytorch/common/utils.py CHANGED Viewed

@@ -406,7 +406,7 @@ def load_api_data(api_data_bytes):
     """Load data from bytes stream"""
     try:
         buffer = io.BytesIO(api_data_bytes)
-        buffer = torch.load(buffer, map_location="cpu")
+        buffer = torch.load(buffer, map_location="cpu", weights_only=False)
     except Exception as e:
         raise RuntimeError("load api_data from bytes failed") from e
     return buffer

msprobe/pytorch/debugger/precision_debugger.py CHANGED Viewed

@@ -53,19 +53,36 @@ class PrecisionDebugger(BasePrecisionDebugger):
         self.module_dumper = ModuleDumper(self.service)
         self.ori_customer_func = {}
         self.enable_dataloader = self.config.enable_dataloader
-        self.param_warning()
-    @property
-    def instance(self):
-        return self._instance
+        self._param_warning()
     @staticmethod
-    def get_task_config(task, json_config):
+    def _get_task_config(task, json_config):
         return parse_task_config(task, json_config)
+    @staticmethod
+    def _iter_tracer(func):
+        def func_wrapper(*args, **kwargs):
+            debugger_instance = PrecisionDebugger._instance
+            if not debugger_instance:
+                raise MsprobeException(
+                    MsprobeException.INTERFACE_USAGE_ERROR,
+                    f"PrecisionDebugger must be instantiated before executing the dataloader iteration"
+                )
+            debugger_instance.enable_dataloader = False
+            if not debugger_instance.service.first_start:
+                debugger_instance.stop()
+                debugger_instance.step()
+            result = func(*args, **kwargs)
+            debugger_instance.start()
+            debugger_instance.enable_dataloader = True
+            return result
+        return func_wrapper
     @classmethod
     def start(cls, model=None, token_range=None):
-        instance = cls.get_instance()
+        instance = cls._get_instance()
         if instance is None:
             return
@@ -79,7 +96,7 @@ class PrecisionDebugger(BasePrecisionDebugger):
     @classmethod
     def stop(cls):
-        instance = cls.get_instance()
+        instance = cls._get_instance()
         if instance is None:
             return
         if instance.enable_dataloader:
@@ -89,7 +106,7 @@ class PrecisionDebugger(BasePrecisionDebugger):
     @classmethod
     def step(cls):
-        instance = cls.get_instance()
+        instance = cls._get_instance()
         if instance is None:
             return
         cls._instance.service.step()
@@ -115,7 +132,7 @@ class PrecisionDebugger(BasePrecisionDebugger):
             return
         instance.service.save(variable, name, save_backward)
-    def param_warning(self):
+    def _param_warning(self):
         if self.model is not None:
             logger.warning_on_rank_0(
                 "The 'model' parameter in the PrecisionDebugger will be deprecated in the future."
@@ -123,7 +140,7 @@ class PrecisionDebugger(BasePrecisionDebugger):
             )
         if self.enable_dataloader:
             logger.warning_on_rank_0("The enable_dataloader feature will be deprecated in the future.")
-            dataloader._BaseDataLoaderIter.__next__ = iter_tracer(dataloader._BaseDataLoaderIter.__next__)
+            dataloader._BaseDataLoaderIter.__next__ = self._iter_tracer(dataloader._BaseDataLoaderIter.__next__)
 def module_dump(module, dump_name):
@@ -155,17 +172,3 @@ def module_dump_end():
             f"PrecisionDebugger must be instantiated before using module_dump_end interface"
         )
     instance.module_dumper.stop_module_dump()
-def iter_tracer(func):
-    def func_wrapper(*args, **kwargs):
-        debugger_instance = PrecisionDebugger.instance
-        debugger_instance.enable_dataloader = False
-        if not debugger_instance.service.first_start:
-            debugger_instance.stop()
-            debugger_instance.step()
-        result = func(*args, **kwargs)
-        debugger_instance.start()
-        debugger_instance.enable_dataloader = True
-        return result
-    return func_wrapper

msprobe/pytorch/hook_module/api_register.py CHANGED Viewed

@@ -89,12 +89,12 @@ def dist_module_forward(module, *args, **kwargs):
     try:
         bound = inspect.signature(module.api_func).bind(*args, **kwargs)
         bound.apply_defaults()
-        use_asyn_op_flag = bound.arguments.get("asyn_op", False)
+        use_async_op_flag = bound.arguments.get("async_op", False)
     except Exception as e:
-        use_asyn_op_flag = False
+        use_async_op_flag = False
         logger.warning(f"fail to get dist api's func signature because {e}, no wait")
-    if use_asyn_op_flag or module.api_name in ["isend", "irecv"]:
+    if use_async_op_flag or module.api_name in ["isend", "irecv"]:
         if handle and hasattr(handle, 'wait'):
             handle.wait()
     if module.api_name == "batch_isend_irecv":

msprobe/pytorch/monitor/optimizer_collect.py CHANGED Viewed

@@ -109,6 +109,9 @@ class OptimizerMon(object):
                     else:
                         logger.warning(f"step of {name} is None, maybe something wrong happened.")
                         continue
+                    if exp_avg is None or exp_avg_sq is None:
+                        logger.warning(f"exp_avg or exp_avg_sq of {name} is None, skip calculation.")
+                        continue
                     exp_avg_hat = exp_avg / (1 - self.torch_opt.defaults['betas'][0] ** step)
                     exp_avg_sq_hat = exp_avg_sq / (1 - self.torch_opt.defaults['betas'][1] ** step)
                     update_dict[name] = exp_avg_hat / (torch.sqrt(exp_avg_sq_hat) + self.torch_opt.defaults['eps'])
@@ -296,7 +299,7 @@ class DeepSpeedZeroOptimizerStage3Mon(DeepSpeedZeroOptimizerMon):
         self.fp32_flat_groups = torch_opt.fp32_partitioned_groups_flat
         self.param2group = self.get_group_index()
-    def param_not_in_partition(self, param, group_index):
+    def param_not_in_partition(self, lp_param, group_idx):
         """Each param partioned across all zero ranks"""
         return False

msprobe/pytorch/pytorch_service.py CHANGED Viewed

@@ -37,6 +37,9 @@ class PytorchService(BaseService):
     @staticmethod
     def _get_current_rank():
         return get_rank_if_initialized()
+    def reset_status(self):
+        self._reset_status()
     def _init_specific_components(self):
         self.logger = logger

msprobe/visualization/compare/mode_adapter.py CHANGED Viewed

@@ -161,6 +161,7 @@ class ModeAdapter:
                 else change_percentage
             precision_index = GraphConst.MAX_INDEX_KEY \
                 if change_percentage > GraphConst.MAX_INDEX_KEY else change_percentage
+        precision_index = self._ignore_precision_index(node.id, precision_index)
         return precision_index, other_dict
     def prepare_real_data(self, node):
@@ -197,3 +198,11 @@ class ModeAdapter:
                 CompareConst.MAX_ABS_ERR: ToolTip.MAX_ABS_ERR,
                 CompareConst.MAX_RELATIVE_ERR: ToolTip.MAX_RELATIVE_ERR}
         return json.dumps(tips)
+    def _ignore_precision_index(self, node_id, precision_index):
+        node_id_split = node_id.split(Const.SEP)
+        if len(node_id_split) < 2:
+            return precision_index
+        if node_id.split(Const.SEP)[1] in GraphConst.IGNORE_PRECISION_INDEX:
+            return GraphConst.MAX_INDEX_KEY if self.compare_mode == GraphConst.MD5_COMPARE else GraphConst.MIN_INDEX_KEY
+        return precision_index

msprobe/visualization/utils.py CHANGED Viewed

@@ -184,6 +184,9 @@ class GraphConst:
     OP = 'op'
     PEER = 'peer'
     GROUP_ID = 'group_id'
+    IGNORE_PRECISION_INDEX = {'empty', 'empty_like', 'empty_with_format', 'new_empty_strided', 'new_empty',
+                              'empty_strided'}
 def is_serializable(obj):

msprobe/mindspore/api_accuracy_checker/generate_op_script/config_op.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-    "dump_json_path": "./dump.json",
-    "api_name": "Mint.split.1",
-    "extract_api_path": "Mint.split.1.json",
-    "propagation": "backward",
-    "data_mode": "random_data",
-    "random_seed": 1234,
-    "iter_times": 1
-}

{mindstudio_probe-8.1.0.dist-info → mindstudio_probe-8.1.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{mindstudio_probe-8.1.0.dist-info → mindstudio_probe-8.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{mindstudio_probe-8.1.0.dist-info → mindstudio_probe-8.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mindstudio_probe-8.1.0.dist-info → mindstudio_probe-8.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

mindstudio-probe 8.1.0__py3-none-any.whl → 8.1.1__py3-none-any.whl

mindstudio-probe 8.1.0py3-none-any.whl → 8.1.1py3-none-any.whl