PyPI - python-wml - Versions diffs - 3.0.0__py3-none-any.whl - Mend

python-wml 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of python-wml might be problematic. Click here for more details.

Files changed (164) hide show

python_wml-3.0.0.dist-info/LICENSE +23 -0
python_wml-3.0.0.dist-info/METADATA +51 -0
python_wml-3.0.0.dist-info/RECORD +164 -0
python_wml-3.0.0.dist-info/WHEEL +5 -0
python_wml-3.0.0.dist-info/top_level.txt +1 -0
wml/__init__.py +0 -0
wml/basic_data_def/__init__.py +2 -0
wml/basic_data_def/detection_data_def.py +279 -0
wml/basic_data_def/io_data_def.py +2 -0
wml/basic_img_utils.py +816 -0
wml/img_patch.py +92 -0
wml/img_utils.py +571 -0
wml/iotoolkit/__init__.py +17 -0
wml/iotoolkit/aic_keypoint.py +115 -0
wml/iotoolkit/baidu_mask_toolkit.py +244 -0
wml/iotoolkit/base_dataset.py +210 -0
wml/iotoolkit/bboxes_statistics.py +515 -0
wml/iotoolkit/build.py +0 -0
wml/iotoolkit/cityscapes_toolkit.py +183 -0
wml/iotoolkit/classification_data_statistics.py +25 -0
wml/iotoolkit/coco_data_fwd.py +225 -0
wml/iotoolkit/coco_keypoints.py +118 -0
wml/iotoolkit/coco_keypoints_fmt2.py +103 -0
wml/iotoolkit/coco_toolkit.py +397 -0
wml/iotoolkit/coco_wholebody.py +269 -0
wml/iotoolkit/common.py +108 -0
wml/iotoolkit/crowd_pose.py +146 -0
wml/iotoolkit/fast_labelme.py +110 -0
wml/iotoolkit/image_folder.py +95 -0
wml/iotoolkit/imgs_cache.py +58 -0
wml/iotoolkit/imgs_reader_mt.py +73 -0
wml/iotoolkit/labelme_base.py +102 -0
wml/iotoolkit/labelme_json_to_img.py +49 -0
wml/iotoolkit/labelme_toolkit.py +117 -0
wml/iotoolkit/labelme_toolkit_fwd.py +733 -0
wml/iotoolkit/labelmemckeypoints_dataset.py +169 -0
wml/iotoolkit/lspet.py +48 -0
wml/iotoolkit/mapillary_vistas_toolkit.py +269 -0
wml/iotoolkit/mat_data.py +90 -0
wml/iotoolkit/mckeypoints_statistics.py +28 -0
wml/iotoolkit/mot_datasets.py +62 -0
wml/iotoolkit/mpii.py +108 -0
wml/iotoolkit/npmckeypoints_dataset.py +164 -0
wml/iotoolkit/o365_to_coco.py +136 -0
wml/iotoolkit/object365_toolkit.py +156 -0
wml/iotoolkit/object365v2_toolkit.py +71 -0
wml/iotoolkit/pascal_voc_data.py +51 -0
wml/iotoolkit/pascal_voc_toolkit.py +194 -0
wml/iotoolkit/pascal_voc_toolkit_fwd.py +473 -0
wml/iotoolkit/penn_action.py +57 -0
wml/iotoolkit/rawframe_dataset.py +129 -0
wml/iotoolkit/rewrite_pascal_voc.py +28 -0
wml/iotoolkit/semantic_data.py +49 -0
wml/iotoolkit/split_file_by_type.py +29 -0
wml/iotoolkit/sports_mot_datasets.py +78 -0
wml/iotoolkit/vis_objectdetection_dataset.py +70 -0
wml/iotoolkit/vis_torch_data.py +39 -0
wml/iotoolkit/yolo_toolkit.py +38 -0
wml/object_detection2/__init__.py +4 -0
wml/object_detection2/basic_visualization.py +37 -0
wml/object_detection2/bboxes.py +812 -0
wml/object_detection2/data_process_toolkit.py +146 -0
wml/object_detection2/keypoints.py +292 -0
wml/object_detection2/mask.py +120 -0
wml/object_detection2/metrics/__init__.py +3 -0
wml/object_detection2/metrics/build.py +15 -0
wml/object_detection2/metrics/classifier_toolkit.py +440 -0
wml/object_detection2/metrics/common.py +71 -0
wml/object_detection2/metrics/mckps_toolkit.py +338 -0
wml/object_detection2/metrics/toolkit.py +1953 -0
wml/object_detection2/npod_toolkit.py +361 -0
wml/object_detection2/odtools.py +243 -0
wml/object_detection2/standard_names.py +75 -0
wml/object_detection2/visualization.py +956 -0
wml/object_detection2/wmath.py +34 -0
wml/semantic/__init__.py +0 -0
wml/semantic/basic_toolkit.py +65 -0
wml/semantic/mask_utils.py +156 -0
wml/semantic/semantic_test.py +21 -0
wml/semantic/structures.py +1 -0
wml/semantic/toolkit.py +105 -0
wml/semantic/visualization_utils.py +658 -0
wml/threadtoolkit.py +50 -0
wml/walgorithm.py +228 -0
wml/wcollections.py +212 -0
wml/wfilesystem.py +487 -0
wml/wml_utils.py +657 -0
wml/wstructures/__init__.py +4 -0
wml/wstructures/common.py +9 -0
wml/wstructures/keypoints_train_toolkit.py +149 -0
wml/wstructures/kps_structures.py +579 -0
wml/wstructures/mask_structures.py +1161 -0
wml/wtorch/__init__.py +8 -0
wml/wtorch/bboxes.py +104 -0
wml/wtorch/classes_suppression.py +24 -0
wml/wtorch/conv_module.py +181 -0
wml/wtorch/conv_ws.py +144 -0
wml/wtorch/data/__init__.py +16 -0
wml/wtorch/data/_utils/__init__.py +45 -0
wml/wtorch/data/_utils/collate.py +183 -0
wml/wtorch/data/_utils/fetch.py +47 -0
wml/wtorch/data/_utils/pin_memory.py +121 -0
wml/wtorch/data/_utils/signal_handling.py +72 -0
wml/wtorch/data/_utils/worker.py +227 -0
wml/wtorch/data/base_data_loader_iter.py +93 -0
wml/wtorch/data/dataloader.py +501 -0
wml/wtorch/data/datapipes/__init__.py +1 -0
wml/wtorch/data/datapipes/iter/__init__.py +12 -0
wml/wtorch/data/datapipes/iter/batch.py +126 -0
wml/wtorch/data/datapipes/iter/callable.py +92 -0
wml/wtorch/data/datapipes/iter/listdirfiles.py +37 -0
wml/wtorch/data/datapipes/iter/loadfilesfromdisk.py +30 -0
wml/wtorch/data/datapipes/iter/readfilesfromtar.py +60 -0
wml/wtorch/data/datapipes/iter/readfilesfromzip.py +63 -0
wml/wtorch/data/datapipes/iter/sampler.py +94 -0
wml/wtorch/data/datapipes/utils/__init__.py +0 -0
wml/wtorch/data/datapipes/utils/common.py +65 -0
wml/wtorch/data/dataset.py +354 -0
wml/wtorch/data/datasets/__init__.py +4 -0
wml/wtorch/data/datasets/common.py +53 -0
wml/wtorch/data/datasets/listdirfilesdataset.py +36 -0
wml/wtorch/data/datasets/loadfilesfromdiskdataset.py +30 -0
wml/wtorch/data/distributed.py +135 -0
wml/wtorch/data/multi_processing_data_loader_iter.py +866 -0
wml/wtorch/data/sampler.py +267 -0
wml/wtorch/data/single_process_data_loader_iter.py +24 -0
wml/wtorch/data/test_data_loader.py +26 -0
wml/wtorch/dataset_toolkit.py +67 -0
wml/wtorch/depthwise_separable_conv_module.py +98 -0
wml/wtorch/dist.py +591 -0
wml/wtorch/dropblock/__init__.py +6 -0
wml/wtorch/dropblock/dropblock.py +228 -0
wml/wtorch/dropblock/dropout.py +40 -0
wml/wtorch/dropblock/scheduler.py +48 -0
wml/wtorch/ema.py +61 -0
wml/wtorch/fc_module.py +73 -0
wml/wtorch/functional.py +34 -0
wml/wtorch/iter_dataset.py +26 -0
wml/wtorch/loss.py +69 -0
wml/wtorch/nets/__init__.py +0 -0
wml/wtorch/nets/ckpt_toolkit.py +219 -0
wml/wtorch/nets/fpn.py +276 -0
wml/wtorch/nets/hrnet/__init__.py +0 -0
wml/wtorch/nets/hrnet/config.py +2 -0
wml/wtorch/nets/hrnet/hrnet.py +494 -0
wml/wtorch/nets/misc.py +249 -0
wml/wtorch/nets/resnet/__init__.py +0 -0
wml/wtorch/nets/resnet/layers/__init__.py +17 -0
wml/wtorch/nets/resnet/layers/aspp.py +144 -0
wml/wtorch/nets/resnet/layers/batch_norm.py +231 -0
wml/wtorch/nets/resnet/layers/blocks.py +111 -0
wml/wtorch/nets/resnet/layers/wrappers.py +110 -0
wml/wtorch/nets/resnet/r50_config.py +38 -0
wml/wtorch/nets/resnet/resnet.py +691 -0
wml/wtorch/nets/shape_spec.py +20 -0
wml/wtorch/nets/simple_fpn.py +101 -0
wml/wtorch/nms.py +109 -0
wml/wtorch/nn.py +896 -0
wml/wtorch/ocr_block.py +193 -0
wml/wtorch/summary.py +331 -0
wml/wtorch/train_toolkit.py +603 -0
wml/wtorch/transformer_blocks.py +266 -0
wml/wtorch/utils.py +719 -0
wml/wtorch/wlr_scheduler.py +100 -0

wml/wtorch/data/_utils/collate.py ADDED Viewed

@@ -0,0 +1,183 @@
+r""""Contains definitions of the methods used by the _BaseDataLoaderIter workers to
+collate samples fetched from dataset into Tensor(s).
+These **needs** to be in global scope since Py2 doesn't support serializing
+static methods.
+"""
+import torch
+import re
+if torch.__version__ < "1.9.0":
+    from torch._six import container_abcs, string_classes, int_classes
+else:
+    import collections as container_abcs
+    string_classes = (str, bytes)
+    int_classes = int
+np_str_obj_array_pattern = re.compile(r'[SaUO]')
+def default_convert(data):
+    r"""Converts each NumPy array data field into a tensor"""
+    elem_type = type(data)
+    if isinstance(data, torch.Tensor):
+        return data
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        # array of string classes and object
+        if elem_type.__name__ == 'ndarray' \
+                and np_str_obj_array_pattern.search(data.dtype.str) is not None:
+            return data
+        return torch.as_tensor(data)
+    elif isinstance(data, container_abcs.Mapping):
+        return {key: default_convert(data[key]) for key in data}
+    elif isinstance(data, tuple) and hasattr(data, '_fields'):  # namedtuple
+        return elem_type(*(default_convert(d) for d in data))
+    elif isinstance(data, container_abcs.Sequence) and not isinstance(data, string_classes):
+        return [default_convert(d) for d in data]
+    else:
+        return data
+def null_convert(data):
+    return data
+default_collate_err_msg_format = (
+    "default_collate: batch must contain tensors, numpy arrays, numbers, "
+    "dicts or lists; found {}")
+def default_collate(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+    elem = batch[0]
+    elem_type = type(elem)
+    if isinstance(elem, torch.Tensor):
+        out = None
+        if torch.utils.data.get_worker_info() is not None:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = elem.storage()._new_shared(numel)
+            out = elem.new(storage)
+        return torch.stack(batch, 0, out=out)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+            return default_collate([torch.as_tensor(b) for b in batch])
+        elif elem.shape == ():  # scalars
+            return torch.as_tensor(batch)
+    elif isinstance(elem, float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(elem, int_classes):
+        return torch.tensor(batch)
+    elif isinstance(elem, string_classes):
+        return batch
+    elif isinstance(elem, container_abcs.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
+        return elem_type(*(default_collate(samples) for samples in zip(*batch)))
+    elif isinstance(elem, container_abcs.Sequence):
+        # check to make sure that the elements in batch have consistent size
+        it = iter(batch)
+        elem_size = len(next(it))
+        if not all(len(elem) == elem_size for elem in it):
+            raise RuntimeError('each element in list of batch should be of equal size')
+        transposed = zip(*batch)
+        return [default_collate(samples) for samples in transposed]
+    raise TypeError(default_collate_err_msg_format.format(elem_type))
+def detection_default_collate_cat(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+    elem = batch[0]
+    elem_type = type(elem)
+    if isinstance(elem, torch.Tensor):
+        out = None
+        if torch.utils.data.get_worker_info() is not None:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = elem.storage()._new_shared(numel)
+            out = elem.new(storage)
+        return torch.cat(batch, dim=0, out=out)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+            return detection_default_collate_cat([torch.as_tensor(b) for b in batch])
+        elif elem.shape == ():  # scalars
+            return torch.as_tensor(batch)
+    elif isinstance(elem, float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(elem, int_classes):
+        return torch.tensor(batch)
+    elif isinstance(elem, string_classes):
+        return batch
+    elif isinstance(elem, container_abcs.Mapping):
+        return {key: detection_default_collate_cat([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
+        return elem_type(*(detection_default_collate_cat(samples) for samples in zip(*batch)))
+    elif isinstance(elem, container_abcs.Sequence):
+        # check to make sure that the elements in batch have consistent size
+        it = iter(batch)
+        elem_size = len(next(it))
+        if not all(len(elem) == elem_size for elem in it):
+            raise RuntimeError('each element in list of batch should be of equal size')
+        transposed = zip(*batch)
+        return [detection_default_collate_cat(samples) for samples in transposed]
+    raise TypeError(default_collate_err_msg_format.format(elem_type))
+def detection_default_collate(batch):
+    r"""Puts each data field into a tensor with outer dimension batch size"""
+    elem = batch[0]
+    elem_type = type(elem)
+    if isinstance(elem, torch.Tensor):
+        out = None
+        if torch.utils.data.get_worker_info() is not None:
+            # If we're in a background process, concatenate directly into a
+            # shared memory tensor to avoid an extra copy
+            numel = sum([x.numel() for x in batch])
+            storage = elem.storage()._new_shared(numel)
+            out = elem.new(storage)
+        return torch.stack(batch, 0, out=out)
+    elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
+            and elem_type.__name__ != 'string_':
+        if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap':
+            # array of string classes and object
+            if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
+                raise TypeError(default_collate_err_msg_format.format(elem.dtype))
+            return detection_default_collate([torch.as_tensor(b) for b in batch])
+        elif elem.shape == ():  # scalars
+            return torch.as_tensor(batch)
+    elif isinstance(elem, float):
+        return torch.tensor(batch, dtype=torch.float64)
+    elif isinstance(elem, int_classes):
+        return torch.tensor(batch)
+    elif isinstance(elem, string_classes):
+        return batch
+    elif isinstance(elem, container_abcs.Mapping):
+        return {key: detection_default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
+        return elem_type(*(detection_default_collate(samples) for samples in zip(*batch)))
+    elif isinstance(elem, container_abcs.Sequence):
+        # check to make sure that the elements in batch have consistent size
+        it = iter(batch)
+        elem_size = len(next(it))
+        if not all(len(elem) == elem_size for elem in it):
+            raise RuntimeError('each element in list of batch should be of equal size')
+        transposed = zip(*batch)
+        return [detection_default_collate(samples) for samples in transposed]
+    raise TypeError(default_collate_err_msg_format.format(elem_type))

wml/wtorch/data/_utils/fetch.py ADDED Viewed

@@ -0,0 +1,47 @@
+r""""Contains definitions of the methods used by the _BaseDataLoaderIter to fetch
+data from an iterable-style or map-style dataset. This logic is shared in both
+single- and multi-processing data loading.
+"""
+class _BaseDatasetFetcher(object):
+    def __init__(self, dataset, auto_collation, collate_fn, drop_last):
+        self.dataset = dataset
+        self.auto_collation = auto_collation
+        self.collate_fn = collate_fn
+        self.drop_last = drop_last
+    def fetch(self, possibly_batched_index):
+        raise NotImplementedError()
+class _IterableDatasetFetcher(_BaseDatasetFetcher):
+    def __init__(self, dataset, auto_collation, collate_fn, drop_last):
+        super(_IterableDatasetFetcher, self).__init__(dataset, auto_collation, collate_fn, drop_last)
+        self.dataset_iter = iter(dataset)
+    def fetch(self, possibly_batched_index):
+        if self.auto_collation:
+            data = []
+            for _ in possibly_batched_index:
+                try:
+                    data.append(next(self.dataset_iter))
+                except StopIteration:
+                    break
+            if len(data) == 0 or (self.drop_last and len(data) < len(possibly_batched_index)):
+                raise StopIteration
+        else:
+            data = next(self.dataset_iter)
+        return self.collate_fn(data)
+class _MapDatasetFetcher(_BaseDatasetFetcher):
+    def __init__(self, dataset, auto_collation, collate_fn, drop_last):
+        super(_MapDatasetFetcher, self).__init__(dataset, auto_collation, collate_fn, drop_last)
+    def fetch(self, possibly_batched_index):
+        if self.auto_collation:
+            data = [self.dataset[idx] for idx in possibly_batched_index]
+        else:
+            data = self.dataset[possibly_batched_index]
+        return self.collate_fn(data)

wml/wtorch/data/_utils/pin_memory.py ADDED Viewed

@@ -0,0 +1,121 @@
+r""""Contains definitions of the methods used by the _BaseDataLoaderIter to put
+fetched tensors into pinned memory.
+These **needs** to be in global scope since Py2 doesn't support serializing
+static methods.
+"""
+import time
+import torch
+if torch.__version__ < "1.9.0":
+    from torch._six import queue, container_abcs, string_classes
+else:
+    import queue
+    import collections as container_abcs
+    string_classes = (str, bytes)
+from . import MP_STATUS_CHECK_INTERVAL
+import os
+from torch._utils import ExceptionWrapper
+def _pin_memory_loop(in_queue, out_queue, device_id, done_event):
+    # This setting is thread local, and prevents the copy in pin_memory from
+    # consuming all CPU cores.
+    torch.set_num_threads(1)
+    torch.cuda.set_device(device_id)
+    # See NOTE [ Data Loader Multiprocessing Shutdown Logic ] for details on the
+    # logic of this function.
+    while not done_event.is_set():
+        try:
+            r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
+        except queue.Empty:
+            continue
+        idx, data = r
+        if not done_event.is_set() and not isinstance(data, ExceptionWrapper):
+            try:
+                data = pin_memory(data)
+            except Exception:
+                data = ExceptionWrapper(
+                    where="in pin memory thread for device {}".format(device_id))
+            r = (idx, data)
+        while not done_event.is_set():
+            try:
+                out_queue.put(r, timeout=MP_STATUS_CHECK_INTERVAL)
+                time.sleep(1e-4)
+                break
+            except queue.Full:
+                time.sleep(1)
+                continue
+        del r  # save memory
+def pin_memory(data):
+    if isinstance(data, torch.Tensor):
+        return data.pin_memory().cuda()
+    elif isinstance(data, string_classes):
+        return data
+    elif isinstance(data, container_abcs.Mapping):
+        return {k: pin_memory(sample) for k, sample in data.items()}
+    elif isinstance(data, tuple) and hasattr(data, '_fields'):  # namedtuple
+        return type(data)(*(pin_memory(sample) for sample in data))
+    elif isinstance(data, container_abcs.Sequence):
+        return [pin_memory(sample) for sample in data]
+    elif hasattr(data, "pin_memory"):
+        return data.pin_memory()
+    else:
+        return data
+def _pin_memory_loop_stream(in_queue, out_queue, device_id, done_event,stream):
+    # This setting is thread local, and prevents the copy in pin_memory from
+    # consuming all CPU cores.
+    torch.set_num_threads(1)
+    torch.cuda.set_device(device_id)
+    # See NOTE [ Data Loader Multiprocessing Shutdown Logic ] for details on the
+    # logic of this function.
+    while not done_event.is_set():
+        try:
+            r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
+        except queue.Empty:
+            continue
+        idx, data = r
+        if not done_event.is_set() and not isinstance(data, ExceptionWrapper):
+            try:
+                with torch.cuda.stream(stream):
+                    data = pin_memory_stream(data)
+            except Exception:
+                data = ExceptionWrapper(
+                    where="in pin memory thread for device {}".format(device_id))
+            r = (idx, data)
+        while not done_event.is_set():
+            try:
+                out_queue.put(r, timeout=MP_STATUS_CHECK_INTERVAL)
+                time.sleep(1e-4)
+                break
+            except queue.Full:
+                time.sleep(1)
+                continue
+        del r  # save memory
+def pin_memory_stream(data):
+    if isinstance(data, torch.Tensor):
+        if data.dtype==torch.int16: #hack: 不处理int16
+            return data
+        return data.pin_memory().cuda(non_blocking=True)
+    elif isinstance(data, string_classes):
+        return data
+    elif isinstance(data, container_abcs.Mapping):
+        return {k: pin_memory_stream(sample) for k, sample in data.items()}
+    elif isinstance(data, tuple) and hasattr(data, '_fields'):  # namedtuple
+        return type(data)(*(pin_memory_stream(sample) for sample in data))
+    elif isinstance(data, container_abcs.Sequence):
+        return [pin_memory_stream(sample) for sample in data]
+    elif hasattr(data, "pin_memory"):
+        return data.pin_memory().cuda(non_blocking=True)
+    else:
+        return data

wml/wtorch/data/_utils/signal_handling.py ADDED Viewed

@@ -0,0 +1,72 @@
+r""""Signal handling for multiprocessing data loading.
+NOTE [ Signal handling in multiprocessing data loading ]
+In cases like DataLoader, if a worker process dies due to bus error/segfault
+or just hang, the main process will hang waiting for data. This is difficult
+to avoid on PyTorch side as it can be caused by limited shm, or other
+libraries users call in the workers. In this file and `DataLoader.cpp`, we make
+our best effort to provide some error message to users when such unfortunate
+events happen.
+When a _BaseDataLoaderIter starts worker processes, their pids are registered in a
+defined in `DataLoader.cpp`: id(_BaseDataLoaderIter) => Collection[ Worker pids ]
+via `_set_worker_pids`.
+When an error happens in a worker process, the main process received a SIGCHLD,
+and Python will eventually call the handler registered below
+(in `_set_SIGCHLD_handler`). In the handler, the `_error_if_any_worker_fails`
+call checks all registered worker pids and raise proper error message to
+prevent main process from hanging waiting for data from worker.
+Additionally, at the beginning of each worker's `_utils.worker._worker_loop`,
+`_set_worker_signal_handlers` is called to register critical signal handlers
+(e.g., for SIGSEGV, SIGBUS, SIGFPE, SIGTERM) in C, which just prints an error
+message to stderr before triggering the default handler. So a message will also
+be printed from the worker process when it is killed by such signals.
+See NOTE [ Data Loader Multiprocessing Shutdown Logic ] for the reasoning of
+this signal handling design and other mechanism we implement to make our
+multiprocessing data loading robust to errors.
+"""
+import signal
+import threading
+from . import IS_WINDOWS
+# Some of the following imported functions are not used in this file, but are to
+# be used `_utils.signal_handling.XXXXX`.
+from torch._C import _set_worker_pids, _remove_worker_pids  # noqa: F401
+from torch._C import _error_if_any_worker_fails, _set_worker_signal_handlers  # noqa: F401
+_SIGCHLD_handler_set = False
+r"""Whether SIGCHLD handler is set for DataLoader worker failures. Only one
+handler needs to be set for all DataLoaders in a process."""
+def _set_SIGCHLD_handler():
+    # Windows doesn't support SIGCHLD handler
+    if IS_WINDOWS:
+        return
+    # can't set signal in child threads
+    if not isinstance(threading.current_thread(), threading._MainThread):  # type: ignore
+        return
+    global _SIGCHLD_handler_set
+    if _SIGCHLD_handler_set:
+        return
+    previous_handler = signal.getsignal(signal.SIGCHLD)
+    if not callable(previous_handler):
+        # This doesn't catch default handler, but SIGCHLD default handler is a
+        # no-op.
+        previous_handler = None
+    def handler(signum, frame):
+        # This following call uses `waitid` with WNOHANG from C side. Therefore,
+        # Python can still get and update the process status successfully.
+        _error_if_any_worker_fails()
+        if previous_handler is not None:
+            assert callable(previous_handler)
+            previous_handler(signum, frame)
+    signal.signal(signal.SIGCHLD, handler)
+    _SIGCHLD_handler_set = True

wml/wtorch/data/_utils/worker.py ADDED Viewed

@@ -0,0 +1,227 @@
+r""""Contains definitions of the methods used by the _BaseDataLoaderIter workers.
+These **needs** to be in global scope since Py2 doesn't support serializing
+static methods.
+"""
+import torch
+import random
+import os
+import time
+from dataclasses import dataclass
+if torch.__version__ < "1.9.0":
+    from torch._six import queue
+else:
+    import queue
+from torch._utils import ExceptionWrapper
+from typing import Union
+from . import signal_handling, MP_STATUS_CHECK_INTERVAL, IS_WINDOWS
+if IS_WINDOWS:
+    import ctypes
+    from ctypes.wintypes import DWORD, BOOL, HANDLE
+    # On Windows, the parent ID of the worker process remains unchanged when the manager process
+    # is gone, and the only way to check it through OS is to let the worker have a process handle
+    # of the manager and ask if the process status has changed.
+    class ManagerWatchdog(object):
+        def __init__(self):
+            self.manager_pid = os.getppid()
+            # mypy cannot detect this code is windows only
+            self.kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)  # type: ignore
+            self.kernel32.OpenProcess.argtypes = (DWORD, BOOL, DWORD)
+            self.kernel32.OpenProcess.restype = HANDLE
+            self.kernel32.WaitForSingleObject.argtypes = (HANDLE, DWORD)
+            self.kernel32.WaitForSingleObject.restype = DWORD
+            # Value obtained from https://msdn.microsoft.com/en-us/library/ms684880.aspx
+            SYNCHRONIZE = 0x00100000
+            self.manager_handle = self.kernel32.OpenProcess(SYNCHRONIZE, 0, self.manager_pid)
+            if not self.manager_handle:
+                raise ctypes.WinError(ctypes.get_last_error())  # type: ignore
+            self.manager_dead = False
+        def is_alive(self):
+            if not self.manager_dead:
+                # Value obtained from https://msdn.microsoft.com/en-us/library/windows/desktop/ms687032.aspx
+                self.manager_dead = self.kernel32.WaitForSingleObject(self.manager_handle, 0) == 0
+            return not self.manager_dead
+else:
+    class ManagerWatchdog(object):  # type: ignore[no-redef]
+        def __init__(self):
+            self.manager_pid = os.getppid()
+            self.manager_dead = False
+        def is_alive(self):
+            if not self.manager_dead:
+                self.manager_dead = os.getppid() != self.manager_pid
+            return not self.manager_dead
+_worker_info = None
+class WorkerInfo(object):
+    __initialized = False
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+        self.__keys = tuple(kwargs.keys())
+        self.__initialized = True
+    def __setattr__(self, key, val):
+        if self.__initialized:
+            raise RuntimeError("Cannot assign attributes to {} objects".format(self.__class__.__name__))
+        return super(WorkerInfo, self).__setattr__(key, val)
+    def __repr__(self):
+        items = []
+        for k in self.__keys:
+            items.append('{}={}'.format(k, getattr(self, k)))
+        return '{}({})'.format(self.__class__.__name__, ', '.join(items))
+def get_worker_info():
+    r"""Returns the information about the current
+    :class:`~torch.utils.data.DataLoader` iterator worker process.
+    When called in a worker, this returns an object guaranteed to have the
+    following attributes:
+    * :attr:`id`: the current worker id.
+    * :attr:`num_workers`: the total number of workers.
+    * :attr:`seed`: the random seed set for the current worker. This value is
+      determined by main process RNG and the worker id. See
+      :class:`~torch.utils.data.DataLoader`'s documentation for more details.
+    * :attr:`dataset`: the copy of the dataset object in **this** process. Note
+      that this will be a different object in a different process than the one
+      in the main process.
+    When called in the main process, this returns ``None``.
+    .. note::
+       When used in a :attr:`worker_init_fn` passed over to
+       :class:`~torch.utils.data.DataLoader`, this method can be useful to
+       set up each worker process differently, for instance, using ``worker_id``
+       to configure the ``dataset`` object to only read a specific fraction of a
+       sharded dataset, or use ``seed`` to seed other libraries used in dataset
+       code (e.g., NumPy).
+    """
+    return _worker_info
+r"""Dummy class used to signal the end of an IterableDataset"""
+@dataclass(frozen=True)
+class _IterableDatasetStopIteration(object):
+    worker_id: int
+r"""Dummy class used to resume the fetching when worker reuse is enabled"""
+@dataclass(frozen=True)
+class _ResumeIteration(object):
+    pass
+def _worker_loop(dataset_kind, dataset, index_queue, data_queue, done_event,
+                 auto_collation, collate_fn, drop_last, seed, init_fn, worker_id,
+                 num_workers, persistent_workers):
+    # See NOTE [ Data Loader Multiprocessing Shutdown Logic ] for details on the
+    # logic of this function.
+    try:
+        # Initialize C side signal handlers for SIGBUS and SIGSEGV. Python signal
+        # module's handlers are executed after Python returns from C low-level
+        # handlers, likely when the same fatal signal had already happened
+        # again.
+        # https://docs.python.org/3/library/signal.html#execution-of-python-signal-handlers
+        signal_handling._set_worker_signal_handlers()
+        torch.set_num_threads(1)
+        random.seed(seed)
+        torch.manual_seed(seed)
+        global _worker_info
+        _worker_info = WorkerInfo(id=worker_id, num_workers=num_workers,
+                                  seed=seed, dataset=dataset)
+        from wml.wtorch.data import _DatasetKind
+        init_exception = None
+        try:
+            if init_fn is not None:
+                init_fn(worker_id)
+            fetcher = _DatasetKind.create_fetcher(dataset_kind, dataset, auto_collation, collate_fn, drop_last)
+        except Exception:
+            init_exception = ExceptionWrapper(
+                where="in DataLoader worker process {}".format(worker_id))
+        # When using Iterable mode, some worker can exit earlier than others due
+        # to the IterableDataset behaving differently for different workers.
+        # When such things happen, an `_IterableDatasetStopIteration` object is
+        # sent over to the main process with the ID of this worker, so that the
+        # main process won't send more tasks to this worker, and will send
+        # `None` to this worker to properly exit it.
+        #
+        # Note that we cannot set `done_event` from a worker as it is shared
+        # among all processes. Instead, we set the `iteration_end` flag to
+        # signify that the iterator is exhausted. When either `done_event` or
+        # `iteration_end` is set, we skip all processing step and just wait for
+        # `None`.
+        iteration_end = False
+        watchdog = ManagerWatchdog()
+        while watchdog.is_alive():
+            try:
+                r = index_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
+            except queue.Empty:
+                continue
+            if isinstance(r, _ResumeIteration):
+                # Acknowledge the main process
+                data_queue.put((r, None))
+                iteration_end = False
+                # Recreate the fetcher for worker-reuse policy
+                fetcher = _DatasetKind.create_fetcher(
+                    dataset_kind, dataset, auto_collation, collate_fn, drop_last)
+                continue
+            elif r is None:
+                # Received the final signal
+                assert done_event.is_set() or iteration_end
+                break
+            elif done_event.is_set() or iteration_end:
+                # `done_event` is set. But I haven't received the final signal
+                # (None) yet. I will keep continuing until get it, and skip the
+                # processing steps.
+                continue
+            idx, index = r
+            data: Union[_IterableDatasetStopIteration, ExceptionWrapper]
+            if init_exception is not None:
+                data = init_exception
+                init_exception = None
+            else:
+                try:
+                    data = fetcher.fetch(index)
+                except Exception as e:
+                    if isinstance(e, StopIteration) and dataset_kind == _DatasetKind.Iterable:
+                        data = _IterableDatasetStopIteration(worker_id)
+                        # Set `iteration_end`
+                        #   (1) to save future `next(...)` calls, and
+                        #   (2) to avoid sending multiple `_IterableDatasetStopIteration`s.
+                        iteration_end = True
+                    else:
+                        # It is important that we don't store exc_info in a variable.
+                        # `ExceptionWrapper` does the correct thing.
+                        # See NOTE [ Python Traceback Reference Cycle Problem ]
+                        data = ExceptionWrapper(
+                            where=f"in DataLoader worker process {worker_id}, msg: {e}")
+            data_queue.put((idx, data))
+            del data, idx, index, r  # save memory
+    except KeyboardInterrupt:
+        # Main process will raise KeyboardInterrupt anyways.
+        pass
+    if done_event.is_set():
+        data_queue.cancel_join_thread()
+        data_queue.close()