mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +5 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
- mindspore/_extends/parse/parser.py +28 -22
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +23 -2
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
- mindspore/amp.py +0 -18
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/__init__.py +18 -12
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +371 -96
- mindspore/common/_utils.py +7 -43
- mindspore/common/api.py +434 -135
- mindspore/common/dtype.py +98 -57
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/hook_handle.py +82 -3
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +17 -127
- mindspore/common/recompute.py +4 -13
- mindspore/common/tensor.py +50 -217
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +20 -106
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +35 -1
- mindspore/dataset/engine/datasets.py +338 -319
- mindspore/dataset/engine/datasets_user_defined.py +38 -22
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/api/cell.h +28 -4
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +0 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +5 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +6 -1
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +4 -3
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/__init__.py +4 -0
- mindspore/mint/distributed/distributed.py +392 -69
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/_functions.py +1 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +10 -10
- mindspore/mint/nn/layer/normalization.py +11 -16
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +231 -239
- mindspore/nn/layer/activation.py +4 -2
- mindspore/nn/layer/basic.py +56 -14
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +32 -127
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +1 -4
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +2 -4
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +39 -5
- mindspore/nn/wrap/grad_reducer.py +4 -89
- mindspore/numpy/array_creations.py +4 -4
- mindspore/numpy/fft.py +9 -9
- mindspore/numpy/utils_const.py +1 -1
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +1 -5
- mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
- mindspore/ops/auto_generate/gen_extend_func.py +6 -11
- mindspore/ops/auto_generate/gen_ops_def.py +385 -154
- mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +16 -2
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +2 -0
- mindspore/ops/function/array_func.py +24 -18
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +7 -6
- mindspore/ops/function/grad/grad_func.py +4 -12
- mindspore/ops/function/math_func.py +89 -86
- mindspore/ops/function/nn_func.py +92 -313
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +4 -1
- mindspore/ops/functional_overload.py +377 -30
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +12 -50
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +5 -50
- mindspore/ops/operations/comm_ops.py +95 -17
- mindspore/ops/operations/custom_ops.py +237 -22
- mindspore/ops/operations/debug_ops.py +33 -35
- mindspore/ops/operations/manually_defined/ops_def.py +39 -318
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +3 -3
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +4 -27
- mindspore/ops/tensor_method.py +88 -10
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_auto_parallel_context.py +5 -15
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +4 -6
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_utils.py +34 -17
- mindspore/parallel/auto_parallel.py +23 -9
- mindspore/parallel/checkpoint_transform.py +20 -2
- mindspore/parallel/cluster/process_entity/_api.py +28 -33
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/parallel/cluster/run.py +5 -3
- mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/function/reshard_func.py +6 -5
- mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
- mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
- mindspore/parallel/shard.py +7 -21
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +127 -20
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +40 -4
- mindspore/profiler/common/path_manager.py +65 -24
- mindspore/profiler/common/profiler_context.py +27 -14
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +10 -6
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/dynamic_profiler.py +91 -46
- mindspore/profiler/envprofiler.py +30 -5
- mindspore/profiler/experimental_config.py +18 -2
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +34 -7
- mindspore/profiler/profiler.py +193 -145
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +9 -6
- mindspore/runtime/executor.py +35 -0
- mindspore/runtime/memory.py +113 -0
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +8 -21
- mindspore/train/amp.py +6 -7
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +1 -17
- mindspore/train/callback/_flops_collector.py +10 -6
- mindspore/train/callback/_train_fault_tolerance.py +72 -25
- mindspore/train/data_sink.py +5 -9
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +41 -230
- mindspore/train/serialization.py +160 -401
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +152 -16
- mindspore/version.py +1 -1
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/__init__.py +0 -23
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/train/memory_profiling_pb2.py +0 -298
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
|
@@ -29,10 +29,8 @@ import atexit
|
|
|
29
29
|
import glob
|
|
30
30
|
import json
|
|
31
31
|
import os
|
|
32
|
-
import queue
|
|
33
32
|
import signal
|
|
34
33
|
import stat
|
|
35
|
-
import subprocess
|
|
36
34
|
import warnings
|
|
37
35
|
|
|
38
36
|
import time
|
|
@@ -41,6 +39,7 @@ import multiprocessing
|
|
|
41
39
|
from importlib import import_module
|
|
42
40
|
import sys
|
|
43
41
|
import threading
|
|
42
|
+
from types import GeneratorType
|
|
44
43
|
|
|
45
44
|
import copy
|
|
46
45
|
import weakref
|
|
@@ -65,7 +64,6 @@ from mindspore.dataset.engine import samplers
|
|
|
65
64
|
from mindspore.dataset.engine.samplers import Shuffle
|
|
66
65
|
from .iterators import DictIterator, TupleIterator, DummyIterator, check_iterator_cleanup, _set_iterator_cleanup, \
|
|
67
66
|
ITERATORS_LIST, _unset_iterator_cleanup, _cleanup_the_iterators_if_created
|
|
68
|
-
from .queue import _SharedQueue, _Queue
|
|
69
67
|
from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
|
|
70
68
|
check_rename, check_device_send, check_take, check_output_shape, check_project, \
|
|
71
69
|
check_sync_wait, check_zip_dataset, check_add_column, check_concat, check_split, check_bucket_batch_by_length, \
|
|
@@ -73,7 +71,8 @@ from .validators import check_batch, check_shuffle, check_map, check_filter, che
|
|
|
73
71
|
check_total_batch, check_sync_update
|
|
74
72
|
from ..core.config import get_callback_timeout, _init_device_info, get_enable_shared_mem, get_num_parallel_workers, \
|
|
75
73
|
get_enable_watchdog, get_seed, set_seed, get_debug_mode, get_multiprocessing_timeout_interval, \
|
|
76
|
-
_get_debug_hook_list, get_multiprocessing_start_method
|
|
74
|
+
_get_debug_hook_list, get_multiprocessing_start_method, get_video_backend, set_video_backend, \
|
|
75
|
+
get_error_samples_mode, ErrorSamplesMode
|
|
77
76
|
from ..core.datatypes import mstype_to_detype
|
|
78
77
|
from ..core.validator_helpers import replace_none
|
|
79
78
|
from ..core.py_util_helpers import ExceptionHandler
|
|
@@ -458,8 +457,10 @@ class Dataset:
|
|
|
458
457
|
each bucket. Must contain len(bucket_boundaries)+1 elements.
|
|
459
458
|
element_length_function (Callable, optional): A function that takes in
|
|
460
459
|
M arguments where M = len(column_names) and returns an integer. If no value
|
|
461
|
-
provided, parameter M the len(column_names) must be 1,
|
|
462
|
-
|
|
460
|
+
provided, parameter M the len(column_names) must be 1. At this time, the length of the data in this
|
|
461
|
+
column is determined based on its ndim. If ndim=0, the data length is 0, indicating a str, bool, int,
|
|
462
|
+
or float scalar; if it is an array with ndim > 0, the length of the data is array.shape[0].
|
|
463
|
+
Default: ``None`` , indicating this parameter is not specified.
|
|
463
464
|
pad_info (dict, optional): The information about how to batch each column. The key
|
|
464
465
|
corresponds to the column name, and the value must be a tuple of 2 elements.
|
|
465
466
|
The first element corresponds to the shape to pad to, and the second
|
|
@@ -820,8 +821,7 @@ class Dataset:
|
|
|
820
821
|
return dataset
|
|
821
822
|
|
|
822
823
|
@check_map
|
|
823
|
-
def map(self, operations, input_columns=None, output_columns=None,
|
|
824
|
-
num_parallel_workers=None, **kwargs):
|
|
824
|
+
def map(self, operations, input_columns=None, output_columns=None, num_parallel_workers=None, **kwargs):
|
|
825
825
|
"""
|
|
826
826
|
Apply each operation in operations to this dataset.
|
|
827
827
|
|
|
@@ -2752,8 +2752,6 @@ class BatchDataset(UnionBaseDataset):
|
|
|
2752
2752
|
|
|
2753
2753
|
self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(), self.num_parallel_workers,
|
|
2754
2754
|
str(self), [self.per_batch_map], self.max_rowsize)
|
|
2755
|
-
# Wrap per_batch_map into _PythonCallable
|
|
2756
|
-
self.per_batch_map = _PythonCallable(self.per_batch_map, 0, self.process_pool)
|
|
2757
2755
|
else:
|
|
2758
2756
|
if self.per_batch_map is not None:
|
|
2759
2757
|
self.per_batch_map = FuncWrapper(self.per_batch_map)
|
|
@@ -3057,95 +3055,6 @@ _OP_NAME = dict()
|
|
|
3057
3055
|
_OP_PROCESS = dict()
|
|
3058
3056
|
|
|
3059
3057
|
|
|
3060
|
-
# PythonCallable wrapper for multiprocess pyfunc
|
|
3061
|
-
class _PythonCallable:
|
|
3062
|
-
"""
|
|
3063
|
-
Internal Python function wrapper for multiprocessing pyfunc.
|
|
3064
|
-
"""
|
|
3065
|
-
|
|
3066
|
-
def __init__(self, py_callable, idx, pool=None):
|
|
3067
|
-
# Original Python callable from user.
|
|
3068
|
-
self.py_callable = py_callable
|
|
3069
|
-
# Process pool created for current iterator.
|
|
3070
|
-
self.pool = pool
|
|
3071
|
-
# Python callable index
|
|
3072
|
-
self.idx = idx
|
|
3073
|
-
|
|
3074
|
-
def __call__(self, *args):
|
|
3075
|
-
result = None
|
|
3076
|
-
get_data_from_worker_process = False
|
|
3077
|
-
while get_data_from_worker_process is False:
|
|
3078
|
-
if self.pool.is_running() and check_iterator_cleanup() is False:
|
|
3079
|
-
try:
|
|
3080
|
-
result = self.pool.execute(self.idx, *args)
|
|
3081
|
-
except multiprocessing.TimeoutError:
|
|
3082
|
-
continue
|
|
3083
|
-
get_data_from_worker_process = True
|
|
3084
|
-
else:
|
|
3085
|
-
# worker process is stopped
|
|
3086
|
-
logger.info("The worker process of map operation is stopped. "
|
|
3087
|
-
"So return None to main thread and break the main thread.")
|
|
3088
|
-
return None
|
|
3089
|
-
# got value from worker process
|
|
3090
|
-
if not isinstance(result, tuple) and get_data_from_worker_process is True:
|
|
3091
|
-
result = (result,)
|
|
3092
|
-
return result
|
|
3093
|
-
|
|
3094
|
-
def to_json(self):
|
|
3095
|
-
return self.py_callable.to_json()
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
# used when python_multiprocessing=True in map
|
|
3099
|
-
class Pipe:
|
|
3100
|
-
"""
|
|
3101
|
-
Class to handle communication between the master process and the worker processes.
|
|
3102
|
-
"""
|
|
3103
|
-
|
|
3104
|
-
def __init__(self, warning_ctl, shared_memory=False, max_rowsize=(-1, -1)):
|
|
3105
|
-
self.shared_memory = shared_memory
|
|
3106
|
-
self.eof = multiprocessing.Event()
|
|
3107
|
-
if self.shared_memory:
|
|
3108
|
-
self.in_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[0])
|
|
3109
|
-
self.res_queue = _SharedQueue(1, warning_ctl, max_rowsize=max_rowsize[1])
|
|
3110
|
-
else:
|
|
3111
|
-
self.in_queue = _Queue(1)
|
|
3112
|
-
self.res_queue = _Queue(1)
|
|
3113
|
-
self.in_queue.cancel_join_thread() # Ensure that the process does not hung when exiting
|
|
3114
|
-
|
|
3115
|
-
def master_send(self, func_index, data):
|
|
3116
|
-
self.in_queue.put_nowait((func_index, *data))
|
|
3117
|
-
|
|
3118
|
-
def master_receive(self):
|
|
3119
|
-
if self.eof is None:
|
|
3120
|
-
raise RuntimeError("EOF is none when get data from worker.")
|
|
3121
|
-
if self.eof.is_set():
|
|
3122
|
-
return None
|
|
3123
|
-
return self.res_queue.get(timeout=1)
|
|
3124
|
-
|
|
3125
|
-
def master_close(self):
|
|
3126
|
-
self.eof.set()
|
|
3127
|
-
self.send_finish_signal_to_worker()
|
|
3128
|
-
self.send_finish_signal()
|
|
3129
|
-
|
|
3130
|
-
def send_finish_signal(self):
|
|
3131
|
-
self.worker_send(None)
|
|
3132
|
-
|
|
3133
|
-
def send_finish_signal_to_worker(self):
|
|
3134
|
-
self.master_send(0, "QUIT")
|
|
3135
|
-
|
|
3136
|
-
def worker_send(self, data):
|
|
3137
|
-
self.res_queue.put_until(data, timeout=1, exit_signal=self.eof)
|
|
3138
|
-
|
|
3139
|
-
def worker_receive(self):
|
|
3140
|
-
result = self.in_queue.get_until(timeout=1, exit_signal=self.eof)
|
|
3141
|
-
if result is None:
|
|
3142
|
-
return result
|
|
3143
|
-
if len(result) == 1:
|
|
3144
|
-
raise RuntimeError(f"Corrupted data. Worker received {len(result)} elements, it should be more than 1.")
|
|
3145
|
-
func_index, *data = result
|
|
3146
|
-
return func_index, tuple(data)
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
3058
|
def _main_process_already_exit():
|
|
3150
3059
|
"""
|
|
3151
3060
|
Judge whether main process already exit.
|
|
@@ -3158,15 +3067,21 @@ def _main_process_already_exit():
|
|
|
3158
3067
|
return False
|
|
3159
3068
|
|
|
3160
3069
|
|
|
3161
|
-
def _worker_loop(operations,
|
|
3070
|
+
def _worker_loop(quit_signal, operations, worker_id, op_type, key, video_backend=None):
|
|
3162
3071
|
"""
|
|
3163
3072
|
Multiprocess worker process loop.
|
|
3073
|
+
The worker process(Python Layer) gets data from / sends data to map / batch thread(C++ layer) by message queue
|
|
3074
|
+
and shared memory. This logic no longer uses the Python multi-process pool, in_queue, and out_queue for
|
|
3075
|
+
data transferring.
|
|
3164
3076
|
"""
|
|
3077
|
+
# Release the lock which had been holded in map_op.cc::Launch()/batch_op.cc::Launch()
|
|
3078
|
+
cde.unlock_shm_id_and_msg_id_mutex()
|
|
3079
|
+
|
|
3165
3080
|
# Initialize C++ side signal handlers
|
|
3166
3081
|
cde.register_worker_handlers()
|
|
3167
3082
|
|
|
3168
|
-
|
|
3169
|
-
|
|
3083
|
+
if video_backend is not None:
|
|
3084
|
+
set_video_backend(video_backend)
|
|
3170
3085
|
|
|
3171
3086
|
def _ignore_sigint():
|
|
3172
3087
|
"""
|
|
@@ -3180,121 +3095,197 @@ def _worker_loop(operations, pipe, worker_id):
|
|
|
3180
3095
|
if get_seed() != 5489:
|
|
3181
3096
|
set_seed(get_seed() + worker_id)
|
|
3182
3097
|
|
|
3098
|
+
msg_queue = cde.MessageQueue(key)
|
|
3099
|
+
msg_queue.set_release_flag(False)
|
|
3100
|
+
shm_queue = cde.SharedMemoryQueue(key)
|
|
3101
|
+
shm_queue.set_release_flag(False)
|
|
3102
|
+
|
|
3103
|
+
pid = str(os.getpid())
|
|
3104
|
+
ppid = str(os.getppid())
|
|
3105
|
+
|
|
3106
|
+
# Scenario: when the main process is killed, worker processe needs to release shm & msg.
|
|
3107
|
+
# The shm id and msg id should be released by SIGTERM in worker handler
|
|
3108
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3109
|
+
msg_queue.msg_queue_id)
|
|
3110
|
+
|
|
3111
|
+
num_receive = 0
|
|
3112
|
+
num_send = 0
|
|
3183
3113
|
while not _main_process_already_exit():
|
|
3184
3114
|
_ignore_sigint()
|
|
3185
3115
|
|
|
3186
|
-
|
|
3187
|
-
if
|
|
3116
|
+
# quit by close_worker
|
|
3117
|
+
if quit_signal.is_set():
|
|
3188
3118
|
return
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3119
|
+
|
|
3120
|
+
# >> receive procedure >>
|
|
3121
|
+
## 1. get message queue which contains shared memory info from map C++ thread in main process
|
|
3192
3122
|
try:
|
|
3193
|
-
|
|
3123
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3124
|
+
msg_queue.msg_queue_id)
|
|
3125
|
+
msg_queue.msg_rcv(cde.MASTER_SEND_DATA_MSG)
|
|
3126
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3127
|
+
msg_queue.msg_queue_id)
|
|
3128
|
+
except RuntimeError as err:
|
|
3129
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3130
|
+
msg_queue.msg_queue_id)
|
|
3131
|
+
# the msg_queue had been released by main process, ignore it in worker process
|
|
3132
|
+
if "errno: 2" in str(err):
|
|
3133
|
+
# Because the worker process does not release msg and shm, continue
|
|
3134
|
+
continue
|
|
3135
|
+
raise err
|
|
3194
3136
|
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3137
|
+
## when the message queue had been released, break the loop
|
|
3138
|
+
if msg_queue.message_queue_state() == cde.MessageState.RELEASED:
|
|
3139
|
+
logger.info("The message queue had been released, worker loop end.")
|
|
3140
|
+
break
|
|
3199
3141
|
|
|
3200
|
-
|
|
3201
|
-
del pipe.in_queue
|
|
3202
|
-
del pipe.res_queue
|
|
3142
|
+
num_receive += 1
|
|
3203
3143
|
|
|
3144
|
+
logger.info("Python process {} worker({}) receives {} samples from map thread.".format(op_type, worker_id,
|
|
3145
|
+
num_receive))
|
|
3204
3146
|
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
|
|
3147
|
+
# convert the data from shm to python data
|
|
3148
|
+
if op_type == cde.MAP_OP:
|
|
3149
|
+
## 2. construct shared memory to TensorRow which contains one / more columns
|
|
3150
|
+
tensor_row = shm_queue.to_tensor_row(msg_queue.shm_id, msg_queue.shm_size)
|
|
3208
3151
|
|
|
3152
|
+
## 3. convert TensorRow to Python tuple which elements are a column
|
|
3153
|
+
tuple_column = cde.convert_tensor_row_to_py_tuple(tensor_row)
|
|
3209
3154
|
|
|
3210
|
-
|
|
3211
|
-
|
|
3212
|
-
|
|
3213
|
-
|
|
3214
|
-
self.worker_id = worker_id
|
|
3215
|
-
logger.info("Multiprocessing start method: {}".format(multiprocessing.get_start_method()))
|
|
3155
|
+
py_func_input = tuple_column
|
|
3156
|
+
elif op_type == cde.BATCH_OP:
|
|
3157
|
+
## 2. construct shard memory to TensorTable which contains one / more TensorRow & CBatchInfo
|
|
3158
|
+
tensor_table, batch_info, _ = shm_queue.to_tensor_table(msg_queue.shm_id, msg_queue.shm_size)
|
|
3216
3159
|
|
|
3217
|
-
|
|
3218
|
-
|
|
3160
|
+
## 3. convert TensorTable to Python tuple tuple
|
|
3161
|
+
# The tuple indicate the multi columns
|
|
3162
|
+
# The list indicate the multi rows
|
|
3163
|
+
tuple_list_column = cde.convert_tensor_table_to_py_tuple_list(tensor_table)
|
|
3219
3164
|
|
|
3165
|
+
py_func_input = (*tuple_list_column, batch_info)
|
|
3166
|
+
else:
|
|
3167
|
+
raise RuntimeError("The op_type: {} is invalid.".format(op_type))
|
|
3220
3168
|
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
"""
|
|
3169
|
+
# execute the pyfunc
|
|
3170
|
+
try:
|
|
3171
|
+
py_func_output = py_func_input
|
|
3225
3172
|
|
|
3226
|
-
|
|
3227
|
-
|
|
3228
|
-
|
|
3229
|
-
|
|
3230
|
-
super().__init__(target=worker_target(operations, worker_id), name="MapWorker" + str(worker_id),
|
|
3231
|
-
args=(self.pipe,), daemon=True)
|
|
3232
|
-
|
|
3233
|
-
def execute(self, idx, *args):
|
|
3234
|
-
"""Acquiring data from a worker in an infinite loop"""
|
|
3235
|
-
self.pipe.master_send(idx, args)
|
|
3236
|
-
time_s = time.time()
|
|
3237
|
-
wait_count = 1
|
|
3238
|
-
while True:
|
|
3239
|
-
cost_time = time.time() - time_s
|
|
3240
|
-
if cost_time / self.check_interval >= wait_count:
|
|
3241
|
-
wait_count += 1
|
|
3242
|
-
logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
|
|
3243
|
-
"worker of the map operation is hanging. "
|
|
3244
|
-
"Check whether the user defined data transform is too slow or the "
|
|
3245
|
-
"output data is too large. You can also set the timeout interval by "
|
|
3246
|
-
"ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
|
|
3247
|
-
"of this log.")
|
|
3248
|
-
pid = self.pid
|
|
3249
|
-
logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
|
|
3250
|
-
install_status, _ = subprocess.getstatusoutput("py-spy --version")
|
|
3251
|
-
if install_status == 0:
|
|
3252
|
-
stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
|
|
3253
|
-
logger.warning("Map worker subprocess stack:\n{}".format(stack))
|
|
3173
|
+
# execute the remaining operations
|
|
3174
|
+
for idx in range(len(operations)):
|
|
3175
|
+
if isinstance(py_func_output, tuple):
|
|
3176
|
+
py_func_output = operations[idx](*py_func_output)
|
|
3254
3177
|
else:
|
|
3255
|
-
|
|
3178
|
+
py_func_output = operations[idx](py_func_output)
|
|
3179
|
+
|
|
3180
|
+
# << send procedure <<
|
|
3181
|
+
# the result is None
|
|
3182
|
+
if py_func_output is None:
|
|
3183
|
+
raise RuntimeError("Got None from Python Function which is defined by {}".format(op_type))
|
|
3184
|
+
|
|
3185
|
+
# convert the output to tuple
|
|
3186
|
+
if not isinstance(py_func_output, tuple):
|
|
3187
|
+
py_func_output = (py_func_output,)
|
|
3188
|
+
|
|
3189
|
+
if op_type == cde.MAP_OP:
|
|
3190
|
+
# check if the map return Generator type
|
|
3191
|
+
for item in py_func_output:
|
|
3192
|
+
if isinstance(item, GeneratorType):
|
|
3193
|
+
raise RuntimeError("Cannot pickle <class 'generator'> object, please verify pyfunc "
|
|
3194
|
+
"return with numpy array")
|
|
3195
|
+
|
|
3196
|
+
## 1. convert Python tuple to TensorRow
|
|
3197
|
+
output_tensor_row = cde.convert_py_tuple_to_tensor_row(py_func_output)
|
|
3198
|
+
|
|
3199
|
+
## 2. convert TensorRow to shared memory
|
|
3200
|
+
shm_queue.from_tensor_row(output_tensor_row)
|
|
3201
|
+
elif op_type == cde.BATCH_OP:
|
|
3202
|
+
## 1. convert Python tuple tuple to TensorTable
|
|
3203
|
+
output_tensor_table, concat_batch = cde.convert_py_tuple_list_to_tensor_table(py_func_output)
|
|
3204
|
+
|
|
3205
|
+
## 2. convert TensorTable to shared memory
|
|
3206
|
+
shm_queue.from_tensor_table(output_tensor_table, batch_info, concat_batch)
|
|
3207
|
+
else:
|
|
3208
|
+
raise RuntimeError("The op_type: {} is invalid.".format(op_type))
|
|
3209
|
+
|
|
3210
|
+
## 3. send message queue which contains shared memory to map C++ thread in main process
|
|
3211
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3212
|
+
msg_queue.msg_queue_id)
|
|
3213
|
+
msg_queue.msg_snd(cde.WORKER_SEND_DATA_MSG, shm_queue.get_shm_id(), shm_queue.get_shm_size())
|
|
3214
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3215
|
+
msg_queue.msg_queue_id)
|
|
3216
|
+
|
|
3217
|
+
num_send += 1
|
|
3218
|
+
logger.info("Python process {} worker({}) sends {} samples to map thread.".format(op_type, worker_id,
|
|
3219
|
+
num_send))
|
|
3220
|
+
except Exception:
|
|
3256
3221
|
try:
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
|
|
3261
|
-
|
|
3262
|
-
|
|
3263
|
-
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3282
|
-
|
|
3283
|
-
|
|
3284
|
-
|
|
3285
|
-
|
|
3286
|
-
|
|
3287
|
-
|
|
3288
|
-
|
|
3289
|
-
|
|
3222
|
+
if op_type == cde.MAP_OP:
|
|
3223
|
+
pyfunc_err = ExceptionHandler(where="in map worker and execute Python function")
|
|
3224
|
+
elif op_type == cde.BATCH_OP:
|
|
3225
|
+
pyfunc_err = ExceptionHandler(where="in batch(per_batch_map) worker and execute Python function")
|
|
3226
|
+
else:
|
|
3227
|
+
pyfunc_err = "The op_type: {} is invalid.".format(op_type)
|
|
3228
|
+
pyfunc_err.reraise()
|
|
3229
|
+
except Exception as err:
|
|
3230
|
+
_, _, exc_tb = sys.exc_info()
|
|
3231
|
+
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
3232
|
+
|
|
3233
|
+
if op_type == cde.MAP_OP:
|
|
3234
|
+
logger.info("Got exception {} from Map Worker({})".format(str(err), worker_id))
|
|
3235
|
+
elif op_type == cde.BATCH_OP:
|
|
3236
|
+
logger.info("Got exception {} from Batch Worker({})".format(str(err), worker_id))
|
|
3237
|
+
else:
|
|
3238
|
+
logger.info("The op_type: {} is invalid.".format(op_type))
|
|
3239
|
+
|
|
3240
|
+
# err_code, lineno, filename, err_desc
|
|
3241
|
+
msg_queue.serialize_status(cde.StatusCode.MD_PY_FUNC_EXCEPTION, exc_tb.tb_lineno, fname, str(err))
|
|
3242
|
+
|
|
3243
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3244
|
+
msg_queue.msg_queue_id)
|
|
3245
|
+
msg_queue.msg_snd(cde.WORKER_SEND_DATA_MSG, shm_queue.get_shm_id(), shm_queue.get_shm_size())
|
|
3246
|
+
cde.register_shm_id_and_msg_id(pid + "_" + ppid + "_" + str(op_type), shm_queue.get_shm_id(),
|
|
3247
|
+
msg_queue.msg_queue_id)
|
|
3248
|
+
|
|
3249
|
+
# worker error
|
|
3250
|
+
if get_error_samples_mode() == ErrorSamplesMode.RETURN:
|
|
3251
|
+
break
|
|
3252
|
+
else:
|
|
3253
|
+
# continue the loop, when the get_error_samples_mode() is REPLACE or SKIP
|
|
3254
|
+
continue
|
|
3255
|
+
|
|
3256
|
+
# release the eager executor which is used by current process
|
|
3257
|
+
transforms.transforms.clean_unused_executors()
|
|
3258
|
+
|
|
3259
|
+
while not _main_process_already_exit():
|
|
3260
|
+
# quit by close_worker
|
|
3261
|
+
if quit_signal.is_set():
|
|
3290
3262
|
return
|
|
3291
|
-
return
|
|
3292
3263
|
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
|
|
3264
|
+
logger.info("The worker process is waiting for the main process to exit.")
|
|
3265
|
+
time.sleep(0.1)
|
|
3266
|
+
|
|
3267
|
+
# the main process is not exist yet which maybe killed -9
|
|
3268
|
+
msg_queue.set_release_flag(True)
|
|
3269
|
+
msg_queue.release()
|
|
3270
|
+
shm_queue.set_release_flag(True)
|
|
3271
|
+
shm_queue.release()
|
|
3272
|
+
|
|
3273
|
+
|
|
3274
|
+
class WorkerTarget:
|
|
3275
|
+
"""Mulitprocess mode for dataset map or batch"""
|
|
3276
|
+
def __init__(self, quit_signal, operations, worker_id, op_type, ftok_key):
|
|
3277
|
+
self.quit_signal = quit_signal
|
|
3278
|
+
self.operations = operations
|
|
3279
|
+
self.worker_id = worker_id
|
|
3280
|
+
self.op_type = op_type
|
|
3281
|
+
self.ftok_key = ftok_key
|
|
3282
|
+
start_method = multiprocessing.get_start_method()
|
|
3283
|
+
logger.info("Multiprocessing start method: {}".format(start_method))
|
|
3284
|
+
self.video_backend = get_video_backend() if start_method == 'spawn' else None
|
|
3285
|
+
|
|
3286
|
+
def __call__(self):
|
|
3287
|
+
return _worker_loop(self.quit_signal, self.operations, self.worker_id, self.op_type, self.ftok_key,
|
|
3288
|
+
self.video_backend)
|
|
3298
3289
|
|
|
3299
3290
|
|
|
3300
3291
|
def worker_is_alive(worker):
|
|
@@ -3305,24 +3296,31 @@ def worker_is_alive(worker):
|
|
|
3305
3296
|
return False
|
|
3306
3297
|
|
|
3307
3298
|
|
|
3308
|
-
def close_worker(worker,
|
|
3299
|
+
def close_worker(worker, eof):
|
|
3309
3300
|
"""Close the subprocess worker in spawn mode"""
|
|
3310
3301
|
try:
|
|
3311
3302
|
if worker_is_alive(worker):
|
|
3312
3303
|
# release the eager executor which is used by current process
|
|
3313
3304
|
transforms.transforms.clean_unused_executors()
|
|
3314
3305
|
|
|
3315
|
-
|
|
3316
|
-
|
|
3306
|
+
# let the worker exit
|
|
3307
|
+
logger.info("Set eof flag for worker with PID: {}.".format(worker.pid))
|
|
3308
|
+
eof.set()
|
|
3309
|
+
|
|
3310
|
+
# wait timeout
|
|
3311
|
+
wait_timeout = 2
|
|
3312
|
+
start_time = time.time()
|
|
3317
3313
|
|
|
3318
3314
|
process_dir = os.path.join('/proc', str(worker.pid))
|
|
3319
3315
|
while worker_is_alive(worker) and os.path.exists(process_dir):
|
|
3320
3316
|
logger.info("Waiting for worker {} closed ...".format(worker.pid))
|
|
3321
3317
|
time.sleep(0.5)
|
|
3322
3318
|
|
|
3319
|
+
# maybe the worker is hung by msg_queue.MsgRcv, so break the loop and terminate it in next step
|
|
3320
|
+
if time.time() - start_time > wait_timeout:
|
|
3321
|
+
break
|
|
3322
|
+
|
|
3323
3323
|
# del the handle which hold by master
|
|
3324
|
-
del pipe.in_queue
|
|
3325
|
-
del pipe.res_queue
|
|
3326
3324
|
worker.terminate()
|
|
3327
3325
|
worker.join()
|
|
3328
3326
|
worker.close()
|
|
@@ -3379,7 +3377,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3379
3377
|
self.warning_ctl = None
|
|
3380
3378
|
# cache thread (get_ident()) to worker_id mapping in Python layer
|
|
3381
3379
|
self.python_threads_to_workers = {}
|
|
3382
|
-
self.
|
|
3380
|
+
self.eof_workers = []
|
|
3381
|
+
self.eof_clean_process = None
|
|
3383
3382
|
self.running = False
|
|
3384
3383
|
|
|
3385
3384
|
def __del__(self):
|
|
@@ -3455,19 +3454,39 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3455
3454
|
del workers
|
|
3456
3455
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
3457
3456
|
|
|
3458
|
-
def launch(self, op_id
|
|
3457
|
+
def launch(self, op_id, op_type, ftok_keys):
|
|
3459
3458
|
"""
|
|
3460
3459
|
Launch Python multiprocessing pool.
|
|
3461
3460
|
|
|
3462
3461
|
Args:
|
|
3463
|
-
op_id: ID for operation to have Python multiprocessing pool launched
|
|
3462
|
+
op_id (int): ID for operation to have Python multiprocessing pool launched
|
|
3463
|
+
op_type (str): Indicate MapOp / BatchOp
|
|
3464
|
+
ftok_keys (list[int]): the ftok key of list for msg queue and shm queue
|
|
3464
3465
|
|
|
3465
3466
|
Returns:
|
|
3466
3467
|
Python multiprocessing pool is launched.
|
|
3467
3468
|
"""
|
|
3468
3469
|
self.python_threads_to_workers = {}
|
|
3470
|
+
|
|
3471
|
+
if not isinstance(op_id, int):
|
|
3472
|
+
raise RuntimeError("The op_id is not int.")
|
|
3469
3473
|
self.op_id = op_id
|
|
3470
|
-
|
|
3474
|
+
|
|
3475
|
+
valid_op_type = [cde.MAP_OP, cde.BATCH_OP]
|
|
3476
|
+
if op_type not in valid_op_type:
|
|
3477
|
+
raise RuntimeError("The op_type: {} is not in {}.".format(op_type, valid_op_type))
|
|
3478
|
+
self.op_type = op_type
|
|
3479
|
+
|
|
3480
|
+
if not isinstance(ftok_keys, list):
|
|
3481
|
+
raise RuntimeError("The ftok_keys is not a list.")
|
|
3482
|
+
if not all(isinstance(x, int) for x in ftok_keys):
|
|
3483
|
+
raise RuntimeError("The item in ftok_keys is not all int.")
|
|
3484
|
+
if len(ftok_keys) != self.num_parallel_workers:
|
|
3485
|
+
raise RuntimeError("The len of ftok_keys is not equal to num_parallel_workers.")
|
|
3486
|
+
self.ftok_keys = ftok_keys
|
|
3487
|
+
|
|
3488
|
+
logger.info("Launching new Python multiprocessing pool for Op: " + self.op_type + "(" + str(self.op_id) + \
|
|
3489
|
+
"), ftok_keys: " + str(self.ftok_keys))
|
|
3471
3490
|
if self.is_mp_enabled():
|
|
3472
3491
|
message = "Launching a new Python multiprocessing pool while a pool already exists!" + \
|
|
3473
3492
|
" The existing pool will be terminated first."
|
|
@@ -3490,30 +3509,21 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3490
3509
|
raise Exception("Pool was already created, close it first.")
|
|
3491
3510
|
|
|
3492
3511
|
self.workers = []
|
|
3493
|
-
self.pipes = []
|
|
3494
|
-
self.check_interval = get_multiprocessing_timeout_interval()
|
|
3495
3512
|
self.warning_ctl = multiprocessing.Value('i', 0)
|
|
3496
|
-
if self.start_method == "fork":
|
|
3497
|
-
# Construct python worker processes
|
|
3498
|
-
for worker_id in range(self.num_parallel_workers):
|
|
3499
|
-
worker = _MPWorker(self.operations, self.warning_ctl, self.max_rowsize, worker_id)
|
|
3500
|
-
worker.start()
|
|
3501
|
-
self.workers.append(worker)
|
|
3502
|
-
else:
|
|
3503
|
-
multiprocessing.set_start_method(self.start_method, True)
|
|
3504
3513
|
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3514
|
+
multiprocessing.set_start_method(self.start_method, True)
|
|
3515
|
+
|
|
3516
|
+
# Construct python worker processes
|
|
3517
|
+
for worker_id in range(self.num_parallel_workers):
|
|
3518
|
+
eof = multiprocessing.Event()
|
|
3519
|
+
worker = multiprocessing.Process(target=WorkerTarget(eof, self.operations, worker_id, self.op_type,
|
|
3520
|
+
self.ftok_keys[worker_id]),
|
|
3521
|
+
name="MapWorker" + str(worker_id), daemon=True)
|
|
3522
|
+
self.eof_workers.append(eof)
|
|
3523
|
+
self.workers.append(worker)
|
|
3524
|
+
worker.start()
|
|
3515
3525
|
|
|
3516
|
-
|
|
3526
|
+
multiprocessing.set_start_method("fork", True)
|
|
3517
3527
|
|
|
3518
3528
|
logger.info("Launch worker process(es): {}".format(self.get_pids()))
|
|
3519
3529
|
|
|
@@ -3527,6 +3537,20 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3527
3537
|
atexit.register(lambda cleanup: cleanup()() if cleanup() is not None else None,
|
|
3528
3538
|
weakref.WeakMethod(self.terminate))
|
|
3529
3539
|
|
|
3540
|
+
# Ensure that all workers are in the running state
|
|
3541
|
+
start = time.time()
|
|
3542
|
+
wait_time = 120 # 120s
|
|
3543
|
+
while True:
|
|
3544
|
+
if self.is_running():
|
|
3545
|
+
logger.info("All workers has been running state.")
|
|
3546
|
+
break
|
|
3547
|
+
else:
|
|
3548
|
+
time.sleep(0.5)
|
|
3549
|
+
if time.time() - start > wait_time:
|
|
3550
|
+
logger.error("All worker processes have not reached the running state within " + str(wait_time) +
|
|
3551
|
+
" seconds, data processing errors may occur.")
|
|
3552
|
+
break
|
|
3553
|
+
|
|
3530
3554
|
def terminate(self):
|
|
3531
3555
|
if self.running:
|
|
3532
3556
|
# abort the monitor first and then close all the workers
|
|
@@ -3555,7 +3579,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3555
3579
|
continue
|
|
3556
3580
|
return self.pids
|
|
3557
3581
|
|
|
3558
|
-
def add_new_workers(self, num_new_workers):
|
|
3582
|
+
def add_new_workers(self, num_new_workers, op_type, ftok_keys):
|
|
3583
|
+
"""Used by AutoTune"""
|
|
3559
3584
|
logger.info(
|
|
3560
3585
|
"Increasing num_parallel_workers of Python Multiprocessing pool for Op:" + str(self.op_id) +
|
|
3561
3586
|
", old num_workers=" + str(self.num_parallel_workers) + " new num_workers=" + str(
|
|
@@ -3563,9 +3588,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3563
3588
|
num_new_workers) + ".")
|
|
3564
3589
|
self.terminate()
|
|
3565
3590
|
self.num_parallel_workers += num_new_workers
|
|
3566
|
-
self.launch(self.op_id)
|
|
3567
3591
|
|
|
3568
|
-
|
|
3592
|
+
if self.num_parallel_workers != len(ftok_keys):
|
|
3593
|
+
raise RuntimeError("Add new workers failed, the num_workers is not equal size of ftok_keys.")
|
|
3594
|
+
|
|
3595
|
+
self.launch(self.op_id, op_type, ftok_keys)
|
|
3596
|
+
|
|
3597
|
+
def remove_workers(self, num_removed_workers, op_type, ftok_keys):
|
|
3598
|
+
"""Used by AutoTune"""
|
|
3569
3599
|
logger.info(
|
|
3570
3600
|
"Decreasing num_parallel_workers of Python Multiprocessing pool for Op:" + str(self.op_id) +
|
|
3571
3601
|
", old num_workers=" + str(self.num_parallel_workers) + " new num_workers=" + str(
|
|
@@ -3573,59 +3603,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3573
3603
|
num_removed_workers) + ".")
|
|
3574
3604
|
self.terminate()
|
|
3575
3605
|
self.num_parallel_workers -= num_removed_workers
|
|
3576
|
-
self.launch(self.op_id)
|
|
3577
3606
|
|
|
3578
|
-
|
|
3579
|
-
|
|
3607
|
+
if self.num_parallel_workers != len(ftok_keys):
|
|
3608
|
+
raise RuntimeError("Remove workers failed, the num_workers is not equal size of ftok_keys.")
|
|
3580
3609
|
|
|
3581
|
-
|
|
3582
|
-
"""
|
|
3583
|
-
Execute
|
|
3584
|
-
"""
|
|
3585
|
-
t_id = threading.get_ident()
|
|
3586
|
-
# get the worker_id from Python layer cache first, get from Cpp layer if not found.
|
|
3587
|
-
worker_id = self.python_threads_to_workers.setdefault(t_id, self.get_thread_to_worker())
|
|
3588
|
-
if worker_id >= len(self.workers):
|
|
3589
|
-
raise RuntimeError("[Internal] worker_id value is greater than number of available workers!")
|
|
3590
|
-
|
|
3591
|
-
# todo check_iterator_cleanup
|
|
3592
|
-
if self.is_running() and check_iterator_cleanup() is False:
|
|
3593
|
-
if self.start_method == "fork":
|
|
3594
|
-
return self.workers[worker_id].execute(idx, *args)
|
|
3595
|
-
# spawn mode
|
|
3596
|
-
self.pipes[worker_id].master_send(idx, args)
|
|
3597
|
-
time_s = time.time()
|
|
3598
|
-
wait_count = 1
|
|
3599
|
-
while True:
|
|
3600
|
-
cost_time = time.time() - time_s
|
|
3601
|
-
if cost_time / self.check_interval >= wait_count:
|
|
3602
|
-
wait_count += 1
|
|
3603
|
-
logger.warning("It has been waiting for " + "%.3f" % cost_time + "s because the sub-process "
|
|
3604
|
-
"worker of the map operation is hanging. "
|
|
3605
|
-
"Check whether the user defined data transform is too slow or the "
|
|
3606
|
-
"output data is too large. You can also set the timeout interval by "
|
|
3607
|
-
"ds.config.set_multiprocessing_timeout_interval to adjust the output frequency "
|
|
3608
|
-
"of this log.")
|
|
3609
|
-
pid = self.workers[worker_id].pid
|
|
3610
|
-
logger.warning("Map worker subprocess ID {} is stuck.".format(pid))
|
|
3611
|
-
install_status, _ = subprocess.getstatusoutput("py-spy --version")
|
|
3612
|
-
if install_status == 0:
|
|
3613
|
-
stack = subprocess.getoutput("py-spy dump -p {} -l".format(pid))
|
|
3614
|
-
logger.warning("Map worker subprocess stack:\n{}".format(stack))
|
|
3615
|
-
else:
|
|
3616
|
-
logger.warning("Please `pip install py-spy` to get the stacks of the stuck process.")
|
|
3617
|
-
try:
|
|
3618
|
-
res = self.pipes[worker_id].master_receive()
|
|
3619
|
-
except queue.Empty:
|
|
3620
|
-
continue
|
|
3621
|
-
if res is None:
|
|
3622
|
-
# receive finish signal
|
|
3623
|
-
return None
|
|
3624
|
-
if isinstance(res, ExceptionHandler):
|
|
3625
|
-
res.reraise()
|
|
3626
|
-
return res
|
|
3610
|
+
self.launch(self.op_id, op_type, ftok_keys)
|
|
3627
3611
|
|
|
3628
|
-
|
|
3612
|
+
def is_mp_enabled(self):
|
|
3613
|
+
return self.workers is not None
|
|
3629
3614
|
|
|
3630
3615
|
def _launch_monitor(self):
|
|
3631
3616
|
"""
|
|
@@ -3634,10 +3619,10 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3634
3619
|
The watch dog will clean up subprocesses and main process when any subprocess exited.
|
|
3635
3620
|
"""
|
|
3636
3621
|
if platform.system().lower() != 'windows':
|
|
3637
|
-
self.
|
|
3622
|
+
self.eof_clean_process = multiprocessing.Event()
|
|
3638
3623
|
self.cleaning_process = multiprocessing.Process(target=self._clean_process,
|
|
3639
3624
|
name="MapCleanProcess",
|
|
3640
|
-
args=(self.ppid, self.workers, self.
|
|
3625
|
+
args=(self.ppid, self.workers, self.eof_clean_process),
|
|
3641
3626
|
daemon=True)
|
|
3642
3627
|
self.cleaning_process.start()
|
|
3643
3628
|
logger.info("Launch clean process {} to monitor worker "
|
|
@@ -3653,8 +3638,9 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3653
3638
|
"""Deregister workers monitored by the watch dog and join clean process."""
|
|
3654
3639
|
if get_enable_watchdog():
|
|
3655
3640
|
cde.deregister_worker_pids(id(self))
|
|
3656
|
-
if hasattr(self, 'eof') and self.
|
|
3657
|
-
|
|
3641
|
+
if hasattr(self, 'eof') and self.eof_clean_process is not None:
|
|
3642
|
+
logger.info("Set eof flag for cleaning_process.")
|
|
3643
|
+
self.eof_clean_process.set()
|
|
3658
3644
|
if hasattr(self, 'cleaning_process') and self.cleaning_process is not None:
|
|
3659
3645
|
# let the quit event notify the cleaning process to exit
|
|
3660
3646
|
self.cleaning_process.join(timeout=5)
|
|
@@ -3665,20 +3651,14 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3665
3651
|
|
|
3666
3652
|
def is_running(self):
|
|
3667
3653
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
3668
|
-
if self.start_method == "fork":
|
|
3669
|
-
return all([w.is_alive() for w in self.workers])
|
|
3670
3654
|
return all([worker_is_alive(w) for w in self.workers])
|
|
3671
3655
|
return False
|
|
3672
3656
|
|
|
3673
3657
|
def close_all_workers(self):
|
|
3674
3658
|
"""Close all the subprocess workers"""
|
|
3675
3659
|
if hasattr(self, 'workers') and self.workers is not None:
|
|
3676
|
-
|
|
3677
|
-
|
|
3678
|
-
w.close()
|
|
3679
|
-
else:
|
|
3680
|
-
for i, w in enumerate(self.workers):
|
|
3681
|
-
close_worker(w, self.pipes[i])
|
|
3660
|
+
for index in range(len(self.workers)):
|
|
3661
|
+
close_worker(self.workers[index], self.eof_workers[index])
|
|
3682
3662
|
|
|
3683
3663
|
check_interval = get_multiprocessing_timeout_interval()
|
|
3684
3664
|
for w in self.workers:
|
|
@@ -3695,12 +3675,8 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3695
3675
|
continue
|
|
3696
3676
|
raise e
|
|
3697
3677
|
try:
|
|
3698
|
-
if
|
|
3699
|
-
|
|
3700
|
-
os.close(subprocess_file_descriptor)
|
|
3701
|
-
else:
|
|
3702
|
-
if worker_is_alive(w):
|
|
3703
|
-
os.close(subprocess_file_descriptor)
|
|
3678
|
+
if worker_is_alive(w):
|
|
3679
|
+
os.close(subprocess_file_descriptor)
|
|
3704
3680
|
except OSError as e:
|
|
3705
3681
|
# Maybe the file descriptor had been released, so ignore the 'Bad file descriptor'
|
|
3706
3682
|
if "Bad file descriptor" not in str(e):
|
|
@@ -3709,8 +3685,12 @@ class _PythonMultiprocessing(cde.PythonMultiprocessingRuntime):
|
|
|
3709
3685
|
# use clear to release the handle which is better than self.workers = None
|
|
3710
3686
|
self.workers.clear()
|
|
3711
3687
|
self.workers = None
|
|
3712
|
-
self.
|
|
3713
|
-
self.
|
|
3688
|
+
self.eof_workers.clear()
|
|
3689
|
+
self.eof_workers = []
|
|
3690
|
+
|
|
3691
|
+
# as it can cause the main process to not exit when PyFunc executes very slowly so release
|
|
3692
|
+
# the shm & msg here
|
|
3693
|
+
cde.release_shm_and_msg_by_worker_pids(self.pids)
|
|
3714
3694
|
self.pids = None
|
|
3715
3695
|
|
|
3716
3696
|
|
|
@@ -3788,7 +3768,22 @@ class MapDataset(UnionBaseDataset):
|
|
|
3788
3768
|
|
|
3789
3769
|
count_old_transforms, count_new_transforms, count_non_data_vision_transforms = \
|
|
3790
3770
|
self.__count_transforms(operations)
|
|
3771
|
+
count_py_ops = self.__count_py_ops(operations)
|
|
3791
3772
|
count_pyfunc = self.__count_pyfuncs(operations)
|
|
3773
|
+
|
|
3774
|
+
# Whether to execute ops in the thread mode
|
|
3775
|
+
# op_type python_multiprocessing run_in_thread
|
|
3776
|
+
# c_op(s) false yes
|
|
3777
|
+
# c_op(s) true yes
|
|
3778
|
+
# py_op(s) / PyFunc false yes
|
|
3779
|
+
# py_op(s) / PyFunc true no
|
|
3780
|
+
# c_op(s) + py_op(s) / PyFunc false yes
|
|
3781
|
+
# c_op(s) + py_op(s) / PyFunc true no
|
|
3782
|
+
run_in_thread = not self.python_multiprocessing or (count_pyfunc == 0 and count_py_ops == 0) or get_debug_mode()
|
|
3783
|
+
|
|
3784
|
+
if self.python_multiprocessing and platform.system().lower() == 'windows':
|
|
3785
|
+
run_in_thread = True
|
|
3786
|
+
|
|
3792
3787
|
if count_new_transforms + count_pyfunc == len(operations):
|
|
3793
3788
|
prev_op = None
|
|
3794
3789
|
for op in operations:
|
|
@@ -3806,18 +3801,43 @@ class MapDataset(UnionBaseDataset):
|
|
|
3806
3801
|
op.implementation = Implementation.C
|
|
3807
3802
|
prev_op = op
|
|
3808
3803
|
operations = self.__insert_debug_wrapper(operations)
|
|
3809
|
-
|
|
3804
|
+
if run_in_thread:
|
|
3805
|
+
operations = transforms.transforms.Compose.reduce(operations)
|
|
3810
3806
|
elif count_old_transforms + count_pyfunc + count_non_data_vision_transforms == len(operations):
|
|
3811
3807
|
operations = self.__insert_debug_wrapper(operations)
|
|
3812
|
-
|
|
3808
|
+
if run_in_thread:
|
|
3809
|
+
operations = transforms.py_transforms.Compose.reduce(operations)
|
|
3813
3810
|
else:
|
|
3814
3811
|
raise RuntimeError("Mixing old legacy c/py_transforms and new unified transforms is not allowed.")
|
|
3815
3812
|
|
|
3816
|
-
|
|
3813
|
+
if run_in_thread:
|
|
3814
|
+
self.operations = self.__process_final_operations(operations)
|
|
3815
|
+
else:
|
|
3816
|
+
self.operations = operations
|
|
3817
3817
|
self.prepare_multiprocessing()
|
|
3818
3818
|
|
|
3819
3819
|
callbacks = [cb.create_runtime_obj() for cb in self.callbacks]
|
|
3820
|
-
|
|
3820
|
+
|
|
3821
|
+
## thread mode
|
|
3822
|
+
if run_in_thread:
|
|
3823
|
+
return cde.MapNode(children[0], self.operations, self.input_columns, self.output_columns,
|
|
3824
|
+
callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
|
|
3825
|
+
|
|
3826
|
+
# Bind self.operations with self.process_pool
|
|
3827
|
+
class _BindProcessPoolWithOperations:
|
|
3828
|
+
def __init__(self, pool, operations):
|
|
3829
|
+
self.pool = pool
|
|
3830
|
+
self.operations = operations
|
|
3831
|
+
|
|
3832
|
+
def __call__(self):
|
|
3833
|
+
pass
|
|
3834
|
+
|
|
3835
|
+
self.bound = _BindProcessPoolWithOperations(self.process_pool, self.operations)
|
|
3836
|
+
|
|
3837
|
+
## process mode
|
|
3838
|
+
# in multi process mode, we just transfer the self.bound which is not really used in c layer
|
|
3839
|
+
# because when the pipeline is running, map thread transfer data through c++ shm & msg to Python Worker Process
|
|
3840
|
+
return cde.MapNode(children[0], [self.bound], self.input_columns, self.output_columns,
|
|
3821
3841
|
callbacks, OffloadToManualOffloadMode.get(self.offload), self.process_pool)
|
|
3822
3842
|
|
|
3823
3843
|
def __deepcopy__(self, memodict):
|
|
@@ -3870,10 +3890,22 @@ class MapDataset(UnionBaseDataset):
|
|
|
3870
3890
|
@staticmethod
|
|
3871
3891
|
def __count_pyfuncs(operations):
|
|
3872
3892
|
"""
|
|
3873
|
-
Count the number of pyfuncs operations
|
|
3893
|
+
Count the number of pyfuncs operations which is defined by user
|
|
3874
3894
|
"""
|
|
3875
3895
|
return sum([1 if isinstance(op, FuncWrapper) else 0 for op in operations])
|
|
3876
3896
|
|
|
3897
|
+
@staticmethod
|
|
3898
|
+
def __count_py_ops(operations):
|
|
3899
|
+
"""
|
|
3900
|
+
Count the number of python operations which is built-in
|
|
3901
|
+
"""
|
|
3902
|
+
count = 0
|
|
3903
|
+
for op in operations:
|
|
3904
|
+
if hasattr(op, "implementation") and op.implementation != Implementation.C \
|
|
3905
|
+
and op.implementation is not None:
|
|
3906
|
+
count += 1
|
|
3907
|
+
return count
|
|
3908
|
+
|
|
3877
3909
|
@staticmethod
|
|
3878
3910
|
def __count_transforms(operations):
|
|
3879
3911
|
"""
|
|
@@ -3937,7 +3969,6 @@ class MapDataset(UnionBaseDataset):
|
|
|
3937
3969
|
" Ignoring Python multiprocessing for map operation.")
|
|
3938
3970
|
return
|
|
3939
3971
|
if self.python_multiprocessing:
|
|
3940
|
-
iter_specific_operations = []
|
|
3941
3972
|
callable_list = []
|
|
3942
3973
|
|
|
3943
3974
|
# If user didn't specify num_parallel_workers, set it to default
|
|
@@ -3954,18 +3985,6 @@ class MapDataset(UnionBaseDataset):
|
|
|
3954
3985
|
self.process_pool = _PythonMultiprocessing(get_multiprocessing_start_method(),
|
|
3955
3986
|
self.num_parallel_workers, str(self),
|
|
3956
3987
|
callable_list, self.max_rowsize)
|
|
3957
|
-
# Pass #2
|
|
3958
|
-
idx = 0
|
|
3959
|
-
for op in self.operations:
|
|
3960
|
-
# our c transforms is now callable and should not be run in Python multithreading
|
|
3961
|
-
if MapDataset.__operation_valid_for_multiprocessing(op):
|
|
3962
|
-
# Wrap Python callable into _PythonCallable
|
|
3963
|
-
iter_specific_operations.append(_PythonCallable(op, idx, self.process_pool))
|
|
3964
|
-
idx += 1
|
|
3965
|
-
else:
|
|
3966
|
-
# CPP ops remain the same
|
|
3967
|
-
iter_specific_operations.append(op)
|
|
3968
|
-
self.operations = iter_specific_operations
|
|
3969
3988
|
|
|
3970
3989
|
def __insert_debug_wrapper(self, operations):
|
|
3971
3990
|
"""
|