PyPI - mindspore - Versions diffs - 2.5.0__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl - Mend

mindspore 2.5.0__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (491) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +6 -4
mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -0
mindspore/_checkparam.py +3 -33
mindspore/_deprecated/__init__.py +17 -0
mindspore/_deprecated/jit.py +198 -0
mindspore/_extends/builtin_operations.py +1 -1
mindspore/_extends/parse/__init__.py +6 -7
mindspore/_extends/parse/compile_config.py +19 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +22 -3
mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
mindspore/_extends/parse/parser.py +24 -193
mindspore/_extends/parse/resources.py +1 -5
mindspore/_extends/parse/standard_method.py +97 -74
mindspore/_extends/pijit/__init__.py +2 -2
mindspore/_extends/pijit/pijit_func_white_list.py +16 -11
mindspore/_extends/pijit/tensor_func_list.py +27 -0
mindspore/_extends/utils.py +1 -1
mindspore/amp.py +4 -4
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/__init__.py +2 -2
mindspore/boost/base.py +3 -7
mindspore/boost/boost_cell_wrapper.py +2 -2
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +4 -3
mindspore/common/_grad_function.py +56 -0
mindspore/common/_pijit_context.py +14 -5
mindspore/common/_register_for_tensor.py +1 -1
mindspore/common/_stub_tensor.py +5 -10
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +1915 -3287
mindspore/common/api.py +341 -354
mindspore/common/auto_dynamic_shape.py +41 -44
mindspore/common/dtype.py +5 -2
mindspore/common/dump.py +7 -5
mindspore/common/file_system.py +3 -0
mindspore/common/hook_handle.py +5 -3
mindspore/common/initializer.py +10 -6
mindspore/common/jit_begin_end.py +94 -0
mindspore/common/jit_config.py +6 -1
mindspore/common/jit_context.py +76 -0
mindspore/common/jit_trace.py +378 -0
mindspore/common/lazy_inline.py +2 -2
mindspore/common/mutable.py +5 -4
mindspore/common/parameter.py +106 -39
mindspore/common/seed.py +2 -2
mindspore/common/sparse_tensor.py +23 -17
mindspore/common/tensor.py +297 -714
mindspore/communication/__init__.py +7 -5
mindspore/communication/_comm_helper.py +47 -2
mindspore/communication/comm_func.py +70 -53
mindspore/communication/management.py +83 -17
mindspore/context.py +214 -560
mindspore/dataset/__init__.py +44 -20
mindspore/dataset/audio/__init__.py +2 -8
mindspore/dataset/audio/transforms.py +3 -17
mindspore/dataset/core/config.py +3 -3
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +102 -120
mindspore/dataset/engine/datasets_audio.py +22 -22
mindspore/dataset/engine/datasets_standard_format.py +43 -24
mindspore/dataset/engine/datasets_text.py +78 -85
mindspore/dataset/engine/datasets_user_defined.py +108 -76
mindspore/dataset/engine/datasets_vision.py +111 -108
mindspore/dataset/engine/iterators.py +5 -3
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
mindspore/dataset/engine/samplers.py +279 -57
mindspore/dataset/engine/serializer_deserializer.py +2 -1
mindspore/dataset/engine/validators.py +10 -0
mindspore/dataset/text/__init__.py +7 -6
mindspore/dataset/text/transforms.py +6 -5
mindspore/dataset/text/utils.py +3 -3
mindspore/dataset/transforms/__init__.py +0 -9
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/utils/browse_dataset.py +1 -1
mindspore/dataset/vision/__init__.py +2 -9
mindspore/dataset/vision/transforms.py +202 -158
mindspore/dataset/vision/utils.py +7 -5
mindspore/device_context/ascend/op_debug.py +60 -1
mindspore/device_context/ascend/op_tuning.py +0 -4
mindspore/device_manager.py +39 -3
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/es/embedding_service.py +35 -27
mindspore/experimental/map_parameter.py +4 -4
mindspore/experimental/optim/adadelta.py +22 -26
mindspore/experimental/optim/adagrad.py +4 -4
mindspore/experimental/optim/adam.py +4 -0
mindspore/experimental/optim/adamax.py +4 -4
mindspore/experimental/optim/adamw.py +4 -0
mindspore/experimental/optim/asgd.py +1 -1
mindspore/experimental/optim/lr_scheduler.py +40 -22
mindspore/experimental/optim/radam.py +5 -5
mindspore/experimental/optim/rprop.py +1 -1
mindspore/experimental/optim/sgd.py +1 -1
mindspore/hal/contiguous_tensors_handle.py +6 -10
mindspore/hal/device.py +55 -81
mindspore/hal/event.py +38 -55
mindspore/hal/memory.py +93 -144
mindspore/hal/stream.py +81 -125
mindspore/include/dataset/constants.h +7 -4
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +40 -2
mindspore/mindrecord/__init__.py +20 -7
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +131 -700
mindspore/mint/distributed/__init__.py +5 -1
mindspore/mint/distributed/distributed.py +194 -109
mindspore/mint/linalg/__init__.py +2 -0
mindspore/mint/nn/__init__.py +280 -18
mindspore/mint/nn/functional.py +282 -64
mindspore/mint/nn/layer/__init__.py +4 -0
mindspore/mint/nn/layer/_functions.py +7 -3
mindspore/mint/nn/layer/activation.py +120 -13
mindspore/mint/nn/layer/conv.py +218 -24
mindspore/mint/nn/layer/normalization.py +15 -16
mindspore/mint/nn/layer/padding.py +1 -1
mindspore/mint/nn/layer/pooling.py +66 -1
mindspore/mint/optim/__init__.py +2 -1
mindspore/mint/optim/sgd.py +171 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/__init__.py +4 -1
mindspore/nn/cell.py +1250 -176
mindspore/nn/layer/activation.py +23 -21
mindspore/nn/layer/basic.py +22 -16
mindspore/nn/layer/container.py +1 -1
mindspore/nn/layer/conv.py +22 -17
mindspore/nn/layer/embedding.py +9 -8
mindspore/nn/layer/normalization.py +48 -42
mindspore/nn/layer/pooling.py +75 -31
mindspore/nn/layer/transformer.py +11 -10
mindspore/nn/learning_rate_schedule.py +4 -2
mindspore/nn/loss/loss.py +27 -19
mindspore/nn/optim/ada_grad.py +6 -5
mindspore/nn/optim/adadelta.py +9 -7
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +16 -12
mindspore/nn/optim/adamax.py +8 -7
mindspore/nn/optim/adasum.py +5 -5
mindspore/nn/optim/asgd.py +1 -1
mindspore/nn/optim/ftrl.py +11 -9
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/lazyadam.py +12 -10
mindspore/nn/optim/momentum.py +7 -6
mindspore/nn/optim/optimizer.py +2 -2
mindspore/nn/optim/proximal_ada_grad.py +12 -10
mindspore/nn/optim/rmsprop.py +13 -12
mindspore/nn/optim/rprop.py +9 -7
mindspore/nn/optim/sgd.py +9 -6
mindspore/nn/optim/tft_wrapper.py +5 -2
mindspore/nn/probability/bijector/bijector.py +17 -11
mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
mindspore/nn/probability/bijector/invert.py +2 -2
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +3 -2
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +1 -1
mindspore/nn/probability/distribution/cauchy.py +4 -2
mindspore/nn/probability/distribution/exponential.py +6 -7
mindspore/nn/probability/distribution/gamma.py +2 -2
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/half_normal.py +5 -3
mindspore/nn/probability/distribution/logistic.py +5 -3
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/uniform.py +5 -3
mindspore/nn/reinforcement/_tensors_queue.py +1 -1
mindspore/nn/reinforcement/tensor_array.py +1 -1
mindspore/nn/wrap/__init__.py +6 -6
mindspore/nn/wrap/cell_wrapper.py +178 -117
mindspore/nn/wrap/grad_reducer.py +45 -36
mindspore/nn/wrap/loss_scale.py +3 -3
mindspore/numpy/array_creations.py +3 -3
mindspore/numpy/array_ops.py +1 -1
mindspore/numpy/math_ops.py +4 -4
mindspore/numpy/utils.py +1 -2
mindspore/numpy/utils_const.py +1 -2
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +3 -2
mindspore/ops/_grad_experimental/grad_comm_ops.py +18 -3
mindspore/ops/_grad_experimental/grad_debug_ops.py +8 -1
mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
mindspore/ops/_register_for_op.py +0 -11
mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -4
mindspore/ops/_vmap/vmap_array_ops.py +7 -6
mindspore/ops/_vmap/vmap_grad_nn_ops.py +2 -1
mindspore/ops/_vmap/vmap_math_ops.py +4 -7
mindspore/ops/_vmap/vmap_nn_ops.py +9 -8
mindspore/ops/auto_generate/__init__.py +4 -3
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +102 -49
mindspore/ops/auto_generate/gen_extend_func.py +281 -135
mindspore/ops/auto_generate/gen_ops_def.py +2574 -2326
mindspore/ops/auto_generate/gen_ops_prim.py +8566 -2755
mindspore/ops/auto_generate/pyboost_inner_prim.py +106 -76
mindspore/ops/composite/__init__.py +2 -1
mindspore/ops/composite/base.py +19 -24
mindspore/ops/composite/math_ops.py +6 -16
mindspore/ops/composite/multitype_ops/__init__.py +5 -2
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -3
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +2 -1
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
mindspore/ops/function/__init__.py +28 -2
mindspore/ops/function/_add_attr_func.py +58 -0
mindspore/ops/function/array_func.py +1629 -2345
mindspore/ops/function/clip_func.py +38 -45
mindspore/ops/function/debug_func.py +36 -44
mindspore/ops/function/grad/__init__.py +1 -0
mindspore/ops/function/grad/grad_func.py +104 -71
mindspore/ops/function/image_func.py +1 -1
mindspore/ops/function/linalg_func.py +46 -78
mindspore/ops/function/math_func.py +3035 -3705
mindspore/ops/function/nn_func.py +676 -241
mindspore/ops/function/other_func.py +159 -1
mindspore/ops/function/parameter_func.py +17 -30
mindspore/ops/function/random_func.py +204 -361
mindspore/ops/function/reshard_func.py +4 -70
mindspore/ops/function/sparse_func.py +3 -3
mindspore/ops/function/sparse_unary_func.py +5 -5
mindspore/ops/function/spectral_func.py +25 -58
mindspore/ops/function/vmap_func.py +24 -17
mindspore/ops/functional.py +6 -4
mindspore/ops/functional_overload.py +547 -4
mindspore/ops/op_info_register.py +32 -244
mindspore/ops/operations/__init__.py +10 -5
mindspore/ops/operations/_custom_ops_utils.py +247 -0
mindspore/ops/operations/_grad_ops.py +1 -10
mindspore/ops/operations/_inner_ops.py +5 -76
mindspore/ops/operations/_ms_kernel.py +4 -10
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/_scalar_ops.py +3 -2
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/array_ops.py +37 -22
mindspore/ops/operations/comm_ops.py +150 -107
mindspore/ops/operations/custom_ops.py +221 -23
mindspore/ops/operations/debug_ops.py +115 -16
mindspore/ops/operations/inner_ops.py +1 -1
mindspore/ops/operations/linalg_ops.py +1 -58
mindspore/ops/operations/manually_defined/_inner.py +1 -1
mindspore/ops/operations/manually_defined/ops_def.py +746 -79
mindspore/ops/operations/math_ops.py +21 -18
mindspore/ops/operations/nn_ops.py +65 -191
mindspore/ops/operations/other_ops.py +62 -9
mindspore/ops/operations/random_ops.py +13 -7
mindspore/ops/operations/reshard_ops.py +1 -1
mindspore/ops/operations/sparse_ops.py +2 -2
mindspore/ops/primitive.py +43 -32
mindspore/ops/tensor_method.py +232 -13
mindspore/ops_generate/__init__.py +0 -5
mindspore/ops_generate/aclnn/__init__.py +0 -0
mindspore/ops_generate/{aclnn_kernel_register_auto_cc_generator.py → aclnn/aclnn_kernel_register_auto_cc_generator.py} +43 -18
mindspore/ops_generate/{gen_aclnn_implement.py → aclnn/gen_aclnn_implement.py} +49 -51
mindspore/ops_generate/api/__init__.py +0 -0
mindspore/ops_generate/{add_tensor_docs_generator.py → api/add_tensor_docs_generator.py} +9 -7
mindspore/ops_generate/{cpp_create_prim_instance_helper_generator.py → api/cpp_create_prim_instance_helper_generator.py} +6 -9
mindspore/ops_generate/{functional_map_cpp_generator.py → api/functional_map_cpp_generator.py} +25 -12
mindspore/ops_generate/{functional_overload_py_generator.py → api/functional_overload_py_generator.py} +8 -6
mindspore/ops_generate/{functions_cc_generator.py → api/functions_cc_generator.py} +14 -10
mindspore/ops_generate/api/gen_api.py +103 -0
mindspore/ops_generate/{op_api_proto.py → api/op_api_proto.py} +98 -69
mindspore/ops_generate/{tensor_func_reg_cpp_generator.py → api/tensor_func_reg_cpp_generator.py} +82 -43
mindspore/ops_generate/common/__init__.py +0 -0
mindspore/ops_generate/common/gen_constants.py +91 -0
mindspore/ops_generate/{gen_utils.py → common/gen_utils.py} +72 -19
mindspore/ops_generate/{op_proto.py → common/op_proto.py} +64 -1
mindspore/ops_generate/{template.py → common/template.py} +96 -84
mindspore/ops_generate/gen_ops.py +23 -325
mindspore/ops_generate/op_def/__init__.py +0 -0
mindspore/ops_generate/op_def/gen_op_def.py +90 -0
mindspore/ops_generate/{lite_ops_cpp_generator.py → op_def/lite_ops_cpp_generator.py} +47 -11
mindspore/ops_generate/{ops_def_cc_generator.py → op_def/ops_def_cc_generator.py} +18 -7
mindspore/ops_generate/{ops_def_h_generator.py → op_def/ops_def_h_generator.py} +5 -5
mindspore/ops_generate/{ops_name_h_generator.py → op_def/ops_name_h_generator.py} +30 -15
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
mindspore/ops_generate/op_def_py/__init__.py +0 -0
mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
mindspore/ops_generate/{op_def_py_generator.py → op_def_py/op_def_py_generator.py} +6 -5
mindspore/ops_generate/{op_prim_py_generator.py → op_def_py/op_prim_py_generator.py} +24 -15
mindspore/ops_generate/pyboost/__init__.py +0 -0
mindspore/ops_generate/{auto_grad_impl_cc_generator.py → pyboost/auto_grad_impl_cc_generator.py} +11 -7
mindspore/ops_generate/{auto_grad_reg_cc_generator.py → pyboost/auto_grad_reg_cc_generator.py} +7 -7
mindspore/ops_generate/{gen_pyboost_func.py → pyboost/gen_pyboost_func.py} +40 -16
mindspore/ops_generate/{op_template_parser.py → pyboost/op_template_parser.py} +105 -24
mindspore/ops_generate/{pyboost_functions_cpp_generator.py → pyboost/pyboost_functions_cpp_generator.py} +55 -18
mindspore/ops_generate/{pyboost_functions_h_generator.py → pyboost/pyboost_functions_h_generator.py} +42 -10
mindspore/ops_generate/{pyboost_functions_py_generator.py → pyboost/pyboost_functions_py_generator.py} +6 -6
mindspore/ops_generate/{pyboost_grad_function_cpp_generator.py → pyboost/pyboost_grad_function_cpp_generator.py} +11 -10
mindspore/ops_generate/{pyboost_inner_prim_generator.py → pyboost/pyboost_inner_prim_generator.py} +8 -7
mindspore/ops_generate/{pyboost_native_grad_functions_generator.py → pyboost/pyboost_native_grad_functions_generator.py} +14 -10
mindspore/ops_generate/{pyboost_op_cpp_code_generator.py → pyboost/pyboost_op_cpp_code_generator.py} +140 -53
mindspore/ops_generate/{pyboost_overload_functions_cpp_generator.py → pyboost/pyboost_overload_functions_cpp_generator.py} +28 -15
mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +88 -4
mindspore/ops_generate/resources/__init__.py +0 -0
mindspore/ops_generate/resources/resource_list.py +30 -0
mindspore/ops_generate/resources/resource_loader.py +36 -0
mindspore/ops_generate/resources/resource_manager.py +64 -0
mindspore/ops_generate/resources/yaml_loader.py +88 -0
mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
mindspore/parallel/__init__.py +6 -2
mindspore/parallel/_auto_parallel_context.py +133 -6
mindspore/parallel/_cell_wrapper.py +130 -15
mindspore/parallel/_parallel_serialization.py +95 -4
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +7 -2
mindspore/parallel/_tensor.py +142 -18
mindspore/parallel/_utils.py +198 -25
mindspore/parallel/algo_parameter_config.py +3 -3
mindspore/parallel/auto_parallel.py +732 -0
mindspore/parallel/checkpoint_convert.py +159 -0
mindspore/parallel/checkpoint_transform.py +656 -37
mindspore/parallel/cluster/process_entity/_api.py +151 -19
mindspore/parallel/cluster/run.py +1 -1
mindspore/parallel/function/__init__.py +24 -0
mindspore/parallel/function/reshard_func.py +259 -0
mindspore/parallel/nn/__init__.py +25 -0
mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
mindspore/parallel/parameter_broadcast.py +24 -13
mindspore/parallel/shard.py +137 -61
mindspore/parallel/transform_safetensors.py +287 -95
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +9 -5
mindspore/profiler/analysis/parser/ascend_cann_parser.py +6 -2
mindspore/profiler/analysis/parser/ms_framework_parser.py +4 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +22 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +241 -86
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +41 -2
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +33 -35
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +7 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +8 -3
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +141 -30
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +5 -6
mindspore/profiler/common/ascend_msprof_exporter.py +5 -4
mindspore/profiler/common/constant.py +12 -0
mindspore/profiler/common/msprof_cmd_tool.py +42 -23
mindspore/profiler/common/path_manager.py +24 -0
mindspore/profiler/common/profiler_context.py +26 -2
mindspore/profiler/common/profiler_meta_data.py +74 -0
mindspore/profiler/common/profiler_parameters.py +59 -18
mindspore/profiler/common/profiler_path_manager.py +66 -7
mindspore/profiler/dynamic_profiler.py +112 -79
mindspore/profiler/envprofiler.py +26 -1
mindspore/profiler/experimental_config.py +197 -0
mindspore/profiler/mstx.py +57 -14
mindspore/profiler/platform/npu_profiler.py +33 -7
mindspore/profiler/profiler.py +541 -45
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +4 -0
mindspore/profiler/schedule.py +57 -22
mindspore/rewrite/api/node.py +15 -13
mindspore/rewrite/api/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +25 -14
mindspore/run_check/run_check.py +1 -1
mindspore/runtime/__init__.py +2 -2
mindspore/runtime/executor.py +40 -11
mindspore/runtime/memory.py +25 -8
mindspore/safeguard/rewrite_obfuscation.py +12 -9
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +8 -8
mindspore/train/_utils.py +35 -7
mindspore/train/amp.py +1 -1
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +2 -16
mindspore/train/callback/_checkpoint.py +24 -40
mindspore/train/callback/_cluster_monitor.py +14 -18
mindspore/train/callback/_flops_collector.py +2 -3
mindspore/train/callback/_history.py +7 -4
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +0 -3
mindspore/train/callback/_loss_monitor.py +2 -1
mindspore/train/callback/_on_request_exit.py +6 -5
mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
mindspore/train/callback/_summary_collector.py +8 -13
mindspore/train/callback/_time_monitor.py +2 -1
mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +179 -103
mindspore/train/data_sink.py +25 -2
mindspore/train/dataset_helper.py +4 -5
mindspore/train/loss_scale_manager.py +8 -7
mindspore/train/metrics/accuracy.py +3 -3
mindspore/train/metrics/confusion_matrix.py +9 -9
mindspore/train/metrics/error.py +3 -3
mindspore/train/metrics/hausdorff_distance.py +4 -4
mindspore/train/metrics/mean_surface_distance.py +3 -3
mindspore/train/metrics/metric.py +0 -12
mindspore/train/metrics/occlusion_sensitivity.py +4 -2
mindspore/train/metrics/precision.py +8 -6
mindspore/train/metrics/recall.py +9 -9
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +19 -12
mindspore/train/model.py +176 -103
mindspore/train/serialization.py +246 -988
mindspore/train/summary/_summary_adapter.py +2 -2
mindspore/train/summary/summary_record.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +3 -2
mindspore/utils/dryrun.py +4 -2
mindspore/utils/hooks.py +81 -0
mindspore/utils/utils.py +138 -4
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +2 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +483 -438
mindspore/_install_custom.py +0 -43
mindspore/common/_register_for_adapter.py +0 -74
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
mindspore/ops/auto_generate/gen_arg_handler.py +0 -136
mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
mindspore/ops_generate/gen_constants.py +0 -190
mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
mindspore/ops_generate/ops_primitive_h_generator.py +0 -81
/mindspore/ops_generate/{base_generator.py → common/base_generator.py} +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0

mindspore/dataset/engine/obs/obs_mindrecord_dataset.py CHANGED Viewed

@@ -29,7 +29,7 @@ import sys
 import time
 from mindspore import log as logger
-from ..datasets import Shuffle
+from ..samplers import Shuffle
 from ...core.config import set_seed

mindspore/dataset/engine/samplers.py CHANGED Viewed

@@ -22,12 +22,61 @@ Users can also define a custom sampler by extending from the Sampler class.
 import copy
 import numbers
+from enum import Enum
 import numpy as np
 import mindspore._c_dataengine as cde
 import mindspore.dataset as ds
 from ..core import validator_helpers as validator
+class Shuffle(str, Enum):
+    """Specify the shuffle mode.
+    - ``Shuffle.FALSE`` : Disable the shuffle.
+    - ``Shuffle.ADAPTIVE`` : When the number of dataset samples is less than or equal to 100 million,
+      global shuffle is used. When the number of dataset samples is greater than 100 million, partial shuffle is used.
+    - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
+    - ``Shuffle.PARTIAL`` : Shuffle data with every 1 million samples
+    - ``Shuffle.FILES`` : Shuffle files only.
+    - ``Shuffle.INFILE`` : Shuffle data within each file.
+    """
+    FALSE: str = "false"
+    ADAPTIVE: str = "adaptive"
+    GLOBAL: str = "global"
+    PARTIAL: str = "partial"
+    FILES: str = "files"
+    INFILE: str = "infile"
+ShuffleToShuffleMode = {Shuffle.FALSE: cde.ShuffleMode.FALSE,
+                        Shuffle.ADAPTIVE: cde.ShuffleMode.ADAPTIVE,
+                        Shuffle.GLOBAL: cde.ShuffleMode.GLOBAL,
+                        Shuffle.PARTIAL: cde.ShuffleMode.PARTIAL,
+                        Shuffle.FILES: cde.ShuffleMode.FILES,
+                        Shuffle.INFILE: cde.ShuffleMode.INFILE}
+def shuffle_to_shuffle_mode(shuffle):
+    """
+    Shuffle Enum to Shuffle Mode
+    Args:
+        shuffle (Shuffle): shuffle flag to shuffle mode in C layer
+    Returns:
+        ShuffleMode, shuffle mode
+    """
+    shuffle_mode = cde.ShuffleMode.GLOBAL  # Global shuffle
+    if not isinstance(shuffle, Shuffle):
+        if shuffle is None or shuffle:
+            shuffle_mode = cde.ShuffleMode.GLOBAL  # Global shuffle
+        else:
+            shuffle_mode = cde.ShuffleMode.FALSE  # No shuffle
+    else:
+        shuffle_mode = ShuffleToShuffleMode[shuffle]
+    return shuffle_mode
 def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
     """
     Create sampler based on user input.
@@ -35,13 +84,16 @@ def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
     Args:
         num_samples (int): Number of samples.
         input_sampler (Union[Iterable, Sampler]): Sampler from user.
-        shuffle (bool): Shuffle.
+        shuffle (Shuffle): Shuffle is FALSE / ADAPTIVE / GLOBAL / PARTIAL / FILES / INFILE
         num_shards (int): Number of shard for sharding.
         shard_id (int): Shard ID.
     Returns:
         Sampler, sampler selected based on user input.
     """
+    if input_sampler is None and shuffle not in (Shuffle.FALSE, Shuffle.ADAPTIVE, Shuffle.GLOBAL, Shuffle.PARTIAL,
+                                                 Shuffle.FILES, Shuffle.INFILE):
+        raise RuntimeError("The input parameter shuffle: {} is not valid.".format(shuffle))
     if input_sampler is not None:
         # If the user provided a sampler, then it doesn't matter what the other args are because
@@ -67,23 +119,14 @@ def select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id):
         if isinstance(input_sampler, int):
             return SubsetSampler([input_sampler])
         raise TypeError('Unsupported sampler object of type ({})'.format(type(input_sampler)))
-    if shuffle is None:
-        if num_shards is not None:
-            # If shuffle is not specified, sharding enabled, use distributed random sampler
-            shuffle = True
-            return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
-        # If shuffle is not specified, sharding disabled, use random sampler
-        if num_samples is not None and num_samples != 0:
-            return RandomSampler(replacement=True, num_samples=num_samples)
-        return RandomSampler(num_samples=num_samples)
-    if shuffle is True:
+    if shuffle is not Shuffle.FALSE:
         if num_shards is not None:
             # If shuffle enabled, sharding enabled, use distributed random sampler
             return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
         # If shuffle enabled, sharding disabled, use random sampler
         if num_samples is not None:
-            return RandomSampler(replacement=True, num_samples=num_samples)
-        return RandomSampler(num_samples=num_samples)
+            return RandomSampler(replacement=True, num_samples=num_samples, shuffle=shuffle)
+        return RandomSampler(num_samples=num_samples, shuffle=shuffle)
     if num_shards is not None:
         # If shuffle disabled, sharding enabled, use distributed sequential sampler
         return DistributedSampler(num_shards, shard_id, shuffle=shuffle, num_samples=num_samples)
@@ -110,6 +153,10 @@ class BuiltinSampler:
         Add a sub-sampler for given sampler. The parent will receive all data from the
         output of sub-sampler sampler and apply its sample logic to return new samples.
+        Note:
+            - If a child sampler is added and it has a shuffle option, its value cannot be ``Shuffle.PARTIAL`` .
+              Additionally, the parent sampler's shuffle value must be ``Shuffle.GLOBAL`` .
         Args:
             sampler (Sampler): Object used to choose samples from the dataset. Only builtin
                 samplers(:class:`mindspore.dataset.DistributedSampler` ,
@@ -127,6 +174,15 @@ class BuiltinSampler:
         """
         if self.child_sampler is not None:
             raise RuntimeError("Cannot add child sampler, this sampler already has a child.")
+        if sampler is not None and sampler.get_shuffle_mode() == Shuffle.PARTIAL:
+            raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the input sampler "
+                               "must not be Shuffle.PARTIAL.")
+        if self.get_shuffle_mode() != Shuffle.GLOBAL and self.get_shuffle_mode() != Shuffle.FALSE:
+            raise RuntimeError("When multiple samplers are used, ensure that the shuffle of the current sampler "
+                               "must be Shuffle.FALSE or Shuffle.GLOBAL, but got: {}.".format(self.get_shuffle_mode()))
         self.child_sampler = sampler
     def get_child(self):
@@ -229,6 +285,10 @@ class BuiltinSampler:
         return self.num_samples
+    def get_shuffle_mode(self):
+        """ Not implemented. """
+        return Shuffle.FALSE
 class Sampler(BuiltinSampler):
     """
@@ -255,6 +315,8 @@ class Sampler(BuiltinSampler):
         self.dataset_size = 0
         self.child_sampler = None
         self.num_samples = num_samples
+        if self.num_samples is None and hasattr(self, '__len__'):
+            self.num_samples = len(self)
         self.batch_sizes = []
     def __iter__(self):
@@ -271,32 +333,30 @@ class Sampler(BuiltinSampler):
     # Initialization handshake callback
     # Do not override this method!
-    def _handshake(self, ds_size, num_samples):
+    def _handshake(self, ds_size):
         self.dataset_size = ds_size
-        self.num_samples = num_samples
-    # Indices fetcher
-    # Do not override this method!
-    # pylint: disable=missing-docstring
-    def _get_indices(self):
-        sampler_iter = iter(self)
+    def get_indices(self):
+        """
+        Get the indices of the sampler.
+        Do not override this method!
+        """
         ret = []
         batch_sizes = []
-        for _ in range(self.num_samples):
-            try:
-                idx = next(sampler_iter)
-                # The idx can be either a number (for sampler) or a list (for batch sampler).
-                # If number, we convert it to list first. So they can be handled in the same way.
-                if isinstance(idx, numbers.Number):
-                    idx = [idx]
-                    # normal sampler does not have batch sizes
-                    batch_sizes.append(0)
-                else:
-                    # Using extend instead of append will flatten the list, so we need to save the
-                    # batch size information here.
-                    batch_sizes.append(len(idx))
-                ret.extend(idx)
-            except StopIteration:
+        for count, idx in enumerate(self):
+            # The idx can be either a number (for sampler) or a list (for batch sampler).
+            # If number, we convert it to list first. So they can be handled in the same way.
+            if isinstance(idx, numbers.Number):
+                idx = [idx]
+                # normal sampler does not have batch sizes
+                batch_sizes.append(0)
+            else:
+                # Using extend instead of append will flatten the list, so we need to save the
+                # batch size information here.
+                batch_sizes.append(len(idx))
+            ret.extend(idx)
+            if self.num_samples is not None and count + 1 >= self.num_samples:
                 break
         self.batch_sizes.append(batch_sizes)
         indices = np.array(ret)
@@ -345,20 +405,71 @@ class Sampler(BuiltinSampler):
         return self.child_sampler.is_sharded()
     def get_num_samples(self):
-        if self.num_samples is None:
-            return None
-        return self._get_indices().size
+        if self.num_samples is not None:
+            return self.num_samples
+        # deepcopy self to avoid changing the random state
+        fake_sampler = copy.deepcopy(self)
+        fake_sampler.get_indices()
+        return len(fake_sampler.batch_sizes[-1])
 class DistributedSampler(BuiltinSampler):
     """
     A sampler that accesses a shard of the dataset, it helps divide dataset into multi-subset for distributed training.
+    Note:
+        The shuffling modes supported for different datasets are as follows:
+        .. list-table:: List of support for shuffling mode
+            :widths: 50 50 50 50
+            :header-rows: 1
+            * - Shuffling Mode
+              - MindDataset
+              - TFRecordDataset
+              - Others
+            * - ``Shuffle.ADAPTIVE``
+              - Supported
+              - Not Supported
+              - Not Supported
+            * - ``Shuffle.GLOBAL``
+              - Supported
+              - Supported
+              - Supported
+            * - ``Shuffle.PARTIAL``
+              - Supported
+              - Not Supported
+              - Not Supported
+            * - ``Shuffle.FILES``
+              - Supported
+              - Supported
+              - Not Supported
+            * - ``Shuffle.INFILE``
+              - Supported
+              - Not Supported
+              - Not Supported
     Args:
         num_shards (int): Number of shards to divide the dataset into.
         shard_id (int): Shard ID of the current shard, which should within the range of [0, `num_shards` - 1].
-        shuffle (bool, optional): If True, the indices are shuffled, otherwise it will not be shuffled.
-            Default: ``True``.
+        shuffle (Union[bool, Shuffle], optional): Specify the shuffle mode.
+            Default: ``True``, performs ``mindspore.dataset.Shuffle.GLOBAL`` . If `shuffle` is ``False`` ,
+            no shuffling will be performed.
+            There are several levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
+            - ``Shuffle.ADAPTIVE`` : When the number of dataset samples is less than or equal to 100 million,
+              ``Shuffle.GLOBAL`` is used. When the number of dataset samples is greater than 100
+              million, ``Shuffle.PARTIAL`` is used. The shuffle is performed once every 1 million samples.
+            - ``Shuffle.GLOBAL`` : Global shuffle of all rows of data in dataset. The memory usage is large.
+            - ``Shuffle.PARTIAL`` : Partial shuffle of data in dataset for every 1 million samples.
+              The memory usage is less than ``Shuffle.GLOBAL`` .
+            - ``Shuffle.FILES`` : Shuffle the file sequence but keep the order of data within each file.
+            - ``Shuffle.INFILE`` : Keep the file sequence the same but shuffle the data within each file.
         num_samples (int, optional): The number of samples to draw. Default: ``None``, which means sample all elements.
         offset(int, optional): The starting shard ID where the elements in the dataset are sent to, which
             should be no more than `num_shards` . This parameter is only valid when a ConcatDataset takes
@@ -368,7 +479,7 @@ class DistributedSampler(BuiltinSampler):
     Raises:
         TypeError: If `num_shards` is not of type int.
         TypeError: If `shard_id` is not of type int.
-        TypeError: If `shuffle` is not of type bool.
+        TypeError: If `shuffle` is not of type bool or Shuffle.
         TypeError: If `num_samples` is not of type int.
         TypeError: If `offset` is not of type int.
         ValueError: If `num_samples` is a negative value.
@@ -392,8 +503,9 @@ class DistributedSampler(BuiltinSampler):
         if not isinstance(shard_id, int):
             raise TypeError("shard_id must be integer but was: {}.".format(shard_id))
-        if not isinstance(shuffle, bool):
-            raise TypeError("shuffle must be a boolean value but was: {}.".format(shuffle))
+        if not isinstance(shuffle, bool) and shuffle not in (Shuffle.FALSE, Shuffle.ADAPTIVE, Shuffle.GLOBAL,
+                                                             Shuffle.PARTIAL, Shuffle.FILES, Shuffle.INFILE):
+            raise TypeError("shuffle must be a boolean value or valid shuffle mode but was: {}.".format(shuffle))
         if num_samples is not None:
             if not isinstance(num_samples, int):
@@ -407,7 +519,11 @@ class DistributedSampler(BuiltinSampler):
         self.num_shards = num_shards
         self.shard_id = shard_id
+        if isinstance(shuffle, bool):
+            shuffle = Shuffle.GLOBAL if shuffle is True else Shuffle.FALSE
         self.shuffle = shuffle
         # get seed in distributed scenario
         # Example 1. if user set seeds by ds.config.set_seed(4321), then seed 4321 is used
         # Example 2. if user does not set the seed, then existing or default seed (like 5489) is used
@@ -419,11 +535,18 @@ class DistributedSampler(BuiltinSampler):
         """ Parse the sampler."""
         num_samples = self.num_samples if self.num_samples is not None else 0
         shuffle = self.shuffle if self.shuffle is not None else True
+        if isinstance(shuffle, bool):
+            shuffle = Shuffle.GLOBAL if shuffle else Shuffle.FALSE
+        if shuffle not in (Shuffle.FALSE, Shuffle.GLOBAL):
+            raise RuntimeError("The shuffle mode: {} is not supported with current dataset.".format(self.shuffle))
         offset = self.offset if self.offset is not None else -1
         # each time user calls create_dict_iterator() (to do repeat) sampler would get a different seed to shuffle
         self.seed += 1
         c_sampler = cde.DistributedSamplerObj(self.num_shards, self.shard_id,
-                                              shuffle, num_samples, self.seed, offset, True)
+                                              shuffle_to_shuffle_mode(shuffle), num_samples, self.seed, offset, True)
         c_child_sampler = self.parse_child()
         c_sampler.add_child(c_child_sampler)
         return c_sampler
@@ -432,7 +555,14 @@ class DistributedSampler(BuiltinSampler):
         """ Parse the sampler for MindRecord."""
         num_samples = self.num_samples if self.num_samples is not None else 0
         shuffle = self.shuffle if self.shuffle is not None else True
-        c_sampler = cde.MindrecordDistributedSampler(self.num_shards, self.shard_id, shuffle,
+        # convert shuffle=True to Shuffle.ADAPTIVE, convert shuffle=False to Shuffle.FALSE
+        if isinstance(shuffle, bool):
+            if shuffle:
+                shuffle = Shuffle.ADAPTIVE
+            else:
+                shuffle = Shuffle.FALSE
+        c_sampler = cde.MindrecordDistributedSampler(self.num_shards, self.shard_id, shuffle_to_shuffle_mode(shuffle),
                                                      self.seed, num_samples, self.offset)
         c_child_sampler = self.parse_child_for_minddataset()
         c_sampler.add_child(c_child_sampler)
@@ -441,7 +571,9 @@ class DistributedSampler(BuiltinSampler):
     def is_shuffled(self):
         if self.child_sampler is None:
-            return self.shuffle
+            if self.shuffle == Shuffle.FALSE:
+                return False
+            return True
         return self.child_sampler.is_shuffled()
@@ -455,6 +587,10 @@ class DistributedSampler(BuiltinSampler):
         self.offset = offset
         return self
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return self.shuffle
 class PKSampler(BuiltinSampler):
     """
@@ -543,19 +679,72 @@ class PKSampler(BuiltinSampler):
         c_sampler.set_num_samples(num_samples)
         return c_sampler
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return Shuffle.FALSE
 class RandomSampler(BuiltinSampler):
     """
     Samples the elements randomly.
+    Note:
+        The shuffling modes supported for different datasets are as follows:
+        .. list-table:: List of support for shuffling mode
+            :widths: 50 50 50 50
+            :header-rows: 1
+            * - Shuffling Mode
+              - MindDataset
+              - TFRecordDataset
+              - Others
+            * - ``Shuffle.ADAPTIVE``
+              - Supported
+              - Not Supported
+              - Not Supported
+            * - ``Shuffle.GLOBAL``
+              - Supported
+              - Supported
+              - Supported
+            * - ``Shuffle.PARTIAL``
+              - Supported
+              - Not Supported
+              - Not Supported
+            * - ``Shuffle.FILES``
+              - Supported
+              - Supported
+              - Not Supported
+            * - ``Shuffle.INFILE``
+              - Supported
+              - Not Supported
+              - Not Supported
     Args:
         replacement (bool, optional): If True, put the sample ID back for the next draw. Default: ``False``.
         num_samples (int, optional): Number of elements to sample. Default: ``None`` , which means sample all elements.
+        shuffle (Shuffle, optional): Specify the shuffle mode.
+            Default: ``Shuffle.GLOBAL``, Global shuffle of all rows of data in dataset.
+            There are several levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
+            - ``Shuffle.ADAPTIVE`` : When the number of dataset samples is less than or equal to 100 million,
+              ``Shuffle.GLOBAL`` is used. When the number of dataset samples is greater than 100
+              million, ``Shuffle.PARTIAL`` is used. The shuffle is performed once every 1 million samples.
+            - ``Shuffle.GLOBAL`` : Global shuffle of all rows of data in dataset. The memory usage is large.
+            - ``Shuffle.PARTIAL`` : Partial shuffle of data in dataset for every 1 million samples.
+              The memory usage is less than ``Shuffle.GLOBAL`` .
+            - ``Shuffle.FILES`` : Shuffle the file sequence but keep the order of data within each file.
+            - ``Shuffle.INFILE`` : Keep the file sequence the same but shuffle the data within each file.
     Raises:
         TypeError: If `replacement` is not of type bool.
         TypeError: If `num_samples` is not of type int.
         ValueError: If `num_samples` is a negative value.
+        TypeError: If `shuffle` is not of type Shuffle.
     Examples:
         >>> import mindspore.dataset as ds
@@ -566,7 +755,7 @@ class RandomSampler(BuiltinSampler):
         ...                                 sampler=sampler)
      """
-    def __init__(self, replacement=False, num_samples=None):
+    def __init__(self, replacement=False, num_samples=None, shuffle=Shuffle.GLOBAL):
         if not isinstance(replacement, bool):
             raise TypeError("replacement must be a boolean value but was: {}.".format(replacement))
@@ -577,6 +766,10 @@ class RandomSampler(BuiltinSampler):
                 raise ValueError("num_samples exceeds the boundary between {} and {}(INT64_MAX)!"
                                  .format(0, validator.INT64_MAX))
+        if shuffle not in (Shuffle.ADAPTIVE, Shuffle.GLOBAL, Shuffle.PARTIAL, Shuffle.FILES, Shuffle.INFILE):
+            raise TypeError("shuffle must be valid shuffle mode but was: {}.".format(shuffle))
+        self.shuffle = shuffle
         self.deterministic = False
         self.replacement = replacement
         self.reshuffle_each_epoch = True
@@ -586,7 +779,11 @@ class RandomSampler(BuiltinSampler):
         """ Parse the sampler."""
         num_samples = self.num_samples if self.num_samples is not None else 0
         replacement = self.replacement if self.replacement is not None else False
-        c_sampler = cde.RandomSamplerObj(replacement, num_samples, self.reshuffle_each_epoch)
+        # convert shuffle=True to Shuffle.GLOBAL, convert shuffle=False to Shuffle.FALSE
+        if self.shuffle is not Shuffle.GLOBAL:
+            raise RuntimeError("The shuffle mode: {} is not supported with current dataset.".format(self.shuffle))
+        c_sampler = cde.RandomSamplerObj(replacement, num_samples, self.reshuffle_each_epoch,
+                                         shuffle_to_shuffle_mode(self.shuffle))
         c_child_sampler = self.parse_child()
         c_sampler.add_child(c_child_sampler)
         return c_sampler
@@ -594,7 +791,15 @@ class RandomSampler(BuiltinSampler):
     def parse_for_minddataset(self):
         """Parse the sampler for MindRecord."""
         num_samples = self.num_samples if self.num_samples is not None else 0
-        c_sampler = cde.MindrecordRandomSampler(num_samples, self.replacement, self.reshuffle_each_epoch)
+        shuffle = self.shuffle if self.shuffle is not None else True
+        # convert shuffle=True to Shuffle.ADAPTIVE, convert shuffle=False to Shuffle.FALSE
+        if isinstance(shuffle, bool):
+            if shuffle:
+                shuffle = Shuffle.ADAPTIVE
+            else:
+                raise RuntimeError("The shuffle: False is invalid for RandomSampler.")
+        c_sampler = cde.MindrecordRandomSampler(num_samples, self.replacement, self.reshuffle_each_epoch,
+                                                shuffle_to_shuffle_mode(shuffle))
         c_child_sampler = self.parse_child_for_minddataset()
         c_sampler.add_child(c_child_sampler)
         c_sampler.set_num_samples(num_samples)
@@ -609,6 +814,10 @@ class RandomSampler(BuiltinSampler):
         return self.child_sampler.is_sharded()
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return self.shuffle
 class SequentialSampler(BuiltinSampler):
     """
@@ -678,6 +887,10 @@ class SequentialSampler(BuiltinSampler):
         return self.child_sampler.is_sharded()
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return Shuffle.FALSE
 class SubsetSampler(BuiltinSampler):
     """
@@ -767,6 +980,10 @@ class SubsetSampler(BuiltinSampler):
         return min(len(self.indices), num_samples)
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return Shuffle.FALSE
 class SubsetRandomSampler(SubsetSampler):
     """
@@ -809,6 +1026,10 @@ class SubsetRandomSampler(SubsetSampler):
         c_sampler.set_num_samples(self.get_num_samples())
         return c_sampler
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return Shuffle.GLOBAL
 class IterSampler(Sampler):
     """
@@ -837,12 +1058,8 @@ class IterSampler(Sampler):
      """
     def __init__(self, sampler, num_samples=None):
-        if num_samples is None:
-            if hasattr(sampler, "__len__"):
-                num_samples = len(sampler)
-            else:
-                # counting on a copied sampler to prevent changing the random state of the original one
-                num_samples = len(list(copy.deepcopy(sampler)))
+        if num_samples is None and hasattr(sampler, '__len__'):
+            num_samples = len(sampler)
         super().__init__(num_samples=num_samples)
         self.sampler = sampler
@@ -856,8 +1073,9 @@ class WeightedRandomSampler(BuiltinSampler):
     Args:
         weights (list[float, int]): A sequence of weights, not necessarily summing up to 1.
-        num_samples (int, optional): Number of elements to sample. Default: ``None`` , which means sample all elements.
-        replacement (bool): If ``True``, put the sample ID back for the next draw. Default: ``True``.
+        num_samples (int, optional): Number of elements to sample. Default: ``None`` ,
+            which means sample all elements.
+        replacement (bool, optional): If ``True``, put the sample ID back for the next draw. Default: ``True``.
     Raises:
         TypeError: If elements of `weights` are not of type number.
@@ -917,3 +1135,7 @@ class WeightedRandomSampler(BuiltinSampler):
             return False
         return self.child_sampler.is_sharded()
+    def get_shuffle_mode(self):
+        """Get the shuffle mode"""
+        return Shuffle.GLOBAL

mindspore/dataset/engine/serializer_deserializer.py CHANGED Viewed

@@ -63,7 +63,8 @@ def serialize(dataset, json_filepath=""):
 def deserialize(input_dict=None, json_filepath=None):
     """
-    Construct dataset pipeline from a JSON file produced by dataset serialize function.
+    Deserialize the data processing pipeline, the api accepts a Python dictionary or
+    a JSON file generated by :func:`mindspore.dataset.serialize`.
     Args:
         input_dict (dict): A Python dictionary containing a serialized dataset graph. Default: ``None``.

mindspore/dataset/engine/validators.py CHANGED Viewed

@@ -1133,9 +1133,13 @@ def check_generator_dataset(method):
             raise ValueError("Neither columns_names nor schema are provided.")
         if schema is not None:
+            logger.warning("'schema' is deprecated from version 2.6 and will be removed in a future version.")
             if not isinstance(schema, (datasets.Schema, str)):
                 raise ValueError("schema should be a path to schema file or a schema object.")
+        if param_dict.get('column_types') is not None:
+            logger.warning("'column_types' is deprecated from version 2.6 and will be removed in a future version.")
         # check optional argument
         nreq_param_int = ["max_rowsize", "num_samples", "num_parallel_workers", "num_shards", "shard_id"]
         validate_dataset_param_value(nreq_param_int, param_dict, int)
@@ -1146,6 +1150,7 @@ def check_generator_dataset(method):
         max_rowsize = param_dict.get("max_rowsize")
         if max_rowsize is not None:
+            logger.warning("'max_rowsize' is deprecated from version 2.6 and will be removed in a future version.")
             check_value(max_rowsize, [-1, INT32_MAX], "max_rowsize")
         num_shards = param_dict.get("num_shards")
@@ -1345,6 +1350,8 @@ def check_batch(method):
             check_num_parallel_workers(num_parallel_workers)
         type_check(drop_remainder, (bool,), "drop_remainder")
+        if max_rowsize is not None:
+            logger.warning("'max_rowsize' is deprecated from version 2.6 and will be removed in a future version.")
         check_max_rowsize(max_rowsize)
         if (input_columns is not None) and (per_batch_map is None):
@@ -1523,6 +1530,9 @@ def check_map(method):
             check_num_parallel_workers(num_parallel_workers)
         type_check(python_multiprocessing, (bool,), "python_multiprocessing")
         check_cache_option(cache)
+        if max_rowsize is not None:
+            logger.warning("'max_rowsize' is deprecated from version 2.6 and will be removed in a future version.")
         check_max_rowsize(max_rowsize)
         if offload is not None:
             type_check(offload, (bool,), "offload")

mindspore/dataset/text/__init__.py CHANGED Viewed

@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This module is to support text processing for NLP. It includes two parts:
-text transforms and utils. text transforms is a high performance
-NLP text processing module which is developed with ICU4C and cppjieba.
-utils provides some general methods for NLP text processing.
+This module is designed for text data augmentation and comprises two submodules: `transforms` and `utils`.
+`transforms` is a high-performance text data augmentation lib that supports common text data augmentation operations.
+`utils` provides a collection of utility methods for text processing.
 Common imported modules in corresponding API examples are as follows:
@@ -33,9 +34,9 @@ The data transform operation can be executed in the data processing pipeline or
 - Pipeline mode is generally used to process big datasets. Examples refer to
   `introduction to data processing pipeline <https://www.mindspore.cn/docs/en/master/api_python/
-  mindspore.dataset.html#introduction-to-data-processing-pipeline>`_ .
+  mindspore.dataset.loading.html#introduction-to-data-processing-pipeline>`_ .
 - Eager mode is more like a function call to process data. Examples refer to
-  `Lightweight Data Processing <https://www.mindspore.cn/docs/en/master/model_train/dataset/eager.html>`_ .
+  `Lightweight Data Processing <https://www.mindspore.cn/tutorials/en/master/dataset/eager.html>`_ .
 """
 import platform