PyPI - mindspore - Versions diffs - 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl - Mend

mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (308) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +3 -1
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +50 -9
mindspore/_extends/parse/compile_config.py +41 -0
mindspore/_extends/parse/parser.py +9 -7
mindspore/_extends/parse/standard_method.py +52 -14
mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
mindspore/amp.py +24 -10
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +6 -4
mindspore/common/_pijit_context.py +190 -0
mindspore/common/_register_for_tensor.py +2 -1
mindspore/common/_tensor_overload.py +139 -0
mindspore/common/api.py +102 -87
mindspore/common/dump.py +5 -6
mindspore/common/generator.py +1 -7
mindspore/common/hook_handle.py +14 -26
mindspore/common/mindir_util.py +2 -2
mindspore/common/parameter.py +46 -13
mindspore/common/recompute.py +39 -9
mindspore/common/sparse_tensor.py +7 -3
mindspore/common/tensor.py +209 -29
mindspore/communication/__init__.py +1 -1
mindspore/communication/_comm_helper.py +38 -3
mindspore/communication/comm_func.py +310 -55
mindspore/communication/management.py +14 -14
mindspore/context.py +123 -22
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/__init__.py +1 -1
mindspore/dataset/core/config.py +7 -0
mindspore/dataset/core/validator_helpers.py +7 -0
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +72 -44
mindspore/dataset/engine/datasets_audio.py +7 -7
mindspore/dataset/engine/datasets_standard_format.py +53 -3
mindspore/dataset/engine/datasets_text.py +20 -20
mindspore/dataset/engine/datasets_user_defined.py +174 -104
mindspore/dataset/engine/datasets_vision.py +33 -33
mindspore/dataset/engine/iterators.py +29 -0
mindspore/dataset/engine/obs/util.py +7 -0
mindspore/dataset/engine/queue.py +114 -60
mindspore/dataset/engine/serializer_deserializer.py +2 -2
mindspore/dataset/engine/validators.py +34 -14
mindspore/dataset/text/__init__.py +1 -4
mindspore/dataset/transforms/__init__.py +0 -3
mindspore/dataset/utils/line_reader.py +2 -0
mindspore/dataset/vision/__init__.py +1 -4
mindspore/dataset/vision/utils.py +1 -1
mindspore/dataset/vision/validators.py +2 -1
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
mindspore/experimental/es/embedding_service.py +883 -0
mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
mindspore/experimental/llm_boost/__init__.py +21 -0
mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
mindspore/experimental/llm_boost/register.py +129 -0
mindspore/experimental/llm_boost/utils.py +31 -0
mindspore/experimental/optim/adamw.py +85 -0
mindspore/experimental/optim/optimizer.py +3 -0
mindspore/hal/__init__.py +3 -3
mindspore/hal/contiguous_tensors_handle.py +175 -0
mindspore/hal/stream.py +18 -0
mindspore/include/api/model_group.h +13 -1
mindspore/include/api/types.h +10 -10
mindspore/include/dataset/config.h +2 -2
mindspore/include/dataset/constants.h +2 -2
mindspore/include/dataset/execute.h +2 -2
mindspore/include/dataset/vision.h +4 -0
mindspore/jpeg62.dll +0 -0
mindspore/log.py +1 -1
mindspore/mindrecord/filewriter.py +68 -51
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mint/__init__.py +495 -46
mindspore/mint/distributed/__init__.py +31 -0
mindspore/mint/distributed/distributed.py +254 -0
mindspore/mint/nn/__init__.py +266 -21
mindspore/mint/nn/functional.py +125 -19
mindspore/mint/nn/layer/__init__.py +39 -0
mindspore/mint/nn/layer/activation.py +133 -0
mindspore/mint/nn/layer/normalization.py +477 -0
mindspore/mint/nn/layer/pooling.py +110 -0
mindspore/mint/optim/adamw.py +28 -7
mindspore/mint/special/__init__.py +63 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/multiprocessing/__init__.py +2 -1
mindspore/nn/__init__.py +0 -1
mindspore/nn/cell.py +275 -93
mindspore/nn/layer/activation.py +211 -44
mindspore/nn/layer/basic.py +113 -3
mindspore/nn/layer/embedding.py +120 -2
mindspore/nn/layer/normalization.py +101 -5
mindspore/nn/layer/padding.py +34 -48
mindspore/nn/layer/pooling.py +161 -7
mindspore/nn/layer/transformer.py +3 -3
mindspore/nn/loss/__init__.py +2 -2
mindspore/nn/loss/loss.py +84 -6
mindspore/nn/optim/__init__.py +2 -1
mindspore/nn/optim/adadelta.py +1 -1
mindspore/nn/optim/adam.py +1 -1
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/tft_wrapper.py +127 -0
mindspore/nn/wrap/cell_wrapper.py +12 -23
mindspore/nn/wrap/grad_reducer.py +5 -5
mindspore/nn/wrap/loss_scale.py +17 -3
mindspore/numpy/__init__.py +1 -1
mindspore/numpy/array_creations.py +65 -68
mindspore/numpy/array_ops.py +64 -60
mindspore/numpy/fft.py +610 -75
mindspore/numpy/logic_ops.py +11 -10
mindspore/numpy/math_ops.py +85 -84
mindspore/numpy/utils_const.py +4 -4
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +6 -4
mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
mindspore/ops/_vmap/vmap_array_ops.py +2 -4
mindspore/ops/_vmap/vmap_math_ops.py +17 -1
mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
mindspore/ops/auto_generate/gen_extend_func.py +734 -13
mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
mindspore/ops/composite/base.py +85 -48
mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
mindspore/ops/function/__init__.py +22 -0
mindspore/ops/function/array_func.py +490 -153
mindspore/ops/function/debug_func.py +113 -1
mindspore/ops/function/fft_func.py +15 -2
mindspore/ops/function/grad/grad_func.py +3 -2
mindspore/ops/function/math_func.py +558 -207
mindspore/ops/function/nn_func.py +817 -383
mindspore/ops/function/other_func.py +3 -2
mindspore/ops/function/random_func.py +184 -8
mindspore/ops/function/reshard_func.py +13 -11
mindspore/ops/function/sparse_unary_func.py +1 -1
mindspore/ops/function/vmap_func.py +3 -2
mindspore/ops/functional.py +24 -14
mindspore/ops/op_info_register.py +3 -3
mindspore/ops/operations/__init__.py +6 -1
mindspore/ops/operations/_grad_ops.py +2 -76
mindspore/ops/operations/_infer_ops.py +1 -1
mindspore/ops/operations/_inner_ops.py +71 -94
mindspore/ops/operations/array_ops.py +12 -146
mindspore/ops/operations/comm_ops.py +42 -53
mindspore/ops/operations/custom_ops.py +83 -19
mindspore/ops/operations/debug_ops.py +42 -10
mindspore/ops/operations/manually_defined/_inner.py +12 -0
mindspore/ops/operations/manually_defined/ops_def.py +265 -10
mindspore/ops/operations/math_ops.py +12 -223
mindspore/ops/operations/nn_ops.py +20 -114
mindspore/ops/operations/other_ops.py +7 -4
mindspore/ops/operations/random_ops.py +46 -1
mindspore/ops/primitive.py +18 -6
mindspore/ops_generate/arg_dtype_cast.py +2 -0
mindspore/ops_generate/gen_aclnn_implement.py +11 -11
mindspore/ops_generate/gen_constants.py +36 -0
mindspore/ops_generate/gen_ops.py +67 -52
mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
mindspore/ops_generate/gen_pyboost_func.py +131 -47
mindspore/ops_generate/op_proto.py +10 -3
mindspore/ops_generate/pyboost_utils.py +14 -1
mindspore/ops_generate/template.py +43 -21
mindspore/parallel/__init__.py +3 -1
mindspore/parallel/_auto_parallel_context.py +28 -8
mindspore/parallel/_cell_wrapper.py +83 -0
mindspore/parallel/_parallel_serialization.py +47 -19
mindspore/parallel/_tensor.py +81 -11
mindspore/parallel/_utils.py +13 -1
mindspore/parallel/algo_parameter_config.py +5 -5
mindspore/parallel/checkpoint_transform.py +46 -39
mindspore/parallel/cluster/process_entity/__init__.py +1 -1
mindspore/parallel/cluster/process_entity/_api.py +31 -23
mindspore/parallel/cluster/process_entity/_utils.py +2 -27
mindspore/parallel/parameter_broadcast.py +3 -4
mindspore/parallel/shard.py +162 -31
mindspore/parallel/transform_safetensors.py +993 -0
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +2 -1
mindspore/profiler/common/constant.py +29 -0
mindspore/profiler/common/registry.py +47 -0
mindspore/profiler/common/util.py +28 -0
mindspore/profiler/dynamic_profiler.py +694 -0
mindspore/profiler/envprofiling.py +17 -19
mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
mindspore/profiler/parser/base_timeline_generator.py +19 -25
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
mindspore/profiler/parser/framework_parser.py +1 -391
mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
mindspore/profiler/parser/memory_usage_parser.py +0 -154
mindspore/profiler/parser/profiler_info.py +78 -6
mindspore/profiler/profiler.py +153 -0
mindspore/profiler/profiling.py +280 -412
mindspore/rewrite/__init__.py +1 -2
mindspore/rewrite/common/namespace.py +4 -4
mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
mindspore/run_check/_check_version.py +36 -103
mindspore/safeguard/rewrite_obfuscation.py +591 -247
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +4 -3
mindspore/train/_utils.py +28 -2
mindspore/train/amp.py +171 -53
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +4 -4
mindspore/train/callback/_checkpoint.py +85 -22
mindspore/train/callback/_cluster_monitor.py +1 -1
mindspore/train/callback/_flops_collector.py +1 -0
mindspore/train/callback/_loss_monitor.py +3 -3
mindspore/train/callback/_on_request_exit.py +134 -31
mindspore/train/callback/_summary_collector.py +5 -5
mindspore/train/callback/_tft_register.py +352 -0
mindspore/train/dataset_helper.py +7 -3
mindspore/train/metrics/metric.py +3 -3
mindspore/train/metrics/roc.py +4 -4
mindspore/train/mind_ir_pb2.py +44 -39
mindspore/train/model.py +134 -58
mindspore/train/serialization.py +336 -112
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +21 -0
mindspore/utils/utils.py +60 -0
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +281 -275
mindspore/include/c_api/ms/abstract.h +0 -67
mindspore/include/c_api/ms/attribute.h +0 -197
mindspore/include/c_api/ms/base/handle_types.h +0 -43
mindspore/include/c_api/ms/base/macros.h +0 -32
mindspore/include/c_api/ms/base/status.h +0 -33
mindspore/include/c_api/ms/base/types.h +0 -283
mindspore/include/c_api/ms/context.h +0 -102
mindspore/include/c_api/ms/graph.h +0 -160
mindspore/include/c_api/ms/node.h +0 -606
mindspore/include/c_api/ms/tensor.h +0 -161
mindspore/include/c_api/ms/value.h +0 -84
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/extend/basic.py +0 -140
mindspore/nn/extend/embedding.py +0 -143
mindspore/nn/extend/layer/normalization.py +0 -109
mindspore/nn/extend/pooling.py +0 -117
mindspore/nn/layer/embedding_service.py +0 -531
mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
mindspore/ops/extend/__init__.py +0 -53
mindspore/ops/extend/array_func.py +0 -218
mindspore/ops/extend/math_func.py +0 -76
mindspore/ops/extend/nn_func.py +0 -308
mindspore/ops/silent_check.py +0 -162
mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
mindspore/profiler/parser/msadvisor_parser.py +0 -240
mindspore/train/callback/_mindio_ttp.py +0 -443
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
{mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0

mindspore/dataset/engine/datasets_vision.py CHANGED Viewed

@@ -287,7 +287,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -399,7 +399,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
         decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
             and returns the decrypted bytes data. Default: ``None`` , no decryption.
@@ -552,7 +552,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -666,7 +666,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -780,7 +780,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -931,7 +931,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
         extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
             output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: ``False``.
@@ -1173,7 +1173,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -1341,7 +1341,7 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -1456,7 +1456,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -1527,7 +1527,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -1632,7 +1632,7 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2005,7 +2005,7 @@ class Food101Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
             when `num_shards` is also specified. Default: ``None`` .
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2126,7 +2126,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
         decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
             and returns the decrypted bytes data. Default: ``None`` , no decryption.
@@ -2270,7 +2270,7 @@ class KITTIDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2390,7 +2390,7 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2500,7 +2500,7 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2639,7 +2639,7 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2760,7 +2760,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2881,7 +2881,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -2986,7 +2986,7 @@ class OmniglotDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3106,7 +3106,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3234,7 +3234,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3319,7 +3319,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
         super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
                          shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
-        self.dataset_dir = os.path.abspath(dataset_dir)
+        self.dataset_dir = os.path.realpath(dataset_dir)
         self.usage = replace_none(usage, "train-standard")
         self.small = small
         self.decode = decode
@@ -3356,7 +3356,7 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3454,7 +3454,7 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
             Default: ``None`` , will use global default workers(8), it can be set
             by :func:`mindspore.dataset.config.set_num_parallel_workers` .
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
         shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
             Default: ``None`` , expected order behavior shown in the table below.
@@ -3539,7 +3539,7 @@ class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . This
             argument can only be specified when `num_shards` is also specified. Default: ``None`` .
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3847,7 +3847,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -3944,7 +3944,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -4054,7 +4054,7 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -4167,7 +4167,7 @@ class SUN397Dataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . This
             argument can only be specified when `num_shards` is also specified. Default: ``None`` .
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -4428,7 +4428,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:
@@ -4536,7 +4536,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
             argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
         extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
             :py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: ``False``.
@@ -4718,7 +4718,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
         shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` .
             This argument can only be specified when `num_shards` is also specified.
         cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
-            `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
+            `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
             Default: ``None`` , which means no cache is used.
     Raises:

mindspore/dataset/engine/iterators.py CHANGED Viewed

@@ -19,6 +19,7 @@ import json
 import os
 import signal
 import weakref
+from functools import wraps
 import numpy as np
 import mindspore._c_dataengine as cde
@@ -58,6 +59,34 @@ def _cleanup():
             itr.release()
+def _cleanup_the_iterators_if_created(method):
+    """Release the iterators which is new created by the method"""
+    @wraps(method)
+    def wrapper(self, *args, **kwargs):
+        original_iterators = deepcopy(ITERATORS_LIST)
+        result = method(self, *args, **kwargs)
+        # it is used to attribute function like: dataset_size / output_shapes / output_types and
+        # it is a GeneratorDataset with two stage pipeline. The first pipeline will create a new iterator
+        # which need to be released after dataset_size / output_shapes / output_types end.
+        # 1. find the iterators which are started by dataset_size / output_shapes / output_types with two stage pipeline
+        iterators_to_be_released = []
+        for index, item in enumerate(ITERATORS_LIST):
+            if item not in original_iterators:
+                iterators_to_be_released.append(index)
+        # 2. release the iterators
+        for index in reversed(iterators_to_be_released):
+            itr = ITERATORS_LIST[index]()
+            if itr is not None:
+                itr.release()
+        return result
+    return wrapper
 class Iterator:
     """
     General Iterator over a dataset.

mindspore/dataset/engine/obs/util.py CHANGED Viewed

@@ -47,6 +47,13 @@ def get_used_disk_per():
             os.makedirs(config.WORKING_PATH)
         except FileExistsError:
             pass
+        except PermissionError as e:
+            err_msg =\
+                str(e) + ". Suggestion: " \
+                "1) It is recommended to manually create the {} directory and add read/write permissions to it. " \
+                "2) If you can't create, we suggest you download MindRecord manually and "\
+                "read it using the MindDataset interface.".format(config.WORKING_PATH)
+            raise RuntimeError(err_msg)
     total, used, _ = shutil.disk_usage(config.WORKING_PATH)
     return used / total

mindspore/dataset/engine/queue.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2021 Huawei Technologies Co., Ltd
+# Copyright 2021-2024 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,18 +31,28 @@ import mindspore._c_dataengine as cde
 from ..transforms.py_transforms_util import ExceptionHandler
+def get_total_size(data):
+    """Calculate the total size of numpy arrays."""
+    total_size = 0
+    for column in data:
+        if isinstance(column, np.ndarray):
+            total_size += column.nbytes
+    return total_size
 class _SharedQueue(multiprocessing.queues.Queue):
     """
     Class to implement a queue using shared memory for better performance.
     Args:
         size: Number of elements in the queue.
         count: Shared variable to suppress log printing.
-        copy_out: Flag to indidcate whether an extra copy should be done before returning.  If data will immediately be
-                  copied before returning, then this can be set to False.
-        max_rowsize: Maximum size of any element in the Queue in MB.
+        copy_out: Whether to copy the data from shared memory to process virtual memory. Default: ``True``.
+        max_rowsize: Maximum size of row in MB that is used for shared memory allocation to copy
+            data between processes. If set to -1, shared memory will be dynamically allocated with
+            the actual size of data. Default: -1.
     """
-    def __init__(self, size, count, copy_out=False, max_rowsize=6):
+    def __init__(self, size, count, copy_out=True, max_rowsize=-1):
         super().__init__(size, ctx=multiprocessing.get_context())
         self.copy_out = copy_out
@@ -55,28 +65,29 @@ class _SharedQueue(multiprocessing.queues.Queue):
         self.data_shared = 1
         self.count = count
         self.print_error = True
+        self.shm_list = []
+        self.seg_pos = 0
+        # num_seg has to be 2 more than the queue size. We can have remote worker filling a buffer, main process
+        # reading a buffer and also have a full queue of buffers in the meta-data queue
+        self.num_seg = size + 2
         if platform.system().lower() != 'windows' and max_rowsize == -1:
             self.dynamic_shm = True
+            self.fd_list = []
         else:
             self.dynamic_shm = False
             # change max_rowsize in MB into bytes
             self.seg_size = max_rowsize * 1024 * 1024
-            self.shm_list = []
-            self.seg_pos = 0
-            # num_seg has to be 2 more than the queue size.  We can have remote worker filling a buffer, main process
-            # reading a buffer and also have a full queue of buffers in the meta-data queue
-            self.num_seg = size + 2
             for _ in range(self.num_seg):
                 try:
-                    a = multiprocessing.Array("b", self.seg_size)
+                    shared_array = multiprocessing.Array("b", self.seg_size)
                 except OSError as e:
                     if e.errno == errno.ENOMEM:
                         raise RuntimeError("Failed to allocate shared memory for {0} elements of {1}MB: {2}"
                                            .format(self.num_seg, self.seg_size / 1024 / 1024, e))
                     raise
                 else:
-                    self.shm_list.append(a)
+                    self.shm_list.append(shared_array)
     def put_until(self, data, timeout=None, exit_signal=None):
         """Put data into the queue. Block until timeout is reached or exit_signal is set."""
@@ -102,32 +113,36 @@ class _SharedQueue(multiprocessing.queues.Queue):
             if isinstance(data, np.ndarray):
                 name_list.append((self.data_immediate, np.array(data)))
             else:
-                for r in data:
-                    # the map:pyfunc is a yield generator which can't be serialize
-                    if isinstance(r, types.GeneratorType):
+                if self.dynamic_shm:
+                    total_size = get_total_size(data)
+                    if total_size > 0:
+                        self.check_and_create_shm(total_size)
+                for column in data:
+                    # the map:pyfunc is a yield generator which can't be serialized
+                    if isinstance(column, types.GeneratorType):
                         raise TypeError("Cannot pickle {} object, please verify pyfunc return with numpy array"
-                                        .format(type(r)))
-                    if isinstance(r, np.ndarray) and self.dynamic_shm:
-                        byte = r.nbytes
-                        shm = cde.SharedMemory(None, True, -1, byte)
-                        dest = np.ndarray(r.shape, r.dtype, buffer=shm.buf())
-                        np.copyto(dest, r)
-                        fd = shm.fd()
-                        df = multiprocessing.reduction.DupFd(fd)
-                        name_list.append((self.data_shared, r.dtype, r.shape, shm.name(), df, shm.size()))
-                    elif (isinstance(r, np.ndarray) and r.size > self.min_shared_mem
-                          and start_bytes + r.nbytes < self.seg_size):
+                                        .format(type(column)))
+                    if self.dynamic_shm and isinstance(column, np.ndarray) and column.nbytes > 0:
+                        shm = self.shm_list[self.seg_pos]
+                        fd = self.fd_list[self.seg_pos]
+                        dest = np.ndarray(column.shape, column.dtype, buffer=shm.buf(), offset=start_bytes)
+                        np.copyto(dest, column)
+                        start_bytes += column.nbytes
+                        shm_metadata = (shm.name(), fd, total_size)
+                        name_list.append((self.data_shared, self.seg_pos, column.dtype, column.shape, shm_metadata))
+                    elif (isinstance(column, np.ndarray) and column.size > self.min_shared_mem
+                          and start_bytes + column.nbytes < self.seg_size):
                         # need to convert start_bytes to offset in array
                         start_offset = start_bytes
-                        dest = np.ndarray(r.shape, r.dtype, buffer=self.shm_list[self.seg_pos].get_obj(),
-                                          offset=start_offset)
-                        np.copyto(dest, r)
-                        byte = r.nbytes
+                        shm = self.shm_list[self.seg_pos]
+                        dest = np.ndarray(column.shape, column.dtype, buffer=shm.get_obj(), offset=start_offset)
+                        np.copyto(dest, column)
+                        byte = column.nbytes
                         byte = 8 * ((byte + 7) // 8)
                         start_bytes += byte
-                        name_list.append((self.data_shared, self.seg_pos, byte, r.dtype, r.shape))
+                        name_list.append((self.data_shared, self.seg_pos, byte, column.dtype, column.shape))
                     else:
-                        if isinstance(r, np.ndarray) and r.size > self.min_shared_mem:
+                        if isinstance(column, np.ndarray) and column.size > self.min_shared_mem:
                             # Only print out error the first time it happens
                             if self.count.value == 0 and self.print_error:
                                 logger.warning(
@@ -135,12 +150,12 @@ class _SharedQueue(multiprocessing.queues.Queue):
                                     + "max_rowsize: "
                                     + str(self.seg_size / 1024 / 1024)
                                     + "MB, current rowsize: "
-                                    + str((start_bytes + r.nbytes) / 1024 / 1024)
+                                    + str((start_bytes + column.nbytes) / 1024 / 1024)
                                     + "MB."
                                 )
                                 self.print_error = False
                                 self.count.value += 1
-                        name_list.append((self.data_immediate, r))
+                        name_list.append((self.data_immediate, column))
             super().put(name_list, timeout=timeout)
             # note above could generate a queue full exception.  It will be handled by teh caller
             # only increment seg_pos after successfully adding to metadata queue
@@ -152,52 +167,91 @@ class _SharedQueue(multiprocessing.queues.Queue):
         """Get data from the queue. Block until timeout is reached or exit_signal is set."""
         while True:
             try:
-                r = self.get(timeout=timeout)
+                result = self.get(timeout=timeout)
             except queue.Empty as e:
                 if exit_signal is None:
                     raise e
                 if exit_signal.is_set():
                     return None
                 continue
-            if r is None:
+            if result is None:
                 # receive finish signal
                 return None
             if exit_signal.is_set():
                 # loop until the queue becomes empty
                 continue
-            return r
+            return result
     def get(self, timeout=None):
-        result = super().get(timeout=timeout)
-        if isinstance(result, ExceptionHandler):
-            return result
-        r = []
+        raw_data = super().get(timeout=timeout)
+        if isinstance(raw_data, ExceptionHandler):
+            return raw_data
+        result = []
         start_bytes = 0
-        for x in result:
-            if x[0] == self.data_shared:
+        for column in raw_data:
+            if column[0] == self.data_shared:
                 if self.dynamic_shm:
-                    dtype, shape, shm_name, df, buf_size = x[1:]
-                    fd = df.detach()
-                    shm = cde.SharedMemory(shm_name, False, fd, buf_size)
-                    data = np.ndarray(shape, dtype, buffer=shm.buf())
-                    dest = np.copy(data)
-                    r.append(dest)
+                    seg_pos, dtype, shape, shm_metadata = column[1:]
+                    if start_bytes == 0:
+                        # only need to check once since all the columns are stored in the same shared memory
+                        self.check_and_attach_shm(seg_pos, shm_metadata)
+                    shm = self.shm_list[seg_pos]
+                    array = np.ndarray(shape, dtype, buffer=shm.buf(), offset=start_bytes)
+                    start_bytes += array.nbytes
                 else:
-                    seg_pos, byte, dtype, shape = x[1:]
+                    seg_pos, byte, dtype, shape = column[1:]
                     start_offset = start_bytes
-                    b = self.shm_list[seg_pos]
-                    data = np.ndarray(shape, dtype, buffer=b.get_obj(), offset=start_offset)
+                    shm = self.shm_list[seg_pos]
+                    array = np.ndarray(shape, dtype, buffer=shm.get_obj(), offset=start_offset)
                     start_bytes += byte
-                    if self.copy_out:
-                        dest = np.copy(data)
-                        r.append(dest)
-                    else:
-                        r.append(data)
-            elif x[0] == self.data_immediate:
-                r.append(x[1])
+                if self.copy_out:
+                    result.append(np.copy(array))
+                else:
+                    result.append(array)
+            elif column[0] == self.data_immediate:
+                result.append(column[1])
             else:
                 raise RuntimeError("SharedQueue, invalid entry in metadata.")
-        return tuple(r)
+        return tuple(result)
+    def check_and_create_shm(self, size):
+        """Check if the shared memory is initialized and of sufficient size."""
+        if len(self.shm_list) == self.seg_pos:
+            # shared memory has not been created and appended to the cache list
+            shm = cde.SharedMemory(None, True, -1, size)
+            shared_fd = multiprocessing.reduction.DupFd(shm.fd())
+            self.shm_list.append(shm)
+            self.fd_list.append(shared_fd)
+        elif len(self.shm_list) > self.seg_pos:
+            if self.shm_list[self.seg_pos].size() < size:
+                # shared memory is not big enough to hold the data
+                shm = cde.SharedMemory(None, True, -1, size)
+                shared_fd = multiprocessing.reduction.DupFd(shm.fd())
+                self.shm_list[self.seg_pos] = shm
+                self.fd_list[self.seg_pos] = shared_fd
+        else:
+            raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
+                               "Uninitialized shared memory may exist.")
+    def check_and_attach_shm(self, shm_index, shm_metadata):
+        """Check if the shared memory is initialized and is the same as the current one."""
+        shm_name, fd, size = shm_metadata
+        if len(self.shm_list) == shm_index:
+            # shared memory has not been created and appended to the cache list
+            fd = fd.detach()
+            shm = cde.SharedMemory(shm_name, False, fd, size)
+            self.shm_list.append(shm)
+            self.fd_list.append(fd)
+        elif len(self.shm_list) > shm_index:
+            if self.shm_list[shm_index].name() != shm_name:
+                # shared memory has changed
+                fd = fd.detach()
+                shm = cde.SharedMemory(shm_name, False, fd, size)
+                self.shm_list[shm_index] = shm
+                self.fd_list[shm_index] = fd
+        else:
+            raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
+                               "Uninitialized shared memory may exist.")
     def __del__(self):
         if not self.dynamic_shm:

mindspore/dataset/engine/serializer_deserializer.py CHANGED Viewed

@@ -107,9 +107,9 @@ def deserialize(input_dict=None, json_filepath=None):
 def expand_path(node_repr, key, val):
     """Convert relative to absolute path."""
     if isinstance(val, list):
-        node_repr[key] = [os.path.abspath(file) for file in val]
+        node_repr[key] = [os.path.realpath(file) for file in val]
     else:
-        node_repr[key] = os.path.abspath(val)
+        node_repr[key] = os.path.realpath(val)
 def show(dataset, indentation=2):