mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +46 -13
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +209 -29
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +310 -55
- mindspore/communication/management.py +14 -14
- mindspore/context.py +123 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +495 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +266 -21
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +28 -7
- mindspore/mint/special/__init__.py +63 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +275 -93
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +113 -3
- mindspore/nn/layer/embedding.py +120 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +127 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +734 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
- mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +490 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +558 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +184 -8
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +6 -1
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +12 -146
- mindspore/ops/operations/comm_ops.py +42 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +265 -10
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +28 -8
- mindspore/parallel/_cell_wrapper.py +83 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +81 -11
- mindspore/parallel/_utils.py +13 -1
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +993 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +280 -412
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +36 -103
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +28 -2
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +85 -22
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +134 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +352 -0
- mindspore/train/dataset_helper.py +7 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +134 -58
- mindspore/train/serialization.py +336 -112
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +281 -275
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
|
@@ -287,7 +287,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
287
287
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
288
288
|
argument can only be specified when `num_shards` is also specified.
|
|
289
289
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
290
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
290
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
291
291
|
Default: ``None`` , which means no cache is used.
|
|
292
292
|
|
|
293
293
|
Raises:
|
|
@@ -399,7 +399,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
|
399
399
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
400
400
|
argument can only be specified when `num_shards` is also specified.
|
|
401
401
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
402
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
402
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
403
403
|
Default: ``None`` , which means no cache is used.
|
|
404
404
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
405
405
|
and returns the decrypted bytes data. Default: ``None`` , no decryption.
|
|
@@ -552,7 +552,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
552
552
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
553
553
|
argument can only be specified when `num_shards` is also specified.
|
|
554
554
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
555
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
555
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
556
556
|
Default: ``None`` , which means no cache is used.
|
|
557
557
|
|
|
558
558
|
Raises:
|
|
@@ -666,7 +666,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
|
666
666
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
667
667
|
argument can only be specified when `num_shards` is also specified.
|
|
668
668
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
669
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
669
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
670
670
|
Default: ``None`` , which means no cache is used.
|
|
671
671
|
|
|
672
672
|
Raises:
|
|
@@ -780,7 +780,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
780
780
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
781
781
|
argument can only be specified when `num_shards` is also specified.
|
|
782
782
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
783
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
783
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
784
784
|
Default: ``None`` , which means no cache is used.
|
|
785
785
|
|
|
786
786
|
Raises:
|
|
@@ -931,7 +931,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
931
931
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
932
932
|
argument can only be specified when `num_shards` is also specified.
|
|
933
933
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
934
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
934
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
935
935
|
Default: ``None`` , which means no cache is used.
|
|
936
936
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
|
|
937
937
|
output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: ``False``.
|
|
@@ -1173,7 +1173,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1173
1173
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1174
1174
|
argument can only be specified when `num_shards` is also specified.
|
|
1175
1175
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1176
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
1176
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
1177
1177
|
Default: ``None`` , which means no cache is used.
|
|
1178
1178
|
|
|
1179
1179
|
Raises:
|
|
@@ -1341,7 +1341,7 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1341
1341
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1342
1342
|
argument can only be specified when `num_shards` is also specified.
|
|
1343
1343
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1344
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
1344
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
1345
1345
|
Default: ``None`` , which means no cache is used.
|
|
1346
1346
|
|
|
1347
1347
|
Raises:
|
|
@@ -1456,7 +1456,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1456
1456
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1457
1457
|
argument can only be specified when `num_shards` is also specified.
|
|
1458
1458
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1459
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
1459
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
1460
1460
|
Default: ``None`` , which means no cache is used.
|
|
1461
1461
|
|
|
1462
1462
|
Raises:
|
|
@@ -1527,7 +1527,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1527
1527
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1528
1528
|
argument can only be specified when `num_shards` is also specified.
|
|
1529
1529
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1530
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
1530
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
1531
1531
|
Default: ``None`` , which means no cache is used.
|
|
1532
1532
|
|
|
1533
1533
|
Raises:
|
|
@@ -1632,7 +1632,7 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1632
1632
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
1633
1633
|
argument can only be specified when `num_shards` is also specified.
|
|
1634
1634
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1635
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
1635
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
1636
1636
|
Default: ``None`` , which means no cache is used.
|
|
1637
1637
|
|
|
1638
1638
|
Raises:
|
|
@@ -2005,7 +2005,7 @@ class Food101Dataset(MappableDataset, VisionBaseDataset):
|
|
|
2005
2005
|
shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
|
|
2006
2006
|
when `num_shards` is also specified. Default: ``None`` .
|
|
2007
2007
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2008
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2008
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2009
2009
|
Default: ``None`` , which means no cache is used.
|
|
2010
2010
|
|
|
2011
2011
|
Raises:
|
|
@@ -2126,7 +2126,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2126
2126
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
2127
2127
|
argument can only be specified when `num_shards` is also specified.
|
|
2128
2128
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2129
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2129
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2130
2130
|
Default: ``None`` , which means no cache is used.
|
|
2131
2131
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
2132
2132
|
and returns the decrypted bytes data. Default: ``None`` , no decryption.
|
|
@@ -2270,7 +2270,7 @@ class KITTIDataset(MappableDataset, VisionBaseDataset):
|
|
|
2270
2270
|
shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
|
|
2271
2271
|
argument can only be specified when `num_shards` is also specified.
|
|
2272
2272
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2273
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2273
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2274
2274
|
Default: ``None`` , which means no cache is used.
|
|
2275
2275
|
|
|
2276
2276
|
Raises:
|
|
@@ -2390,7 +2390,7 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2390
2390
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
2391
2391
|
argument can only be specified when `num_shards` is also specified.
|
|
2392
2392
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2393
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2393
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2394
2394
|
Default: ``None`` , which means no cache is used.
|
|
2395
2395
|
|
|
2396
2396
|
Raises:
|
|
@@ -2500,7 +2500,7 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2500
2500
|
shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
|
|
2501
2501
|
argument can only be specified when `num_shards` is also specified.
|
|
2502
2502
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2503
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2503
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2504
2504
|
Default: ``None`` , which means no cache is used.
|
|
2505
2505
|
|
|
2506
2506
|
Raises:
|
|
@@ -2639,7 +2639,7 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
|
2639
2639
|
shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
|
|
2640
2640
|
argument can only be specified when `num_shards` is also specified.
|
|
2641
2641
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2642
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2642
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2643
2643
|
Default: ``None`` , which means no cache is used.
|
|
2644
2644
|
|
|
2645
2645
|
Raises:
|
|
@@ -2760,7 +2760,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2760
2760
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
2761
2761
|
argument can only be specified when `num_shards` is also specified.
|
|
2762
2762
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2763
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2763
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2764
2764
|
Default: ``None`` , which means no cache is used.
|
|
2765
2765
|
|
|
2766
2766
|
Raises:
|
|
@@ -2881,7 +2881,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2881
2881
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
2882
2882
|
argument can only be specified when `num_shards` is also specified.
|
|
2883
2883
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2884
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2884
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2885
2885
|
Default: ``None`` , which means no cache is used.
|
|
2886
2886
|
|
|
2887
2887
|
Raises:
|
|
@@ -2986,7 +2986,7 @@ class OmniglotDataset(MappableDataset, VisionBaseDataset):
|
|
|
2986
2986
|
shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
|
|
2987
2987
|
argument can only be specified when `num_shards` is also specified.
|
|
2988
2988
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2989
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
2989
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
2990
2990
|
Default: ``None`` , which means no cache is used.
|
|
2991
2991
|
|
|
2992
2992
|
Raises:
|
|
@@ -3106,7 +3106,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3106
3106
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
3107
3107
|
argument can only be specified when `num_shards` is also specified.
|
|
3108
3108
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3109
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3109
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3110
3110
|
Default: ``None`` , which means no cache is used.
|
|
3111
3111
|
|
|
3112
3112
|
Raises:
|
|
@@ -3234,7 +3234,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3234
3234
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
3235
3235
|
argument can only be specified when `num_shards` is also specified.
|
|
3236
3236
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3237
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3237
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3238
3238
|
Default: ``None`` , which means no cache is used.
|
|
3239
3239
|
|
|
3240
3240
|
Raises:
|
|
@@ -3319,7 +3319,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3319
3319
|
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
3320
3320
|
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
3321
3321
|
|
|
3322
|
-
self.dataset_dir = os.path.
|
|
3322
|
+
self.dataset_dir = os.path.realpath(dataset_dir)
|
|
3323
3323
|
self.usage = replace_none(usage, "train-standard")
|
|
3324
3324
|
self.small = small
|
|
3325
3325
|
self.decode = decode
|
|
@@ -3356,7 +3356,7 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3356
3356
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
3357
3357
|
argument can only be specified when `num_shards` is also specified.
|
|
3358
3358
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3359
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3359
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3360
3360
|
Default: ``None`` , which means no cache is used.
|
|
3361
3361
|
|
|
3362
3362
|
Raises:
|
|
@@ -3454,7 +3454,7 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3454
3454
|
Default: ``None`` , will use global default workers(8), it can be set
|
|
3455
3455
|
by :func:`mindspore.dataset.config.set_num_parallel_workers` .
|
|
3456
3456
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3457
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3457
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3458
3458
|
Default: ``None`` , which means no cache is used.
|
|
3459
3459
|
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3460
3460
|
Default: ``None`` , expected order behavior shown in the table below.
|
|
@@ -3539,7 +3539,7 @@ class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3539
3539
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
3540
3540
|
argument can only be specified when `num_shards` is also specified. Default: ``None`` .
|
|
3541
3541
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3542
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3542
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3543
3543
|
Default: ``None`` , which means no cache is used.
|
|
3544
3544
|
|
|
3545
3545
|
Raises:
|
|
@@ -3847,7 +3847,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
3847
3847
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
3848
3848
|
argument can only be specified when `num_shards` is also specified.
|
|
3849
3849
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3850
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3850
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3851
3851
|
Default: ``None`` , which means no cache is used.
|
|
3852
3852
|
|
|
3853
3853
|
Raises:
|
|
@@ -3944,7 +3944,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
3944
3944
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
3945
3945
|
argument can only be specified when `num_shards` is also specified.
|
|
3946
3946
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3947
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
3947
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
3948
3948
|
Default: ``None`` , which means no cache is used.
|
|
3949
3949
|
|
|
3950
3950
|
Raises:
|
|
@@ -4054,7 +4054,7 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4054
4054
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
4055
4055
|
argument can only be specified when `num_shards` is also specified.
|
|
4056
4056
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4057
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
4057
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
4058
4058
|
Default: ``None`` , which means no cache is used.
|
|
4059
4059
|
|
|
4060
4060
|
Raises:
|
|
@@ -4167,7 +4167,7 @@ class SUN397Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4167
4167
|
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
4168
4168
|
argument can only be specified when `num_shards` is also specified. Default: ``None`` .
|
|
4169
4169
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4170
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
4170
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
4171
4171
|
Default: ``None`` , which means no cache is used.
|
|
4172
4172
|
|
|
4173
4173
|
Raises:
|
|
@@ -4428,7 +4428,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4428
4428
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
4429
4429
|
argument can only be specified when `num_shards` is also specified.
|
|
4430
4430
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4431
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
4431
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
4432
4432
|
Default: ``None`` , which means no cache is used.
|
|
4433
4433
|
|
|
4434
4434
|
Raises:
|
|
@@ -4536,7 +4536,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4536
4536
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
|
|
4537
4537
|
argument can only be specified when `num_shards` is also specified.
|
|
4538
4538
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4539
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
4539
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
4540
4540
|
Default: ``None`` , which means no cache is used.
|
|
4541
4541
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
|
|
4542
4542
|
:py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: ``False``.
|
|
@@ -4718,7 +4718,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
|
4718
4718
|
shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` .
|
|
4719
4719
|
This argument can only be specified when `num_shards` is also specified.
|
|
4720
4720
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4721
|
-
`Single-Node Data Cache <https://www.mindspore.cn/
|
|
4721
|
+
`Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
4722
4722
|
Default: ``None`` , which means no cache is used.
|
|
4723
4723
|
|
|
4724
4724
|
Raises:
|
|
@@ -19,6 +19,7 @@ import json
|
|
|
19
19
|
import os
|
|
20
20
|
import signal
|
|
21
21
|
import weakref
|
|
22
|
+
from functools import wraps
|
|
22
23
|
import numpy as np
|
|
23
24
|
|
|
24
25
|
import mindspore._c_dataengine as cde
|
|
@@ -58,6 +59,34 @@ def _cleanup():
|
|
|
58
59
|
itr.release()
|
|
59
60
|
|
|
60
61
|
|
|
62
|
+
def _cleanup_the_iterators_if_created(method):
|
|
63
|
+
"""Release the iterators which is new created by the method"""
|
|
64
|
+
|
|
65
|
+
@wraps(method)
|
|
66
|
+
def wrapper(self, *args, **kwargs):
|
|
67
|
+
original_iterators = deepcopy(ITERATORS_LIST)
|
|
68
|
+
|
|
69
|
+
result = method(self, *args, **kwargs)
|
|
70
|
+
|
|
71
|
+
# it is used to attribute function like: dataset_size / output_shapes / output_types and
|
|
72
|
+
# it is a GeneratorDataset with two stage pipeline. The first pipeline will create a new iterator
|
|
73
|
+
# which need to be released after dataset_size / output_shapes / output_types end.
|
|
74
|
+
# 1. find the iterators which are started by dataset_size / output_shapes / output_types with two stage pipeline
|
|
75
|
+
iterators_to_be_released = []
|
|
76
|
+
for index, item in enumerate(ITERATORS_LIST):
|
|
77
|
+
if item not in original_iterators:
|
|
78
|
+
iterators_to_be_released.append(index)
|
|
79
|
+
|
|
80
|
+
# 2. release the iterators
|
|
81
|
+
for index in reversed(iterators_to_be_released):
|
|
82
|
+
itr = ITERATORS_LIST[index]()
|
|
83
|
+
if itr is not None:
|
|
84
|
+
itr.release()
|
|
85
|
+
|
|
86
|
+
return result
|
|
87
|
+
return wrapper
|
|
88
|
+
|
|
89
|
+
|
|
61
90
|
class Iterator:
|
|
62
91
|
"""
|
|
63
92
|
General Iterator over a dataset.
|
|
@@ -47,6 +47,13 @@ def get_used_disk_per():
|
|
|
47
47
|
os.makedirs(config.WORKING_PATH)
|
|
48
48
|
except FileExistsError:
|
|
49
49
|
pass
|
|
50
|
+
except PermissionError as e:
|
|
51
|
+
err_msg =\
|
|
52
|
+
str(e) + ". Suggestion: " \
|
|
53
|
+
"1) It is recommended to manually create the {} directory and add read/write permissions to it. " \
|
|
54
|
+
"2) If you can't create, we suggest you download MindRecord manually and "\
|
|
55
|
+
"read it using the MindDataset interface.".format(config.WORKING_PATH)
|
|
56
|
+
raise RuntimeError(err_msg)
|
|
50
57
|
|
|
51
58
|
total, used, _ = shutil.disk_usage(config.WORKING_PATH)
|
|
52
59
|
return used / total
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
1
|
+
# Copyright 2021-2024 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -31,18 +31,28 @@ import mindspore._c_dataengine as cde
|
|
|
31
31
|
from ..transforms.py_transforms_util import ExceptionHandler
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
def get_total_size(data):
|
|
35
|
+
"""Calculate the total size of numpy arrays."""
|
|
36
|
+
total_size = 0
|
|
37
|
+
for column in data:
|
|
38
|
+
if isinstance(column, np.ndarray):
|
|
39
|
+
total_size += column.nbytes
|
|
40
|
+
return total_size
|
|
41
|
+
|
|
42
|
+
|
|
34
43
|
class _SharedQueue(multiprocessing.queues.Queue):
|
|
35
44
|
"""
|
|
36
45
|
Class to implement a queue using shared memory for better performance.
|
|
37
46
|
Args:
|
|
38
47
|
size: Number of elements in the queue.
|
|
39
48
|
count: Shared variable to suppress log printing.
|
|
40
|
-
copy_out:
|
|
41
|
-
|
|
42
|
-
|
|
49
|
+
copy_out: Whether to copy the data from shared memory to process virtual memory. Default: ``True``.
|
|
50
|
+
max_rowsize: Maximum size of row in MB that is used for shared memory allocation to copy
|
|
51
|
+
data between processes. If set to -1, shared memory will be dynamically allocated with
|
|
52
|
+
the actual size of data. Default: -1.
|
|
43
53
|
"""
|
|
44
54
|
|
|
45
|
-
def __init__(self, size, count, copy_out=
|
|
55
|
+
def __init__(self, size, count, copy_out=True, max_rowsize=-1):
|
|
46
56
|
super().__init__(size, ctx=multiprocessing.get_context())
|
|
47
57
|
|
|
48
58
|
self.copy_out = copy_out
|
|
@@ -55,28 +65,29 @@ class _SharedQueue(multiprocessing.queues.Queue):
|
|
|
55
65
|
self.data_shared = 1
|
|
56
66
|
self.count = count
|
|
57
67
|
self.print_error = True
|
|
68
|
+
self.shm_list = []
|
|
69
|
+
self.seg_pos = 0
|
|
70
|
+
# num_seg has to be 2 more than the queue size. We can have remote worker filling a buffer, main process
|
|
71
|
+
# reading a buffer and also have a full queue of buffers in the meta-data queue
|
|
72
|
+
self.num_seg = size + 2
|
|
58
73
|
|
|
59
74
|
if platform.system().lower() != 'windows' and max_rowsize == -1:
|
|
60
75
|
self.dynamic_shm = True
|
|
76
|
+
self.fd_list = []
|
|
61
77
|
else:
|
|
62
78
|
self.dynamic_shm = False
|
|
63
79
|
# change max_rowsize in MB into bytes
|
|
64
80
|
self.seg_size = max_rowsize * 1024 * 1024
|
|
65
|
-
self.shm_list = []
|
|
66
|
-
self.seg_pos = 0
|
|
67
|
-
# num_seg has to be 2 more than the queue size. We can have remote worker filling a buffer, main process
|
|
68
|
-
# reading a buffer and also have a full queue of buffers in the meta-data queue
|
|
69
|
-
self.num_seg = size + 2
|
|
70
81
|
for _ in range(self.num_seg):
|
|
71
82
|
try:
|
|
72
|
-
|
|
83
|
+
shared_array = multiprocessing.Array("b", self.seg_size)
|
|
73
84
|
except OSError as e:
|
|
74
85
|
if e.errno == errno.ENOMEM:
|
|
75
86
|
raise RuntimeError("Failed to allocate shared memory for {0} elements of {1}MB: {2}"
|
|
76
87
|
.format(self.num_seg, self.seg_size / 1024 / 1024, e))
|
|
77
88
|
raise
|
|
78
89
|
else:
|
|
79
|
-
self.shm_list.append(
|
|
90
|
+
self.shm_list.append(shared_array)
|
|
80
91
|
|
|
81
92
|
def put_until(self, data, timeout=None, exit_signal=None):
|
|
82
93
|
"""Put data into the queue. Block until timeout is reached or exit_signal is set."""
|
|
@@ -102,32 +113,36 @@ class _SharedQueue(multiprocessing.queues.Queue):
|
|
|
102
113
|
if isinstance(data, np.ndarray):
|
|
103
114
|
name_list.append((self.data_immediate, np.array(data)))
|
|
104
115
|
else:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if
|
|
116
|
+
if self.dynamic_shm:
|
|
117
|
+
total_size = get_total_size(data)
|
|
118
|
+
if total_size > 0:
|
|
119
|
+
self.check_and_create_shm(total_size)
|
|
120
|
+
for column in data:
|
|
121
|
+
# the map:pyfunc is a yield generator which can't be serialized
|
|
122
|
+
if isinstance(column, types.GeneratorType):
|
|
108
123
|
raise TypeError("Cannot pickle {} object, please verify pyfunc return with numpy array"
|
|
109
|
-
.format(type(
|
|
110
|
-
if isinstance(
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
dest = np.ndarray(
|
|
114
|
-
np.copyto(dest,
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
name_list.append((self.data_shared,
|
|
118
|
-
elif (isinstance(
|
|
119
|
-
and start_bytes +
|
|
124
|
+
.format(type(column)))
|
|
125
|
+
if self.dynamic_shm and isinstance(column, np.ndarray) and column.nbytes > 0:
|
|
126
|
+
shm = self.shm_list[self.seg_pos]
|
|
127
|
+
fd = self.fd_list[self.seg_pos]
|
|
128
|
+
dest = np.ndarray(column.shape, column.dtype, buffer=shm.buf(), offset=start_bytes)
|
|
129
|
+
np.copyto(dest, column)
|
|
130
|
+
start_bytes += column.nbytes
|
|
131
|
+
shm_metadata = (shm.name(), fd, total_size)
|
|
132
|
+
name_list.append((self.data_shared, self.seg_pos, column.dtype, column.shape, shm_metadata))
|
|
133
|
+
elif (isinstance(column, np.ndarray) and column.size > self.min_shared_mem
|
|
134
|
+
and start_bytes + column.nbytes < self.seg_size):
|
|
120
135
|
# need to convert start_bytes to offset in array
|
|
121
136
|
start_offset = start_bytes
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
np.copyto(dest,
|
|
125
|
-
byte =
|
|
137
|
+
shm = self.shm_list[self.seg_pos]
|
|
138
|
+
dest = np.ndarray(column.shape, column.dtype, buffer=shm.get_obj(), offset=start_offset)
|
|
139
|
+
np.copyto(dest, column)
|
|
140
|
+
byte = column.nbytes
|
|
126
141
|
byte = 8 * ((byte + 7) // 8)
|
|
127
142
|
start_bytes += byte
|
|
128
|
-
name_list.append((self.data_shared, self.seg_pos, byte,
|
|
143
|
+
name_list.append((self.data_shared, self.seg_pos, byte, column.dtype, column.shape))
|
|
129
144
|
else:
|
|
130
|
-
if isinstance(
|
|
145
|
+
if isinstance(column, np.ndarray) and column.size > self.min_shared_mem:
|
|
131
146
|
# Only print out error the first time it happens
|
|
132
147
|
if self.count.value == 0 and self.print_error:
|
|
133
148
|
logger.warning(
|
|
@@ -135,12 +150,12 @@ class _SharedQueue(multiprocessing.queues.Queue):
|
|
|
135
150
|
+ "max_rowsize: "
|
|
136
151
|
+ str(self.seg_size / 1024 / 1024)
|
|
137
152
|
+ "MB, current rowsize: "
|
|
138
|
-
+ str((start_bytes +
|
|
153
|
+
+ str((start_bytes + column.nbytes) / 1024 / 1024)
|
|
139
154
|
+ "MB."
|
|
140
155
|
)
|
|
141
156
|
self.print_error = False
|
|
142
157
|
self.count.value += 1
|
|
143
|
-
name_list.append((self.data_immediate,
|
|
158
|
+
name_list.append((self.data_immediate, column))
|
|
144
159
|
super().put(name_list, timeout=timeout)
|
|
145
160
|
# note above could generate a queue full exception. It will be handled by teh caller
|
|
146
161
|
# only increment seg_pos after successfully adding to metadata queue
|
|
@@ -152,52 +167,91 @@ class _SharedQueue(multiprocessing.queues.Queue):
|
|
|
152
167
|
"""Get data from the queue. Block until timeout is reached or exit_signal is set."""
|
|
153
168
|
while True:
|
|
154
169
|
try:
|
|
155
|
-
|
|
170
|
+
result = self.get(timeout=timeout)
|
|
156
171
|
except queue.Empty as e:
|
|
157
172
|
if exit_signal is None:
|
|
158
173
|
raise e
|
|
159
174
|
if exit_signal.is_set():
|
|
160
175
|
return None
|
|
161
176
|
continue
|
|
162
|
-
if
|
|
177
|
+
if result is None:
|
|
163
178
|
# receive finish signal
|
|
164
179
|
return None
|
|
165
180
|
if exit_signal.is_set():
|
|
166
181
|
# loop until the queue becomes empty
|
|
167
182
|
continue
|
|
168
|
-
return
|
|
183
|
+
return result
|
|
169
184
|
|
|
170
185
|
def get(self, timeout=None):
|
|
171
|
-
|
|
172
|
-
if isinstance(
|
|
173
|
-
return
|
|
174
|
-
|
|
186
|
+
raw_data = super().get(timeout=timeout)
|
|
187
|
+
if isinstance(raw_data, ExceptionHandler):
|
|
188
|
+
return raw_data
|
|
189
|
+
result = []
|
|
175
190
|
start_bytes = 0
|
|
176
|
-
for
|
|
177
|
-
if
|
|
191
|
+
for column in raw_data:
|
|
192
|
+
if column[0] == self.data_shared:
|
|
178
193
|
if self.dynamic_shm:
|
|
179
|
-
dtype, shape,
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
194
|
+
seg_pos, dtype, shape, shm_metadata = column[1:]
|
|
195
|
+
if start_bytes == 0:
|
|
196
|
+
# only need to check once since all the columns are stored in the same shared memory
|
|
197
|
+
self.check_and_attach_shm(seg_pos, shm_metadata)
|
|
198
|
+
shm = self.shm_list[seg_pos]
|
|
199
|
+
array = np.ndarray(shape, dtype, buffer=shm.buf(), offset=start_bytes)
|
|
200
|
+
start_bytes += array.nbytes
|
|
185
201
|
else:
|
|
186
|
-
seg_pos, byte, dtype, shape =
|
|
202
|
+
seg_pos, byte, dtype, shape = column[1:]
|
|
187
203
|
start_offset = start_bytes
|
|
188
|
-
|
|
189
|
-
|
|
204
|
+
shm = self.shm_list[seg_pos]
|
|
205
|
+
array = np.ndarray(shape, dtype, buffer=shm.get_obj(), offset=start_offset)
|
|
190
206
|
start_bytes += byte
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
r.append(x[1])
|
|
207
|
+
if self.copy_out:
|
|
208
|
+
result.append(np.copy(array))
|
|
209
|
+
else:
|
|
210
|
+
result.append(array)
|
|
211
|
+
elif column[0] == self.data_immediate:
|
|
212
|
+
result.append(column[1])
|
|
198
213
|
else:
|
|
199
214
|
raise RuntimeError("SharedQueue, invalid entry in metadata.")
|
|
200
|
-
return tuple(
|
|
215
|
+
return tuple(result)
|
|
216
|
+
|
|
217
|
+
def check_and_create_shm(self, size):
|
|
218
|
+
"""Check if the shared memory is initialized and of sufficient size."""
|
|
219
|
+
if len(self.shm_list) == self.seg_pos:
|
|
220
|
+
# shared memory has not been created and appended to the cache list
|
|
221
|
+
shm = cde.SharedMemory(None, True, -1, size)
|
|
222
|
+
shared_fd = multiprocessing.reduction.DupFd(shm.fd())
|
|
223
|
+
self.shm_list.append(shm)
|
|
224
|
+
self.fd_list.append(shared_fd)
|
|
225
|
+
elif len(self.shm_list) > self.seg_pos:
|
|
226
|
+
if self.shm_list[self.seg_pos].size() < size:
|
|
227
|
+
# shared memory is not big enough to hold the data
|
|
228
|
+
shm = cde.SharedMemory(None, True, -1, size)
|
|
229
|
+
shared_fd = multiprocessing.reduction.DupFd(shm.fd())
|
|
230
|
+
self.shm_list[self.seg_pos] = shm
|
|
231
|
+
self.fd_list[self.seg_pos] = shared_fd
|
|
232
|
+
else:
|
|
233
|
+
raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
|
|
234
|
+
"Uninitialized shared memory may exist.")
|
|
235
|
+
|
|
236
|
+
def check_and_attach_shm(self, shm_index, shm_metadata):
|
|
237
|
+
"""Check if the shared memory is initialized and is the same as the current one."""
|
|
238
|
+
shm_name, fd, size = shm_metadata
|
|
239
|
+
if len(self.shm_list) == shm_index:
|
|
240
|
+
# shared memory has not been created and appended to the cache list
|
|
241
|
+
fd = fd.detach()
|
|
242
|
+
shm = cde.SharedMemory(shm_name, False, fd, size)
|
|
243
|
+
self.shm_list.append(shm)
|
|
244
|
+
self.fd_list.append(fd)
|
|
245
|
+
elif len(self.shm_list) > shm_index:
|
|
246
|
+
if self.shm_list[shm_index].name() != shm_name:
|
|
247
|
+
# shared memory has changed
|
|
248
|
+
fd = fd.detach()
|
|
249
|
+
shm = cde.SharedMemory(shm_name, False, fd, size)
|
|
250
|
+
self.shm_list[shm_index] = shm
|
|
251
|
+
self.fd_list[shm_index] = fd
|
|
252
|
+
else:
|
|
253
|
+
raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
|
|
254
|
+
"Uninitialized shared memory may exist.")
|
|
201
255
|
|
|
202
256
|
def __del__(self):
|
|
203
257
|
if not self.dynamic_shm:
|
|
@@ -107,9 +107,9 @@ def deserialize(input_dict=None, json_filepath=None):
|
|
|
107
107
|
def expand_path(node_repr, key, val):
|
|
108
108
|
"""Convert relative to absolute path."""
|
|
109
109
|
if isinstance(val, list):
|
|
110
|
-
node_repr[key] = [os.path.
|
|
110
|
+
node_repr[key] = [os.path.realpath(file) for file in val]
|
|
111
111
|
else:
|
|
112
|
-
node_repr[key] = os.path.
|
|
112
|
+
node_repr[key] = os.path.realpath(val)
|
|
113
113
|
|
|
114
114
|
|
|
115
115
|
def show(dataset, indentation=2):
|