mindspore 1.10.0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/ConcurrencyCheck.dll +0 -0
- mindspore/CppBuildInsights.dll +0 -0
- mindspore/CppCoreCheck.dll +0 -0
- mindspore/EnumIndex.dll +0 -0
- mindspore/EspXEngine.dll +0 -0
- mindspore/HResultCheck.dll +0 -0
- mindspore/KernelTraceControl.dll +0 -0
- mindspore/LocalESPC.dll +0 -0
- mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
- mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
- mindspore/VariantClear.dll +0 -0
- mindspore/__init__.py +9 -4
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/amp.py +53 -58
- mindspore/atlprov.dll +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/cfgpersist.dll +0 -0
- mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
- mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/context.py +291 -56
- mindspore/d3dcompiler_47.dll +0 -0
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/dpcmi.dll +0 -0
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/perf_msvcbuildinsights.dll +0 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/libatomic-1.dll +0 -0
- mindspore/libgcc_s_seh-1.dll +0 -0
- mindspore/libgfortran-4.dll +0 -0
- mindspore/libgomp-1.dll +0 -0
- mindspore/libjpeg-62.dll +0 -0
- mindspore/libmindspore.dll +0 -0
- mindspore/libmindspore_common.dll +0 -0
- mindspore/libmindspore_core.dll +0 -0
- mindspore/libmindspore_glog.dll +0 -0
- mindspore/libnnacl.dll +0 -0
- mindspore/libopencv_core452.dll +0 -0
- mindspore/libopencv_imgcodecs452.dll +0 -0
- mindspore/libopencv_imgproc452.dll +0 -0
- mindspore/libquadmath-0.dll +0 -0
- mindspore/libsqlite3.dll +0 -0
- mindspore/libssp-0.dll +0 -0
- mindspore/libstdc++-6.dll +0 -0
- mindspore/libtinyxml2.dll +0 -0
- mindspore/libturbojpeg.dll +0 -0
- mindspore/libwinpthread-1.dll +0 -0
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -32,14 +32,14 @@ import mindspore._c_dataengine as cde
|
|
|
32
32
|
|
|
33
33
|
from .datasets import VisionBaseDataset, SourceDataset, MappableDataset, Shuffle, Schema
|
|
34
34
|
from .datasets_user_defined import GeneratorDataset
|
|
35
|
-
from .validators import
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
from .validators import check_caltech101_dataset, check_caltech256_dataset, check_celebadataset, \
|
|
36
|
+
check_cityscapes_dataset, check_cocodataset, check_div2k_dataset, check_emnist_dataset, check_fake_image_dataset, \
|
|
37
|
+
check_flickr_dataset, check_flowers102dataset, check_food101_dataset, check_imagefolderdataset, \
|
|
38
|
+
check_kittidataset, check_lfw_dataset, check_lsun_dataset, check_manifestdataset, check_mnist_cifar_dataset, \
|
|
39
|
+
check_omniglotdataset, check_photo_tour_dataset, check_places365_dataset, check_qmnist_dataset, \
|
|
40
|
+
check_random_dataset, check_rendered_sst2_dataset, check_sb_dataset, check_sbu_dataset, check_semeion_dataset, \
|
|
41
|
+
check_stl10_dataset, check_sun397_dataset, check_svhn_dataset, check_usps_dataset, check_vocdataset, \
|
|
42
|
+
check_wider_face_dataset
|
|
43
43
|
|
|
44
44
|
from ..core.validator_helpers import replace_none
|
|
45
45
|
|
|
@@ -108,12 +108,14 @@ class _Caltech101Dataset:
|
|
|
108
108
|
|
|
109
109
|
class Caltech101Dataset(GeneratorDataset):
|
|
110
110
|
"""
|
|
111
|
-
|
|
111
|
+
Caltech 101 dataset.
|
|
112
|
+
|
|
113
|
+
The columns of the generated dataset depend on the value of `target_type` .
|
|
114
|
+
|
|
115
|
+
- When `target_type` is 'category', the columns are :py:obj:`[image, category]` .
|
|
116
|
+
- When `target_type` is 'annotation', the columns are :py:obj:`[image, annotation]` .
|
|
117
|
+
- When `target_type` is 'all', the columns are :py:obj:`[image, category, annotation]` .
|
|
112
118
|
|
|
113
|
-
The columns of the generated dataset depend on the value of `target_type`.
|
|
114
|
-
When `target_type` is 'category', the columns are :py:obj:`[image, category]`.
|
|
115
|
-
When `target_type` is 'annotation', the columns are :py:obj:`[image, annotation]`.
|
|
116
|
-
When `target_type` is 'all', the columns are :py:obj:`[image, category, annotation]`.
|
|
117
119
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
118
120
|
The tensor of column :py:obj:`category` is of the uint32 type.
|
|
119
121
|
The tensor of column :py:obj:`annotation` is a 2-dimensional ndarray that stores the contour of the image
|
|
@@ -125,33 +127,33 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
125
127
|
and the other is called Annotations, which stores annotations.
|
|
126
128
|
target_type (str, optional): Target of the image. If `target_type` is 'category', return category represents
|
|
127
129
|
the target class. If `target_type` is 'annotation', return annotation.
|
|
128
|
-
If `target_type` is 'all', return category and annotation
|
|
129
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
130
|
-
|
|
131
|
-
num_parallel_workers (int, optional): Number of
|
|
132
|
-
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
|
133
|
-
|
|
134
|
-
decode (bool, optional): Whether or not to decode the images after reading
|
|
130
|
+
If `target_type` is 'all', return category and annotation. Default: None, means 'category'.
|
|
131
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
132
|
+
Default: None, all images.
|
|
133
|
+
num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
|
|
134
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
135
|
+
Default: None, expected order behavior shown in the table below.
|
|
136
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
135
137
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
136
|
-
dataset
|
|
138
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
137
139
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
138
|
-
into
|
|
140
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
139
141
|
the maximum sample number of per shard.
|
|
140
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
142
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
141
143
|
argument can only be specified when `num_shards` is also specified.
|
|
142
144
|
|
|
143
145
|
Raises:
|
|
144
146
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
145
|
-
ValueError: If `target_type` is not set correctly.
|
|
146
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
147
147
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
148
148
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
149
149
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
150
150
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
151
|
-
ValueError: If `shard_id` is
|
|
151
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
152
|
+
ValueError: If `target_type` is not set correctly.
|
|
153
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
152
154
|
|
|
153
155
|
Note:
|
|
154
|
-
- This dataset can take in a `sampler
|
|
156
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
155
157
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
156
158
|
|
|
157
159
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -191,11 +193,13 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
191
193
|
|
|
192
194
|
About Caltech101Dataset:
|
|
193
195
|
|
|
194
|
-
Pictures of objects belonging to 101 categories
|
|
195
|
-
Most categories have about 50 images.
|
|
196
|
-
and Marc 'Aurelio Ranzato. The size of each image is roughly 300 x 200 pixels.
|
|
196
|
+
Pictures of objects belonging to 101 categories, about 40 to 800 images per category.
|
|
197
|
+
Most categories have about 50 images. The size of each image is roughly 300 x 200 pixels.
|
|
197
198
|
The official provides the contour data of each object in each picture, which is the annotation.
|
|
198
199
|
|
|
200
|
+
Here is the original Caltech101 dataset structure,
|
|
201
|
+
and you can unzip the dataset files into the following directory structure, which are read by MindSpore API.
|
|
202
|
+
|
|
199
203
|
.. code-block::
|
|
200
204
|
|
|
201
205
|
.
|
|
@@ -276,44 +280,45 @@ class Caltech101Dataset(GeneratorDataset):
|
|
|
276
280
|
|
|
277
281
|
class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
278
282
|
"""
|
|
279
|
-
|
|
283
|
+
Caltech 256 dataset.
|
|
280
284
|
|
|
281
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
285
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
282
286
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
283
287
|
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
284
288
|
|
|
285
289
|
Args:
|
|
286
290
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
287
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
288
|
-
|
|
289
|
-
num_parallel_workers (int, optional): Number of
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
291
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
292
|
+
Default: None, all images.
|
|
293
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
294
|
+
Default: None, will use global default workers(8), it can be set
|
|
295
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
296
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
297
|
+
Default: None, expected order behavior shown in the table below.
|
|
298
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
294
299
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
295
|
-
dataset
|
|
300
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
296
301
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
297
|
-
into
|
|
302
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
298
303
|
the maximum sample number of per shard.
|
|
299
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
304
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
300
305
|
argument can only be specified when `num_shards` is also specified.
|
|
301
306
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
302
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
303
|
-
|
|
307
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
308
|
+
Default: None, which means no cache is used.
|
|
304
309
|
|
|
305
310
|
Raises:
|
|
306
311
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
307
|
-
ValueError: If `target_type` is not 'category', 'annotation' or 'all'.
|
|
308
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
309
312
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
310
313
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
311
314
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
312
315
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
313
|
-
ValueError: If `shard_id` is
|
|
316
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
317
|
+
ValueError: If `target_type` is not 'category', 'annotation' or 'all'.
|
|
318
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
314
319
|
|
|
315
320
|
Note:
|
|
316
|
-
- This dataset can take in a `sampler
|
|
321
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
317
322
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
318
323
|
|
|
319
324
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -404,48 +409,49 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
|
|
|
404
409
|
|
|
405
410
|
class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
406
411
|
"""
|
|
407
|
-
|
|
408
|
-
Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
|
|
412
|
+
CelebA(CelebFaces Attributes) dataset.
|
|
409
413
|
|
|
410
|
-
|
|
414
|
+
Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
|
|
415
|
+
The generated dataset has two columns: :py:obj:`[image, attr]` .
|
|
411
416
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
412
417
|
The tensor of column :py:obj:`attr` is of the uint32 type and one hot encoded.
|
|
413
418
|
|
|
414
419
|
Args:
|
|
415
420
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
416
|
-
num_parallel_workers (int, optional): Number of
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
421
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
422
|
+
Default: None, will use global default workers(8), it can be set
|
|
423
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
424
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None.
|
|
425
|
+
usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
|
|
426
|
+
Default: 'all', will read all samples.
|
|
427
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset. Default: None.
|
|
428
|
+
decode (bool, optional): Whether to decode the images after reading. Default: False.
|
|
429
|
+
extensions (list[str], optional): List of file extensions to be included in the dataset. Default: None.
|
|
430
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
431
|
+
Default: None, will include all images.
|
|
426
432
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
427
|
-
into
|
|
433
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
428
434
|
the maximum sample number of per shard.
|
|
429
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
435
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
430
436
|
argument can only be specified when `num_shards` is also specified.
|
|
431
437
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
432
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
433
|
-
|
|
438
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
439
|
+
Default: None, which means no cache is used.
|
|
434
440
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
435
441
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
436
442
|
|
|
437
443
|
Raises:
|
|
438
444
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
439
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
440
|
-
ValueError: If `usage` is not 'train', 'valid', 'test' or 'all'.
|
|
441
445
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
442
446
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
443
447
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
444
448
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
445
|
-
ValueError: If `shard_id` is
|
|
449
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
450
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
451
|
+
ValueError: If `usage` is not 'train', 'valid', 'test' or 'all'.
|
|
446
452
|
|
|
447
453
|
Note:
|
|
448
|
-
- This dataset can take in a `sampler
|
|
454
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
449
455
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
450
456
|
|
|
451
457
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -574,47 +580,48 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|
|
574
580
|
|
|
575
581
|
class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
576
582
|
"""
|
|
577
|
-
|
|
578
|
-
This api only supports parsing Cifar10 file in binary version now.
|
|
583
|
+
CIFAR-10 dataset.
|
|
579
584
|
|
|
580
|
-
|
|
585
|
+
This api only supports parsing CIFAR-10 file in binary version now.
|
|
586
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
581
587
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
582
588
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
583
589
|
|
|
584
590
|
Args:
|
|
585
591
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
586
592
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
|
|
587
|
-
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
|
|
588
|
-
|
|
589
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
590
|
-
|
|
591
|
-
num_parallel_workers (int, optional): Number of
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
593
|
+
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples.
|
|
594
|
+
Default: None, all samples.
|
|
595
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
596
|
+
Default: None, all images.
|
|
597
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
598
|
+
Default: None, will use global default workers(8), it can be set
|
|
599
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
600
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
601
|
+
order behavior shown in the table below.
|
|
595
602
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
596
|
-
dataset
|
|
603
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
597
604
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
598
|
-
into
|
|
605
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
599
606
|
the maximum sample number of per shard.
|
|
600
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
607
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
601
608
|
argument can only be specified when `num_shards` is also specified.
|
|
602
609
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
603
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
604
|
-
|
|
610
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
611
|
+
Default: None, which means no cache is used.
|
|
605
612
|
|
|
606
613
|
Raises:
|
|
607
614
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
608
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
609
|
-
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
610
615
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
611
616
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
612
617
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
613
618
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
614
|
-
ValueError: If `shard_id` is
|
|
619
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
620
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
621
|
+
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
615
622
|
|
|
616
623
|
Note:
|
|
617
|
-
- This dataset can take in a `sampler
|
|
624
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
618
625
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
619
626
|
|
|
620
627
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -707,46 +714,47 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
707
714
|
|
|
708
715
|
class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
709
716
|
"""
|
|
710
|
-
|
|
717
|
+
CIFAR-100 dataset.
|
|
711
718
|
|
|
712
|
-
The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]
|
|
719
|
+
The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]` .
|
|
713
720
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
714
721
|
The tensor of column :py:obj:`coarse_label` and :py:obj:`fine_labels` are each a scalar of uint32 type.
|
|
715
722
|
|
|
716
723
|
Args:
|
|
717
724
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
718
725
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
|
|
719
|
-
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
|
|
720
|
-
|
|
721
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
722
|
-
|
|
723
|
-
num_parallel_workers (int, optional): Number of
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
726
|
+
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples.
|
|
727
|
+
Default: None, all samples.
|
|
728
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
729
|
+
Default: None, all images.
|
|
730
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
731
|
+
Default: None, will use global default workers(8), it can be set
|
|
732
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
733
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
734
|
+
order behavior shown in the table below.
|
|
727
735
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
728
|
-
dataset
|
|
736
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
729
737
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
730
|
-
into
|
|
738
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
731
739
|
the maximum sample number of per shard.
|
|
732
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
740
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
733
741
|
argument can only be specified when `num_shards` is also specified.
|
|
734
742
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
735
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
736
|
-
|
|
743
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
744
|
+
Default: None, which means no cache is used.
|
|
737
745
|
|
|
738
746
|
Raises:
|
|
739
747
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
740
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
741
|
-
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
742
748
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
743
749
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
744
750
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
745
751
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
746
|
-
ValueError: If `shard_id` is
|
|
752
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
753
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
754
|
+
ValueError: If `usage` is not 'train', 'test' or 'all'.
|
|
747
755
|
|
|
748
756
|
Note:
|
|
749
|
-
- This dataset can take in a `sampler
|
|
757
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
750
758
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
751
759
|
|
|
752
760
|
.. list-table:: Expected Order Behavior of Using `sampler` and shuffle
|
|
@@ -833,52 +841,54 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
|
|
|
833
841
|
|
|
834
842
|
class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
835
843
|
"""
|
|
836
|
-
|
|
844
|
+
Cityscapes dataset.
|
|
837
845
|
|
|
838
|
-
The generated dataset has two columns :py:obj:`[image, task]
|
|
846
|
+
The generated dataset has two columns :py:obj:`[image, task]` .
|
|
839
847
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
840
848
|
The tensor of column :py:obj:`task` is of the uint8 type if task is not 'polygon' otherwise task is
|
|
841
849
|
a string tensor with serialize json.
|
|
842
850
|
|
|
843
851
|
Args:
|
|
844
852
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
845
|
-
usage (str): Acceptable usages include 'train', 'test', 'val' or 'all' if quality_mode is 'fine'
|
|
846
|
-
otherwise 'train', 'train_extra', 'val' or 'all'
|
|
847
|
-
quality_mode (str): Acceptable quality_modes include 'fine' or 'coarse'
|
|
848
|
-
task (str): Acceptable tasks include 'instance',
|
|
853
|
+
usage (str, optional): Acceptable usages include 'train', 'test', 'val' or 'all' if quality_mode is 'fine'
|
|
854
|
+
otherwise 'train', 'train_extra', 'val' or 'all'. Default: 'train'.
|
|
855
|
+
quality_mode (str, optional): Acceptable quality_modes include 'fine' or 'coarse'. Default: 'fine'.
|
|
856
|
+
task (str, optional): Acceptable tasks include 'instance',
|
|
857
|
+
'semantic', 'polygon' or 'color'. Default: 'instance'.
|
|
849
858
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
850
|
-
|
|
851
|
-
num_parallel_workers (int, optional): Number of
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
859
|
+
Default: None, all images.
|
|
860
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
861
|
+
Default: None, will use global default workers(8), it can be set
|
|
862
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
863
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
864
|
+
order behavior shown in the table below.
|
|
865
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
856
866
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
857
|
-
dataset
|
|
867
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
858
868
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
859
|
-
into
|
|
869
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
860
870
|
the max sample number of per shard.
|
|
861
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
871
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
862
872
|
argument can only be specified when `num_shards` is also specified.
|
|
863
873
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
864
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
865
|
-
|
|
874
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
875
|
+
Default: None, which means no cache is used.
|
|
866
876
|
|
|
867
877
|
Raises:
|
|
868
878
|
RuntimeError: If `dataset_dir` is invalid or does not contain data files.
|
|
869
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
870
879
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
871
880
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
872
881
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
873
882
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
883
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
874
884
|
ValueError: If `dataset_dir` is not exist.
|
|
875
885
|
ValueError: If `task` is invalid.
|
|
876
886
|
ValueError: If `quality_mode` is invalid.
|
|
877
887
|
ValueError: If `usage` is invalid.
|
|
878
|
-
ValueError: If `shard_id` is
|
|
888
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
879
889
|
|
|
880
890
|
Note:
|
|
881
|
-
- This dataset can take in a `sampler
|
|
891
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
882
892
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
883
893
|
|
|
884
894
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -1004,7 +1014,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
|
|
|
1004
1014
|
|
|
1005
1015
|
class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
1006
1016
|
"""
|
|
1007
|
-
|
|
1017
|
+
COCO(Common Objects in Context) dataset.
|
|
1008
1018
|
|
|
1009
1019
|
CocoDataset supports five kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation,
|
|
1010
1020
|
Panoptic Segmentation and Captioning of 2017 Train/Val/Test dataset.
|
|
@@ -1013,26 +1023,27 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1013
1023
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1014
1024
|
annotation_file (str): Path to the annotation JSON file.
|
|
1015
1025
|
task (str, optional): Set the task type for reading COCO data. Supported task types:
|
|
1016
|
-
'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning'
|
|
1017
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
1018
|
-
|
|
1019
|
-
num_parallel_workers (int, optional): Number of
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
+
'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning'. Default: 'Detection'.
|
|
1027
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1028
|
+
Default: None, all images.
|
|
1029
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1030
|
+
Default: None, will use global default workers(8), it can be set
|
|
1031
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1032
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1033
|
+
order behavior shown in the table below.
|
|
1034
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
1035
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
1036
|
+
Default: None, expected order behavior shown in the table below.
|
|
1026
1037
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
1027
|
-
into
|
|
1038
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
1028
1039
|
the maximum sample number of per shard.
|
|
1029
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1040
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1030
1041
|
argument can only be specified when `num_shards` is also specified.
|
|
1031
1042
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1032
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1033
|
-
|
|
1043
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1044
|
+
Default: None, which means no cache is used.
|
|
1034
1045
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
|
|
1035
|
-
output at the end :py:obj:`[_meta-filename, dtype=string]`
|
|
1046
|
+
output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: False.
|
|
1036
1047
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
1037
1048
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
1038
1049
|
|
|
@@ -1083,13 +1094,13 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1083
1094
|
ValueError: If `task` is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'].
|
|
1084
1095
|
ValueError: If `annotation_file` is not exist.
|
|
1085
1096
|
ValueError: If `dataset_dir` is not exist.
|
|
1086
|
-
ValueError: If `shard_id` is
|
|
1097
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1087
1098
|
|
|
1088
1099
|
Note:
|
|
1089
1100
|
- Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op is added
|
|
1090
1101
|
to remove the prefix('_meta-').
|
|
1091
|
-
-
|
|
1092
|
-
- This dataset can take in a `sampler
|
|
1102
|
+
- Not support `mindspore.dataset.PKSampler` for `sampler` parameter yet.
|
|
1103
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1093
1104
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1094
1105
|
|
|
1095
1106
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -1249,37 +1260,38 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|
|
1249
1260
|
|
|
1250
1261
|
class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
1251
1262
|
"""
|
|
1252
|
-
|
|
1263
|
+
DIV2K(DIVerse 2K resolution image) dataset.
|
|
1253
1264
|
|
|
1254
|
-
The generated dataset has two columns :py:obj:`[hr_image, lr_image]
|
|
1265
|
+
The generated dataset has two columns :py:obj:`[hr_image, lr_image]` .
|
|
1255
1266
|
The tensor of column :py:obj:`hr_image` and the tensor of column :py:obj:`lr_image` are of the uint8 type.
|
|
1256
1267
|
|
|
1257
1268
|
Args:
|
|
1258
1269
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1259
|
-
usage (str, optional): Acceptable usages include 'train', 'valid' or 'all'
|
|
1270
|
+
usage (str, optional): Acceptable usages include 'train', 'valid' or 'all'. Default: 'train'.
|
|
1260
1271
|
downgrade (str, optional): Acceptable downgrades include 'bicubic', 'unknown', 'mild', 'difficult' or
|
|
1261
|
-
'wild'
|
|
1262
|
-
scale (str, optional): Acceptable scales include 2, 3, 4 or 8
|
|
1272
|
+
'wild'. Default: 'bicubic'.
|
|
1273
|
+
scale (str, optional): Acceptable scales include 2, 3, 4 or 8. Default: 2.
|
|
1263
1274
|
When `downgrade` is 'bicubic', scale can be 2, 3, 4, 8.
|
|
1264
1275
|
When `downgrade` is 'unknown', scale can only be 2, 3, 4.
|
|
1265
1276
|
When `downgrade` is 'mild', 'difficult' or 'wild', scale can only be 4.
|
|
1266
1277
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1267
|
-
|
|
1268
|
-
num_parallel_workers (int, optional): Number of
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1278
|
+
Default: None, all images.
|
|
1279
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1280
|
+
Default: None, will use global default workers(8), it can be set
|
|
1281
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1282
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1283
|
+
order behavior shown in the table below.
|
|
1284
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
1273
1285
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
1274
|
-
dataset
|
|
1286
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
1275
1287
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
1276
|
-
into
|
|
1288
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
1277
1289
|
the max sample number of per shard.
|
|
1278
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1290
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1279
1291
|
argument can only be specified when `num_shards` is also specified.
|
|
1280
1292
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1281
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1282
|
-
|
|
1293
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1294
|
+
Default: None, which means no cache is used.
|
|
1283
1295
|
|
|
1284
1296
|
Raises:
|
|
1285
1297
|
RuntimeError: If `dataset_dir` is invalid or does not contain data files.
|
|
@@ -1294,10 +1306,10 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1294
1306
|
ValueError: If `scale` is invalid.
|
|
1295
1307
|
ValueError: If `scale` equal to 8 and downgrade not equal to 'bicubic'.
|
|
1296
1308
|
ValueError: If `downgrade` in ['mild', 'difficult', 'wild'] and `scale` not equal to 4.
|
|
1297
|
-
ValueError: If `shard_id` is
|
|
1309
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1298
1310
|
|
|
1299
1311
|
Note:
|
|
1300
|
-
- This dataset can take in a `sampler
|
|
1312
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1301
1313
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1302
1314
|
|
|
1303
1315
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -1437,9 +1449,9 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
|
|
|
1437
1449
|
|
|
1438
1450
|
class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
1439
1451
|
"""
|
|
1440
|
-
|
|
1452
|
+
EMNIST(Extended MNIST) dataset.
|
|
1441
1453
|
|
|
1442
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
1454
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
1443
1455
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
1444
1456
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
1445
1457
|
|
|
@@ -1447,33 +1459,35 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1447
1459
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1448
1460
|
name (str): Name of splits for this dataset, can be 'byclass', 'bymerge', 'balanced', 'letters', 'digits'
|
|
1449
1461
|
or 'mnist'.
|
|
1450
|
-
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1462
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.'train' will read from 60,000
|
|
1463
|
+
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
|
1464
|
+
Default: None, will read all samples.
|
|
1465
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1466
|
+
Default: None, will read all images.
|
|
1467
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1468
|
+
Default: None, will use global default workers(8), it can be set
|
|
1469
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1470
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1471
|
+
Default: None, expected order behavior shown in the table below.
|
|
1458
1472
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
1459
|
-
dataset
|
|
1460
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1473
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
1474
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1461
1475
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1462
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1476
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1463
1477
|
argument can only be specified when `num_shards` is also specified.
|
|
1464
1478
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1465
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1466
|
-
|
|
1479
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1480
|
+
Default: None, which means no cache is used.
|
|
1467
1481
|
|
|
1468
1482
|
Raises:
|
|
1469
1483
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
1470
1484
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
1471
1485
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1472
1486
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1473
|
-
ValueError: If `shard_id` is
|
|
1487
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1474
1488
|
|
|
1475
1489
|
Note:
|
|
1476
|
-
- This dataset can take in a `sampler
|
|
1490
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1477
1491
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1478
1492
|
|
|
1479
1493
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -1572,44 +1586,45 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1572
1586
|
"""
|
|
1573
1587
|
A source dataset for generating fake images.
|
|
1574
1588
|
|
|
1575
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
1589
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
1576
1590
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
1577
|
-
The
|
|
1591
|
+
The column :py:obj:`label` is a scalar of the uint32 type.
|
|
1578
1592
|
|
|
1579
1593
|
Args:
|
|
1580
|
-
num_images (int, optional): Number of images to generate in the dataset
|
|
1581
|
-
image_size (tuple, optional): Size of the fake image (
|
|
1582
|
-
num_classes (int, optional): Number of classes in the dataset
|
|
1583
|
-
base_seed (int, optional): Offsets the index-based random seed used to generate each image
|
|
1584
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
1585
|
-
|
|
1586
|
-
num_parallel_workers (int, optional): Number of
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1594
|
+
num_images (int, optional): Number of images to generate in the dataset. Default: 1000.
|
|
1595
|
+
image_size (tuple, optional): Size of the fake image. Default: (224, 224, 3).
|
|
1596
|
+
num_classes (int, optional): Number of classes in the dataset. Default: 10.
|
|
1597
|
+
base_seed (int, optional): Offsets the index-based random seed used to generate each image. Default: 0.
|
|
1598
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1599
|
+
Default: None, will read all images.
|
|
1600
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1601
|
+
Default: None, will use global default workers(8), it can be set
|
|
1602
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1603
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1604
|
+
Default: None, expected order behavior shown in the table below.
|
|
1590
1605
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
1591
|
-
dataset
|
|
1592
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
1606
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
1607
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1593
1608
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
1594
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1609
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1595
1610
|
argument can only be specified when `num_shards` is also specified.
|
|
1596
1611
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1597
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1598
|
-
|
|
1612
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1613
|
+
Default: None, which means no cache is used.
|
|
1599
1614
|
|
|
1600
1615
|
Raises:
|
|
1601
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1602
1616
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
1603
1617
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
1604
1618
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1605
1619
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1606
|
-
ValueError: If `
|
|
1620
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1621
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1607
1622
|
|
|
1608
1623
|
Note:
|
|
1609
|
-
- This dataset can take in a sampler.
|
|
1624
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1610
1625
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1611
1626
|
|
|
1612
|
-
.. list-table:: Expected Order Behavior of Using
|
|
1627
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
1613
1628
|
:widths: 25 25 50
|
|
1614
1629
|
:header-rows: 1
|
|
1615
1630
|
|
|
@@ -1639,8 +1654,6 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1639
1654
|
>>> # Read 3 samples from FakeImage dataset
|
|
1640
1655
|
>>> dataset = ds.FakeImageDataset(num_images=1000, image_size=(224,224,3),
|
|
1641
1656
|
... num_classes=10, base_seed=0, num_samples=3)
|
|
1642
|
-
>>>
|
|
1643
|
-
>>> # Note: In FakeImage dataset, each dictionary has keys "image" and "label"
|
|
1644
1657
|
"""
|
|
1645
1658
|
|
|
1646
1659
|
@check_fake_image_dataset
|
|
@@ -1660,44 +1673,45 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
|
|
|
1660
1673
|
|
|
1661
1674
|
class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
1662
1675
|
"""
|
|
1663
|
-
|
|
1676
|
+
Fashion-MNIST dataset.
|
|
1664
1677
|
|
|
1665
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
1678
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
1666
1679
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
1667
|
-
The
|
|
1680
|
+
The column :py:obj:`label` is a scalar of the uint32 type.
|
|
1668
1681
|
|
|
1669
1682
|
Args:
|
|
1670
1683
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1671
1684
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 60,000
|
|
1672
1685
|
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
|
1673
|
-
|
|
1674
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
1675
|
-
|
|
1676
|
-
num_parallel_workers (int, optional): Number of
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1686
|
+
Default: None, will read all samples.
|
|
1687
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1688
|
+
Default: None, will read all images.
|
|
1689
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1690
|
+
Default: None, will use global default workers(8), it can be set
|
|
1691
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1692
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
1693
|
+
Default: None, expected order behavior shown in the table below.
|
|
1694
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
1695
|
+
Default: None, expected order behavior shown in the table below.
|
|
1696
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
1683
1697
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
1684
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1698
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1685
1699
|
argument can only be specified when `num_shards` is also specified.
|
|
1686
1700
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1687
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1688
|
-
|
|
1701
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1702
|
+
Default: None, which means no cache is used.
|
|
1689
1703
|
|
|
1690
1704
|
Raises:
|
|
1691
1705
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
1692
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1693
1706
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
1694
1707
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
1695
1708
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
1696
1709
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1697
|
-
ValueError: If `
|
|
1710
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
1711
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1698
1712
|
|
|
1699
1713
|
Note:
|
|
1700
|
-
- This dataset can take in a `sampler
|
|
1714
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1701
1715
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1702
1716
|
|
|
1703
1717
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -1782,9 +1796,9 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
1782
1796
|
|
|
1783
1797
|
class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
1784
1798
|
"""
|
|
1785
|
-
|
|
1799
|
+
Flickr8k and Flickr30k datasets.
|
|
1786
1800
|
|
|
1787
|
-
The generated dataset has two columns :py:obj:`[image, annotation]
|
|
1801
|
+
The generated dataset has two columns :py:obj:`[image, annotation]` .
|
|
1788
1802
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
1789
1803
|
The tensor of column :py:obj:`annotation` is a tensor which contains 5 annotations string,
|
|
1790
1804
|
such as ["a", "b", "c", "d", "e"].
|
|
@@ -1793,22 +1807,23 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1793
1807
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
1794
1808
|
annotation_file (str): Path to the root directory that contains the annotation.
|
|
1795
1809
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
1796
|
-
|
|
1797
|
-
num_parallel_workers (int, optional): Number of
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1810
|
+
Default: None, all images.
|
|
1811
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
1812
|
+
Default: None, will use global default workers(8), it can be set
|
|
1813
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
1814
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
1815
|
+
order behavior shown in the table below.
|
|
1816
|
+
decode (bool, optional): Decode the images after reading. Default: None.
|
|
1802
1817
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
1803
|
-
dataset
|
|
1818
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
1804
1819
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
1805
|
-
into
|
|
1820
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
1806
1821
|
the max sample number of per shard.
|
|
1807
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
1822
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
1808
1823
|
argument can only be specified when `num_shards` is also specified.
|
|
1809
1824
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
1810
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
1811
|
-
|
|
1825
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
1826
|
+
Default: None, which means no cache is used.
|
|
1812
1827
|
|
|
1813
1828
|
Raises:
|
|
1814
1829
|
RuntimeError: If `dataset_dir` is not valid or does not contain data files.
|
|
@@ -1819,10 +1834,10 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
|
|
|
1819
1834
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
1820
1835
|
ValueError: If `dataset_dir` is not exist.
|
|
1821
1836
|
ValueError: If `annotation_file` is not exist.
|
|
1822
|
-
ValueError: If `shard_id` is
|
|
1837
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
1823
1838
|
|
|
1824
1839
|
Note:
|
|
1825
|
-
- This dataset can take in a `sampler
|
|
1840
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
1826
1841
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
1827
1842
|
|
|
1828
1843
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -2026,45 +2041,45 @@ class _Flowers102Dataset:
|
|
|
2026
2041
|
|
|
2027
2042
|
class Flowers102Dataset(GeneratorDataset):
|
|
2028
2043
|
"""
|
|
2029
|
-
|
|
2044
|
+
Oxfird 102 Flower dataset.
|
|
2030
2045
|
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2046
|
+
According to the given `task` configuration, the generated dataset has different output columns:
|
|
2047
|
+
- `task` = 'Classification', output columns: `[image, dtype=uint8]` , `[label, dtype=uint32]` .
|
|
2048
|
+
- `task` = 'Segmentation',
|
|
2049
|
+
output columns: `[image, dtype=uint8]` , `[segmentation, dtype=uint8]` , `[label, dtype=uint32]` .
|
|
2035
2050
|
|
|
2036
2051
|
Args:
|
|
2037
2052
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2038
|
-
task (str): Specify the 'Classification' or 'Segmentation' task
|
|
2039
|
-
usage (str): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
|
|
2040
|
-
|
|
2041
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
2042
|
-
num_parallel_workers (int, optional): Number of subprocesses used to
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
sample number of per shard.
|
|
2051
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
2052
|
-
when num_shards is also specified.
|
|
2053
|
+
task (str, optional): Specify the 'Classification' or 'Segmentation' task. Default: 'Classification'.
|
|
2054
|
+
usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
|
|
2055
|
+
Default: 'all', will read all samples.
|
|
2056
|
+
num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
|
|
2057
|
+
num_parallel_workers (int, optional): Number of worker subprocesses used to
|
|
2058
|
+
fetch the dataset in parallel. Default: 1.
|
|
2059
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2060
|
+
Default: None, expected order behavior shown in the table below.
|
|
2061
|
+
decode (bool, optional): Whether or not to decode the images and segmentations after reading. Default: False.
|
|
2062
|
+
sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset.
|
|
2063
|
+
Default: None, expected order behavior shown in the table below.
|
|
2064
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
2065
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
2066
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument must be specified only
|
|
2067
|
+
when `num_shards` is also specified.
|
|
2053
2068
|
|
|
2054
2069
|
Raises:
|
|
2055
2070
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2056
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2057
2071
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2058
2072
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
2059
2073
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2060
2074
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2061
|
-
ValueError: If `
|
|
2075
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2076
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2062
2077
|
|
|
2063
2078
|
Note:
|
|
2064
|
-
- This dataset can take in a sampler.
|
|
2079
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2065
2080
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
2066
2081
|
|
|
2067
|
-
.. list-table:: Expected Order Behavior of Using
|
|
2082
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2068
2083
|
:widths: 25 25 50
|
|
2069
2084
|
:header-rows: 1
|
|
2070
2085
|
|
|
@@ -2187,40 +2202,176 @@ class Flowers102Dataset(GeneratorDataset):
|
|
|
2187
2202
|
return class_dict
|
|
2188
2203
|
|
|
2189
2204
|
|
|
2205
|
+
class Food101Dataset(MappableDataset, VisionBaseDataset):
|
|
2206
|
+
"""
|
|
2207
|
+
Food101 dataset.
|
|
2208
|
+
|
|
2209
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
2210
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2211
|
+
The tensor of column :py:obj:`label` is of the string type.
|
|
2212
|
+
|
|
2213
|
+
Args:
|
|
2214
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2215
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'test', or 'all'. 'train' will read
|
|
2216
|
+
from 75,750 samples, 'test' will read from 25,250 samples, and 'all' will read all 'train'
|
|
2217
|
+
and 'test' samples. Default: None, will be set to 'all'.
|
|
2218
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2219
|
+
Default: None, will read all images.
|
|
2220
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2221
|
+
Default: None, will use global default workers(8), it can be set
|
|
2222
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2223
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2224
|
+
Default: None, expected order behavior shown in the table below.
|
|
2225
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2226
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
2227
|
+
Default: None, expected order behavior shown in the table below.
|
|
2228
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. When this argument
|
|
2229
|
+
is specified, `num_samples` reflects the maximum sample number of per shard. Default: None.
|
|
2230
|
+
shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
|
|
2231
|
+
when `num_shards` is also specified. Default: None.
|
|
2232
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2233
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2234
|
+
Default: None, which means no cache is used.
|
|
2235
|
+
|
|
2236
|
+
Raises:
|
|
2237
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2238
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2239
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
2240
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2241
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2242
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2243
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2244
|
+
ValueError: If the value of `usage` is not 'train', 'test', or 'all'.
|
|
2245
|
+
ValueError: If `dataset_dir` is not exist.
|
|
2246
|
+
|
|
2247
|
+
Note:
|
|
2248
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2249
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2250
|
+
|
|
2251
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2252
|
+
:widths: 25 25 50
|
|
2253
|
+
:header-rows: 1
|
|
2254
|
+
|
|
2255
|
+
* - Parameter `sampler`
|
|
2256
|
+
- Parameter `shuffle`
|
|
2257
|
+
- Expected Order Behavior
|
|
2258
|
+
* - None
|
|
2259
|
+
- None
|
|
2260
|
+
- random order
|
|
2261
|
+
* - None
|
|
2262
|
+
- True
|
|
2263
|
+
- random order
|
|
2264
|
+
* - None
|
|
2265
|
+
- False
|
|
2266
|
+
- sequential order
|
|
2267
|
+
* - Sampler object
|
|
2268
|
+
- None
|
|
2269
|
+
- order defined by sampler
|
|
2270
|
+
* - Sampler object
|
|
2271
|
+
- True
|
|
2272
|
+
- not allowed
|
|
2273
|
+
* - Sampler object
|
|
2274
|
+
- False
|
|
2275
|
+
- not allowed
|
|
2276
|
+
|
|
2277
|
+
Examples:
|
|
2278
|
+
>>> food101_dataset_dir = "/path/to/food101_dataset_directory"
|
|
2279
|
+
>>>
|
|
2280
|
+
>>> # Read 3 samples from Food101 dataset
|
|
2281
|
+
>>> dataset = ds.Food101Dataset(dataset_dir=food101_dataset_dir, num_samples=3)
|
|
2282
|
+
|
|
2283
|
+
About Food101 dataset:
|
|
2284
|
+
|
|
2285
|
+
The Food101 is a dataset of 101 food categories, with 101,000 images.
|
|
2286
|
+
There are 250 test imgaes and 750 training images in each class. All images were rescaled
|
|
2287
|
+
to have a maximum side length of 512 pixels.
|
|
2288
|
+
|
|
2289
|
+
The following is the original Food101 dataset structure.
|
|
2290
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
2291
|
+
|
|
2292
|
+
.. code-block::
|
|
2293
|
+
|
|
2294
|
+
.
|
|
2295
|
+
└── food101_dir
|
|
2296
|
+
├── images
|
|
2297
|
+
│ ├── apple_pie
|
|
2298
|
+
│ │ ├── 1005649.jpg
|
|
2299
|
+
│ │ ├── 1014775.jpg
|
|
2300
|
+
│ │ ├──...
|
|
2301
|
+
│ ├── baby_back_rips
|
|
2302
|
+
│ │ ├── 1005293.jpg
|
|
2303
|
+
│ │ ├── 1007102.jpg
|
|
2304
|
+
│ │ ├──...
|
|
2305
|
+
│ └──...
|
|
2306
|
+
└── meta
|
|
2307
|
+
├── train.txt
|
|
2308
|
+
├── test.txt
|
|
2309
|
+
├── classes.txt
|
|
2310
|
+
├── train.json
|
|
2311
|
+
├── test.json
|
|
2312
|
+
└── train.txt
|
|
2313
|
+
|
|
2314
|
+
Citation:
|
|
2315
|
+
|
|
2316
|
+
.. code-block::
|
|
2317
|
+
|
|
2318
|
+
@inproceedings{bossard14,
|
|
2319
|
+
title = {Food-101 -- Mining Discriminative Components with Random Forests},
|
|
2320
|
+
author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc},
|
|
2321
|
+
booktitle = {European Conference on Computer Vision},
|
|
2322
|
+
year = {2014}
|
|
2323
|
+
}
|
|
2324
|
+
"""
|
|
2325
|
+
|
|
2326
|
+
@check_food101_dataset
|
|
2327
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
|
|
2328
|
+
decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
2329
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
2330
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
2331
|
+
|
|
2332
|
+
self.dataset_dir = dataset_dir
|
|
2333
|
+
self.usage = replace_none(usage, "all")
|
|
2334
|
+
self.decode = replace_none(decode, False)
|
|
2335
|
+
|
|
2336
|
+
def parse(self, children=None):
|
|
2337
|
+
return cde.Food101Node(self.dataset_dir, self.usage, self.decode, self.sampler)
|
|
2338
|
+
|
|
2339
|
+
|
|
2190
2340
|
class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
2191
2341
|
"""
|
|
2192
2342
|
A source dataset that reads images from a tree of directories.
|
|
2193
2343
|
All images within one folder have the same label.
|
|
2194
2344
|
|
|
2195
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
2345
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
2196
2346
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2197
2347
|
The tensor of column :py:obj:`label` is of a scalar of uint32 type.
|
|
2198
2348
|
|
|
2199
2349
|
Args:
|
|
2200
2350
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2201
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
2202
|
-
|
|
2203
|
-
num_parallel_workers (int, optional): Number of
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2351
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2352
|
+
Default: None, all images.
|
|
2353
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2354
|
+
Default: None, will use global default workers(8), it can be set
|
|
2355
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2356
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2357
|
+
Default: None, expected order behavior shown in the table below.
|
|
2207
2358
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2208
|
-
dataset
|
|
2359
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
2209
2360
|
extensions (list[str], optional): List of file extensions to be
|
|
2210
|
-
included in the dataset
|
|
2361
|
+
included in the dataset. Default: None.
|
|
2211
2362
|
class_indexing (dict, optional): A str-to-int mapping from folder name to index
|
|
2212
|
-
|
|
2363
|
+
Default: None, the folder names will be sorted
|
|
2213
2364
|
alphabetically and each class will be given a
|
|
2214
|
-
unique index starting from 0
|
|
2215
|
-
decode (bool, optional): Decode the images after reading
|
|
2365
|
+
unique index starting from 0.
|
|
2366
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2216
2367
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2217
|
-
into
|
|
2368
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2218
2369
|
the maximum sample number of per shard.
|
|
2219
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
2370
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2220
2371
|
argument can only be specified when `num_shards` is also specified.
|
|
2221
2372
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2222
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2223
|
-
|
|
2373
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2374
|
+
Default: None, which means no cache is used.
|
|
2224
2375
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
2225
2376
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
2226
2377
|
|
|
@@ -2232,11 +2383,11 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2232
2383
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2233
2384
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2234
2385
|
RuntimeError: If `class_indexing` is not a dictionary.
|
|
2235
|
-
ValueError: If `shard_id` is
|
|
2386
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2236
2387
|
|
|
2237
2388
|
Note:
|
|
2238
2389
|
- The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
|
|
2239
|
-
- This dataset can take in a `sampler
|
|
2390
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2240
2391
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
2241
2392
|
|
|
2242
2393
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -2322,14 +2473,34 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|
|
2322
2473
|
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing,
|
|
2323
2474
|
self.decrypt)
|
|
2324
2475
|
|
|
2476
|
+
def get_class_indexing(self):
|
|
2477
|
+
"""
|
|
2478
|
+
Get the class index.
|
|
2479
|
+
|
|
2480
|
+
Returns:
|
|
2481
|
+
dict, a str-to-int mapping from label name to index.
|
|
2482
|
+
|
|
2483
|
+
Examples:
|
|
2484
|
+
>>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
|
|
2485
|
+
>>>
|
|
2486
|
+
>>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
|
|
2487
|
+
>>> class_indexing = dataset.get_class_indexing()
|
|
2488
|
+
"""
|
|
2489
|
+
if self.class_indexing is None or not self.class_indexing:
|
|
2490
|
+
runtime_getter = self._init_tree_getters()
|
|
2491
|
+
_class_indexing = runtime_getter[0].GetClassIndexing()
|
|
2492
|
+
for pair in _class_indexing:
|
|
2493
|
+
self.class_indexing[pair[0]] = pair[1][0]
|
|
2494
|
+
return self.class_indexing
|
|
2495
|
+
|
|
2325
2496
|
|
|
2326
|
-
class KITTIDataset(MappableDataset):
|
|
2497
|
+
class KITTIDataset(MappableDataset, VisionBaseDataset):
|
|
2327
2498
|
"""
|
|
2328
|
-
|
|
2499
|
+
KITTI dataset.
|
|
2329
2500
|
|
|
2330
|
-
When usage is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
|
|
2331
|
-
occluded, alpha, bbox, dimensions, location, rotation_y]
|
|
2332
|
-
has only one column: :py:obj:`[image]
|
|
2501
|
+
When `usage` is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
|
|
2502
|
+
occluded, alpha, bbox, dimensions, location, rotation_y]` ; When `usage` is "test", the generated dataset
|
|
2503
|
+
has only one column: :py:obj:`[image]` .
|
|
2333
2504
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2334
2505
|
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
2335
2506
|
The tensor of column :py:obj:`truncated` is of the float32 type.
|
|
@@ -2342,25 +2513,26 @@ class KITTIDataset(MappableDataset):
|
|
|
2342
2513
|
|
|
2343
2514
|
Args:
|
|
2344
2515
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2345
|
-
usage (str, optional): Usage of this dataset, can be `train` or `test
|
|
2346
|
-
train samples, `test` will read from 7518 test samples without label
|
|
2347
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
2348
|
-
|
|
2349
|
-
num_parallel_workers (int, optional): Number of
|
|
2350
|
-
|
|
2351
|
-
|
|
2352
|
-
|
|
2353
|
-
|
|
2354
|
-
|
|
2355
|
-
|
|
2516
|
+
usage (str, optional): Usage of this dataset, can be `train` or `test` . `train` will read 7481
|
|
2517
|
+
train samples, `test` will read from 7518 test samples without label. Default: None, will use `train` .
|
|
2518
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2519
|
+
Default: None, will include all images.
|
|
2520
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2521
|
+
Default: None, will use global default workers(8), it can be set
|
|
2522
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2523
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
2524
|
+
order behavior shown in the table below.
|
|
2525
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2526
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
2527
|
+
Default: None, expected order behavior shown in the table below.
|
|
2356
2528
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2357
|
-
into
|
|
2529
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2358
2530
|
the max sample number of per shard.
|
|
2359
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2360
|
-
argument can only be specified when num_shards is also specified.
|
|
2531
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2532
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2361
2533
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2362
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2363
|
-
|
|
2534
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2535
|
+
Default: None, which means no cache is used.
|
|
2364
2536
|
|
|
2365
2537
|
Raises:
|
|
2366
2538
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
@@ -2368,10 +2540,10 @@ class KITTIDataset(MappableDataset):
|
|
|
2368
2540
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2369
2541
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2370
2542
|
ValueError: If `dataset_dir` is not exist.
|
|
2371
|
-
ValueError: If `shard_id` is
|
|
2543
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2372
2544
|
|
|
2373
2545
|
Note:
|
|
2374
|
-
- This dataset can take in a `sampler
|
|
2546
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2375
2547
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
2376
2548
|
|
|
2377
2549
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -2418,13 +2590,14 @@ class KITTIDataset(MappableDataset):
|
|
|
2418
2590
|
and a 3D laser scanner. Despite its popularity, the dataset itself does not contain ground truth for
|
|
2419
2591
|
semantic segmentation. However, various researchers have manually annotated parts of the dataset to fit
|
|
2420
2592
|
their necessities. Álvarez et al. generated ground truth for 323 images from the road detection challenge
|
|
2421
|
-
with three classes: road,
|
|
2593
|
+
with three classes: road, vehicles and sky. Zhang et al. annotated 252 (140 for training and 112 for testing)
|
|
2422
2594
|
acquisitions – RGB and Velodyne scans – from the tracking challenge for ten object categories: building, sky,
|
|
2423
2595
|
road, vegetation, sidewalk, car, pedestrian, cyclist, sign/pole, and fence.
|
|
2424
2596
|
|
|
2425
2597
|
You can unzip the original KITTI dataset files into this directory structure and read by MindSpore's API.
|
|
2426
2598
|
|
|
2427
2599
|
.. code-block::
|
|
2600
|
+
|
|
2428
2601
|
.
|
|
2429
2602
|
└── kitti_dataset_directory
|
|
2430
2603
|
├── data_object_image_2
|
|
@@ -2472,44 +2645,45 @@ class KITTIDataset(MappableDataset):
|
|
|
2472
2645
|
|
|
2473
2646
|
class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
2474
2647
|
"""
|
|
2475
|
-
|
|
2648
|
+
KMNIST(Kuzushiji-MNIST) dataset.
|
|
2476
2649
|
|
|
2477
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
2650
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
2478
2651
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2479
|
-
The
|
|
2652
|
+
The column :py:obj:`label` is a scalar of the uint32 type.
|
|
2480
2653
|
|
|
2481
2654
|
Args:
|
|
2482
2655
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2483
2656
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
|
|
2484
2657
|
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
|
2485
|
-
|
|
2486
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
2487
|
-
|
|
2488
|
-
num_parallel_workers (int, optional): Number of
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
|
|
2494
|
-
|
|
2658
|
+
Default: None, will read all samples.
|
|
2659
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2660
|
+
Default: None, will read all images.
|
|
2661
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2662
|
+
Default: None, will use global default workers(8), it can be set
|
|
2663
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2664
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2665
|
+
Default: None, expected order behavior shown in the table below.
|
|
2666
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
2667
|
+
Default: None, expected order behavior shown in the table below.
|
|
2668
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
2495
2669
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
2496
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
2670
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2497
2671
|
argument can only be specified when `num_shards` is also specified.
|
|
2498
2672
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2499
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2500
|
-
|
|
2673
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2674
|
+
Default: None, which means no cache is used.
|
|
2501
2675
|
|
|
2502
2676
|
Raises:
|
|
2503
2677
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2504
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2505
2678
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2506
2679
|
RuntimeError: If `sampler` and sharding are specified at the same time.
|
|
2507
2680
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2508
2681
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2509
|
-
ValueError: If `
|
|
2682
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
2683
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2510
2684
|
|
|
2511
2685
|
Note:
|
|
2512
|
-
- This dataset can take in a `sampler
|
|
2686
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2513
2687
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
2514
2688
|
|
|
2515
2689
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -2543,8 +2717,6 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2543
2717
|
>>>
|
|
2544
2718
|
>>> # Read 3 samples from KMNIST dataset
|
|
2545
2719
|
>>> dataset = ds.KMnistDataset(dataset_dir=kmnist_dataset_dir, num_samples=3)
|
|
2546
|
-
>>>
|
|
2547
|
-
>>> # Note: In kmnist_dataset dataset, each dictionary has keys "image" and "label"
|
|
2548
2720
|
|
|
2549
2721
|
About KMNIST dataset:
|
|
2550
2722
|
|
|
@@ -2594,10 +2766,10 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
2594
2766
|
|
|
2595
2767
|
class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
2596
2768
|
"""
|
|
2597
|
-
|
|
2769
|
+
LFW(Labeled Faces in the Wild) dataset.
|
|
2598
2770
|
|
|
2599
|
-
When task is
|
|
2600
|
-
When task is
|
|
2771
|
+
When `task` is 'people', the generated dataset has two columns: :py:obj:`[image, label]`;
|
|
2772
|
+
When `task` is 'pairs', the generated dataset has three columns: :py:obj:`[image1, image2, label]` .
|
|
2601
2773
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2602
2774
|
The tensor of column :py:obj:`image1` is of the uint8 type.
|
|
2603
2775
|
The tensor of column :py:obj:`image2` is of the uint8 type.
|
|
@@ -2605,38 +2777,44 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2605
2777
|
|
|
2606
2778
|
Args:
|
|
2607
2779
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2608
|
-
task (str, optional): Set the task type of reading lfw data, support
|
|
2609
|
-
|
|
2610
|
-
usage (str, optional): The image split to use, support
|
|
2611
|
-
|
|
2612
|
-
image_set (str, optional):
|
|
2613
|
-
|
|
2614
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
2615
|
-
|
|
2616
|
-
num_parallel_workers (int, optional): Number of
|
|
2617
|
-
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2780
|
+
task (str, optional): Set the task type of reading lfw data, support 'people' and 'pairs'.
|
|
2781
|
+
Default: None, means 'people'.
|
|
2782
|
+
usage (str, optional): The image split to use, support '10fold', 'train', 'test' and 'all'.
|
|
2783
|
+
Default: None, will read samples including train and test.
|
|
2784
|
+
image_set (str, optional): Type of image funneling to use, support 'original', 'funneled' or
|
|
2785
|
+
'deepfunneled'. Default: None, will use 'funneled'.
|
|
2786
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2787
|
+
Default: None, all images.
|
|
2788
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2789
|
+
Default: None, will use global default workers(8), it can be set
|
|
2790
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2791
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2792
|
+
Default: None, expected order behavior shown in the table below.
|
|
2793
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2621
2794
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2622
|
-
dataset
|
|
2795
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
2623
2796
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2624
|
-
into
|
|
2797
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2625
2798
|
the max sample number of per shard.
|
|
2626
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2627
|
-
argument can only be specified when num_shards is also specified.
|
|
2799
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2800
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2628
2801
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2629
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2630
|
-
|
|
2802
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2803
|
+
Default: None, which means no cache is used.
|
|
2631
2804
|
|
|
2632
2805
|
Raises:
|
|
2806
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2633
2807
|
RuntimeError: If sampler and shuffle are specified at the same time.
|
|
2634
2808
|
RuntimeError: If sampler and sharding are specified at the same time.
|
|
2635
|
-
RuntimeError: If num_shards is specified but shard_id is None.
|
|
2636
|
-
RuntimeError: If shard_id is specified but num_shards is None.
|
|
2637
|
-
ValueError: If shard_id is invalid (< 0 or >= num_shards).
|
|
2809
|
+
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
2810
|
+
RuntimeError: If `shard_id` is specified but num_shards is None.
|
|
2811
|
+
ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
|
|
2638
2812
|
|
|
2639
|
-
|
|
2813
|
+
Note:
|
|
2814
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2815
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2816
|
+
|
|
2817
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2640
2818
|
:widths: 25 25 50
|
|
2641
2819
|
:header-rows: 1
|
|
2642
2820
|
|
|
@@ -2674,15 +2852,17 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2674
2852
|
|
|
2675
2853
|
About LFW dataset:
|
|
2676
2854
|
|
|
2677
|
-
LFW
|
|
2678
|
-
|
|
2679
|
-
of
|
|
2680
|
-
|
|
2681
|
-
|
|
2855
|
+
LFW (Labelled Faces in the Wild) dataset is one of the most commonly used and widely open datasets in
|
|
2856
|
+
the field of face recognition. It was released by Gary B. Huang and his team at Massachusetts Institute
|
|
2857
|
+
of Technology in 2007. The dataset includes nearly 50,000 images of 13,233 individuals, which are sourced
|
|
2858
|
+
from various internet platforms and contain diverse environmental factors such as different poses, lighting
|
|
2859
|
+
conditions, and angles. Most of the images in the dataset are frontal and cover a wide range of ages, genders,
|
|
2860
|
+
and ethnicities.
|
|
2682
2861
|
|
|
2683
2862
|
You can unzip the original LFW dataset files into this directory structure and read by MindSpore's API.
|
|
2684
2863
|
|
|
2685
2864
|
.. code-block::
|
|
2865
|
+
|
|
2686
2866
|
.
|
|
2687
2867
|
└── lfw_dataset_directory
|
|
2688
2868
|
├── lfw
|
|
@@ -2749,45 +2929,51 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
|
|
|
2749
2929
|
|
|
2750
2930
|
class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
2751
2931
|
"""
|
|
2752
|
-
|
|
2932
|
+
LSUN(Large-scale Scene UNderstarding) dataset.
|
|
2753
2933
|
|
|
2754
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
2934
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
2755
2935
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2756
2936
|
The tensor of column :py:obj:`label` is of a scalar of uint32 type.
|
|
2757
2937
|
|
|
2758
2938
|
Args:
|
|
2759
2939
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
2760
|
-
usage (str, optional): Usage of this dataset, can be `train
|
|
2761
|
-
|
|
2762
|
-
classes(Union[str, list[str]], optional): Choose the specific classes to load
|
|
2763
|
-
all classes in root directory
|
|
2764
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
2765
|
-
|
|
2766
|
-
num_parallel_workers (int, optional): Number of
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2940
|
+
usage (str, optional): Usage of this dataset, can be `train` , `test` , `valid` or `all`
|
|
2941
|
+
Default: None, will be set to `all` .
|
|
2942
|
+
classes (Union[str, list[str]], optional): Choose the specific classes to load. Default: None, means loading
|
|
2943
|
+
all classes in root directory.
|
|
2944
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2945
|
+
Default: None, all images.
|
|
2946
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
2947
|
+
Default: None, will use global default workers(8), it can be set
|
|
2948
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
2949
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
2950
|
+
Default: None, expected order behavior shown in the table below.
|
|
2951
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
2771
2952
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2772
|
-
dataset
|
|
2953
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
2773
2954
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2774
|
-
into
|
|
2955
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2775
2956
|
the max sample number of per shard.
|
|
2776
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
2777
|
-
argument can only be specified when num_shards is also specified.
|
|
2957
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
2958
|
+
argument can only be specified when `num_shards` is also specified.
|
|
2778
2959
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2779
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2780
|
-
|
|
2960
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
2961
|
+
Default: None, which means no cache is used.
|
|
2781
2962
|
|
|
2782
2963
|
Raises:
|
|
2783
|
-
RuntimeError: If
|
|
2784
|
-
RuntimeError: If
|
|
2785
|
-
RuntimeError: If
|
|
2786
|
-
RuntimeError: If
|
|
2787
|
-
|
|
2788
|
-
ValueError: If
|
|
2789
|
-
|
|
2790
|
-
|
|
2964
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
2965
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
2966
|
+
RuntimeError: If `sampler` and sharding are specified at the same time.
|
|
2967
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2968
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2969
|
+
ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
|
|
2970
|
+
ValueError: If `usage` or `classes` is invalid (not in specific types).
|
|
2971
|
+
|
|
2972
|
+
Note:
|
|
2973
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2974
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
2975
|
+
|
|
2976
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
2791
2977
|
:widths: 25 25 50
|
|
2792
2978
|
:header-rows: 1
|
|
2793
2979
|
|
|
@@ -2826,15 +3012,17 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
|
|
|
2826
3012
|
|
|
2827
3013
|
About LSUN dataset:
|
|
2828
3014
|
|
|
2829
|
-
The LSUN
|
|
2830
|
-
in
|
|
3015
|
+
The LSUN (Large-Scale Scene Understanding) is a large-scale dataset used for indoors scene
|
|
3016
|
+
understanding. It was originally launched by Stanford University in 2015 with the aim of
|
|
3017
|
+
providing a challenging and diverse dataset for research in computer vision and machine
|
|
3018
|
+
learning. The main application of this dataset for research is indoor scene analysis.
|
|
2831
3019
|
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
3020
|
+
This dataset contains ten different categories of scenes, including bedrooms, living rooms,
|
|
3021
|
+
restaurants, lounges, studies, kitchens, bathrooms, corridors, children's room, and outdoors.
|
|
3022
|
+
Each category contains tens of thousands of images from different perspectives, and these
|
|
3023
|
+
images are high-quality, high-resolusion real-world images.
|
|
2835
3024
|
|
|
2836
|
-
You can unzip the
|
|
2837
|
-
read by MindSpore's API.
|
|
3025
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
2838
3026
|
|
|
2839
3027
|
.. code-block::
|
|
2840
3028
|
|
|
@@ -2882,33 +3070,34 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2882
3070
|
"""
|
|
2883
3071
|
A source dataset for reading images from a Manifest file.
|
|
2884
3072
|
|
|
2885
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
3073
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
2886
3074
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
2887
3075
|
The tensor of column :py:obj:`label` is of a scalar of uint64 type.
|
|
2888
3076
|
|
|
2889
3077
|
Args:
|
|
2890
3078
|
dataset_file (str): File to be read.
|
|
2891
|
-
usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference'
|
|
3079
|
+
usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference'. Default: 'train'.
|
|
2892
3080
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
2893
|
-
|
|
2894
|
-
num_parallel_workers (int, optional): Number of
|
|
2895
|
-
|
|
2896
|
-
|
|
2897
|
-
|
|
3081
|
+
Default: None, will include all images.
|
|
3082
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3083
|
+
Default: None, will use global default workers(8), it can be set
|
|
3084
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3085
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
3086
|
+
order behavior shown in the table below.
|
|
2898
3087
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
2899
|
-
dataset
|
|
2900
|
-
class_indexing (dict, optional): A str-to-int mapping from label name to index
|
|
2901
|
-
|
|
2902
|
-
class will be given a unique index starting from 0
|
|
2903
|
-
decode (bool, optional): decode the images after reading
|
|
3088
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3089
|
+
class_indexing (dict, optional): A str-to-int mapping from label name to index.
|
|
3090
|
+
Default: None, the folder names will be sorted alphabetically and each
|
|
3091
|
+
class will be given a unique index starting from 0.
|
|
3092
|
+
decode (bool, optional): decode the images after reading. Default: False.
|
|
2904
3093
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
2905
|
-
into
|
|
3094
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
2906
3095
|
the max number of samples per shard.
|
|
2907
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3096
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
2908
3097
|
argument can only be specified when `num_shards` is also specified.
|
|
2909
3098
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
2910
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
2911
|
-
|
|
3099
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3100
|
+
Default: None, which means no cache is used.
|
|
2912
3101
|
|
|
2913
3102
|
Raises:
|
|
2914
3103
|
RuntimeError: If dataset_files are not valid or do not exist.
|
|
@@ -2918,11 +3107,11 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2918
3107
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
2919
3108
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
2920
3109
|
RuntimeError: If class_indexing is not a dictionary.
|
|
2921
|
-
ValueError: If `shard_id` is
|
|
3110
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
2922
3111
|
|
|
2923
3112
|
Note:
|
|
2924
3113
|
- The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
|
|
2925
|
-
- This dataset can take in a `sampler
|
|
3114
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
2926
3115
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
2927
3116
|
|
|
2928
3117
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -2959,6 +3148,26 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
2959
3148
|
>>>
|
|
2960
3149
|
>>> # 2) Read samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup
|
|
2961
3150
|
>>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, num_shards=2, shard_id=0)
|
|
3151
|
+
|
|
3152
|
+
About Manifest dataset:
|
|
3153
|
+
|
|
3154
|
+
Manifest file contains a list of files included in a dataset, including basic file info such as File name and File
|
|
3155
|
+
ID, along with extended file metadata. Manifest is a data format file supported by Huawei Modelarts. For details,
|
|
3156
|
+
see `Specifications for Importing the Manifest File <https://support.huaweicloud.com/engineers-modelarts/
|
|
3157
|
+
modelarts_23_0009.html>`_ .
|
|
3158
|
+
|
|
3159
|
+
.. code-block::
|
|
3160
|
+
|
|
3161
|
+
.
|
|
3162
|
+
└── manifest_dataset_directory
|
|
3163
|
+
├── train
|
|
3164
|
+
│ ├── 1.JPEG
|
|
3165
|
+
│ ├── 2.JPEG
|
|
3166
|
+
│ ├── ...
|
|
3167
|
+
├── eval
|
|
3168
|
+
│ ├── 1.JPEG
|
|
3169
|
+
│ ├── 2.JPEG
|
|
3170
|
+
│ ├── ...
|
|
2962
3171
|
"""
|
|
2963
3172
|
|
|
2964
3173
|
@check_manifestdataset
|
|
@@ -3000,9 +3209,9 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
|
|
|
3000
3209
|
|
|
3001
3210
|
class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
3002
3211
|
"""
|
|
3003
|
-
|
|
3212
|
+
MNIST dataset.
|
|
3004
3213
|
|
|
3005
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
3214
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3006
3215
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3007
3216
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
3008
3217
|
|
|
@@ -3010,22 +3219,23 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3010
3219
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3011
3220
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
|
|
3012
3221
|
train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
|
|
3013
|
-
|
|
3014
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
3015
|
-
|
|
3016
|
-
num_parallel_workers (int, optional): Number of
|
|
3017
|
-
|
|
3018
|
-
|
|
3019
|
-
|
|
3222
|
+
Default: None, will read all samples.
|
|
3223
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3224
|
+
Default: None, will read all images.
|
|
3225
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3226
|
+
Default: None, will use global default workers(8), it can be set
|
|
3227
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3228
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3229
|
+
Default: None, expected order behavior shown in the table below.
|
|
3020
3230
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3021
|
-
dataset
|
|
3022
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
3231
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3232
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
3023
3233
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
3024
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3234
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3025
3235
|
argument can only be specified when `num_shards` is also specified.
|
|
3026
3236
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3027
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3028
|
-
|
|
3237
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3238
|
+
Default: None, which means no cache is used.
|
|
3029
3239
|
|
|
3030
3240
|
Raises:
|
|
3031
3241
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
@@ -3035,10 +3245,10 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3035
3245
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3036
3246
|
RuntimeError: If `num_shards` is specified but shard_id is None.
|
|
3037
3247
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3038
|
-
ValueError: If `shard_id` is
|
|
3248
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3039
3249
|
|
|
3040
3250
|
Note:
|
|
3041
|
-
- This dataset can take in a `sampler
|
|
3251
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
3042
3252
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3043
3253
|
|
|
3044
3254
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -3120,42 +3330,44 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
|
|
|
3120
3330
|
return cde.MnistNode(self.dataset_dir, self.usage, self.sampler)
|
|
3121
3331
|
|
|
3122
3332
|
|
|
3123
|
-
class OmniglotDataset(MappableDataset):
|
|
3333
|
+
class OmniglotDataset(MappableDataset, VisionBaseDataset):
|
|
3124
3334
|
"""
|
|
3125
|
-
|
|
3335
|
+
Omniglot dataset.
|
|
3126
3336
|
|
|
3127
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
3337
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3128
3338
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3129
3339
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
3130
3340
|
|
|
3131
3341
|
Args:
|
|
3132
3342
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3133
|
-
background(bool, optional):
|
|
3134
|
-
|
|
3135
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
3136
|
-
|
|
3137
|
-
num_parallel_workers (int, optional): Number of
|
|
3138
|
-
|
|
3139
|
-
|
|
3140
|
-
|
|
3141
|
-
|
|
3343
|
+
background (bool, optional): Whether to create dataset from the "background" set.
|
|
3344
|
+
Otherwise create from the "evaluation" set. Default: None, set to True.
|
|
3345
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3346
|
+
Default: None, all images.
|
|
3347
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3348
|
+
Default: None, will use global default workers(8), it can be set
|
|
3349
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3350
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3351
|
+
Default: None, expected order behavior shown in the table below.
|
|
3352
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
3142
3353
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3143
|
-
dataset
|
|
3354
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3144
3355
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3145
|
-
into
|
|
3356
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
3146
3357
|
the max sample number of per shard.
|
|
3147
|
-
shard_id (int, optional): The shard ID within num_shards
|
|
3148
|
-
argument can only be specified when num_shards is also specified.
|
|
3358
|
+
shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
|
|
3359
|
+
argument can only be specified when `num_shards` is also specified.
|
|
3149
3360
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3150
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3151
|
-
|
|
3361
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3362
|
+
Default: None, which means no cache is used.
|
|
3152
3363
|
|
|
3153
3364
|
Raises:
|
|
3365
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3154
3366
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3155
3367
|
RuntimeError: If `sampler` and `sharding` are specified at the same time.
|
|
3156
3368
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3157
3369
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3158
|
-
ValueError: If `shard_id` is
|
|
3370
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3159
3371
|
|
|
3160
3372
|
Note:
|
|
3161
3373
|
- This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -3194,14 +3406,15 @@ class OmniglotDataset(MappableDataset):
|
|
|
3194
3406
|
|
|
3195
3407
|
About Omniglot dataset:
|
|
3196
3408
|
|
|
3197
|
-
The Omniglot dataset is designed for developing more human-like learning algorithms.
|
|
3198
|
-
|
|
3199
|
-
|
|
3200
|
-
|
|
3409
|
+
The Omniglot dataset is designed for developing more human-like learning algorithms. It contains 1623 different
|
|
3410
|
+
handwritten characters from 50 different alphabets. Each of the 1623 characters was drawn online via Amazon's
|
|
3411
|
+
Mechanical Turk by 20 different people. Each image is paired with stroke data, a sequences of [x, y, t] coordinates
|
|
3412
|
+
with time in milliseconds.
|
|
3201
3413
|
|
|
3202
3414
|
You can unzip the original Omniglot dataset files into this directory structure and read by MindSpore's API.
|
|
3203
3415
|
|
|
3204
3416
|
.. code-block::
|
|
3417
|
+
|
|
3205
3418
|
.
|
|
3206
3419
|
└── omniglot_dataset_directory
|
|
3207
3420
|
├── images_background/
|
|
@@ -3252,43 +3465,41 @@ class OmniglotDataset(MappableDataset):
|
|
|
3252
3465
|
|
|
3253
3466
|
class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
3254
3467
|
"""
|
|
3255
|
-
|
|
3468
|
+
PhotoTour dataset.
|
|
3256
3469
|
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
|
|
3260
|
-
The tensor of column :py:obj:`image`, :py:obj:`image1` and :py:obj:`image2` is of the uint8 type.
|
|
3261
|
-
The tensor of column :py:obj:`matches` is a scalar of the uint32 type.
|
|
3470
|
+
According to the given `usage` configuration, the generated dataset has different output columns:
|
|
3471
|
+
- `usage` = 'train', output columns: `[image, dtype=uint8]` .
|
|
3472
|
+
- `usage` ≠ 'train', output columns: `[image1, dtype=uint8]` , `[image2, dtype=uint8]` , `[matches, dtype=uint32]` .
|
|
3262
3473
|
|
|
3263
3474
|
Args:
|
|
3264
3475
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3265
3476
|
name (str): Name of the dataset to load,
|
|
3266
3477
|
should be one of 'notredame', 'yosemite', 'liberty', 'notredame_harris',
|
|
3267
3478
|
'yosemite_harris' or 'liberty_harris'.
|
|
3268
|
-
usage (str, optional): Usage of the dataset, can be 'train' or 'test'
|
|
3479
|
+
usage (str, optional): Usage of the dataset, can be 'train' or 'test'. Default: None, will be set to 'train'.
|
|
3269
3480
|
When usage is 'train', number of samples for each `name` is
|
|
3270
3481
|
{'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, 'liberty_harris': 379587,
|
|
3271
3482
|
'yosemite_harris': 450912, 'notredame_harris': 325295}.
|
|
3272
3483
|
When usage is 'test', will read 100,000 samples for testing.
|
|
3273
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
3274
|
-
|
|
3275
|
-
num_parallel_workers (int, optional): Number of
|
|
3276
|
-
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3484
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3485
|
+
Default: None, will read all images.
|
|
3486
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3487
|
+
Default: None, will use global default workers(8), it can be set
|
|
3488
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3489
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3490
|
+
Default: None, expected order behavior shown in the table below.
|
|
3491
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
3492
|
+
Default: None, expected order behavior shown in the table below.
|
|
3493
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
3282
3494
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
3283
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3495
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3284
3496
|
argument can only be specified when `num_shards` is also specified.
|
|
3285
3497
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3286
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3287
|
-
|
|
3498
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3499
|
+
Default: None, which means no cache is used.
|
|
3288
3500
|
|
|
3289
3501
|
Raises:
|
|
3290
3502
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3291
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3292
3503
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3293
3504
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3294
3505
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
@@ -3297,13 +3508,14 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3297
3508
|
ValueError: If `usage` is not in ["train", "test"].
|
|
3298
3509
|
ValueError: If name is not in ["notredame", "yosemite", "liberty",
|
|
3299
3510
|
"notredame_harris", "yosemite_harris", "liberty_harris"].
|
|
3300
|
-
ValueError: If `
|
|
3511
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3512
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3301
3513
|
|
|
3302
3514
|
Note:
|
|
3303
|
-
- This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
|
|
3515
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive. The table
|
|
3304
3516
|
below shows what input arguments are allowed and their expected behavior.
|
|
3305
3517
|
|
|
3306
|
-
.. list-table:: Expected Order Behavior of Using
|
|
3518
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
3307
3519
|
:widths: 64 64 1
|
|
3308
3520
|
:header-rows: 1
|
|
3309
3521
|
|
|
@@ -3333,9 +3545,6 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3333
3545
|
>>> # Read 3 samples from PhotoTour dataset.
|
|
3334
3546
|
>>> dataset = ds.PhotoTourDataset(dataset_dir="/path/to/photo_tour_dataset_directory",
|
|
3335
3547
|
... name='liberty', usage='train', num_samples=3)
|
|
3336
|
-
>>>
|
|
3337
|
-
>>> # In PhotoTourDataset dataset, if usage is 'train', each dictionary has key "image",
|
|
3338
|
-
>>> # else has keys "image1" "image2" and "matches".
|
|
3339
3548
|
|
|
3340
3549
|
About PhotoTour dataset:
|
|
3341
3550
|
|
|
@@ -3407,49 +3616,50 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
|
|
|
3407
3616
|
|
|
3408
3617
|
class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
3409
3618
|
"""
|
|
3410
|
-
|
|
3619
|
+
Places365 dataset.
|
|
3411
3620
|
|
|
3412
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
3621
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3413
3622
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3414
|
-
The tensor of column :py:obj:`label` is
|
|
3623
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
3415
3624
|
|
|
3416
3625
|
Args:
|
|
3417
3626
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3418
|
-
usage (str, optional): Usage of this dataset, can be 'train-standard', 'train-challenge' or 'val'
|
|
3419
|
-
|
|
3420
|
-
small (bool, optional): Use 256 * 256 images (True) or high resolution images (False)
|
|
3421
|
-
decode (bool, optional): Decode the images after reading
|
|
3422
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
3423
|
-
|
|
3424
|
-
num_parallel_workers (int, optional): Number of
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3627
|
+
usage (str, optional): Usage of this dataset, can be 'train-standard', 'train-challenge' or 'val'.
|
|
3628
|
+
Default: None, will be set to 'train-standard'.
|
|
3629
|
+
small (bool, optional): Use 256 * 256 images (True) or high resolution images (False). Default: False.
|
|
3630
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
3631
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3632
|
+
Default: None, will read all images.
|
|
3633
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3634
|
+
Default: None, will use global default workers(8), it can be set
|
|
3635
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3636
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3637
|
+
Default: None, expected order behavior shown in the table below.
|
|
3428
3638
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3429
|
-
dataset
|
|
3430
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
3639
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3640
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
3431
3641
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
3432
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3642
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3433
3643
|
argument can only be specified when `num_shards` is also specified.
|
|
3434
3644
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3435
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3436
|
-
|
|
3645
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3646
|
+
Default: None, which means no cache is used.
|
|
3437
3647
|
|
|
3438
3648
|
Raises:
|
|
3439
3649
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3440
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3441
3650
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3442
3651
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3443
3652
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3444
3653
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3445
|
-
ValueError: If `
|
|
3654
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3655
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3446
3656
|
ValueError: If `usage` is not in ["train-standard", "train-challenge", "val"].
|
|
3447
3657
|
|
|
3448
3658
|
Note:
|
|
3449
3659
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
3450
3660
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3451
3661
|
|
|
3452
|
-
.. list-table:: Expected Order Behavior of Using
|
|
3662
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
3453
3663
|
:widths: 25 25 50
|
|
3454
3664
|
:header-rows: 1
|
|
3455
3665
|
|
|
@@ -3481,8 +3691,6 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3481
3691
|
>>> # Read 3 samples from Places365 dataset
|
|
3482
3692
|
>>> dataset = ds.Places365Dataset(dataset_dir=place365_dataset_dir, usage='train-standard',
|
|
3483
3693
|
... small=True, decode=True, num_samples=3)
|
|
3484
|
-
>>>
|
|
3485
|
-
>>> # In places365 dataset, each dictionary has keys "image" and "label".
|
|
3486
3694
|
|
|
3487
3695
|
About Places365 dataset:
|
|
3488
3696
|
|
|
@@ -3549,45 +3757,46 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
|
|
|
3549
3757
|
|
|
3550
3758
|
class QMnistDataset(MappableDataset, VisionBaseDataset):
|
|
3551
3759
|
"""
|
|
3552
|
-
|
|
3760
|
+
QMNIST dataset.
|
|
3553
3761
|
|
|
3554
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
3762
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
3555
3763
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3556
|
-
The tensor of column :py:obj:`label` is
|
|
3764
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
3557
3765
|
|
|
3558
3766
|
Args:
|
|
3559
3767
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3560
3768
|
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'test10k', 'test50k', 'nist'
|
|
3561
|
-
or 'all'
|
|
3769
|
+
or 'all'. Default: None, will read all samples.
|
|
3562
3770
|
compat (bool, optional): Whether the label for each example is class number (compat=True) or the full QMNIST
|
|
3563
|
-
information (compat=False)
|
|
3564
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
3565
|
-
|
|
3566
|
-
num_parallel_workers (int, optional): Number of
|
|
3567
|
-
|
|
3568
|
-
|
|
3569
|
-
|
|
3771
|
+
information (compat=False). Default: True.
|
|
3772
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3773
|
+
Default: None, will read all images.
|
|
3774
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3775
|
+
Default: None, will use global default workers(8), it can be set
|
|
3776
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3777
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3778
|
+
Default: None, expected order behavior shown in the table below.
|
|
3570
3779
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3571
|
-
dataset
|
|
3572
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
3780
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3781
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
3573
3782
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
3574
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3783
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3575
3784
|
argument can only be specified when `num_shards` is also specified.
|
|
3576
3785
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3577
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3578
|
-
|
|
3786
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3787
|
+
Default: None, which means no cache is used.
|
|
3579
3788
|
|
|
3580
3789
|
Raises:
|
|
3581
3790
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3582
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3583
3791
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3584
3792
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3585
3793
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3586
3794
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3587
|
-
ValueError: If `shard_id` is
|
|
3795
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3796
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3588
3797
|
|
|
3589
3798
|
Note:
|
|
3590
|
-
- This dataset can take in a `sampler
|
|
3799
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
3591
3800
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3592
3801
|
|
|
3593
3802
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -3678,26 +3887,49 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3678
3887
|
A source dataset that generates random data.
|
|
3679
3888
|
|
|
3680
3889
|
Args:
|
|
3681
|
-
total_rows (int, optional): Number of samples for the dataset to generate
|
|
3682
|
-
|
|
3683
|
-
schema (Union[str, Schema], optional):
|
|
3684
|
-
|
|
3685
|
-
|
|
3686
|
-
|
|
3687
|
-
|
|
3688
|
-
|
|
3689
|
-
|
|
3690
|
-
|
|
3890
|
+
total_rows (int, optional): Number of samples for the dataset to generate.
|
|
3891
|
+
Default: None, number of samples is random.
|
|
3892
|
+
schema (Union[str, Schema], optional): Data format policy, which specifies the data types and shapes of the data
|
|
3893
|
+
column to be read. Both JSON file path and objects constructed by mindspore.dataset.Schema are acceptable.
|
|
3894
|
+
Default: None.
|
|
3895
|
+
columns_list (list[str], optional): List of column names of the dataset.
|
|
3896
|
+
Default: None, the columns will be named like this "c0", "c1", "c2" etc.
|
|
3897
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
3898
|
+
Default: None, all samples.
|
|
3899
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3900
|
+
Default: None, will use global default workers(8), it can be set
|
|
3901
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3691
3902
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3692
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3693
|
-
|
|
3694
|
-
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
|
3695
|
-
|
|
3903
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3904
|
+
Default: None, which means no cache is used.
|
|
3905
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3906
|
+
Default: None, expected order behavior shown in the table below.
|
|
3696
3907
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3697
|
-
into
|
|
3908
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
3698
3909
|
the maximum sample number of per shard.
|
|
3699
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
3910
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3700
3911
|
argument can only be specified when `num_shards` is also specified.
|
|
3912
|
+
|
|
3913
|
+
Raises:
|
|
3914
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3915
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3916
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3917
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3918
|
+
TypeError: If `total_rows` is not of type int.
|
|
3919
|
+
TypeError: If `num_shards` is not of type int.
|
|
3920
|
+
TypeError: If `num_parallel_workers` is not of type int.
|
|
3921
|
+
TypeError: If `shuffle` is not of type bool.
|
|
3922
|
+
TypeError: If `columns_list` is not of type list.
|
|
3923
|
+
|
|
3924
|
+
Examples:
|
|
3925
|
+
>>> from mindspore import dtype as mstype
|
|
3926
|
+
>>> import mindspore.dataset as ds
|
|
3927
|
+
>>>
|
|
3928
|
+
>>> schema = ds.Schema()
|
|
3929
|
+
>>> schema.add_column('image', de_type=mstype.uint8, shape=[2])
|
|
3930
|
+
>>> schema.add_column('label', de_type=mstype.uint8, shape=[1])
|
|
3931
|
+
>>> # apply dataset operations
|
|
3932
|
+
>>> ds1 = ds.RandomDataset(schema=schema, total_rows=50, num_parallel_workers=4)
|
|
3701
3933
|
"""
|
|
3702
3934
|
|
|
3703
3935
|
@check_random_dataset
|
|
@@ -3721,6 +3953,159 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
|
|
|
3721
3953
|
return cde.RandomNode(self.total_rows, schema, self.columns_list)
|
|
3722
3954
|
|
|
3723
3955
|
|
|
3956
|
+
class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
|
|
3957
|
+
"""
|
|
3958
|
+
RenderedSST2(Rendered Stanford Sentiment Treebank v2) dataset.
|
|
3959
|
+
|
|
3960
|
+
The generated dataset has two columns: :py:obj:`[image, label]`.
|
|
3961
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3962
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
3963
|
+
|
|
3964
|
+
Args:
|
|
3965
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3966
|
+
usage (str, optional): Usage of this dataset, can be 'train', 'val', 'test' or 'all'.
|
|
3967
|
+
Default: None, will read all samples.
|
|
3968
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3969
|
+
Default: None, will include all images.
|
|
3970
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
3971
|
+
Default: None, will use global default workers(8), it can be set
|
|
3972
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
3973
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
3974
|
+
Default: None, expected order behavior shown in the table below.
|
|
3975
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
3976
|
+
sampler (Sampler, optional): Object used to choose samples from the
|
|
3977
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3978
|
+
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3979
|
+
into. When this argument is specified, `num_samples` reflects
|
|
3980
|
+
the maximum sample number of per shard. Default: None.
|
|
3981
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
3982
|
+
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
3983
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3984
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
3985
|
+
Default: None, which means no cache is used.
|
|
3986
|
+
|
|
3987
|
+
Raises:
|
|
3988
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3989
|
+
ValueError: If `usage` is not 'train', 'test', 'val' or 'all'.
|
|
3990
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3991
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3992
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3993
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3994
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3995
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3996
|
+
|
|
3997
|
+
Note:
|
|
3998
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
3999
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
4000
|
+
|
|
4001
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4002
|
+
:widths: 25 25 50
|
|
4003
|
+
:header-rows: 1
|
|
4004
|
+
|
|
4005
|
+
* - Parameter `sampler`
|
|
4006
|
+
- Parameter `shuffle`
|
|
4007
|
+
- Expected Order Behavior
|
|
4008
|
+
* - None
|
|
4009
|
+
- None
|
|
4010
|
+
- random order
|
|
4011
|
+
* - None
|
|
4012
|
+
- True
|
|
4013
|
+
- random order
|
|
4014
|
+
* - None
|
|
4015
|
+
- False
|
|
4016
|
+
- sequential order
|
|
4017
|
+
* - Sampler object
|
|
4018
|
+
- None
|
|
4019
|
+
- order defined by sampler
|
|
4020
|
+
* - Sampler object
|
|
4021
|
+
- True
|
|
4022
|
+
- not allowed
|
|
4023
|
+
* - Sampler object
|
|
4024
|
+
- False
|
|
4025
|
+
- not allowed
|
|
4026
|
+
|
|
4027
|
+
Examples:
|
|
4028
|
+
>>> rendered_sst2_dataset_dir = "/path/to/rendered_sst2_dataset_directory"
|
|
4029
|
+
>>>
|
|
4030
|
+
>>> # 1) Read all samples (image files) in rendered_sst2_dataset_dir with 8 threads
|
|
4031
|
+
>>> dataset = ds.RenderedSST2Dataset(dataset_dir=rendered_sst2_dataset_dir,
|
|
4032
|
+
... usage="all", num_parallel_workers=8)
|
|
4033
|
+
|
|
4034
|
+
About RenderedSST2Dataset:
|
|
4035
|
+
|
|
4036
|
+
Rendered SST2 is an image classification dataset which was generated by rendering sentences in the Standford
|
|
4037
|
+
Sentiment Treebank v2 dataset. There are three splits in this dataset and each split contains two classes
|
|
4038
|
+
(positive and negative): a train split containing 6920 images (3610 positive and 3310 negative), a validation
|
|
4039
|
+
split containing 872 images (444 positive and 428 negative), and a test split containing 1821 images
|
|
4040
|
+
(909 positive and 912 negative).
|
|
4041
|
+
|
|
4042
|
+
Here is the original RenderedSST2 dataset structure.
|
|
4043
|
+
You can unzip the dataset files into the following directory structure and read by MindSpore's API.
|
|
4044
|
+
|
|
4045
|
+
.. code-block::
|
|
4046
|
+
|
|
4047
|
+
.
|
|
4048
|
+
└── rendered_sst2_dataset_directory
|
|
4049
|
+
├── train
|
|
4050
|
+
│ ├── negative
|
|
4051
|
+
│ │ ├── 0001.jpg
|
|
4052
|
+
│ │ ├── 0002.jpg
|
|
4053
|
+
│ │ ...
|
|
4054
|
+
│ └── positive
|
|
4055
|
+
│ ├── 0001.jpg
|
|
4056
|
+
│ ├── 0002.jpg
|
|
4057
|
+
│ ...
|
|
4058
|
+
├── test
|
|
4059
|
+
│ ├── negative
|
|
4060
|
+
│ │ ├── 0001.jpg
|
|
4061
|
+
│ │ ├── 0002.jpg
|
|
4062
|
+
│ │ ...
|
|
4063
|
+
│ └── positive
|
|
4064
|
+
│ ├── 0001.jpg
|
|
4065
|
+
│ ├── 0002.jpg
|
|
4066
|
+
│ ...
|
|
4067
|
+
└── valid
|
|
4068
|
+
├── negative
|
|
4069
|
+
│ ├── 0001.jpg
|
|
4070
|
+
│ ├── 0002.jpg
|
|
4071
|
+
│ ...
|
|
4072
|
+
└── positive
|
|
4073
|
+
├── 0001.jpg
|
|
4074
|
+
├── 0002.jpg
|
|
4075
|
+
...
|
|
4076
|
+
|
|
4077
|
+
Citation:
|
|
4078
|
+
|
|
4079
|
+
.. code-block::
|
|
4080
|
+
|
|
4081
|
+
@inproceedings{socher-etal-2013-recursive,
|
|
4082
|
+
title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
|
|
4083
|
+
author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
|
|
4084
|
+
Christopher D. and Ng, Andrew and Potts, Christopher},
|
|
4085
|
+
booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
|
|
4086
|
+
month = oct,
|
|
4087
|
+
year = {2013},
|
|
4088
|
+
address = {Seattle, Washington, USA},
|
|
4089
|
+
publisher = {Association for Computational Linguistics},
|
|
4090
|
+
url = {https://www.aclweb.org/anthology/D13-1170},
|
|
4091
|
+
pages = {1631--1642},
|
|
4092
|
+
}
|
|
4093
|
+
"""
|
|
4094
|
+
|
|
4095
|
+
@check_rendered_sst2_dataset
|
|
4096
|
+
def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
|
|
4097
|
+
decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
4098
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
4099
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
4100
|
+
|
|
4101
|
+
self.dataset_dir = dataset_dir
|
|
4102
|
+
self.usage = replace_none(usage, "all")
|
|
4103
|
+
self.decode = replace_none(decode, False)
|
|
4104
|
+
|
|
4105
|
+
def parse(self, children=None):
|
|
4106
|
+
return cde.RenderedSST2Node(self.dataset_dir, self.usage, self.decode, self.sampler)
|
|
4107
|
+
|
|
4108
|
+
|
|
3724
4109
|
class _SBDataset:
|
|
3725
4110
|
"""
|
|
3726
4111
|
Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
|
|
@@ -3783,43 +4168,44 @@ class _SBDataset:
|
|
|
3783
4168
|
|
|
3784
4169
|
class SBDataset(GeneratorDataset):
|
|
3785
4170
|
"""
|
|
3786
|
-
|
|
4171
|
+
SB(Semantic Boundaries) Dataset.
|
|
3787
4172
|
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
4173
|
+
By configuring the 'Task' parameter, the generated dataset has different output columns.
|
|
4174
|
+
|
|
4175
|
+
- 'task' = 'Boundaries' , there are two output columns: the 'image' column has the data type uint8 and
|
|
4176
|
+
the 'label' column contains one image of the data type uint8.
|
|
4177
|
+
- 'task' = 'Segmentation' , there are two output columns: the 'image' column has the data type uint8 and
|
|
4178
|
+
the 'label' column contains 20 images of the data type uint8.
|
|
3792
4179
|
|
|
3793
4180
|
Args:
|
|
3794
4181
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3795
|
-
task (str, optional): Acceptable tasks include 'Boundaries' or 'Segmentation'
|
|
3796
|
-
usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all'
|
|
4182
|
+
task (str, optional): Acceptable tasks include 'Boundaries' or 'Segmentation'. Default: 'Boundaries'.
|
|
4183
|
+
usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all'. Default: 'all'.
|
|
3797
4184
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
3798
|
-
|
|
3799
|
-
num_parallel_workers (int, optional): Number of
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
decode (bool, optional): Decode the images after reading (default=None).
|
|
4185
|
+
Default: None, all images.
|
|
4186
|
+
num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
|
|
4187
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4188
|
+
order behavior shown in the table below.
|
|
4189
|
+
decode (bool, optional): Decode the images after reading. Default: None.
|
|
3804
4190
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3805
|
-
dataset
|
|
4191
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
3806
4192
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
3807
|
-
into
|
|
4193
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
3808
4194
|
the max sample number of per shard.
|
|
3809
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
4195
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3810
4196
|
argument can only be specified when `num_shards` is also specified.
|
|
3811
4197
|
|
|
3812
4198
|
Raises:
|
|
3813
4199
|
RuntimeError: If `dataset_dir` is not valid or does not contain data files.
|
|
3814
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3815
4200
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3816
4201
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3817
4202
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3818
4203
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3819
4204
|
ValueError: If `dataset_dir` is not exist.
|
|
4205
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3820
4206
|
ValueError: If `task` is not in ['Boundaries', 'Segmentation'].
|
|
3821
4207
|
ValueError: If `usage` is not in ['train', 'val', 'train_noval', 'all'].
|
|
3822
|
-
ValueError: If `shard_id` is
|
|
4208
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3823
4209
|
|
|
3824
4210
|
Note:
|
|
3825
4211
|
- This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
|
|
@@ -3869,7 +4255,7 @@ class SBDataset(GeneratorDataset):
|
|
|
3869
4255
|
|
|
3870
4256
|
The Semantic Boundaries Dataset consists of 11355 color images. There are 8498 images' name in the train.txt,
|
|
3871
4257
|
2857 images' name in the val.txt and 5623 images' name in the train_noval.txt. The category cls/
|
|
3872
|
-
contains the Segmentation and Boundaries results of category-level, the category inst/
|
|
4258
|
+
contains the Segmentation and Boundaries results of category-level, the category inst/ contains the
|
|
3873
4259
|
Segmentation and Boundaries results of instance-level.
|
|
3874
4260
|
|
|
3875
4261
|
You can unzip the dataset files into the following structure and read by MindSpore's API:
|
|
@@ -3916,45 +4302,46 @@ class SBDataset(GeneratorDataset):
|
|
|
3916
4302
|
|
|
3917
4303
|
class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
3918
4304
|
"""
|
|
3919
|
-
|
|
4305
|
+
SBU(SBU Captioned Photo) dataset.
|
|
3920
4306
|
|
|
3921
|
-
The generated dataset has two columns :py:obj:`[image, caption]
|
|
4307
|
+
The generated dataset has two columns :py:obj:`[image, caption]` .
|
|
3922
4308
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
3923
4309
|
The tensor of column :py:obj:`caption` is of the string type.
|
|
3924
4310
|
|
|
3925
4311
|
Args:
|
|
3926
4312
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
3927
|
-
|
|
3928
|
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
|
|
3932
|
-
shuffle (bool, optional): Whether or not to perform shuffle on the dataset
|
|
3933
|
-
|
|
4313
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4314
|
+
Default: None, will read all images.
|
|
4315
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4316
|
+
Default: None, will use global default workers(8), it can be set
|
|
4317
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4318
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4319
|
+
Default: None, expected order behavior shown in the table below.
|
|
4320
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
3934
4321
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
3935
|
-
dataset
|
|
3936
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
4322
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
4323
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
3937
4324
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
3938
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
4325
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
3939
4326
|
argument can only be specified when `num_shards` is also specified.
|
|
3940
4327
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
3941
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
3942
|
-
|
|
4328
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4329
|
+
Default: None, which means no cache is used.
|
|
3943
4330
|
|
|
3944
4331
|
Raises:
|
|
3945
4332
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
3946
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
3947
4333
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
3948
4334
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
3949
4335
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
3950
4336
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
3951
|
-
ValueError: If `
|
|
4337
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4338
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
3952
4339
|
|
|
3953
4340
|
Note:
|
|
3954
4341
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
3955
4342
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
3956
4343
|
|
|
3957
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4344
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
3958
4345
|
:widths: 25 25 50
|
|
3959
4346
|
:header-rows: 1
|
|
3960
4347
|
|
|
@@ -4031,42 +4418,43 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
|
|
|
4031
4418
|
|
|
4032
4419
|
class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
4033
4420
|
"""
|
|
4034
|
-
|
|
4421
|
+
Semeion dataset.
|
|
4035
4422
|
|
|
4036
|
-
The generated dataset has two columns :py:obj:`[image, label]
|
|
4423
|
+
The generated dataset has two columns :py:obj:`[image, label]` .
|
|
4037
4424
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4038
4425
|
The tensor of column :py:obj:`label` is a scalar of the uint32 type.
|
|
4039
4426
|
|
|
4040
4427
|
Args:
|
|
4041
4428
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4042
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
4043
|
-
|
|
4044
|
-
num_parallel_workers (int, optional): Number of
|
|
4045
|
-
|
|
4046
|
-
|
|
4047
|
-
|
|
4429
|
+
num_samples (int, optional): The number of samples to be included in the dataset.
|
|
4430
|
+
Default: None, will read all images.
|
|
4431
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4432
|
+
Default: None, will use global default workers(8), it can be set
|
|
4433
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4434
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4435
|
+
order behavior shown in the table below.
|
|
4048
4436
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
4049
|
-
dataset
|
|
4437
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
4050
4438
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4051
|
-
into
|
|
4439
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
4052
4440
|
the maximum sample number of per shard.
|
|
4053
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
4441
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4054
4442
|
argument can only be specified when `num_shards` is also specified.
|
|
4055
4443
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4056
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
4057
|
-
|
|
4444
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4445
|
+
Default: None, which means no cache is used.
|
|
4058
4446
|
|
|
4059
4447
|
Raises:
|
|
4060
4448
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
4061
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4062
4449
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4063
4450
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4064
4451
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4065
4452
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4066
|
-
ValueError: If `
|
|
4453
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4454
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4067
4455
|
|
|
4068
4456
|
Note:
|
|
4069
|
-
- This dataset can take in a `sampler
|
|
4457
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
4070
4458
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4071
4459
|
|
|
4072
4460
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -4153,9 +4541,9 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
|
|
|
4153
4541
|
|
|
4154
4542
|
class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
4155
4543
|
"""
|
|
4156
|
-
|
|
4544
|
+
STL-10 dataset.
|
|
4157
4545
|
|
|
4158
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
4546
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4159
4547
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4160
4548
|
The tensor of column :py:obj:`label` is of a scalar of int32 type.
|
|
4161
4549
|
|
|
@@ -4166,39 +4554,40 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4166
4554
|
train samples, 'test' will read from 8,000 test samples,
|
|
4167
4555
|
'unlabeled' will read from all 100,000 samples, and 'train+unlabeled'
|
|
4168
4556
|
will read from 105000 samples, 'all' will read all the samples
|
|
4169
|
-
|
|
4557
|
+
Default: None, all samples.
|
|
4170
4558
|
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4171
|
-
|
|
4172
|
-
num_parallel_workers (int, optional): Number of
|
|
4173
|
-
|
|
4174
|
-
|
|
4175
|
-
|
|
4559
|
+
Default: None, all images.
|
|
4560
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4561
|
+
Default: None, will use global default workers(8), it can be set
|
|
4562
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4563
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
4564
|
+
order behavior shown in the table below.
|
|
4176
4565
|
sampler (Sampler, optional): Object used to choose samples from the
|
|
4177
|
-
dataset
|
|
4566
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
4178
4567
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4179
|
-
into
|
|
4568
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
4180
4569
|
the max sample number of per shard.
|
|
4181
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
4570
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4182
4571
|
argument can only be specified when `num_shards` is also specified.
|
|
4183
4572
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4184
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
4185
|
-
|
|
4573
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4574
|
+
Default: None, which means no cache is used.
|
|
4186
4575
|
|
|
4187
4576
|
Raises:
|
|
4188
4577
|
RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
|
|
4189
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4190
4578
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4191
4579
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4192
4580
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4193
4581
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4194
4582
|
ValueError: If `usage` is invalid.
|
|
4195
|
-
ValueError: If `
|
|
4583
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4584
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4196
4585
|
|
|
4197
4586
|
Note:
|
|
4198
4587
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
4199
4588
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4200
4589
|
|
|
4201
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4590
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4202
4591
|
:widths: 25 25 50
|
|
4203
4592
|
:header-rows: 1
|
|
4204
4593
|
|
|
@@ -4239,7 +4628,6 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4239
4628
|
About STL10 dataset:
|
|
4240
4629
|
|
|
4241
4630
|
STL10 dataset consists of 10 classes: airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck.
|
|
4242
|
-
STL10 is is inspired by the CIFAR-10 dataset.
|
|
4243
4631
|
Images are 96x96 pixels, color.
|
|
4244
4632
|
500 training images, 800 test images per class and 100000 unlabeled images.
|
|
4245
4633
|
Labels are 0-indexed, and unlabeled images have -1 as their labels.
|
|
@@ -4257,7 +4645,7 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4257
4645
|
├── test_y.bin
|
|
4258
4646
|
└── unlabeled_X.bin
|
|
4259
4647
|
|
|
4260
|
-
Citation of STL10 dataset
|
|
4648
|
+
Citation of STL10 dataset:
|
|
4261
4649
|
|
|
4262
4650
|
.. code-block::
|
|
4263
4651
|
|
|
@@ -4288,6 +4676,151 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
|
|
|
4288
4676
|
return cde.STL10Node(self.dataset_dir, self.usage, self.sampler)
|
|
4289
4677
|
|
|
4290
4678
|
|
|
4679
|
+
class SUN397Dataset(MappableDataset, VisionBaseDataset):
|
|
4680
|
+
"""
|
|
4681
|
+
SUN397(Scene UNderstanding) dataset.
|
|
4682
|
+
|
|
4683
|
+
The generated dataset has two columns: :py:obj:`[image, label]`.
|
|
4684
|
+
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4685
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
4686
|
+
|
|
4687
|
+
Args:
|
|
4688
|
+
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4689
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4690
|
+
Default: None, all images.
|
|
4691
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4692
|
+
Default: None, will use global default workers(8), it can be set
|
|
4693
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4694
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4695
|
+
Default: None, expected order behavior shown in the table below.
|
|
4696
|
+
decode (bool, optional): Whether or not to decode the images after reading. Default: False.
|
|
4697
|
+
sampler (Sampler, optional): Object used to choose samples from the
|
|
4698
|
+
dataset. Default: None, expected order behavior shown in the table below.
|
|
4699
|
+
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4700
|
+
into. When this argument is specified, `num_samples` reflects
|
|
4701
|
+
the maximum sample number of per shard. Default: None.
|
|
4702
|
+
shard_id (int, optional): The shard ID within `num_shards` . This
|
|
4703
|
+
argument can only be specified when `num_shards` is also specified. Default: None.
|
|
4704
|
+
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4705
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
4706
|
+
Default: None, which means no cache is used.
|
|
4707
|
+
|
|
4708
|
+
Raises:
|
|
4709
|
+
RuntimeError: If `dataset_dir` does not contain data files.
|
|
4710
|
+
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4711
|
+
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4712
|
+
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4713
|
+
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4714
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4715
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4716
|
+
|
|
4717
|
+
Note:
|
|
4718
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
4719
|
+
The table below shows what input arguments are allowed and their expected behavior.
|
|
4720
|
+
|
|
4721
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4722
|
+
:widths: 25 25 50
|
|
4723
|
+
:header-rows: 1
|
|
4724
|
+
|
|
4725
|
+
* - Parameter `sampler`
|
|
4726
|
+
- Parameter `shuffle`
|
|
4727
|
+
- Expected Order Behavior
|
|
4728
|
+
* - None
|
|
4729
|
+
- None
|
|
4730
|
+
- random order
|
|
4731
|
+
* - None
|
|
4732
|
+
- True
|
|
4733
|
+
- random order
|
|
4734
|
+
* - None
|
|
4735
|
+
- False
|
|
4736
|
+
- sequential order
|
|
4737
|
+
* - Sampler object
|
|
4738
|
+
- None
|
|
4739
|
+
- order defined by sampler
|
|
4740
|
+
* - Sampler object
|
|
4741
|
+
- True
|
|
4742
|
+
- not allowed
|
|
4743
|
+
* - Sampler object
|
|
4744
|
+
- False
|
|
4745
|
+
- not allowed
|
|
4746
|
+
|
|
4747
|
+
Examples:
|
|
4748
|
+
>>> sun397_dataset_dir = "/path/to/sun397_dataset_directory"
|
|
4749
|
+
>>>
|
|
4750
|
+
>>> # 1) Read all samples (image files) in sun397_dataset_dir with 8 threads
|
|
4751
|
+
>>> dataset = ds.SUN397Dataset(dataset_dir=sun397_dataset_dir, num_parallel_workers=8)
|
|
4752
|
+
|
|
4753
|
+
About SUN397Dataset:
|
|
4754
|
+
|
|
4755
|
+
The SUN397 or Scene UNderstanding (SUN) is a dataset for scene recognition consisting of 397 categories with
|
|
4756
|
+
108,754 images. The number of images varies across categories, but there are at least 100 images per category.
|
|
4757
|
+
Images are in jpg, png, or gif format.
|
|
4758
|
+
|
|
4759
|
+
Here is the original SUN397 dataset structure.
|
|
4760
|
+
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
4761
|
+
|
|
4762
|
+
.. code-block::
|
|
4763
|
+
|
|
4764
|
+
.
|
|
4765
|
+
└── sun397_dataset_directory
|
|
4766
|
+
├── ClassName.txt
|
|
4767
|
+
├── README.txt
|
|
4768
|
+
├── a
|
|
4769
|
+
│ ├── abbey
|
|
4770
|
+
│ │ ├── sun_aaaulhwrhqgejnyt.jpg
|
|
4771
|
+
│ │ ├── sun_aacphuqehdodwawg.jpg
|
|
4772
|
+
│ │ ├── ...
|
|
4773
|
+
│ ├── apartment_building
|
|
4774
|
+
│ │ └── outdoor
|
|
4775
|
+
│ │ ├── sun_aamyhslnsnomjzue.jpg
|
|
4776
|
+
│ │ ├── sun_abbjzfrsalhqivis.jpg
|
|
4777
|
+
│ │ ├── ...
|
|
4778
|
+
│ ├── ...
|
|
4779
|
+
├── b
|
|
4780
|
+
│ ├── badlands
|
|
4781
|
+
│ │ ├── sun_aabtemlmesogqbbp.jpg
|
|
4782
|
+
│ │ ├── sun_afbsfeexggdhzshd.jpg
|
|
4783
|
+
│ │ ├── ...
|
|
4784
|
+
│ ├── balcony
|
|
4785
|
+
│ │ ├── exterior
|
|
4786
|
+
│ │ │ ├── sun_aaxzaiuznwquburq.jpg
|
|
4787
|
+
│ │ │ ├── sun_baajuldidvlcyzhv.jpg
|
|
4788
|
+
│ │ │ ├── ...
|
|
4789
|
+
│ │ └── interior
|
|
4790
|
+
│ │ ├── sun_babkzjntjfarengi.jpg
|
|
4791
|
+
│ │ ├── sun_bagjvjynskmonnbv.jpg
|
|
4792
|
+
│ │ ├── ...
|
|
4793
|
+
│ └── ...
|
|
4794
|
+
├── ...
|
|
4795
|
+
|
|
4796
|
+
|
|
4797
|
+
Citation:
|
|
4798
|
+
|
|
4799
|
+
.. code-block::
|
|
4800
|
+
|
|
4801
|
+
@inproceedings{xiao2010sun,
|
|
4802
|
+
title = {Sun database: Large-scale scene recognition from abbey to zoo},
|
|
4803
|
+
author = {Xiao, Jianxiong and Hays, James and Ehinger, Krista A and Oliva, Aude and Torralba, Antonio},
|
|
4804
|
+
booktitle = {2010 IEEE computer society conference on computer vision and pattern recognition},
|
|
4805
|
+
pages = {3485--3492},
|
|
4806
|
+
year = {2010},
|
|
4807
|
+
organization = {IEEE}
|
|
4808
|
+
}
|
|
4809
|
+
"""
|
|
4810
|
+
|
|
4811
|
+
@check_sun397_dataset
|
|
4812
|
+
def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
|
|
4813
|
+
sampler=None, num_shards=None, shard_id=None, cache=None):
|
|
4814
|
+
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
|
4815
|
+
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
|
4816
|
+
|
|
4817
|
+
self.dataset_dir = dataset_dir
|
|
4818
|
+
self.decode = replace_none(decode, False)
|
|
4819
|
+
|
|
4820
|
+
def parse(self, children=None):
|
|
4821
|
+
return cde.SUN397Node(self.dataset_dir, self.decode, self.sampler)
|
|
4822
|
+
|
|
4823
|
+
|
|
4291
4824
|
class _SVHNDataset:
|
|
4292
4825
|
"""
|
|
4293
4826
|
Mainly for loading SVHN Dataset, and return two rows each time.
|
|
@@ -4326,43 +4859,43 @@ class _SVHNDataset:
|
|
|
4326
4859
|
|
|
4327
4860
|
class SVHNDataset(GeneratorDataset):
|
|
4328
4861
|
"""
|
|
4329
|
-
|
|
4862
|
+
SVHN(Street View House Numbers) dataset.
|
|
4330
4863
|
|
|
4331
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
4864
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4332
4865
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4333
4866
|
The tensor of column :py:obj:`label` is of a scalar of uint32 type.
|
|
4334
4867
|
|
|
4335
4868
|
Args:
|
|
4336
4869
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4337
|
-
usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset
|
|
4338
|
-
|
|
4339
|
-
num_samples (int, optional): The number of samples to be included in the dataset
|
|
4340
|
-
num_parallel_workers (int, optional): Number of subprocesses used to
|
|
4341
|
-
|
|
4342
|
-
|
|
4870
|
+
usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset.
|
|
4871
|
+
Default: None, will read all samples.
|
|
4872
|
+
num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
|
|
4873
|
+
num_parallel_workers (int, optional): Number of worker subprocesses used to
|
|
4874
|
+
fetch the dataset in parallel. Default: 1.
|
|
4875
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
4876
|
+
Default: None, expected order behavior shown in the table below.
|
|
4343
4877
|
sampler (Sampler, optional): Object used to choose samples from the dataset. Random accessible
|
|
4344
|
-
input is required
|
|
4345
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
4346
|
-
|
|
4347
|
-
|
|
4348
|
-
|
|
4349
|
-
when num_shards is also specified. Random accessible input is required.
|
|
4878
|
+
input is required. Default: None, expected order behavior shown in the table below.
|
|
4879
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
4880
|
+
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
4881
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument must be specified only
|
|
4882
|
+
when `num_shards` is also specified.
|
|
4350
4883
|
|
|
4351
4884
|
Raises:
|
|
4352
4885
|
RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
|
|
4353
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4354
4886
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4355
4887
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4356
4888
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4357
4889
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4358
4890
|
ValueError: If `usage` is invalid.
|
|
4359
|
-
ValueError: If `
|
|
4891
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4892
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4360
4893
|
|
|
4361
4894
|
Note:
|
|
4362
4895
|
- This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
|
|
4363
4896
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4364
4897
|
|
|
4365
|
-
.. list-table:: Expected Order Behavior of Using
|
|
4898
|
+
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
4366
4899
|
:widths: 25 25 50
|
|
4367
4900
|
:header-rows: 1
|
|
4368
4901
|
|
|
@@ -4394,9 +4927,7 @@ class SVHNDataset(GeneratorDataset):
|
|
|
4394
4927
|
|
|
4395
4928
|
About SVHN dataset:
|
|
4396
4929
|
|
|
4397
|
-
SVHN dataset consists of 10 digit classes.
|
|
4398
|
-
SVHN is obtained from house numbers in Google Street View images.
|
|
4399
|
-
73257 digits for training, 26032 digits for testing, and 531131 additional extra training data.
|
|
4930
|
+
SVHN dataset consists of 10 digit classes and is obtained from house numbers in Google Street View images.
|
|
4400
4931
|
|
|
4401
4932
|
Here is the original SVHN dataset structure.
|
|
4402
4933
|
You can unzip the dataset files into this directory structure and read by MindSpore's API.
|
|
@@ -4438,54 +4969,53 @@ class SVHNDataset(GeneratorDataset):
|
|
|
4438
4969
|
|
|
4439
4970
|
class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
4440
4971
|
"""
|
|
4441
|
-
|
|
4972
|
+
USPS(U.S. Postal Service) dataset.
|
|
4442
4973
|
|
|
4443
|
-
The generated dataset has two columns: :py:obj:`[image, label]
|
|
4974
|
+
The generated dataset has two columns: :py:obj:`[image, label]` .
|
|
4444
4975
|
The tensor of column :py:obj:`image` is of the uint8 type.
|
|
4445
|
-
The tensor of column :py:obj:`label` is of
|
|
4976
|
+
The tensor of column :py:obj:`label` is of the uint32 type.
|
|
4446
4977
|
|
|
4447
4978
|
Args:
|
|
4448
4979
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4449
4980
|
usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 7,291
|
|
4450
4981
|
train samples, 'test' will read from 2,007 test samples, 'all' will read from all 9,298 samples.
|
|
4451
|
-
|
|
4452
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
4453
|
-
|
|
4454
|
-
num_parallel_workers (int, optional): Number of
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
If shuffle is
|
|
4460
|
-
|
|
4982
|
+
Default: None, will read all samples.
|
|
4983
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
4984
|
+
Default: None, will read all images.
|
|
4985
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
4986
|
+
Default: None, will use global default workers(8), it can be set
|
|
4987
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
4988
|
+
shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
|
|
4989
|
+
Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
|
|
4990
|
+
If shuffle is False, no shuffling will be performed.
|
|
4991
|
+
If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
|
|
4992
|
+
Set the mode of data shuffling by passing in enumeration variables:
|
|
4461
4993
|
|
|
4462
4994
|
- Shuffle.GLOBAL: Shuffle both the files and samples.
|
|
4463
4995
|
|
|
4464
4996
|
- Shuffle.FILES: Shuffle files only.
|
|
4465
4997
|
|
|
4466
|
-
num_shards (int, optional): Number of shards that the dataset will be divided into
|
|
4998
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
4467
4999
|
When this argument is specified, `num_samples` reflects the max sample number of per shard.
|
|
4468
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
5000
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4469
5001
|
argument can only be specified when `num_shards` is also specified.
|
|
4470
5002
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4471
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
4472
|
-
|
|
5003
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
5004
|
+
Default: None, which means no cache is used.
|
|
4473
5005
|
|
|
4474
5006
|
Raises:
|
|
4475
5007
|
RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
|
|
4476
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4477
5008
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4478
5009
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4479
5010
|
ValueError: If `usage` is invalid.
|
|
4480
|
-
ValueError: If `
|
|
5011
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
5012
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4481
5013
|
|
|
4482
5014
|
Examples:
|
|
4483
5015
|
>>> usps_dataset_dir = "/path/to/usps_dataset_directory"
|
|
4484
5016
|
>>>
|
|
4485
5017
|
>>> # Read 3 samples from USPS dataset
|
|
4486
5018
|
>>> dataset = ds.USPSDataset(dataset_dir=usps_dataset_dir, num_samples=3)
|
|
4487
|
-
>>>
|
|
4488
|
-
>>> # Note: In USPS dataset, each dictionary has keys "image" and "label"
|
|
4489
5019
|
|
|
4490
5020
|
About USPS dataset:
|
|
4491
5021
|
|
|
@@ -4535,52 +5065,53 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
|
|
|
4535
5065
|
|
|
4536
5066
|
class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
4537
5067
|
"""
|
|
4538
|
-
|
|
5068
|
+
VOC(Visual Object Classes) dataset.
|
|
4539
5069
|
|
|
4540
5070
|
The generated dataset with different task setting has different output columns:
|
|
4541
5071
|
|
|
4542
|
-
- task = :py:obj:`Detection
|
|
4543
|
-
:py:obj:`[label, dtype=uint32]
|
|
4544
|
-
- task = :py:obj:`Segmentation
|
|
5072
|
+
- task = :py:obj:`Detection` , output columns: :py:obj:`[image, dtype=uint8]` , :py:obj:`[bbox, dtype=float32]` , \
|
|
5073
|
+
:py:obj:`[label, dtype=uint32]` , :py:obj:`[difficult, dtype=uint32]` , :py:obj:`[truncate, dtype=uint32]` .
|
|
5074
|
+
- task = :py:obj:`Segmentation` , output columns: :py:obj:`[image, dtype=uint8]` , :py:obj:`[target,dtype=uint8]` .
|
|
4545
5075
|
|
|
4546
5076
|
Args:
|
|
4547
5077
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4548
|
-
task (str, optional): Set the task type of reading voc data, now only support 'Segmentation' or 'Detection'
|
|
4549
|
-
|
|
4550
|
-
usage (str, optional): Set the task type of ImageSets
|
|
5078
|
+
task (str, optional): Set the task type of reading voc data, now only support 'Segmentation' or 'Detection'.
|
|
5079
|
+
Default: 'Segmentation'.
|
|
5080
|
+
usage (str, optional): Set the task type of ImageSets. Default: 'train'. If task is 'Segmentation', image and
|
|
4551
5081
|
annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is 'Detection', image and
|
|
4552
5082
|
annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage are not set, image and
|
|
4553
5083
|
annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
|
|
4554
5084
|
class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
|
|
4555
|
-
'Detection' task
|
|
4556
|
-
class will be given a unique index starting from 0
|
|
4557
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
4558
|
-
|
|
4559
|
-
num_parallel_workers (int, optional): Number of
|
|
4560
|
-
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
|
|
4564
|
-
|
|
4565
|
-
|
|
5085
|
+
'Detection' task. Default: None, the folder names will be sorted alphabetically and each
|
|
5086
|
+
class will be given a unique index starting from 0.
|
|
5087
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
5088
|
+
Default: None, all images.
|
|
5089
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
5090
|
+
Default: None, will use global default workers(8), it can be set
|
|
5091
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
5092
|
+
shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
|
|
5093
|
+
order behavior shown in the table below.
|
|
5094
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
5095
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
5096
|
+
Default: None, expected order behavior shown in the table below.
|
|
4566
5097
|
num_shards (int, optional): Number of shards that the dataset will be divided
|
|
4567
|
-
into
|
|
5098
|
+
into. Default: None. When this argument is specified, `num_samples` reflects
|
|
4568
5099
|
the maximum sample number of per shard.
|
|
4569
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
5100
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
|
|
4570
5101
|
argument can only be specified when `num_shards` is also specified.
|
|
4571
5102
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4572
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
4573
|
-
|
|
5103
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
5104
|
+
Default: None, which means no cache is used.
|
|
4574
5105
|
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
|
|
4575
|
-
:py:obj:`[_meta-filename, dtype=string]` will be output at the end
|
|
5106
|
+
:py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: False.
|
|
4576
5107
|
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
|
4577
5108
|
and returns the decrypted bytes data. Default: None, no decryption.
|
|
4578
5109
|
|
|
4579
5110
|
Raises:
|
|
4580
5111
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
4581
5112
|
RuntimeError: If xml of Annotations is an invalid format.
|
|
4582
|
-
RuntimeError: If xml of Annotations loss attribution of `object
|
|
4583
|
-
RuntimeError: If xml of Annotations loss attribution of `bndbox
|
|
5113
|
+
RuntimeError: If xml of Annotations loss attribution of `object` .
|
|
5114
|
+
RuntimeError: If xml of Annotations loss attribution of `bndbox` .
|
|
4584
5115
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4585
5116
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4586
5117
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
@@ -4589,12 +5120,12 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4589
5120
|
ValueError: If task is not equal 'Segmentation' or 'Detection'.
|
|
4590
5121
|
ValueError: If task equal 'Segmentation' but class_indexing is not None.
|
|
4591
5122
|
ValueError: If txt related to mode is not exist.
|
|
4592
|
-
ValueError: If `shard_id` is
|
|
5123
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4593
5124
|
|
|
4594
5125
|
Note:
|
|
4595
5126
|
- Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op
|
|
4596
5127
|
is added to remove the prefix('_meta-').
|
|
4597
|
-
- This dataset can take in a `sampler
|
|
5128
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
4598
5129
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4599
5130
|
|
|
4600
5131
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|
|
@@ -4643,7 +5174,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4643
5174
|
>>> # In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
|
|
4644
5175
|
>>> # In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
|
|
4645
5176
|
|
|
4646
|
-
About VOC dataset
|
|
5177
|
+
About VOC dataset:
|
|
4647
5178
|
|
|
4648
5179
|
The PASCAL Visual Object Classes (VOC) challenge is a benchmark in visual
|
|
4649
5180
|
object category recognition and detection, providing the vision and machine
|
|
@@ -4741,56 +5272,50 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|
|
4741
5272
|
|
|
4742
5273
|
class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
|
|
4743
5274
|
"""
|
|
4744
|
-
|
|
5275
|
+
WIDERFace dataset.
|
|
4745
5276
|
|
|
4746
5277
|
When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur",
|
|
4747
|
-
"expression", "illumination", "occlusion", "pose", "invalid"].
|
|
4748
|
-
|
|
4749
|
-
|
|
4750
|
-
The tensor of column :py:obj:`bbox` is a scalar of the uint32 type.
|
|
4751
|
-
The tensor of column :py:obj:`blur` is a scalar of the uint32 type.
|
|
4752
|
-
The tensor of column :py:obj:`expression` is a scalar of the uint32 type.
|
|
4753
|
-
The tensor of column :py:obj:`illumination` is a scalar of the uint32 type.
|
|
4754
|
-
The tensor of column :py:obj:`occlusion` is a scalar of the uint32 type.
|
|
4755
|
-
The tensor of column :py:obj:`pose` is a scalar of the uint32 type.
|
|
4756
|
-
The tensor of column :py:obj:`invalid` is a scalar of the uint32 type.
|
|
5278
|
+
"expression", "illumination", "occlusion", "pose", "invalid"]. The data type of the `image` column is uint8,
|
|
5279
|
+
and all other columns are uint32. When usage is "test", it only has one column
|
|
5280
|
+
["image"], with uint8 data type.
|
|
4757
5281
|
|
|
4758
5282
|
Args:
|
|
4759
5283
|
dataset_dir (str): Path to the root directory that contains the dataset.
|
|
4760
5284
|
usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read
|
|
4761
5285
|
from 12,880 samples, 'test' will read from 16,097 samples, 'valid' will read from 3,226 test samples
|
|
4762
|
-
and 'all' will read all 'train' and 'valid' samples
|
|
4763
|
-
num_samples (int, optional): The number of images to be included in the dataset
|
|
4764
|
-
|
|
4765
|
-
num_parallel_workers (int, optional): Number of
|
|
4766
|
-
|
|
4767
|
-
|
|
4768
|
-
|
|
4769
|
-
|
|
4770
|
-
|
|
4771
|
-
|
|
4772
|
-
|
|
5286
|
+
and 'all' will read all 'train' and 'valid' samples. Default: None, will be set to 'all'.
|
|
5287
|
+
num_samples (int, optional): The number of images to be included in the dataset.
|
|
5288
|
+
Default: None, will read all images.
|
|
5289
|
+
num_parallel_workers (int, optional): Number of worker threads to read the data.
|
|
5290
|
+
Default: None, will use global default workers(8), it can be set
|
|
5291
|
+
by `mindspore.dataset.config.set_num_parallel_workers` .
|
|
5292
|
+
shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
|
|
5293
|
+
Default: None, expected order behavior shown in the table below.
|
|
5294
|
+
decode (bool, optional): Decode the images after reading. Default: False.
|
|
5295
|
+
sampler (Sampler, optional): Object used to choose samples from the dataset.
|
|
5296
|
+
Default: None, expected order behavior shown in the table below.
|
|
5297
|
+
num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
|
|
4773
5298
|
When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
|
|
4774
|
-
shard_id (int, optional): The shard ID within `num_shards`
|
|
5299
|
+
shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument can only be specified
|
|
4775
5300
|
when `num_shards` is also specified.
|
|
4776
5301
|
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
|
|
4777
|
-
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/
|
|
4778
|
-
|
|
5302
|
+
`Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
|
|
5303
|
+
Default: None, which means no cache is used.
|
|
4779
5304
|
|
|
4780
5305
|
Raises:
|
|
4781
5306
|
RuntimeError: If `dataset_dir` does not contain data files.
|
|
4782
|
-
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4783
5307
|
RuntimeError: If `sampler` and `shuffle` are specified at the same time.
|
|
4784
5308
|
RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
|
|
4785
5309
|
RuntimeError: If `num_shards` is specified but `shard_id` is None.
|
|
4786
5310
|
RuntimeError: If `shard_id` is specified but `num_shards` is None.
|
|
4787
|
-
ValueError: If `shard_id` is
|
|
5311
|
+
ValueError: If `shard_id` is not in range of [0, `num_shards` ).
|
|
4788
5312
|
ValueError: If `usage` is not in ['train', 'test', 'valid', 'all'].
|
|
5313
|
+
ValueError: If `num_parallel_workers` exceeds the max thread numbers.
|
|
4789
5314
|
ValueError: If `annotation_file` is not exist.
|
|
4790
5315
|
ValueError: If `dataset_dir` is not exist.
|
|
4791
5316
|
|
|
4792
5317
|
Note:
|
|
4793
|
-
- This dataset can take in a `sampler
|
|
5318
|
+
- This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
|
|
4794
5319
|
The table below shows what input arguments are allowed and their expected behavior.
|
|
4795
5320
|
|
|
4796
5321
|
.. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
|