mindspore 1.10.0__cp38-cp38-win_amd64.whl → 2.0.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (966) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/ConcurrencyCheck.dll +0 -0
  3. mindspore/CppBuildInsights.dll +0 -0
  4. mindspore/CppCoreCheck.dll +0 -0
  5. mindspore/EnumIndex.dll +0 -0
  6. mindspore/EspXEngine.dll +0 -0
  7. mindspore/HResultCheck.dll +0 -0
  8. mindspore/KernelTraceControl.dll +0 -0
  9. mindspore/LocalESPC.dll +0 -0
  10. mindspore/Microsoft.Diagnostics.Tracing.EventSource.dll +0 -0
  11. mindspore/Microsoft.VisualStudio.RemoteControl.dll +0 -0
  12. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  13. mindspore/Microsoft.VisualStudio.Utilities.Internal.dll +0 -0
  14. mindspore/Newtonsoft.Json.dll +0 -0
  15. mindspore/System.Runtime.CompilerServices.Unsafe.dll +0 -0
  16. mindspore/VariantClear.dll +0 -0
  17. mindspore/__init__.py +9 -4
  18. mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
  19. mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
  20. mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
  21. mindspore/_check_jit_forbidden_api.py +102 -0
  22. mindspore/_checkparam.py +1066 -1001
  23. mindspore/_extends/builtin_operations.py +32 -4
  24. mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
  25. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
  26. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
  27. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
  28. mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
  29. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
  30. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
  31. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
  32. mindspore/_extends/parse/__init__.py +5 -3
  33. mindspore/_extends/parse/namespace.py +17 -2
  34. mindspore/_extends/parse/parser.py +193 -34
  35. mindspore/_extends/parse/resources.py +7 -8
  36. mindspore/_extends/parse/standard_method.py +1780 -435
  37. mindspore/_extends/parse/trope.py +3 -1
  38. mindspore/amp.py +53 -58
  39. mindspore/atlprov.dll +0 -0
  40. mindspore/boost/adasum.py +3 -2
  41. mindspore/boost/boost.py +2 -2
  42. mindspore/boost/boost_cell_wrapper.py +46 -26
  43. mindspore/boost/dim_reduce.py +6 -5
  44. mindspore/boost/grad_accumulation.py +2 -1
  45. mindspore/boost/group_loss_scale_manager.py +1 -1
  46. mindspore/c1.dll +0 -0
  47. mindspore/c1xx.dll +0 -0
  48. mindspore/c2.dll +0 -0
  49. mindspore/cfgpersist.dll +0 -0
  50. mindspore/clang_rt.asan_dbg_dynamic-x86_64.dll +0 -0
  51. mindspore/clang_rt.asan_dynamic-x86_64.dll +0 -0
  52. mindspore/common/__init__.py +11 -10
  53. mindspore/common/_decorator.py +2 -0
  54. mindspore/common/_register_for_adapter.py +55 -0
  55. mindspore/common/_stub_tensor.py +201 -0
  56. mindspore/common/_utils.py +57 -0
  57. mindspore/common/api.py +582 -297
  58. mindspore/common/dtype.py +66 -18
  59. mindspore/common/dump.py +2 -2
  60. mindspore/common/initializer.py +38 -1
  61. mindspore/common/jit_config.py +25 -13
  62. mindspore/common/mutable.py +53 -24
  63. mindspore/common/parameter.py +60 -37
  64. mindspore/common/seed.py +8 -24
  65. mindspore/common/sparse_tensor.py +927 -0
  66. mindspore/common/tensor.py +1627 -3900
  67. mindspore/communication/__init__.py +10 -5
  68. mindspore/communication/_comm_helper.py +78 -214
  69. mindspore/communication/_hccl_management.py +2 -1
  70. mindspore/communication/management.py +136 -47
  71. mindspore/config/op_info.config +501 -1008
  72. mindspore/context.py +291 -56
  73. mindspore/d3dcompiler_47.dll +0 -0
  74. mindspore/dataset/__init__.py +12 -8
  75. mindspore/dataset/audio/__init__.py +9 -9
  76. mindspore/dataset/audio/transforms.py +1090 -228
  77. mindspore/dataset/audio/utils.py +87 -39
  78. mindspore/dataset/audio/validators.py +223 -1
  79. mindspore/dataset/callback/ds_callback.py +17 -15
  80. mindspore/dataset/core/config.py +246 -17
  81. mindspore/dataset/core/py_util_helpers.py +4 -3
  82. mindspore/dataset/core/validator_helpers.py +10 -10
  83. mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
  84. mindspore/dataset/debug/debug_hook.py +65 -0
  85. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  86. mindspore/dataset/engine/__init__.py +7 -3
  87. mindspore/dataset/engine/cache_client.py +9 -9
  88. mindspore/dataset/engine/datasets.py +648 -477
  89. mindspore/dataset/engine/datasets_audio.py +165 -167
  90. mindspore/dataset/engine/datasets_standard_format.py +93 -67
  91. mindspore/dataset/engine/datasets_text.py +492 -342
  92. mindspore/dataset/engine/datasets_user_defined.py +85 -50
  93. mindspore/dataset/engine/datasets_vision.py +1224 -699
  94. mindspore/dataset/engine/graphdata.py +134 -69
  95. mindspore/dataset/engine/iterators.py +50 -9
  96. mindspore/dataset/engine/offload.py +52 -31
  97. mindspore/dataset/engine/samplers.py +27 -24
  98. mindspore/dataset/engine/serializer_deserializer.py +14 -15
  99. mindspore/dataset/engine/validators.py +213 -52
  100. mindspore/dataset/text/__init__.py +10 -8
  101. mindspore/dataset/text/transforms.py +152 -57
  102. mindspore/dataset/text/utils.py +98 -49
  103. mindspore/dataset/text/validators.py +25 -0
  104. mindspore/dataset/transforms/__init__.py +4 -2
  105. mindspore/dataset/transforms/c_transforms.py +11 -13
  106. mindspore/dataset/transforms/py_transforms.py +2 -2
  107. mindspore/dataset/transforms/py_transforms_util.py +10 -0
  108. mindspore/dataset/transforms/transforms.py +13 -15
  109. mindspore/dataset/transforms/validators.py +7 -7
  110. mindspore/dataset/utils/__init__.py +2 -1
  111. mindspore/dataset/utils/browse_dataset.py +13 -13
  112. mindspore/dataset/utils/line_reader.py +121 -0
  113. mindspore/dataset/vision/__init__.py +8 -7
  114. mindspore/dataset/vision/c_transforms.py +125 -126
  115. mindspore/dataset/vision/py_transforms.py +37 -37
  116. mindspore/dataset/vision/py_transforms_util.py +23 -20
  117. mindspore/dataset/vision/transforms.py +316 -315
  118. mindspore/dataset/vision/utils.py +313 -17
  119. mindspore/dataset/vision/validators.py +6 -6
  120. mindspore/default_config.py +0 -1
  121. mindspore/dpcmi.dll +0 -0
  122. mindspore/{compression → experimental}/__init__.py +6 -5
  123. mindspore/experimental/map_parameter.py +275 -0
  124. mindspore/include/OWNERS +0 -1
  125. mindspore/include/api/callback/callback.h +9 -13
  126. mindspore/include/api/callback/ckpt_saver.h +2 -2
  127. mindspore/include/api/callback/loss_monitor.h +2 -2
  128. mindspore/include/api/callback/lr_scheduler.h +5 -5
  129. mindspore/include/api/callback/time_monitor.h +2 -2
  130. mindspore/include/api/callback/train_accuracy.h +4 -6
  131. mindspore/include/api/cfg.h +19 -6
  132. mindspore/include/api/context.h +70 -9
  133. mindspore/include/api/delegate.h +8 -1
  134. mindspore/include/api/dual_abi_helper.h +8 -24
  135. mindspore/include/api/metrics/accuracy.h +2 -2
  136. mindspore/include/api/metrics/metrics.h +4 -3
  137. mindspore/include/api/model.h +9 -4
  138. mindspore/include/api/model_group.h +68 -0
  139. mindspore/include/api/model_parallel_runner.h +17 -17
  140. mindspore/include/api/net.h +12 -11
  141. mindspore/include/api/serialization.h +20 -4
  142. mindspore/include/api/status.h +7 -1
  143. mindspore/include/api/types.h +25 -21
  144. mindspore/include/api/visible.h +4 -0
  145. mindspore/include/c_api/model_c.h +5 -0
  146. mindspore/include/c_api/status_c.h +1 -1
  147. mindspore/include/dataset/config.h +1 -1
  148. mindspore/include/dataset/constants.h +14 -0
  149. mindspore/include/dataset/text.h +59 -0
  150. mindspore/include/dataset/vision.h +56 -117
  151. mindspore/include/dataset/vision_lite.h +102 -0
  152. mindspore/jpeg62.dll +0 -0
  153. mindspore/log.py +28 -28
  154. mindspore/mindrecord/common/exceptions.py +2 -4
  155. mindspore/mindrecord/filereader.py +19 -1
  156. mindspore/mindrecord/filewriter.py +250 -88
  157. mindspore/mindrecord/mindpage.py +13 -13
  158. mindspore/mindrecord/shardheader.py +15 -15
  159. mindspore/mindrecord/shardreader.py +9 -0
  160. mindspore/mindrecord/shardwriter.py +29 -29
  161. mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
  162. mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
  163. mindspore/mindrecord/tools/csv_to_mr.py +4 -4
  164. mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
  165. mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
  166. mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
  167. mindspore/{libmindspore_backend.dll → mindspore_backend.dll} +0 -0
  168. mindspore/mindspore_common.dll +0 -0
  169. mindspore/mindspore_core.dll +0 -0
  170. mindspore/mindspore_glog.dll +0 -0
  171. mindspore/mindspore_shared_lib.dll +0 -0
  172. mindspore/msobj140.dll +0 -0
  173. mindspore/mspdb140.dll +0 -0
  174. mindspore/mspdbcore.dll +0 -0
  175. mindspore/mspdbst.dll +0 -0
  176. mindspore/mspft140.dll +0 -0
  177. mindspore/msvcdis140.dll +0 -0
  178. mindspore/msvcp140_1.dll +0 -0
  179. mindspore/msvcp140_2.dll +0 -0
  180. mindspore/msvcp140_atomic_wait.dll +0 -0
  181. mindspore/msvcp140_codecvt_ids.dll +0 -0
  182. mindspore/nn/__init__.py +1 -5
  183. mindspore/nn/cell.py +297 -234
  184. mindspore/nn/dynamic_lr.py +1 -1
  185. mindspore/nn/grad/cell_grad.py +17 -42
  186. mindspore/nn/layer/__init__.py +7 -4
  187. mindspore/nn/layer/activation.py +131 -88
  188. mindspore/nn/layer/basic.py +313 -613
  189. mindspore/nn/layer/channel_shuffle.py +103 -0
  190. mindspore/nn/layer/combined.py +1 -1
  191. mindspore/nn/layer/container.py +52 -6
  192. mindspore/nn/layer/conv.py +112 -43
  193. mindspore/nn/layer/dense.py +10 -9
  194. mindspore/nn/layer/embedding.py +36 -34
  195. mindspore/nn/layer/image.py +123 -27
  196. mindspore/nn/layer/math.py +108 -107
  197. mindspore/nn/layer/normalization.py +212 -366
  198. mindspore/nn/layer/padding.py +370 -42
  199. mindspore/nn/layer/pooling.py +1443 -219
  200. mindspore/nn/layer/rnn_cells.py +11 -16
  201. mindspore/nn/layer/rnns.py +38 -39
  202. mindspore/nn/layer/thor_layer.py +24 -25
  203. mindspore/nn/layer/timedistributed.py +5 -5
  204. mindspore/nn/layer/transformer.py +701 -0
  205. mindspore/nn/learning_rate_schedule.py +8 -8
  206. mindspore/nn/loss/__init__.py +9 -6
  207. mindspore/nn/loss/loss.py +678 -142
  208. mindspore/nn/metrics.py +53 -0
  209. mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
  210. mindspore/nn/optim/ada_grad.py +8 -8
  211. mindspore/nn/optim/adadelta.py +2 -3
  212. mindspore/nn/optim/adafactor.py +18 -14
  213. mindspore/nn/optim/adam.py +429 -87
  214. mindspore/nn/optim/adamax.py +5 -6
  215. mindspore/nn/optim/adasum.py +10 -8
  216. mindspore/nn/optim/asgd.py +7 -7
  217. mindspore/nn/optim/ftrl.py +81 -11
  218. mindspore/nn/optim/lamb.py +7 -8
  219. mindspore/nn/optim/lars.py +4 -4
  220. mindspore/nn/optim/lazyadam.py +82 -7
  221. mindspore/nn/optim/momentum.py +8 -7
  222. mindspore/nn/optim/optimizer.py +19 -10
  223. mindspore/nn/optim/proximal_ada_grad.py +6 -5
  224. mindspore/nn/optim/rmsprop.py +3 -3
  225. mindspore/nn/optim/rprop.py +20 -16
  226. mindspore/nn/optim/sgd.py +21 -15
  227. mindspore/nn/optim/thor.py +23 -21
  228. mindspore/nn/probability/__init__.py +0 -2
  229. mindspore/nn/probability/bijector/bijector.py +7 -6
  230. mindspore/nn/probability/bijector/invert.py +4 -2
  231. mindspore/nn/probability/bijector/softplus.py +2 -2
  232. mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
  233. mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
  234. mindspore/nn/probability/distribution/__init__.py +6 -0
  235. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
  236. mindspore/nn/probability/distribution/_utils/utils.py +11 -17
  237. mindspore/nn/probability/distribution/bernoulli.py +6 -6
  238. mindspore/nn/probability/distribution/beta.py +1 -1
  239. mindspore/nn/probability/distribution/categorical.py +9 -9
  240. mindspore/nn/probability/distribution/cauchy.py +8 -8
  241. mindspore/nn/probability/distribution/distribution.py +12 -6
  242. mindspore/nn/probability/distribution/exponential.py +5 -5
  243. mindspore/nn/probability/distribution/gamma.py +3 -3
  244. mindspore/nn/probability/distribution/geometric.py +6 -5
  245. mindspore/nn/probability/distribution/gumbel.py +5 -5
  246. mindspore/nn/probability/distribution/half_normal.py +133 -0
  247. mindspore/nn/probability/distribution/laplace.py +128 -0
  248. mindspore/nn/probability/distribution/log_normal.py +0 -1
  249. mindspore/nn/probability/distribution/logistic.py +4 -5
  250. mindspore/nn/probability/distribution/normal.py +11 -15
  251. mindspore/nn/probability/distribution/poisson.py +6 -2
  252. mindspore/nn/probability/distribution/student_t.py +150 -0
  253. mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
  254. mindspore/nn/probability/distribution/uniform.py +5 -5
  255. mindspore/nn/reinforcement/_tensors_queue.py +3 -3
  256. mindspore/nn/reinforcement/tensor_array.py +2 -2
  257. mindspore/nn/sparse/sparse.py +8 -1
  258. mindspore/nn/wrap/cell_wrapper.py +55 -27
  259. mindspore/nn/wrap/grad_reducer.py +20 -11
  260. mindspore/nn/wrap/loss_scale.py +47 -30
  261. mindspore/numpy/array_creations.py +33 -22
  262. mindspore/numpy/array_ops.py +46 -42
  263. mindspore/numpy/logic_ops.py +6 -27
  264. mindspore/numpy/math_ops.py +26 -19
  265. mindspore/numpy/utils.py +1 -8
  266. mindspore/numpy/utils_const.py +112 -62
  267. mindspore/opencv_core452.dll +0 -0
  268. mindspore/opencv_imgcodecs452.dll +0 -0
  269. mindspore/opencv_imgproc452.dll +0 -0
  270. mindspore/ops/__init__.py +6 -3
  271. mindspore/ops/_constants.py +0 -6
  272. mindspore/ops/_grad/__init__.py +2 -1
  273. mindspore/ops/_grad/grad_array_ops.py +209 -152
  274. mindspore/ops/_grad/grad_base.py +55 -17
  275. mindspore/ops/_grad/grad_clip_ops.py +11 -3
  276. mindspore/ops/_grad/grad_comm_ops.py +58 -47
  277. mindspore/ops/_grad/grad_implementations.py +21 -61
  278. mindspore/ops/_grad/grad_inner_ops.py +48 -6
  279. mindspore/ops/_grad/grad_math_ops.py +306 -161
  280. mindspore/ops/_grad/grad_nn_ops.py +192 -181
  281. mindspore/ops/_grad/grad_other_ops.py +1 -1
  282. mindspore/ops/_grad/grad_quant_ops.py +5 -5
  283. mindspore/ops/_grad/grad_sequence_ops.py +296 -0
  284. mindspore/ops/_grad/grad_sparse.py +15 -9
  285. mindspore/ops/_grad_experimental/__init__.py +1 -0
  286. mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
  287. mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
  288. mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
  289. mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
  290. mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
  291. mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
  292. mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
  293. mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
  294. mindspore/ops/_op_impl/__init__.py +3 -3
  295. mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
  296. mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
  297. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
  298. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
  299. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
  300. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
  301. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
  302. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
  303. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
  304. mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
  305. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
  306. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
  307. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
  308. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
  309. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
  310. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
  311. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
  312. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
  313. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
  314. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
  315. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
  316. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
  317. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
  318. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
  319. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
  320. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
  321. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
  322. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
  323. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
  324. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
  325. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
  326. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
  327. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
  328. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
  329. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
  330. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
  331. mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
  332. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  333. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  334. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  335. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  336. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  337. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  338. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  339. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  340. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  341. mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
  342. mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
  343. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  344. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  345. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  346. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  347. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  348. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  349. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  350. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  351. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  352. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
  353. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  354. mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
  355. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  356. mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
  357. mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
  358. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  359. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  360. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  361. mindspore/ops/_op_impl/aicpu/conj.py +11 -0
  362. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  363. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  364. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  365. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
  366. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  367. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  368. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  369. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  370. mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
  371. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  372. mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
  373. mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
  374. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  375. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  376. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
  377. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  378. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  379. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  380. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  381. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  382. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  383. mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
  384. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  385. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  386. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  387. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  388. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  389. mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
  390. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  391. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  392. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  393. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  394. mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
  395. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  396. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  397. mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
  398. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  399. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  400. mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
  401. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
  402. mindspore/ops/_op_impl/aicpu/mul.py +3 -1
  403. mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
  404. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  405. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  406. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  407. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  408. mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
  409. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  410. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  411. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  412. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  413. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  414. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  415. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  416. mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
  417. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  418. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  419. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  420. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  421. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  422. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
  423. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
  424. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  425. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  426. mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
  427. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
  428. mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
  429. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  430. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  431. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  432. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  433. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  434. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  435. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  436. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  437. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
  438. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  439. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  440. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  441. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  442. mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
  443. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  444. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  445. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  446. mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
  447. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
  448. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
  449. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  450. mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
  451. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  452. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  453. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  454. mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
  455. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
  456. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
  457. mindspore/ops/_op_impl/cpu/__init__.py +1 -2
  458. mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
  459. mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
  460. mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
  461. mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
  462. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  463. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  464. mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
  465. mindspore/ops/_op_impl/tbe/__init__.py +27 -608
  466. mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
  467. mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
  468. mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
  469. mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
  470. mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
  471. mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
  472. mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
  473. mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
  474. mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
  475. mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
  476. mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
  477. mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
  478. mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
  479. mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
  480. mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
  481. mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
  482. mindspore/ops/_op_impl/tbe/cast.py +0 -2
  483. mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
  484. mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
  485. mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
  486. mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
  487. mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
  488. mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
  489. mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
  490. mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
  491. mindspore/ops/_op_impl/tbe/greater.py +2 -0
  492. mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
  493. mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
  494. mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
  495. mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
  496. mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
  497. mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
  498. mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
  499. mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
  500. mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
  501. mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
  502. mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
  503. mindspore/ops/_op_impl/tbe/slice.py +26 -15
  504. mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
  505. mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
  506. mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
  507. mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
  508. mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
  509. mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
  510. mindspore/ops/_primitive_cache.py +3 -2
  511. mindspore/ops/_register_for_op.py +11 -0
  512. mindspore/ops/_utils/__init__.py +1 -1
  513. mindspore/ops/_utils/utils.py +20 -41
  514. mindspore/ops/_vmap/__init__.py +2 -2
  515. mindspore/ops/_vmap/vmap_array_ops.py +170 -78
  516. mindspore/ops/_vmap/vmap_base.py +24 -10
  517. mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
  518. mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
  519. mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
  520. mindspore/ops/_vmap/vmap_image_ops.py +52 -0
  521. mindspore/ops/_vmap/vmap_math_ops.py +77 -6
  522. mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
  523. mindspore/ops/_vmap/vmap_other_ops.py +3 -1
  524. mindspore/ops/_vmap/vmap_random_ops.py +55 -3
  525. mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
  526. mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
  527. mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
  528. mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
  529. mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
  530. mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
  531. mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
  532. mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
  533. mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
  534. mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
  535. mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
  536. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
  537. mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
  538. mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
  539. mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
  540. mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
  541. mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
  542. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
  543. mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
  544. mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
  545. mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
  546. mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
  547. mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
  548. mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
  549. mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
  550. mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
  551. mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
  552. mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
  553. mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
  554. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
  555. mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
  556. mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
  557. mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
  558. mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
  559. mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
  560. mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
  561. mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
  562. mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
  563. mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
  565. mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
  566. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  567. mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
  568. mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
  569. mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
  570. mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
  571. mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
  572. mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
  573. mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
  574. mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
  576. mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
  577. mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
  578. mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
  579. mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
  580. mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
  581. mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
  582. mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
  583. mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
  584. mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
  585. mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
  586. mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
  587. mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
  588. mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
  589. mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
  590. mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
  591. mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
  592. mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
  593. mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
  594. mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
  595. mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
  596. mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
  597. mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
  598. mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
  599. mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
  600. mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
  601. mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
  602. mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
  603. mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
  604. mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
  605. mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
  606. mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
  607. mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
  608. mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
  609. mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
  610. mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
  611. mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
  612. mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
  613. mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
  614. mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
  615. mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
  616. mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
  617. mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
  618. mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
  619. mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
  620. mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
  621. mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
  622. mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
  623. mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
  624. mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
  625. mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
  626. mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
  627. mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
  628. mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
  629. mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
  630. mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
  631. mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
  632. mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
  633. mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
  634. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
  635. mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
  636. mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
  637. mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
  638. mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
  639. mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
  640. mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
  641. mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
  642. mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
  643. mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
  644. mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
  645. mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  646. mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
  647. mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
  648. mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
  649. mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
  650. mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
  651. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  652. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  653. mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
  654. mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
  655. mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
  656. mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
  657. mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
  658. mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
  659. mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
  660. mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
  661. mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
  662. mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
  663. mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
  664. mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
  665. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
  666. mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
  667. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
  668. mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
  669. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
  670. mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
  671. mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
  672. mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
  673. mindspore/ops/bprop_mindir/__init__.py +1 -4
  674. mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
  675. mindspore/ops/composite/__init__.py +12 -13
  676. mindspore/ops/composite/base.py +261 -254
  677. mindspore/ops/composite/env_ops.py +41 -0
  678. mindspore/ops/composite/math_ops.py +197 -156
  679. mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
  680. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
  681. mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
  682. mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
  683. mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
  684. mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
  685. mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
  686. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
  687. mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
  688. mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
  689. mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
  690. mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
  691. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
  692. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  693. mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
  694. mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
  695. mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
  696. mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
  697. mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
  698. mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
  699. mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
  700. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
  701. mindspore/ops/function/__init__.py +323 -8
  702. mindspore/ops/function/array_func.py +3511 -780
  703. mindspore/ops/function/clip_func.py +329 -0
  704. mindspore/ops/function/debug_func.py +6 -6
  705. mindspore/ops/function/grad/__init__.py +5 -1
  706. mindspore/ops/function/grad/grad_func.py +736 -65
  707. mindspore/ops/function/image_func.py +270 -0
  708. mindspore/ops/function/linalg_func.py +268 -8
  709. mindspore/ops/function/math_func.py +8032 -3164
  710. mindspore/ops/function/nn_func.py +5619 -1855
  711. mindspore/ops/function/other_func.py +115 -0
  712. mindspore/ops/function/parameter_func.py +11 -10
  713. mindspore/ops/function/random_func.py +939 -77
  714. mindspore/ops/function/sparse_func.py +249 -84
  715. mindspore/ops/function/sparse_unary_func.py +2303 -0
  716. mindspore/ops/function/spectral_func.py +146 -0
  717. mindspore/ops/function/vmap_func.py +114 -0
  718. mindspore/ops/functional.py +182 -254
  719. mindspore/ops/op_info_register.py +79 -34
  720. mindspore/ops/operations/__init__.py +210 -118
  721. mindspore/ops/operations/_csr_ops.py +7 -7
  722. mindspore/ops/operations/_embedding_cache_ops.py +25 -15
  723. mindspore/ops/operations/_grad_ops.py +447 -322
  724. mindspore/ops/operations/_inner_ops.py +547 -176
  725. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  726. mindspore/ops/operations/_ms_kernel.py +29 -27
  727. mindspore/ops/operations/_ocr_ops.py +11 -11
  728. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  729. mindspore/ops/operations/_quant_ops.py +186 -101
  730. mindspore/ops/operations/_rl_inner_ops.py +122 -61
  731. mindspore/ops/operations/_scalar_ops.py +466 -0
  732. mindspore/ops/operations/_sequence_ops.py +1047 -0
  733. mindspore/ops/operations/_tensor_array.py +10 -11
  734. mindspore/ops/operations/_thor_ops.py +4 -4
  735. mindspore/ops/operations/array_ops.py +1428 -1226
  736. mindspore/ops/operations/comm_ops.py +180 -117
  737. mindspore/ops/operations/control_ops.py +4 -2
  738. mindspore/ops/operations/custom_ops.py +185 -98
  739. mindspore/ops/operations/debug_ops.py +92 -54
  740. mindspore/ops/operations/image_ops.py +406 -211
  741. mindspore/ops/operations/inner_ops.py +42 -53
  742. mindspore/ops/operations/linalg_ops.py +32 -29
  743. mindspore/ops/operations/math_ops.py +2076 -897
  744. mindspore/ops/operations/nn_ops.py +1282 -1252
  745. mindspore/ops/operations/other_ops.py +124 -278
  746. mindspore/ops/operations/random_ops.py +345 -178
  747. mindspore/ops/operations/rl_ops.py +8 -9
  748. mindspore/ops/operations/sparse_ops.py +502 -157
  749. mindspore/ops/operations/spectral_ops.py +107 -0
  750. mindspore/ops/primitive.py +192 -15
  751. mindspore/ops/vm_impl_registry.py +23 -2
  752. mindspore/parallel/__init__.py +6 -1
  753. mindspore/parallel/_auto_parallel_context.py +199 -92
  754. mindspore/parallel/_cell_wrapper.py +4 -2
  755. mindspore/parallel/_cost_model_context.py +3 -0
  756. mindspore/parallel/_dp_allreduce_fusion.py +2 -1
  757. mindspore/parallel/_offload_context.py +185 -0
  758. mindspore/parallel/_parallel_serialization.py +167 -28
  759. mindspore/parallel/_ps_context.py +9 -5
  760. mindspore/parallel/_recovery_context.py +1 -1
  761. mindspore/parallel/_tensor.py +9 -1
  762. mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
  763. mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
  764. mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
  765. mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
  766. mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
  767. mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
  768. mindspore/parallel/_utils.py +47 -7
  769. mindspore/parallel/algo_parameter_config.py +5 -1
  770. mindspore/parallel/checkpoint_transform.py +329 -0
  771. mindspore/parallel/shard.py +229 -0
  772. mindspore/perf_msvcbuildinsights.dll +0 -0
  773. mindspore/pgodb140.dll +0 -0
  774. mindspore/pgort140.dll +0 -0
  775. mindspore/profiler/__init__.py +2 -1
  776. mindspore/profiler/common/util.py +4 -3
  777. mindspore/profiler/common/validator/validate_path.py +2 -2
  778. mindspore/profiler/envprofiling.py +249 -0
  779. mindspore/profiler/parser/aicpu_data_parser.py +38 -39
  780. mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
  781. mindspore/profiler/parser/base_timeline_generator.py +471 -0
  782. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
  783. mindspore/profiler/parser/framework_parser.py +42 -16
  784. mindspore/profiler/parser/hccl_parser.py +158 -158
  785. mindspore/profiler/parser/hwts_log_parser.py +7 -6
  786. mindspore/profiler/parser/integrator.py +18 -1579
  787. mindspore/profiler/parser/minddata_analyzer.py +8 -8
  788. mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
  789. mindspore/profiler/parser/msadvisor_parser.py +2 -4
  790. mindspore/profiler/parser/optime_parser.py +17 -18
  791. mindspore/profiler/parser/profiler_info.py +108 -0
  792. mindspore/profiler/parser/step_trace_parser.py +1 -1
  793. mindspore/profiler/profiling.py +396 -194
  794. mindspore/rewrite/__init__.py +6 -2
  795. mindspore/rewrite/api/node.py +51 -110
  796. mindspore/rewrite/api/node_type.py +10 -6
  797. mindspore/rewrite/api/pattern_engine.py +51 -7
  798. mindspore/rewrite/api/scoped_value.py +64 -53
  799. mindspore/rewrite/api/symbol_tree.py +108 -61
  800. mindspore/rewrite/api/tree_node_helper.py +2 -3
  801. mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
  802. mindspore/rewrite/ast_helpers/__init__.py +6 -3
  803. mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
  804. mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
  805. mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
  806. mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
  807. mindspore/rewrite/ast_transformers/__init__.py +0 -1
  808. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
  809. mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
  810. mindspore/rewrite/common/__init__.py +2 -0
  811. mindspore/rewrite/common/event.py +1 -1
  812. mindspore/rewrite/common/observable.py +1 -1
  813. mindspore/rewrite/common/observer.py +1 -1
  814. mindspore/rewrite/common/rewrite_elog.py +35 -0
  815. mindspore/rewrite/namer.py +2 -2
  816. mindspore/rewrite/namespace.py +14 -4
  817. mindspore/rewrite/node.py +161 -13
  818. mindspore/rewrite/parser.py +0 -1
  819. mindspore/rewrite/parser_register.py +0 -1
  820. mindspore/rewrite/parsers/arguments_parser.py +3 -2
  821. mindspore/rewrite/parsers/assign_parser.py +267 -67
  822. mindspore/rewrite/parsers/attribute_parser.py +56 -0
  823. mindspore/rewrite/parsers/class_def_parser.py +191 -108
  824. mindspore/rewrite/parsers/constant_parser.py +101 -0
  825. mindspore/rewrite/parsers/container_parser.py +88 -0
  826. mindspore/rewrite/parsers/for_parser.py +28 -15
  827. mindspore/rewrite/parsers/function_def_parser.py +21 -5
  828. mindspore/rewrite/parsers/if_parser.py +11 -28
  829. mindspore/rewrite/parsers/module_parser.py +9 -6
  830. mindspore/rewrite/parsers/return_parser.py +3 -2
  831. mindspore/rewrite/sparsify/__init__.py +0 -0
  832. mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
  833. mindspore/rewrite/sparsify/sparsify.py +109 -0
  834. mindspore/rewrite/sparsify/utils.py +173 -0
  835. mindspore/rewrite/symbol_tree.py +322 -109
  836. mindspore/rewrite/symbol_tree_builder.py +45 -8
  837. mindspore/rewrite/symbol_tree_dumper.py +0 -1
  838. mindspore/rewrite/topological_manager.py +1 -2
  839. mindspore/run_check/_check_version.py +209 -112
  840. mindspore/run_check/run_check.py +2 -1
  841. mindspore/tbbmalloc.dll +0 -0
  842. mindspore/tinyxml2.dll +0 -0
  843. mindspore/train/__init__.py +6 -4
  844. mindspore/train/_utils.py +28 -5
  845. mindspore/train/amp.py +321 -50
  846. mindspore/train/callback/__init__.py +3 -1
  847. mindspore/train/callback/_backup_and_restore.py +120 -0
  848. mindspore/train/callback/_callback.py +8 -8
  849. mindspore/train/callback/_checkpoint.py +12 -9
  850. mindspore/train/callback/_early_stop.py +13 -7
  851. mindspore/train/callback/_history.py +8 -8
  852. mindspore/train/callback/_lambda_callback.py +6 -6
  853. mindspore/train/callback/_landscape.py +36 -38
  854. mindspore/train/callback/_loss_monitor.py +12 -6
  855. mindspore/train/callback/_lr_scheduler_callback.py +2 -4
  856. mindspore/train/callback/_on_request_exit.py +212 -0
  857. mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
  858. mindspore/train/callback/_summary_collector.py +27 -19
  859. mindspore/train/callback/_time_monitor.py +13 -7
  860. mindspore/train/checkpoint_pb2.py +68 -8
  861. mindspore/train/data_sink.py +122 -33
  862. mindspore/train/dataset_helper.py +28 -87
  863. mindspore/train/loss_scale_manager.py +4 -7
  864. mindspore/{nn → train}/metrics/__init__.py +20 -20
  865. mindspore/{nn → train}/metrics/accuracy.py +12 -10
  866. mindspore/{nn → train}/metrics/auc.py +4 -4
  867. mindspore/{nn → train}/metrics/bleu_score.py +4 -4
  868. mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
  869. mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
  870. mindspore/{nn → train}/metrics/dice.py +6 -5
  871. mindspore/{nn → train}/metrics/error.py +7 -5
  872. mindspore/{nn → train}/metrics/fbeta.py +9 -7
  873. mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
  874. mindspore/{nn → train}/metrics/loss.py +4 -3
  875. mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
  876. mindspore/{nn → train}/metrics/metric.py +6 -5
  877. mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
  878. mindspore/{nn → train}/metrics/perplexity.py +5 -4
  879. mindspore/{nn → train}/metrics/precision.py +5 -4
  880. mindspore/{nn → train}/metrics/recall.py +5 -4
  881. mindspore/{nn → train}/metrics/roc.py +7 -6
  882. mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
  883. mindspore/{nn → train}/metrics/topk.py +7 -5
  884. mindspore/train/mind_ir_pb2.py +339 -32
  885. mindspore/train/model.py +113 -84
  886. mindspore/train/serialization.py +547 -167
  887. mindspore/train/summary/_summary_adapter.py +1 -1
  888. mindspore/train/summary/summary_record.py +43 -12
  889. mindspore/train/train_thor/convert_utils.py +7 -1
  890. mindspore/train/train_thor/dataset_helper.py +3 -3
  891. mindspore/train/train_thor/model_thor.py +0 -4
  892. mindspore/turbojpeg.dll +0 -0
  893. mindspore/vcmeta.dll +0 -0
  894. mindspore/vcruntime140.dll +0 -0
  895. mindspore/vcruntime140_1.dll +0 -0
  896. mindspore/version.py +1 -1
  897. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
  898. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +901 -660
  899. mindspore/compression/common/constant.py +0 -124
  900. mindspore/compression/export/__init__.py +0 -19
  901. mindspore/compression/export/quant_export.py +0 -514
  902. mindspore/compression/quant/qat.py +0 -636
  903. mindspore/compression/quant/quant_utils.py +0 -462
  904. mindspore/compression/quant/quantizer.py +0 -68
  905. mindspore/libatomic-1.dll +0 -0
  906. mindspore/libgcc_s_seh-1.dll +0 -0
  907. mindspore/libgfortran-4.dll +0 -0
  908. mindspore/libgomp-1.dll +0 -0
  909. mindspore/libjpeg-62.dll +0 -0
  910. mindspore/libmindspore.dll +0 -0
  911. mindspore/libmindspore_common.dll +0 -0
  912. mindspore/libmindspore_core.dll +0 -0
  913. mindspore/libmindspore_glog.dll +0 -0
  914. mindspore/libnnacl.dll +0 -0
  915. mindspore/libopencv_core452.dll +0 -0
  916. mindspore/libopencv_imgcodecs452.dll +0 -0
  917. mindspore/libopencv_imgproc452.dll +0 -0
  918. mindspore/libquadmath-0.dll +0 -0
  919. mindspore/libsqlite3.dll +0 -0
  920. mindspore/libssp-0.dll +0 -0
  921. mindspore/libstdc++-6.dll +0 -0
  922. mindspore/libtinyxml2.dll +0 -0
  923. mindspore/libturbojpeg.dll +0 -0
  924. mindspore/libwinpthread-1.dll +0 -0
  925. mindspore/nn/layer/quant.py +0 -1868
  926. mindspore/nn/layer/rnn_utils.py +0 -90
  927. mindspore/nn/probability/dpn/__init__.py +0 -22
  928. mindspore/nn/probability/dpn/vae/__init__.py +0 -25
  929. mindspore/nn/probability/dpn/vae/cvae.py +0 -138
  930. mindspore/nn/probability/dpn/vae/vae.py +0 -122
  931. mindspore/nn/probability/infer/__init__.py +0 -22
  932. mindspore/nn/probability/infer/variational/elbo.py +0 -70
  933. mindspore/nn/probability/infer/variational/svi.py +0 -84
  934. mindspore/nn/probability/toolbox/__init__.py +0 -22
  935. mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
  936. mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
  937. mindspore/nn/probability/transforms/__init__.py +0 -22
  938. mindspore/nn/probability/transforms/transform_bnn.py +0 -262
  939. mindspore/nn/probability/zhusuan/__init__.py +0 -18
  940. mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
  941. mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
  942. mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
  943. mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
  944. mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
  945. mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
  946. mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
  947. mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
  948. mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
  949. mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
  950. mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
  951. mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
  952. mindspore/ops/composite/array_ops.py +0 -210
  953. mindspore/ops/composite/clip_ops.py +0 -238
  954. mindspore/ops/composite/random_ops.py +0 -426
  955. mindspore/ops/composite/vmap_ops.py +0 -38
  956. mindspore/ops/operations/sponge_ops.py +0 -3531
  957. mindspore/ops/operations/sponge_update_ops.py +0 -2546
  958. mindspore/parallel/nn/__init__.py +0 -42
  959. mindspore/parallel/nn/loss.py +0 -22
  960. mindspore/parallel/nn/moe.py +0 -21
  961. mindspore/parallel/nn/op_parallel_config.py +0 -22
  962. mindspore/parallel/nn/transformer.py +0 -31
  963. mindspore/run_check/_check_deps_version.py +0 -84
  964. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
  965. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
  966. {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -32,14 +32,14 @@ import mindspore._c_dataengine as cde
32
32
 
33
33
  from .datasets import VisionBaseDataset, SourceDataset, MappableDataset, Shuffle, Schema
34
34
  from .datasets_user_defined import GeneratorDataset
35
- from .validators import check_imagefolderdataset, check_kittidataset,\
36
- check_mnist_cifar_dataset, check_manifestdataset, check_vocdataset, check_cocodataset, \
37
- check_celebadataset, check_flickr_dataset, check_sb_dataset, check_flowers102dataset, check_cityscapes_dataset, \
38
- check_usps_dataset, check_div2k_dataset, check_random_dataset, \
39
- check_sbu_dataset, check_qmnist_dataset, check_emnist_dataset, check_fake_image_dataset, check_places365_dataset, \
40
- check_photo_tour_dataset, check_svhn_dataset, check_stl10_dataset, check_semeion_dataset, \
41
- check_caltech101_dataset, check_caltech256_dataset, check_wider_face_dataset, check_lfw_dataset, \
42
- check_lsun_dataset, check_omniglotdataset
35
+ from .validators import check_caltech101_dataset, check_caltech256_dataset, check_celebadataset, \
36
+ check_cityscapes_dataset, check_cocodataset, check_div2k_dataset, check_emnist_dataset, check_fake_image_dataset, \
37
+ check_flickr_dataset, check_flowers102dataset, check_food101_dataset, check_imagefolderdataset, \
38
+ check_kittidataset, check_lfw_dataset, check_lsun_dataset, check_manifestdataset, check_mnist_cifar_dataset, \
39
+ check_omniglotdataset, check_photo_tour_dataset, check_places365_dataset, check_qmnist_dataset, \
40
+ check_random_dataset, check_rendered_sst2_dataset, check_sb_dataset, check_sbu_dataset, check_semeion_dataset, \
41
+ check_stl10_dataset, check_sun397_dataset, check_svhn_dataset, check_usps_dataset, check_vocdataset, \
42
+ check_wider_face_dataset
43
43
 
44
44
  from ..core.validator_helpers import replace_none
45
45
 
@@ -108,12 +108,14 @@ class _Caltech101Dataset:
108
108
 
109
109
  class Caltech101Dataset(GeneratorDataset):
110
110
  """
111
- A source dataset that reads and parses Caltech101 dataset.
111
+ Caltech 101 dataset.
112
+
113
+ The columns of the generated dataset depend on the value of `target_type` .
114
+
115
+ - When `target_type` is 'category', the columns are :py:obj:`[image, category]` .
116
+ - When `target_type` is 'annotation', the columns are :py:obj:`[image, annotation]` .
117
+ - When `target_type` is 'all', the columns are :py:obj:`[image, category, annotation]` .
112
118
 
113
- The columns of the generated dataset depend on the value of `target_type`.
114
- When `target_type` is 'category', the columns are :py:obj:`[image, category]`.
115
- When `target_type` is 'annotation', the columns are :py:obj:`[image, annotation]`.
116
- When `target_type` is 'all', the columns are :py:obj:`[image, category, annotation]`.
117
119
  The tensor of column :py:obj:`image` is of the uint8 type.
118
120
  The tensor of column :py:obj:`category` is of the uint32 type.
119
121
  The tensor of column :py:obj:`annotation` is a 2-dimensional ndarray that stores the contour of the image
@@ -125,33 +127,33 @@ class Caltech101Dataset(GeneratorDataset):
125
127
  and the other is called Annotations, which stores annotations.
126
128
  target_type (str, optional): Target of the image. If `target_type` is 'category', return category represents
127
129
  the target class. If `target_type` is 'annotation', return annotation.
128
- If `target_type` is 'all', return category and annotation (default=None, means 'category').
129
- num_samples (int, optional): The number of images to be included in the dataset
130
- (default=None, all images).
131
- num_parallel_workers (int, optional): Number of workers to read the data (default=1).
132
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
133
- (default=None, expected order behavior shown in the table).
134
- decode (bool, optional): Whether or not to decode the images after reading (default=False).
130
+ If `target_type` is 'all', return category and annotation. Default: None, means 'category'.
131
+ num_samples (int, optional): The number of images to be included in the dataset.
132
+ Default: None, all images.
133
+ num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
134
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
135
+ Default: None, expected order behavior shown in the table below.
136
+ decode (bool, optional): Whether or not to decode the images after reading. Default: False.
135
137
  sampler (Sampler, optional): Object used to choose samples from the
136
- dataset (default=None, expected order behavior shown in the table).
138
+ dataset. Default: None, expected order behavior shown in the table below.
137
139
  num_shards (int, optional): Number of shards that the dataset will be divided
138
- into (default=None). When this argument is specified, `num_samples` reflects
140
+ into. Default: None. When this argument is specified, `num_samples` reflects
139
141
  the maximum sample number of per shard.
140
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
142
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
141
143
  argument can only be specified when `num_shards` is also specified.
142
144
 
143
145
  Raises:
144
146
  RuntimeError: If `dataset_dir` does not contain data files.
145
- ValueError: If `target_type` is not set correctly.
146
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
147
147
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
148
148
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
149
149
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
150
150
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
151
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
151
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
152
+ ValueError: If `target_type` is not set correctly.
153
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
152
154
 
153
155
  Note:
154
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
156
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
155
157
  The table below shows what input arguments are allowed and their expected behavior.
156
158
 
157
159
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -191,11 +193,13 @@ class Caltech101Dataset(GeneratorDataset):
191
193
 
192
194
  About Caltech101Dataset:
193
195
 
194
- Pictures of objects belonging to 101 categories. About 40 to 800 images per category.
195
- Most categories have about 50 images. Collected in September 2003 by Fei-Fei Li, Marco Andreetto,
196
- and Marc 'Aurelio Ranzato. The size of each image is roughly 300 x 200 pixels.
196
+ Pictures of objects belonging to 101 categories, about 40 to 800 images per category.
197
+ Most categories have about 50 images. The size of each image is roughly 300 x 200 pixels.
197
198
  The official provides the contour data of each object in each picture, which is the annotation.
198
199
 
200
+ Here is the original Caltech101 dataset structure,
201
+ and you can unzip the dataset files into the following directory structure, which are read by MindSpore API.
202
+
199
203
  .. code-block::
200
204
 
201
205
  .
@@ -276,44 +280,45 @@ class Caltech101Dataset(GeneratorDataset):
276
280
 
277
281
  class Caltech256Dataset(MappableDataset, VisionBaseDataset):
278
282
  """
279
- A source dataset that reads and parses Caltech256 dataset.
283
+ Caltech 256 dataset.
280
284
 
281
- The generated dataset has two columns: :py:obj:`[image, label]`.
285
+ The generated dataset has two columns: :py:obj:`[image, label]` .
282
286
  The tensor of column :py:obj:`image` is of the uint8 type.
283
287
  The tensor of column :py:obj:`label` is of the uint32 type.
284
288
 
285
289
  Args:
286
290
  dataset_dir (str): Path to the root directory that contains the dataset.
287
- num_samples (int, optional): The number of images to be included in the dataset
288
- (default=None, all images).
289
- num_parallel_workers (int, optional): Number of workers to read the data
290
- (default=None, set in the config).
291
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
292
- (default=None, expected order behavior shown in the table).
293
- decode (bool, optional): Whether or not to decode the images after reading (default=False).
291
+ num_samples (int, optional): The number of images to be included in the dataset.
292
+ Default: None, all images.
293
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
294
+ Default: None, will use global default workers(8), it can be set
295
+ by `mindspore.dataset.config.set_num_parallel_workers` .
296
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
297
+ Default: None, expected order behavior shown in the table below.
298
+ decode (bool, optional): Whether or not to decode the images after reading. Default: False.
294
299
  sampler (Sampler, optional): Object used to choose samples from the
295
- dataset (default=None, expected order behavior shown in the table).
300
+ dataset. Default: None, expected order behavior shown in the table below.
296
301
  num_shards (int, optional): Number of shards that the dataset will be divided
297
- into (default=None). When this argument is specified, `num_samples` reflects
302
+ into. Default: None. When this argument is specified, `num_samples` reflects
298
303
  the maximum sample number of per shard.
299
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
304
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
300
305
  argument can only be specified when `num_shards` is also specified.
301
306
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
302
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
303
- (default=None, which means no cache is used).
307
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
308
+ Default: None, which means no cache is used.
304
309
 
305
310
  Raises:
306
311
  RuntimeError: If `dataset_dir` does not contain data files.
307
- ValueError: If `target_type` is not 'category', 'annotation' or 'all'.
308
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
309
312
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
310
313
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
311
314
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
312
315
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
313
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
316
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
317
+ ValueError: If `target_type` is not 'category', 'annotation' or 'all'.
318
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
314
319
 
315
320
  Note:
316
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
321
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
317
322
  The table below shows what input arguments are allowed and their expected behavior.
318
323
 
319
324
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -404,48 +409,49 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
404
409
 
405
410
  class CelebADataset(MappableDataset, VisionBaseDataset):
406
411
  """
407
- A source dataset that reads and parses CelebA dataset.
408
- Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
412
+ CelebA(CelebFaces Attributes) dataset.
409
413
 
410
- The generated dataset has two columns: :py:obj:`[image, attr]`.
414
+ Only support to read `list_attr_celeba.txt` currently, which is the attribute annotations of the dataset.
415
+ The generated dataset has two columns: :py:obj:`[image, attr]` .
411
416
  The tensor of column :py:obj:`image` is of the uint8 type.
412
417
  The tensor of column :py:obj:`attr` is of the uint32 type and one hot encoded.
413
418
 
414
419
  Args:
415
420
  dataset_dir (str): Path to the root directory that contains the dataset.
416
- num_parallel_workers (int, optional): Number of workers to read the data (default=None, will use value set in
417
- the config).
418
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None).
419
- usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
420
- (default= 'all', will read all samples).
421
- sampler (Sampler, optional): Object used to choose samples from the dataset (default=None).
422
- decode (bool, optional): Whether to decode the images after reading (default=False).
423
- extensions (list[str], optional): List of file extensions to be included in the dataset (default=None).
424
- num_samples (int, optional): The number of images to be included in the dataset
425
- (default=None, will include all images).
421
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
422
+ Default: None, will use global default workers(8), it can be set
423
+ by `mindspore.dataset.config.set_num_parallel_workers` .
424
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None.
425
+ usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
426
+ Default: 'all', will read all samples.
427
+ sampler (Sampler, optional): Object used to choose samples from the dataset. Default: None.
428
+ decode (bool, optional): Whether to decode the images after reading. Default: False.
429
+ extensions (list[str], optional): List of file extensions to be included in the dataset. Default: None.
430
+ num_samples (int, optional): The number of images to be included in the dataset.
431
+ Default: None, will include all images.
426
432
  num_shards (int, optional): Number of shards that the dataset will be divided
427
- into (default=None). When this argument is specified, `num_samples` reflects
433
+ into. Default: None. When this argument is specified, `num_samples` reflects
428
434
  the maximum sample number of per shard.
429
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
435
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
430
436
  argument can only be specified when `num_shards` is also specified.
431
437
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
432
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
433
- (default=None, which means no cache is used).
438
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
439
+ Default: None, which means no cache is used.
434
440
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
435
441
  and returns the decrypted bytes data. Default: None, no decryption.
436
442
 
437
443
  Raises:
438
444
  RuntimeError: If `dataset_dir` does not contain data files.
439
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
440
- ValueError: If `usage` is not 'train', 'valid', 'test' or 'all'.
441
445
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
442
446
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
443
447
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
444
448
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
445
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
449
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
450
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
451
+ ValueError: If `usage` is not 'train', 'valid', 'test' or 'all'.
446
452
 
447
453
  Note:
448
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
454
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
449
455
  The table below shows what input arguments are allowed and their expected behavior.
450
456
 
451
457
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -574,47 +580,48 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
574
580
 
575
581
  class Cifar10Dataset(MappableDataset, VisionBaseDataset):
576
582
  """
577
- A source dataset that reads and parses Cifar10 dataset.
578
- This api only supports parsing Cifar10 file in binary version now.
583
+ CIFAR-10 dataset.
579
584
 
580
- The generated dataset has two columns :py:obj:`[image, label]`.
585
+ This api only supports parsing CIFAR-10 file in binary version now.
586
+ The generated dataset has two columns :py:obj:`[image, label]` .
581
587
  The tensor of column :py:obj:`image` is of the uint8 type.
582
588
  The tensor of column :py:obj:`label` is a scalar of the uint32 type.
583
589
 
584
590
  Args:
585
591
  dataset_dir (str): Path to the root directory that contains the dataset.
586
592
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
587
- train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
588
- (default=None, all samples).
589
- num_samples (int, optional): The number of images to be included in the dataset
590
- (default=None, all images).
591
- num_parallel_workers (int, optional): Number of workers to read the data
592
- (default=None, number set in the config).
593
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
594
- order behavior shown in the table).
593
+ train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples.
594
+ Default: None, all samples.
595
+ num_samples (int, optional): The number of images to be included in the dataset.
596
+ Default: None, all images.
597
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
598
+ Default: None, will use global default workers(8), it can be set
599
+ by `mindspore.dataset.config.set_num_parallel_workers` .
600
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
601
+ order behavior shown in the table below.
595
602
  sampler (Sampler, optional): Object used to choose samples from the
596
- dataset (default=None, expected order behavior shown in the table).
603
+ dataset. Default: None, expected order behavior shown in the table below.
597
604
  num_shards (int, optional): Number of shards that the dataset will be divided
598
- into (default=None). When this argument is specified, `num_samples` reflects
605
+ into. Default: None. When this argument is specified, `num_samples` reflects
599
606
  the maximum sample number of per shard.
600
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
607
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
601
608
  argument can only be specified when `num_shards` is also specified.
602
609
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
603
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
604
- (default=None, which means no cache is used).
610
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
611
+ Default: None, which means no cache is used.
605
612
 
606
613
  Raises:
607
614
  RuntimeError: If `dataset_dir` does not contain data files.
608
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
609
- ValueError: If `usage` is not 'train', 'test' or 'all'.
610
615
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
611
616
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
612
617
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
613
618
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
614
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
619
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
620
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
621
+ ValueError: If `usage` is not 'train', 'test' or 'all'.
615
622
 
616
623
  Note:
617
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
624
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
618
625
  The table below shows what input arguments are allowed and their expected behavior.
619
626
 
620
627
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -707,46 +714,47 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
707
714
 
708
715
  class Cifar100Dataset(MappableDataset, VisionBaseDataset):
709
716
  """
710
- A source dataset that reads and parses Cifar100 dataset.
717
+ CIFAR-100 dataset.
711
718
 
712
- The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]`.
719
+ The generated dataset has three columns :py:obj:`[image, coarse_label, fine_label]` .
713
720
  The tensor of column :py:obj:`image` is of the uint8 type.
714
721
  The tensor of column :py:obj:`coarse_label` and :py:obj:`fine_labels` are each a scalar of uint32 type.
715
722
 
716
723
  Args:
717
724
  dataset_dir (str): Path to the root directory that contains the dataset.
718
725
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 50,000
719
- train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples
720
- (default=None, all samples).
721
- num_samples (int, optional): The number of images to be included in the dataset
722
- (default=None, all images).
723
- num_parallel_workers (int, optional): Number of workers to read the data
724
- (default=None, number set in the config).
725
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
726
- order behavior shown in the table).
726
+ train samples, 'test' will read from 10,000 test samples, 'all' will read from all 60,000 samples.
727
+ Default: None, all samples.
728
+ num_samples (int, optional): The number of images to be included in the dataset.
729
+ Default: None, all images.
730
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
731
+ Default: None, will use global default workers(8), it can be set
732
+ by `mindspore.dataset.config.set_num_parallel_workers` .
733
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
734
+ order behavior shown in the table below.
727
735
  sampler (Sampler, optional): Object used to choose samples from the
728
- dataset (default=None, expected order behavior shown in the table).
736
+ dataset. Default: None, expected order behavior shown in the table below.
729
737
  num_shards (int, optional): Number of shards that the dataset will be divided
730
- into (default=None). When this argument is specified, 'num_samples' reflects
738
+ into. Default: None. When this argument is specified, `num_samples` reflects
731
739
  the maximum sample number of per shard.
732
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
740
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
733
741
  argument can only be specified when `num_shards` is also specified.
734
742
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
735
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
736
- (default=None, which means no cache is used).
743
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
744
+ Default: None, which means no cache is used.
737
745
 
738
746
  Raises:
739
747
  RuntimeError: If `dataset_dir` does not contain data files.
740
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
741
- ValueError: If `usage` is not 'train', 'test' or 'all'.
742
748
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
743
749
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
744
750
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
745
751
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
746
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
752
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
753
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
754
+ ValueError: If `usage` is not 'train', 'test' or 'all'.
747
755
 
748
756
  Note:
749
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
757
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
750
758
  The table below shows what input arguments are allowed and their expected behavior.
751
759
 
752
760
  .. list-table:: Expected Order Behavior of Using `sampler` and shuffle
@@ -833,52 +841,54 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
833
841
 
834
842
  class CityscapesDataset(MappableDataset, VisionBaseDataset):
835
843
  """
836
- A source dataset that reads and parses Cityscapes dataset.
844
+ Cityscapes dataset.
837
845
 
838
- The generated dataset has two columns :py:obj:`[image, task]`.
846
+ The generated dataset has two columns :py:obj:`[image, task]` .
839
847
  The tensor of column :py:obj:`image` is of the uint8 type.
840
848
  The tensor of column :py:obj:`task` is of the uint8 type if task is not 'polygon' otherwise task is
841
849
  a string tensor with serialize json.
842
850
 
843
851
  Args:
844
852
  dataset_dir (str): Path to the root directory that contains the dataset.
845
- usage (str): Acceptable usages include 'train', 'test', 'val' or 'all' if quality_mode is 'fine'
846
- otherwise 'train', 'train_extra', 'val' or 'all' (default= 'train').
847
- quality_mode (str): Acceptable quality_modes include 'fine' or 'coarse' (default= 'fine').
848
- task (str): Acceptable tasks include 'instance', 'semantic', 'polygon' or 'color' (default= 'instance').
853
+ usage (str, optional): Acceptable usages include 'train', 'test', 'val' or 'all' if quality_mode is 'fine'
854
+ otherwise 'train', 'train_extra', 'val' or 'all'. Default: 'train'.
855
+ quality_mode (str, optional): Acceptable quality_modes include 'fine' or 'coarse'. Default: 'fine'.
856
+ task (str, optional): Acceptable tasks include 'instance',
857
+ 'semantic', 'polygon' or 'color'. Default: 'instance'.
849
858
  num_samples (int, optional): The number of images to be included in the dataset.
850
- (default=None, all images).
851
- num_parallel_workers (int, optional): Number of workers to read the data
852
- (default=None, number set in the config).
853
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
854
- order behavior shown in the table).
855
- decode (bool, optional): Decode the images after reading (default=False).
859
+ Default: None, all images.
860
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
861
+ Default: None, will use global default workers(8), it can be set
862
+ by `mindspore.dataset.config.set_num_parallel_workers` .
863
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
864
+ order behavior shown in the table below.
865
+ decode (bool, optional): Decode the images after reading. Default: False.
856
866
  sampler (Sampler, optional): Object used to choose samples from the
857
- dataset (default=None, expected order behavior shown in the table).
867
+ dataset. Default: None, expected order behavior shown in the table below.
858
868
  num_shards (int, optional): Number of shards that the dataset will be divided
859
- into (default=None). When this argument is specified, `num_samples` reflects
869
+ into. Default: None. When this argument is specified, `num_samples` reflects
860
870
  the max sample number of per shard.
861
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
871
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
862
872
  argument can only be specified when `num_shards` is also specified.
863
873
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
864
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
865
- (default=None, which means no cache is used).
874
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
875
+ Default: None, which means no cache is used.
866
876
 
867
877
  Raises:
868
878
  RuntimeError: If `dataset_dir` is invalid or does not contain data files.
869
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
870
879
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
871
880
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
872
881
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
873
882
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
883
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
874
884
  ValueError: If `dataset_dir` is not exist.
875
885
  ValueError: If `task` is invalid.
876
886
  ValueError: If `quality_mode` is invalid.
877
887
  ValueError: If `usage` is invalid.
878
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
888
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
879
889
 
880
890
  Note:
881
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
891
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
882
892
  The table below shows what input arguments are allowed and their expected behavior.
883
893
 
884
894
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -1004,7 +1014,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
1004
1014
 
1005
1015
  class CocoDataset(MappableDataset, VisionBaseDataset):
1006
1016
  """
1007
- A source dataset that reads and parses COCO dataset.
1017
+ COCO(Common Objects in Context) dataset.
1008
1018
 
1009
1019
  CocoDataset supports five kinds of tasks, which are Object Detection, Keypoint Detection, Stuff Segmentation,
1010
1020
  Panoptic Segmentation and Captioning of 2017 Train/Val/Test dataset.
@@ -1013,26 +1023,27 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
1013
1023
  dataset_dir (str): Path to the root directory that contains the dataset.
1014
1024
  annotation_file (str): Path to the annotation JSON file.
1015
1025
  task (str, optional): Set the task type for reading COCO data. Supported task types:
1016
- 'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning' (default='Detection').
1017
- num_samples (int, optional): The number of images to be included in the dataset
1018
- (default=None, all images).
1019
- num_parallel_workers (int, optional): Number of workers to read the data
1020
- (default=None, number set in the configuration file).
1021
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
1022
- order behavior shown in the table).
1023
- decode (bool, optional): Decode the images after reading (default=False).
1024
- sampler (Sampler, optional): Object used to choose samples from the dataset
1025
- (default=None, expected order behavior shown in the table).
1026
+ 'Detection', 'Stuff', 'Panoptic', 'Keypoint' and 'Captioning'. Default: 'Detection'.
1027
+ num_samples (int, optional): The number of images to be included in the dataset.
1028
+ Default: None, all images.
1029
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1030
+ Default: None, will use global default workers(8), it can be set
1031
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1032
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
1033
+ order behavior shown in the table below.
1034
+ decode (bool, optional): Decode the images after reading. Default: False.
1035
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
1036
+ Default: None, expected order behavior shown in the table below.
1026
1037
  num_shards (int, optional): Number of shards that the dataset will be divided
1027
- into (default=None). When this argument is specified, `num_samples` reflects
1038
+ into. Default: None. When this argument is specified, `num_samples` reflects
1028
1039
  the maximum sample number of per shard.
1029
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1040
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1030
1041
  argument can only be specified when `num_shards` is also specified.
1031
1042
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1032
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1033
- (default=None, which means no cache is used).
1043
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1044
+ Default: None, which means no cache is used.
1034
1045
  extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
1035
- output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
1046
+ output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: False.
1036
1047
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
1037
1048
  and returns the decrypted bytes data. Default: None, no decryption.
1038
1049
 
@@ -1083,13 +1094,13 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
1083
1094
  ValueError: If `task` is not in ['Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'].
1084
1095
  ValueError: If `annotation_file` is not exist.
1085
1096
  ValueError: If `dataset_dir` is not exist.
1086
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1097
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1087
1098
 
1088
1099
  Note:
1089
1100
  - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op is added
1090
1101
  to remove the prefix('_meta-').
1091
- - CocoDataset doesn't support PKSampler.
1092
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
1102
+ - Not support `mindspore.dataset.PKSampler` for `sampler` parameter yet.
1103
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1093
1104
  The table below shows what input arguments are allowed and their expected behavior.
1094
1105
 
1095
1106
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -1249,37 +1260,38 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
1249
1260
 
1250
1261
  class DIV2KDataset(MappableDataset, VisionBaseDataset):
1251
1262
  """
1252
- A source dataset that reads and parses DIV2KDataset dataset.
1263
+ DIV2K(DIVerse 2K resolution image) dataset.
1253
1264
 
1254
- The generated dataset has two columns :py:obj:`[hr_image, lr_image]`.
1265
+ The generated dataset has two columns :py:obj:`[hr_image, lr_image]` .
1255
1266
  The tensor of column :py:obj:`hr_image` and the tensor of column :py:obj:`lr_image` are of the uint8 type.
1256
1267
 
1257
1268
  Args:
1258
1269
  dataset_dir (str): Path to the root directory that contains the dataset.
1259
- usage (str, optional): Acceptable usages include 'train', 'valid' or 'all' (default= 'train').
1270
+ usage (str, optional): Acceptable usages include 'train', 'valid' or 'all'. Default: 'train'.
1260
1271
  downgrade (str, optional): Acceptable downgrades include 'bicubic', 'unknown', 'mild', 'difficult' or
1261
- 'wild' (default= 'bicubic').
1262
- scale (str, optional): Acceptable scales include 2, 3, 4 or 8 (default=2).
1272
+ 'wild'. Default: 'bicubic'.
1273
+ scale (str, optional): Acceptable scales include 2, 3, 4 or 8. Default: 2.
1263
1274
  When `downgrade` is 'bicubic', scale can be 2, 3, 4, 8.
1264
1275
  When `downgrade` is 'unknown', scale can only be 2, 3, 4.
1265
1276
  When `downgrade` is 'mild', 'difficult' or 'wild', scale can only be 4.
1266
1277
  num_samples (int, optional): The number of images to be included in the dataset.
1267
- (default=None, all images).
1268
- num_parallel_workers (int, optional): Number of workers to read the data
1269
- (default=None, number set in the config).
1270
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
1271
- order behavior shown in the table).
1272
- decode (bool, optional): Decode the images after reading (default=False).
1278
+ Default: None, all images.
1279
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1280
+ Default: None, will use global default workers(8), it can be set
1281
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1282
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
1283
+ order behavior shown in the table below.
1284
+ decode (bool, optional): Decode the images after reading. Default: False.
1273
1285
  sampler (Sampler, optional): Object used to choose samples from the
1274
- dataset (default=None, expected order behavior shown in the table).
1286
+ dataset. Default: None, expected order behavior shown in the table below.
1275
1287
  num_shards (int, optional): Number of shards that the dataset will be divided
1276
- into (default=None). When this argument is specified, `num_samples` reflects
1288
+ into. Default: None. When this argument is specified, `num_samples` reflects
1277
1289
  the max sample number of per shard.
1278
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1290
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1279
1291
  argument can only be specified when `num_shards` is also specified.
1280
1292
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1281
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1282
- (default=None, which means no cache is used).
1293
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1294
+ Default: None, which means no cache is used.
1283
1295
 
1284
1296
  Raises:
1285
1297
  RuntimeError: If `dataset_dir` is invalid or does not contain data files.
@@ -1294,10 +1306,10 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
1294
1306
  ValueError: If `scale` is invalid.
1295
1307
  ValueError: If `scale` equal to 8 and downgrade not equal to 'bicubic'.
1296
1308
  ValueError: If `downgrade` in ['mild', 'difficult', 'wild'] and `scale` not equal to 4.
1297
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1309
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1298
1310
 
1299
1311
  Note:
1300
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
1312
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1301
1313
  The table below shows what input arguments are allowed and their expected behavior.
1302
1314
 
1303
1315
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -1437,9 +1449,9 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
1437
1449
 
1438
1450
  class EMnistDataset(MappableDataset, VisionBaseDataset):
1439
1451
  """
1440
- A source dataset that reads and parses the EMNIST dataset.
1452
+ EMNIST(Extended MNIST) dataset.
1441
1453
 
1442
- The generated dataset has two columns :py:obj:`[image, label]`.
1454
+ The generated dataset has two columns :py:obj:`[image, label]` .
1443
1455
  The tensor of column :py:obj:`image` is of the uint8 type.
1444
1456
  The tensor of column :py:obj:`label` is a scalar of the uint32 type.
1445
1457
 
@@ -1447,33 +1459,35 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
1447
1459
  dataset_dir (str): Path to the root directory that contains the dataset.
1448
1460
  name (str): Name of splits for this dataset, can be 'byclass', 'bymerge', 'balanced', 'letters', 'digits'
1449
1461
  or 'mnist'.
1450
- usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.
1451
- (default=None, will read all samples).
1452
- num_samples (int, optional): The number of images to be included in the dataset
1453
- (default=None, will read all images).
1454
- num_parallel_workers (int, optional): Number of workers to read the data
1455
- (default=None, will use value set in the config).
1456
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
1457
- (default=None, expected order behavior shown in the table).
1462
+ usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'.'train' will read from 60,000
1463
+ train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
1464
+ Default: None, will read all samples.
1465
+ num_samples (int, optional): The number of images to be included in the dataset.
1466
+ Default: None, will read all images.
1467
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1468
+ Default: None, will use global default workers(8), it can be set
1469
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1470
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
1471
+ Default: None, expected order behavior shown in the table below.
1458
1472
  sampler (Sampler, optional): Object used to choose samples from the
1459
- dataset (default=None, expected order behavior shown in the table).
1460
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1473
+ dataset. Default: None, expected order behavior shown in the table below.
1474
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1461
1475
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1462
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1476
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1463
1477
  argument can only be specified when `num_shards` is also specified.
1464
1478
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1465
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1466
- (default=None, which means no cache is used).
1479
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1480
+ Default: None, which means no cache is used.
1467
1481
 
1468
1482
  Raises:
1469
1483
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
1470
1484
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
1471
1485
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1472
1486
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1473
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1487
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1474
1488
 
1475
1489
  Note:
1476
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
1490
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1477
1491
  The table below shows what input arguments are allowed and their expected behavior.
1478
1492
 
1479
1493
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -1572,44 +1586,45 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
1572
1586
  """
1573
1587
  A source dataset for generating fake images.
1574
1588
 
1575
- The generated dataset has two columns :py:obj:`[image, label]`.
1589
+ The generated dataset has two columns :py:obj:`[image, label]` .
1576
1590
  The tensor of column :py:obj:`image` is of the uint8 type.
1577
- The tensor of column :py:obj:`label` is a scalar of the uint32 type.
1591
+ The column :py:obj:`label` is a scalar of the uint32 type.
1578
1592
 
1579
1593
  Args:
1580
- num_images (int, optional): Number of images to generate in the dataset (default=1000).
1581
- image_size (tuple, optional): Size of the fake image (default=(224, 224, 3)).
1582
- num_classes (int, optional): Number of classes in the dataset (default=10).
1583
- base_seed (int, optional): Offsets the index-based random seed used to generate each image (default=0).
1584
- num_samples (int, optional): The number of images to be included in the dataset
1585
- (default=None, will read all images).
1586
- num_parallel_workers (int, optional): Number of workers to read the data
1587
- (default=None, will use value set in the config).
1588
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
1589
- (default=None, expected order behavior shown in the table).
1594
+ num_images (int, optional): Number of images to generate in the dataset. Default: 1000.
1595
+ image_size (tuple, optional): Size of the fake image. Default: (224, 224, 3).
1596
+ num_classes (int, optional): Number of classes in the dataset. Default: 10.
1597
+ base_seed (int, optional): Offsets the index-based random seed used to generate each image. Default: 0.
1598
+ num_samples (int, optional): The number of images to be included in the dataset.
1599
+ Default: None, will read all images.
1600
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1601
+ Default: None, will use global default workers(8), it can be set
1602
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1603
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
1604
+ Default: None, expected order behavior shown in the table below.
1590
1605
  sampler (Sampler, optional): Object used to choose samples from the
1591
- dataset (default=None, expected order behavior shown in the table).
1592
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1606
+ dataset. Default: None, expected order behavior shown in the table below.
1607
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1593
1608
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
1594
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1609
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1595
1610
  argument can only be specified when `num_shards` is also specified.
1596
1611
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1597
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1598
- (default=None, which means no cache is used).
1612
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1613
+ Default: None, which means no cache is used.
1599
1614
 
1600
1615
  Raises:
1601
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1602
1616
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
1603
1617
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
1604
1618
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1605
1619
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1606
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1620
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1621
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1607
1622
 
1608
1623
  Note:
1609
- - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
1624
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1610
1625
  The table below shows what input arguments are allowed and their expected behavior.
1611
1626
 
1612
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
1627
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
1613
1628
  :widths: 25 25 50
1614
1629
  :header-rows: 1
1615
1630
 
@@ -1639,8 +1654,6 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
1639
1654
  >>> # Read 3 samples from FakeImage dataset
1640
1655
  >>> dataset = ds.FakeImageDataset(num_images=1000, image_size=(224,224,3),
1641
1656
  ... num_classes=10, base_seed=0, num_samples=3)
1642
- >>>
1643
- >>> # Note: In FakeImage dataset, each dictionary has keys "image" and "label"
1644
1657
  """
1645
1658
 
1646
1659
  @check_fake_image_dataset
@@ -1660,44 +1673,45 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
1660
1673
 
1661
1674
  class FashionMnistDataset(MappableDataset, VisionBaseDataset):
1662
1675
  """
1663
- A source dataset that reads and parses the FASHION-MNIST dataset.
1676
+ Fashion-MNIST dataset.
1664
1677
 
1665
- The generated dataset has two columns :py:obj:`[image, label]`.
1678
+ The generated dataset has two columns :py:obj:`[image, label]` .
1666
1679
  The tensor of column :py:obj:`image` is of the uint8 type.
1667
- The tensor of column :py:obj:`label` is a scalar of the uint32 type.
1680
+ The column :py:obj:`label` is a scalar of the uint32 type.
1668
1681
 
1669
1682
  Args:
1670
1683
  dataset_dir (str): Path to the root directory that contains the dataset.
1671
1684
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 60,000
1672
1685
  train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
1673
- (default=None, will read all samples)
1674
- num_samples (int, optional): The number of images to be included in the dataset
1675
- (default=None, will read all images).
1676
- num_parallel_workers (int, optional): Number of workers to read the data
1677
- (default=None, will use value set in the config).
1678
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
1679
- (default=None, expected order behavior shown in the table).
1680
- sampler (Sampler, optional): Object used to choose samples from the
1681
- dataset (default=None, expected order behavior shown in the table).
1682
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
1686
+ Default: None, will read all samples.
1687
+ num_samples (int, optional): The number of images to be included in the dataset.
1688
+ Default: None, will read all images.
1689
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1690
+ Default: None, will use global default workers(8), it can be set
1691
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1692
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
1693
+ Default: None, expected order behavior shown in the table below.
1694
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
1695
+ Default: None, expected order behavior shown in the table below.
1696
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
1683
1697
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1684
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1698
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1685
1699
  argument can only be specified when `num_shards` is also specified.
1686
1700
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1687
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1688
- (default=None, which means no cache is used).
1701
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1702
+ Default: None, which means no cache is used.
1689
1703
 
1690
1704
  Raises:
1691
1705
  RuntimeError: If `dataset_dir` does not contain data files.
1692
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1693
1706
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
1694
1707
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
1695
1708
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
1696
1709
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1697
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1710
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1711
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1698
1712
 
1699
1713
  Note:
1700
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
1714
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1701
1715
  The table below shows what input arguments are allowed and their expected behavior.
1702
1716
 
1703
1717
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -1782,9 +1796,9 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
1782
1796
 
1783
1797
  class FlickrDataset(MappableDataset, VisionBaseDataset):
1784
1798
  """
1785
- A source dataset that reads and parses Flickr8k and Flickr30k dataset.
1799
+ Flickr8k and Flickr30k datasets.
1786
1800
 
1787
- The generated dataset has two columns :py:obj:`[image, annotation]`.
1801
+ The generated dataset has two columns :py:obj:`[image, annotation]` .
1788
1802
  The tensor of column :py:obj:`image` is of the uint8 type.
1789
1803
  The tensor of column :py:obj:`annotation` is a tensor which contains 5 annotations string,
1790
1804
  such as ["a", "b", "c", "d", "e"].
@@ -1793,22 +1807,23 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
1793
1807
  dataset_dir (str): Path to the root directory that contains the dataset.
1794
1808
  annotation_file (str): Path to the root directory that contains the annotation.
1795
1809
  num_samples (int, optional): The number of images to be included in the dataset.
1796
- (default=None, all images).
1797
- num_parallel_workers (int, optional): Number of workers to read the data
1798
- (default=None, number set in the config).
1799
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
1800
- order behavior shown in the table).
1801
- decode (bool, optional): Decode the images after reading (default=None).
1810
+ Default: None, all images.
1811
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1812
+ Default: None, will use global default workers(8), it can be set
1813
+ by `mindspore.dataset.config.set_num_parallel_workers` .
1814
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
1815
+ order behavior shown in the table below.
1816
+ decode (bool, optional): Decode the images after reading. Default: None.
1802
1817
  sampler (Sampler, optional): Object used to choose samples from the
1803
- dataset (default=None, expected order behavior shown in the table).
1818
+ dataset. Default: None, expected order behavior shown in the table below.
1804
1819
  num_shards (int, optional): Number of shards that the dataset will be divided
1805
- into (default=None). When this argument is specified, `num_samples` reflects
1820
+ into. Default: None. When this argument is specified, `num_samples` reflects
1806
1821
  the max sample number of per shard.
1807
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
1822
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
1808
1823
  argument can only be specified when `num_shards` is also specified.
1809
1824
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1810
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
1811
- (default=None, which means no cache is used).
1825
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
1826
+ Default: None, which means no cache is used.
1812
1827
 
1813
1828
  Raises:
1814
1829
  RuntimeError: If `dataset_dir` is not valid or does not contain data files.
@@ -1819,10 +1834,10 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
1819
1834
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
1820
1835
  ValueError: If `dataset_dir` is not exist.
1821
1836
  ValueError: If `annotation_file` is not exist.
1822
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
1837
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1823
1838
 
1824
1839
  Note:
1825
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
1840
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
1826
1841
  The table below shows what input arguments are allowed and their expected behavior.
1827
1842
 
1828
1843
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -2026,45 +2041,45 @@ class _Flowers102Dataset:
2026
2041
 
2027
2042
  class Flowers102Dataset(GeneratorDataset):
2028
2043
  """
2029
- A source dataset that reads and parses Flowers102 dataset.
2044
+ Oxfird 102 Flower dataset.
2030
2045
 
2031
- The generated dataset has two columns :py:obj:`[image, label]` or three :py:obj:`[image, segmentation, label]`.
2032
- The tensor of column :py:obj:`image` is of the uint8 type.
2033
- The tensor of column :py:obj:`segmentation` is of the uint8 type.
2034
- The tensor of column :py:obj:`label` is a scalar or a tensor of the uint32 type.
2046
+ According to the given `task` configuration, the generated dataset has different output columns:
2047
+ - `task` = 'Classification', output columns: `[image, dtype=uint8]` , `[label, dtype=uint32]` .
2048
+ - `task` = 'Segmentation',
2049
+ output columns: `[image, dtype=uint8]` , `[segmentation, dtype=uint8]` , `[label, dtype=uint32]` .
2035
2050
 
2036
2051
  Args:
2037
2052
  dataset_dir (str): Path to the root directory that contains the dataset.
2038
- task (str): Specify the 'Classification' or 'Segmentation' task (default='Classification').
2039
- usage (str): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset
2040
- (default='all', will read all samples).
2041
- num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
2042
- num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
2043
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
2044
- (default=None, expected order behavior shown in the table).
2045
- decode (bool, optional): Whether or not to decode the images and segmentations after reading (default=False).
2046
- sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset. Random accessible
2047
- input is required (default=None, expected order behavior shown in the table).
2048
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
2049
- Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
2050
- sample number of per shard.
2051
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument must be specified only
2052
- when num_shards is also specified. Random accessible input is required.
2053
+ task (str, optional): Specify the 'Classification' or 'Segmentation' task. Default: 'Classification'.
2054
+ usage (str, optional): Specify the 'train', 'valid', 'test' part or 'all' parts of dataset.
2055
+ Default: 'all', will read all samples.
2056
+ num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
2057
+ num_parallel_workers (int, optional): Number of worker subprocesses used to
2058
+ fetch the dataset in parallel. Default: 1.
2059
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2060
+ Default: None, expected order behavior shown in the table below.
2061
+ decode (bool, optional): Whether or not to decode the images and segmentations after reading. Default: False.
2062
+ sampler (Union[Sampler, Iterable], optional): Object used to choose samples from the dataset.
2063
+ Default: None, expected order behavior shown in the table below.
2064
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
2065
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
2066
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument must be specified only
2067
+ when `num_shards` is also specified.
2053
2068
 
2054
2069
  Raises:
2055
2070
  RuntimeError: If `dataset_dir` does not contain data files.
2056
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2057
2071
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
2058
2072
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
2059
2073
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
2060
2074
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
2061
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
2075
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2076
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2062
2077
 
2063
2078
  Note:
2064
- - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
2079
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2065
2080
  The table below shows what input arguments are allowed and their expected behavior.
2066
2081
 
2067
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
2082
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
2068
2083
  :widths: 25 25 50
2069
2084
  :header-rows: 1
2070
2085
 
@@ -2187,40 +2202,176 @@ class Flowers102Dataset(GeneratorDataset):
2187
2202
  return class_dict
2188
2203
 
2189
2204
 
2205
+ class Food101Dataset(MappableDataset, VisionBaseDataset):
2206
+ """
2207
+ Food101 dataset.
2208
+
2209
+ The generated dataset has two columns :py:obj:`[image, label]` .
2210
+ The tensor of column :py:obj:`image` is of the uint8 type.
2211
+ The tensor of column :py:obj:`label` is of the string type.
2212
+
2213
+ Args:
2214
+ dataset_dir (str): Path to the root directory that contains the dataset.
2215
+ usage (str, optional): Usage of this dataset, can be 'train', 'test', or 'all'. 'train' will read
2216
+ from 75,750 samples, 'test' will read from 25,250 samples, and 'all' will read all 'train'
2217
+ and 'test' samples. Default: None, will be set to 'all'.
2218
+ num_samples (int, optional): The number of images to be included in the dataset.
2219
+ Default: None, will read all images.
2220
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2221
+ Default: None, will use global default workers(8), it can be set
2222
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2223
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2224
+ Default: None, expected order behavior shown in the table below.
2225
+ decode (bool, optional): Decode the images after reading. Default: False.
2226
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
2227
+ Default: None, expected order behavior shown in the table below.
2228
+ num_shards (int, optional): Number of shards that the dataset will be divided into. When this argument
2229
+ is specified, `num_samples` reflects the maximum sample number of per shard. Default: None.
2230
+ shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
2231
+ when `num_shards` is also specified. Default: None.
2232
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2233
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2234
+ Default: None, which means no cache is used.
2235
+
2236
+ Raises:
2237
+ RuntimeError: If `dataset_dir` does not contain data files.
2238
+ RuntimeError: If `sampler` and `shuffle` are specified at the same time.
2239
+ RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
2240
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
2241
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
2242
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2243
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2244
+ ValueError: If the value of `usage` is not 'train', 'test', or 'all'.
2245
+ ValueError: If `dataset_dir` is not exist.
2246
+
2247
+ Note:
2248
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2249
+ The table below shows what input arguments are allowed and their expected behavior.
2250
+
2251
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
2252
+ :widths: 25 25 50
2253
+ :header-rows: 1
2254
+
2255
+ * - Parameter `sampler`
2256
+ - Parameter `shuffle`
2257
+ - Expected Order Behavior
2258
+ * - None
2259
+ - None
2260
+ - random order
2261
+ * - None
2262
+ - True
2263
+ - random order
2264
+ * - None
2265
+ - False
2266
+ - sequential order
2267
+ * - Sampler object
2268
+ - None
2269
+ - order defined by sampler
2270
+ * - Sampler object
2271
+ - True
2272
+ - not allowed
2273
+ * - Sampler object
2274
+ - False
2275
+ - not allowed
2276
+
2277
+ Examples:
2278
+ >>> food101_dataset_dir = "/path/to/food101_dataset_directory"
2279
+ >>>
2280
+ >>> # Read 3 samples from Food101 dataset
2281
+ >>> dataset = ds.Food101Dataset(dataset_dir=food101_dataset_dir, num_samples=3)
2282
+
2283
+ About Food101 dataset:
2284
+
2285
+ The Food101 is a dataset of 101 food categories, with 101,000 images.
2286
+ There are 250 test imgaes and 750 training images in each class. All images were rescaled
2287
+ to have a maximum side length of 512 pixels.
2288
+
2289
+ The following is the original Food101 dataset structure.
2290
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
2291
+
2292
+ .. code-block::
2293
+
2294
+ .
2295
+ └── food101_dir
2296
+ ├── images
2297
+ │ ├── apple_pie
2298
+ │ │ ├── 1005649.jpg
2299
+ │ │ ├── 1014775.jpg
2300
+ │ │ ├──...
2301
+ │ ├── baby_back_rips
2302
+ │ │ ├── 1005293.jpg
2303
+ │ │ ├── 1007102.jpg
2304
+ │ │ ├──...
2305
+ │ └──...
2306
+ └── meta
2307
+ ├── train.txt
2308
+ ├── test.txt
2309
+ ├── classes.txt
2310
+ ├── train.json
2311
+ ├── test.json
2312
+ └── train.txt
2313
+
2314
+ Citation:
2315
+
2316
+ .. code-block::
2317
+
2318
+ @inproceedings{bossard14,
2319
+ title = {Food-101 -- Mining Discriminative Components with Random Forests},
2320
+ author = {Bossard, Lukas and Guillaumin, Matthieu and Van Gool, Luc},
2321
+ booktitle = {European Conference on Computer Vision},
2322
+ year = {2014}
2323
+ }
2324
+ """
2325
+
2326
+ @check_food101_dataset
2327
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
2328
+ decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
2329
+ super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
2330
+ shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
2331
+
2332
+ self.dataset_dir = dataset_dir
2333
+ self.usage = replace_none(usage, "all")
2334
+ self.decode = replace_none(decode, False)
2335
+
2336
+ def parse(self, children=None):
2337
+ return cde.Food101Node(self.dataset_dir, self.usage, self.decode, self.sampler)
2338
+
2339
+
2190
2340
  class ImageFolderDataset(MappableDataset, VisionBaseDataset):
2191
2341
  """
2192
2342
  A source dataset that reads images from a tree of directories.
2193
2343
  All images within one folder have the same label.
2194
2344
 
2195
- The generated dataset has two columns: :py:obj:`[image, label]`.
2345
+ The generated dataset has two columns: :py:obj:`[image, label]` .
2196
2346
  The tensor of column :py:obj:`image` is of the uint8 type.
2197
2347
  The tensor of column :py:obj:`label` is of a scalar of uint32 type.
2198
2348
 
2199
2349
  Args:
2200
2350
  dataset_dir (str): Path to the root directory that contains the dataset.
2201
- num_samples (int, optional): The number of images to be included in the dataset
2202
- (default=None, all images).
2203
- num_parallel_workers (int, optional): Number of workers to read the data
2204
- (default=None, set in the config).
2205
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
2206
- (default=None, expected order behavior shown in the table).
2351
+ num_samples (int, optional): The number of images to be included in the dataset.
2352
+ Default: None, all images.
2353
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2354
+ Default: None, will use global default workers(8), it can be set
2355
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2356
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2357
+ Default: None, expected order behavior shown in the table below.
2207
2358
  sampler (Sampler, optional): Object used to choose samples from the
2208
- dataset (default=None, expected order behavior shown in the table).
2359
+ dataset. Default: None, expected order behavior shown in the table below.
2209
2360
  extensions (list[str], optional): List of file extensions to be
2210
- included in the dataset (default=None).
2361
+ included in the dataset. Default: None.
2211
2362
  class_indexing (dict, optional): A str-to-int mapping from folder name to index
2212
- (default=None, the folder names will be sorted
2363
+ Default: None, the folder names will be sorted
2213
2364
  alphabetically and each class will be given a
2214
- unique index starting from 0).
2215
- decode (bool, optional): Decode the images after reading (default=False).
2365
+ unique index starting from 0.
2366
+ decode (bool, optional): Decode the images after reading. Default: False.
2216
2367
  num_shards (int, optional): Number of shards that the dataset will be divided
2217
- into (default=None). When this argument is specified, `num_samples` reflects
2368
+ into. Default: None. When this argument is specified, `num_samples` reflects
2218
2369
  the maximum sample number of per shard.
2219
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
2370
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
2220
2371
  argument can only be specified when `num_shards` is also specified.
2221
2372
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2222
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2223
- (default=None, which means no cache is used).
2373
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2374
+ Default: None, which means no cache is used.
2224
2375
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
2225
2376
  and returns the decrypted bytes data. Default: None, no decryption.
2226
2377
 
@@ -2232,11 +2383,11 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
2232
2383
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
2233
2384
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
2234
2385
  RuntimeError: If `class_indexing` is not a dictionary.
2235
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
2386
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2236
2387
 
2237
2388
  Note:
2238
2389
  - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
2239
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
2390
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2240
2391
  The table below shows what input arguments are allowed and their expected behavior.
2241
2392
 
2242
2393
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -2322,14 +2473,34 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
2322
2473
  return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing,
2323
2474
  self.decrypt)
2324
2475
 
2476
+ def get_class_indexing(self):
2477
+ """
2478
+ Get the class index.
2479
+
2480
+ Returns:
2481
+ dict, a str-to-int mapping from label name to index.
2482
+
2483
+ Examples:
2484
+ >>> image_folder_dataset_dir = "/path/to/image_folder_dataset_directory"
2485
+ >>>
2486
+ >>> dataset = ds.ImageFolderDataset(dataset_dir=image_folder_dataset_dir)
2487
+ >>> class_indexing = dataset.get_class_indexing()
2488
+ """
2489
+ if self.class_indexing is None or not self.class_indexing:
2490
+ runtime_getter = self._init_tree_getters()
2491
+ _class_indexing = runtime_getter[0].GetClassIndexing()
2492
+ for pair in _class_indexing:
2493
+ self.class_indexing[pair[0]] = pair[1][0]
2494
+ return self.class_indexing
2495
+
2325
2496
 
2326
- class KITTIDataset(MappableDataset):
2497
+ class KITTIDataset(MappableDataset, VisionBaseDataset):
2327
2498
  """
2328
- A source dataset that reads and parses the KITTI dataset.
2499
+ KITTI dataset.
2329
2500
 
2330
- When usage is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
2331
- occluded, alpha, bbox, dimensions, location, rotation_y]`; When usage is "test", the generated dataset
2332
- has only one column: :py:obj:`[image]`.
2501
+ When `usage` is "train", the generated dataset has multiple columns: :py:obj:`[image, label, truncated,
2502
+ occluded, alpha, bbox, dimensions, location, rotation_y]` ; When `usage` is "test", the generated dataset
2503
+ has only one column: :py:obj:`[image]` .
2333
2504
  The tensor of column :py:obj:`image` is of the uint8 type.
2334
2505
  The tensor of column :py:obj:`label` is of the uint32 type.
2335
2506
  The tensor of column :py:obj:`truncated` is of the float32 type.
@@ -2342,25 +2513,26 @@ class KITTIDataset(MappableDataset):
2342
2513
 
2343
2514
  Args:
2344
2515
  dataset_dir (str): Path to the root directory that contains the dataset.
2345
- usage (str, optional): Usage of this dataset, can be `train` or `test`. `train` will read 7481
2346
- train samples, `test` will read from 7518 test samples without label (default=None, will use `train`).
2347
- num_samples (int, optional): The number of images to be included in the dataset
2348
- (default=None, will include all images).
2349
- num_parallel_workers (int, optional): Number of workers to read the data
2350
- (default=None, number set in the config).
2351
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
2352
- order behavior shown in the table).
2353
- decode (bool, optional): Decode the images after reading (default=False).
2354
- sampler (Sampler, optional): Object used to choose samples from the dataset
2355
- (default=None, expected order behavior shown in the table).
2516
+ usage (str, optional): Usage of this dataset, can be `train` or `test` . `train` will read 7481
2517
+ train samples, `test` will read from 7518 test samples without label. Default: None, will use `train` .
2518
+ num_samples (int, optional): The number of images to be included in the dataset.
2519
+ Default: None, will include all images.
2520
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2521
+ Default: None, will use global default workers(8), it can be set
2522
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2523
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
2524
+ order behavior shown in the table below.
2525
+ decode (bool, optional): Decode the images after reading. Default: False.
2526
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
2527
+ Default: None, expected order behavior shown in the table below.
2356
2528
  num_shards (int, optional): Number of shards that the dataset will be divided
2357
- into (default=None). When this argument is specified, 'num_samples' reflects
2529
+ into. Default: None. When this argument is specified, `num_samples` reflects
2358
2530
  the max sample number of per shard.
2359
- shard_id (int, optional): The shard ID within num_shards (default=None). This
2360
- argument can only be specified when num_shards is also specified.
2531
+ shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
2532
+ argument can only be specified when `num_shards` is also specified.
2361
2533
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2362
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2363
- (default=None, which means no cache is used).
2534
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2535
+ Default: None, which means no cache is used.
2364
2536
 
2365
2537
  Raises:
2366
2538
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
@@ -2368,10 +2540,10 @@ class KITTIDataset(MappableDataset):
2368
2540
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
2369
2541
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
2370
2542
  ValueError: If `dataset_dir` is not exist.
2371
- ValueError: If `shard_id` is invalid (< 0 or >= num_shards).
2543
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2372
2544
 
2373
2545
  Note:
2374
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
2546
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2375
2547
  The table below shows what input arguments are allowed and their expected behavior.
2376
2548
 
2377
2549
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -2418,13 +2590,14 @@ class KITTIDataset(MappableDataset):
2418
2590
  and a 3D laser scanner. Despite its popularity, the dataset itself does not contain ground truth for
2419
2591
  semantic segmentation. However, various researchers have manually annotated parts of the dataset to fit
2420
2592
  their necessities. Álvarez et al. generated ground truth for 323 images from the road detection challenge
2421
- with three classes: road, vertical,and sky. Zhang et al. annotated 252 (140 for training and 112 for testing)
2593
+ with three classes: road, vehicles and sky. Zhang et al. annotated 252 (140 for training and 112 for testing)
2422
2594
  acquisitions – RGB and Velodyne scans – from the tracking challenge for ten object categories: building, sky,
2423
2595
  road, vegetation, sidewalk, car, pedestrian, cyclist, sign/pole, and fence.
2424
2596
 
2425
2597
  You can unzip the original KITTI dataset files into this directory structure and read by MindSpore's API.
2426
2598
 
2427
2599
  .. code-block::
2600
+
2428
2601
  .
2429
2602
  └── kitti_dataset_directory
2430
2603
  ├── data_object_image_2
@@ -2472,44 +2645,45 @@ class KITTIDataset(MappableDataset):
2472
2645
 
2473
2646
  class KMnistDataset(MappableDataset, VisionBaseDataset):
2474
2647
  """
2475
- A source dataset that reads and parses the KMNIST dataset.
2648
+ KMNIST(Kuzushiji-MNIST) dataset.
2476
2649
 
2477
- The generated dataset has two columns :py:obj:`[image, label]`.
2650
+ The generated dataset has two columns :py:obj:`[image, label]` .
2478
2651
  The tensor of column :py:obj:`image` is of the uint8 type.
2479
- The tensor of column :py:obj:`label` is a scalar of the uint32 type.
2652
+ The column :py:obj:`label` is a scalar of the uint32 type.
2480
2653
 
2481
2654
  Args:
2482
2655
  dataset_dir (str): Path to the root directory that contains the dataset.
2483
2656
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
2484
2657
  train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
2485
- (default=None, will read all samples)
2486
- num_samples (int, optional): The number of images to be included in the dataset
2487
- (default=None, will read all images).
2488
- num_parallel_workers (int, optional): Number of workers to read the data
2489
- (default=None, will use value set in the config).
2490
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
2491
- (default=None, expected order behavior shown in the table).
2492
- sampler (Sampler, optional): Object used to choose samples from the
2493
- dataset (default=None, expected order behavior shown in the table).
2494
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
2658
+ Default: None, will read all samples.
2659
+ num_samples (int, optional): The number of images to be included in the dataset.
2660
+ Default: None, will read all images.
2661
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2662
+ Default: None, will use global default workers(8), it can be set
2663
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2664
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2665
+ Default: None, expected order behavior shown in the table below.
2666
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
2667
+ Default: None, expected order behavior shown in the table below.
2668
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
2495
2669
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
2496
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
2670
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
2497
2671
  argument can only be specified when `num_shards` is also specified.
2498
2672
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2499
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2500
- (default=None, which means no cache is used).
2673
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2674
+ Default: None, which means no cache is used.
2501
2675
 
2502
2676
  Raises:
2503
2677
  RuntimeError: If `dataset_dir` does not contain data files.
2504
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2505
2678
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
2506
2679
  RuntimeError: If `sampler` and sharding are specified at the same time.
2507
2680
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
2508
2681
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
2509
- ValueError: If `shard_id` is invalid (out of range [0, `num_shards`]).
2682
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2683
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2510
2684
 
2511
2685
  Note:
2512
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
2686
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2513
2687
  The table below shows what input arguments are allowed and their expected behavior.
2514
2688
 
2515
2689
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -2543,8 +2717,6 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
2543
2717
  >>>
2544
2718
  >>> # Read 3 samples from KMNIST dataset
2545
2719
  >>> dataset = ds.KMnistDataset(dataset_dir=kmnist_dataset_dir, num_samples=3)
2546
- >>>
2547
- >>> # Note: In kmnist_dataset dataset, each dictionary has keys "image" and "label"
2548
2720
 
2549
2721
  About KMNIST dataset:
2550
2722
 
@@ -2594,10 +2766,10 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
2594
2766
 
2595
2767
  class LFWDataset(MappableDataset, VisionBaseDataset):
2596
2768
  """
2597
- A source dataset that reads and parses the LFW dataset.
2769
+ LFW(Labeled Faces in the Wild) dataset.
2598
2770
 
2599
- When task is "people", the generated dataset has two columns: :py:obj:`[image, label]`;
2600
- When task is "pairs", the generated dataset has three columns: :py:obj:`[image1, image2, label]`.
2771
+ When `task` is 'people', the generated dataset has two columns: :py:obj:`[image, label]`;
2772
+ When `task` is 'pairs', the generated dataset has three columns: :py:obj:`[image1, image2, label]` .
2601
2773
  The tensor of column :py:obj:`image` is of the uint8 type.
2602
2774
  The tensor of column :py:obj:`image1` is of the uint8 type.
2603
2775
  The tensor of column :py:obj:`image2` is of the uint8 type.
@@ -2605,38 +2777,44 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
2605
2777
 
2606
2778
  Args:
2607
2779
  dataset_dir (str): Path to the root directory that contains the dataset.
2608
- task (str, optional): Set the task type of reading lfw data, support "people" and "pairs"
2609
- (default="people").
2610
- usage (str, optional): The image split to use, support "10fold", "train", "test" and "all"
2611
- (default="all", will read samples including train and test).
2612
- image_set (str, optional): Image set of image funneling to use, support "original", "funneled" or
2613
- "deepfunneled" (default="funneled", will read "funneled" set).
2614
- num_samples (int, optional): The number of images to be included in the dataset
2615
- (default=None, all images).
2616
- num_parallel_workers (int, optional): Number of workers to read the data
2617
- (default=None, set in the config).
2618
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
2619
- (default=None, expected order behavior shown in the table).
2620
- decode (bool, optional): Decode the images after reading (default=False).
2780
+ task (str, optional): Set the task type of reading lfw data, support 'people' and 'pairs'.
2781
+ Default: None, means 'people'.
2782
+ usage (str, optional): The image split to use, support '10fold', 'train', 'test' and 'all'.
2783
+ Default: None, will read samples including train and test.
2784
+ image_set (str, optional): Type of image funneling to use, support 'original', 'funneled' or
2785
+ 'deepfunneled'. Default: None, will use 'funneled'.
2786
+ num_samples (int, optional): The number of images to be included in the dataset.
2787
+ Default: None, all images.
2788
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2789
+ Default: None, will use global default workers(8), it can be set
2790
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2791
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2792
+ Default: None, expected order behavior shown in the table below.
2793
+ decode (bool, optional): Decode the images after reading. Default: False.
2621
2794
  sampler (Sampler, optional): Object used to choose samples from the
2622
- dataset (default=None, expected order behavior shown in the table).
2795
+ dataset. Default: None, expected order behavior shown in the table below.
2623
2796
  num_shards (int, optional): Number of shards that the dataset will be divided
2624
- into (default=None). When this argument is specified, 'num_samples' reflects
2797
+ into. Default: None. When this argument is specified, `num_samples` reflects
2625
2798
  the max sample number of per shard.
2626
- shard_id (int, optional): The shard ID within num_shards (default=None). This
2627
- argument can only be specified when num_shards is also specified.
2799
+ shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
2800
+ argument can only be specified when `num_shards` is also specified.
2628
2801
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2629
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2630
- (default=None, which means no cache is used).
2802
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2803
+ Default: None, which means no cache is used.
2631
2804
 
2632
2805
  Raises:
2806
+ RuntimeError: If `dataset_dir` does not contain data files.
2633
2807
  RuntimeError: If sampler and shuffle are specified at the same time.
2634
2808
  RuntimeError: If sampler and sharding are specified at the same time.
2635
- RuntimeError: If num_shards is specified but shard_id is None.
2636
- RuntimeError: If shard_id is specified but num_shards is None.
2637
- ValueError: If shard_id is invalid (< 0 or >= num_shards).
2809
+ RuntimeError: If `num_shards` is specified but shard_id is None.
2810
+ RuntimeError: If `shard_id` is specified but num_shards is None.
2811
+ ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
2638
2812
 
2639
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
2813
+ Note:
2814
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2815
+ The table below shows what input arguments are allowed and their expected behavior.
2816
+
2817
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
2640
2818
  :widths: 25 25 50
2641
2819
  :header-rows: 1
2642
2820
 
@@ -2674,15 +2852,17 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
2674
2852
 
2675
2853
  About LFW dataset:
2676
2854
 
2677
- LFW is a database of photographs designed for studying the problem of
2678
- unconstrained recognition. This database was created and maintained by researchers at the University
2679
- of Massachusetts, Amherst (specific references are in Acknowledgments section). 13,233 images of 5,749
2680
- people were detected and centered by the Viola Jones detector and collected from the web. 1,680 of the
2681
- people pictured have two or more distinct photos in the dataset.
2855
+ LFW (Labelled Faces in the Wild) dataset is one of the most commonly used and widely open datasets in
2856
+ the field of face recognition. It was released by Gary B. Huang and his team at Massachusetts Institute
2857
+ of Technology in 2007. The dataset includes nearly 50,000 images of 13,233 individuals, which are sourced
2858
+ from various internet platforms and contain diverse environmental factors such as different poses, lighting
2859
+ conditions, and angles. Most of the images in the dataset are frontal and cover a wide range of ages, genders,
2860
+ and ethnicities.
2682
2861
 
2683
2862
  You can unzip the original LFW dataset files into this directory structure and read by MindSpore's API.
2684
2863
 
2685
2864
  .. code-block::
2865
+
2686
2866
  .
2687
2867
  └── lfw_dataset_directory
2688
2868
  ├── lfw
@@ -2749,45 +2929,51 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
2749
2929
 
2750
2930
  class LSUNDataset(MappableDataset, VisionBaseDataset):
2751
2931
  """
2752
- A source dataset that reads and parses the LSUN dataset.
2932
+ LSUN(Large-scale Scene UNderstarding) dataset.
2753
2933
 
2754
- The generated dataset has two columns: :py:obj:`[image, label]`.
2934
+ The generated dataset has two columns: :py:obj:`[image, label]` .
2755
2935
  The tensor of column :py:obj:`image` is of the uint8 type.
2756
2936
  The tensor of column :py:obj:`label` is of a scalar of uint32 type.
2757
2937
 
2758
2938
  Args:
2759
2939
  dataset_dir (str): Path to the root directory that contains the dataset.
2760
- usage (str, optional): Usage of this dataset, can be `train`, `test`, `valid` or `all`
2761
- (default=None, will be set to `all`).
2762
- classes(Union[str, list[str]], optional): Choose the specific classes to load (default=None, means loading
2763
- all classes in root directory).
2764
- num_samples (int, optional): The number of images to be included in the dataset
2765
- (default=None, all images).
2766
- num_parallel_workers (int, optional): Number of workers to read the data
2767
- (default=None, set in the config).
2768
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
2769
- (default=None, expected order behavior shown in the table).
2770
- decode (bool, optional): Decode the images after reading (default=False).
2940
+ usage (str, optional): Usage of this dataset, can be `train` , `test` , `valid` or `all`
2941
+ Default: None, will be set to `all` .
2942
+ classes (Union[str, list[str]], optional): Choose the specific classes to load. Default: None, means loading
2943
+ all classes in root directory.
2944
+ num_samples (int, optional): The number of images to be included in the dataset.
2945
+ Default: None, all images.
2946
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2947
+ Default: None, will use global default workers(8), it can be set
2948
+ by `mindspore.dataset.config.set_num_parallel_workers` .
2949
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
2950
+ Default: None, expected order behavior shown in the table below.
2951
+ decode (bool, optional): Decode the images after reading. Default: False.
2771
2952
  sampler (Sampler, optional): Object used to choose samples from the
2772
- dataset (default=None, expected order behavior shown in the table).
2953
+ dataset. Default: None, expected order behavior shown in the table below.
2773
2954
  num_shards (int, optional): Number of shards that the dataset will be divided
2774
- into (default=None). When this argument is specified, 'num_samples' reflects
2955
+ into. Default: None. When this argument is specified, `num_samples` reflects
2775
2956
  the max sample number of per shard.
2776
- shard_id (int, optional): The shard ID within num_shards (default=None). This
2777
- argument can only be specified when num_shards is also specified.
2957
+ shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
2958
+ argument can only be specified when `num_shards` is also specified.
2778
2959
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2779
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2780
- (default=None, which means no cache is used).
2960
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
2961
+ Default: None, which means no cache is used.
2781
2962
 
2782
2963
  Raises:
2783
- RuntimeError: If 'sampler' and 'shuffle' are specified at the same time.
2784
- RuntimeError: If 'sampler' and sharding are specified at the same time.
2785
- RuntimeError: If 'num_shards' is specified but 'shard_id' is None.
2786
- RuntimeError: If 'shard_id' is specified but 'num_shards' is None.
2787
- ValueError: If 'shard_id' is invalid (< 0 or >= num_shards).
2788
- ValueError: If 'usage' or 'classes' is invalid (not in specific types).
2789
-
2790
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
2964
+ RuntimeError: If `dataset_dir` does not contain data files.
2965
+ RuntimeError: If `sampler` and `shuffle` are specified at the same time.
2966
+ RuntimeError: If `sampler` and sharding are specified at the same time.
2967
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
2968
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
2969
+ ValueError: If `shard_id` is invalid (< 0 or >= `num_shards` ).
2970
+ ValueError: If `usage` or `classes` is invalid (not in specific types).
2971
+
2972
+ Note:
2973
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2974
+ The table below shows what input arguments are allowed and their expected behavior.
2975
+
2976
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
2791
2977
  :widths: 25 25 50
2792
2978
  :header-rows: 1
2793
2979
 
@@ -2826,15 +3012,17 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
2826
3012
 
2827
3013
  About LSUN dataset:
2828
3014
 
2829
- The LSUN dataset accesses the effectiveness of this cascading procedure and enables further progress
2830
- in visual recognition research.
3015
+ The LSUN (Large-Scale Scene Understanding) is a large-scale dataset used for indoors scene
3016
+ understanding. It was originally launched by Stanford University in 2015 with the aim of
3017
+ providing a challenging and diverse dataset for research in computer vision and machine
3018
+ learning. The main application of this dataset for research is indoor scene analysis.
2831
3019
 
2832
- The LSUN dataset contains around one million labeled images for each of 10 scene categories
2833
- and 20 object categories. The author experimented with training popular convolutional networks and found
2834
- that they achieved substantial performance gains when trained on this dataset.
3020
+ This dataset contains ten different categories of scenes, including bedrooms, living rooms,
3021
+ restaurants, lounges, studies, kitchens, bathrooms, corridors, children's room, and outdoors.
3022
+ Each category contains tens of thousands of images from different perspectives, and these
3023
+ images are high-quality, high-resolusion real-world images.
2835
3024
 
2836
- You can unzip the original LSUN dataset files into this directory structure using official data.py and
2837
- read by MindSpore's API.
3025
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
2838
3026
 
2839
3027
  .. code-block::
2840
3028
 
@@ -2882,33 +3070,34 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
2882
3070
  """
2883
3071
  A source dataset for reading images from a Manifest file.
2884
3072
 
2885
- The generated dataset has two columns: :py:obj:`[image, label]`.
3073
+ The generated dataset has two columns: :py:obj:`[image, label]` .
2886
3074
  The tensor of column :py:obj:`image` is of the uint8 type.
2887
3075
  The tensor of column :py:obj:`label` is of a scalar of uint64 type.
2888
3076
 
2889
3077
  Args:
2890
3078
  dataset_file (str): File to be read.
2891
- usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference' (default= 'train').
3079
+ usage (str, optional): Acceptable usages include 'train', 'eval' and 'inference'. Default: 'train'.
2892
3080
  num_samples (int, optional): The number of images to be included in the dataset.
2893
- (default=None, will include all images).
2894
- num_parallel_workers (int, optional): Number of workers to read the data
2895
- (default=None, will use value set in the config).
2896
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
2897
- order behavior shown in the table).
3081
+ Default: None, will include all images.
3082
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3083
+ Default: None, will use global default workers(8), it can be set
3084
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3085
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
3086
+ order behavior shown in the table below.
2898
3087
  sampler (Sampler, optional): Object used to choose samples from the
2899
- dataset (default=None, expected order behavior shown in the table).
2900
- class_indexing (dict, optional): A str-to-int mapping from label name to index
2901
- (default=None, the folder names will be sorted alphabetically and each
2902
- class will be given a unique index starting from 0).
2903
- decode (bool, optional): decode the images after reading (default=False).
3088
+ dataset. Default: None, expected order behavior shown in the table below.
3089
+ class_indexing (dict, optional): A str-to-int mapping from label name to index.
3090
+ Default: None, the folder names will be sorted alphabetically and each
3091
+ class will be given a unique index starting from 0.
3092
+ decode (bool, optional): decode the images after reading. Default: False.
2904
3093
  num_shards (int, optional): Number of shards that the dataset will be divided
2905
- into (default=None). When this argument is specified, `num_samples` reflects
3094
+ into. Default: None. When this argument is specified, `num_samples` reflects
2906
3095
  the max number of samples per shard.
2907
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3096
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
2908
3097
  argument can only be specified when `num_shards` is also specified.
2909
3098
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2910
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
2911
- (default=None, which means no cache is used).
3099
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3100
+ Default: None, which means no cache is used.
2912
3101
 
2913
3102
  Raises:
2914
3103
  RuntimeError: If dataset_files are not valid or do not exist.
@@ -2918,11 +3107,11 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
2918
3107
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
2919
3108
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
2920
3109
  RuntimeError: If class_indexing is not a dictionary.
2921
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3110
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
2922
3111
 
2923
3112
  Note:
2924
3113
  - The shape of the image column is [image_size] if decode flag is False, or [H,W,C] otherwise.
2925
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
3114
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
2926
3115
  The table below shows what input arguments are allowed and their expected behavior.
2927
3116
 
2928
3117
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -2959,6 +3148,26 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
2959
3148
  >>>
2960
3149
  >>> # 2) Read samples (specified in manifest_file.manifest) for shard 0 in a 2-way distributed training setup
2961
3150
  >>> dataset = ds.ManifestDataset(dataset_file=manifest_dataset_dir, num_shards=2, shard_id=0)
3151
+
3152
+ About Manifest dataset:
3153
+
3154
+ Manifest file contains a list of files included in a dataset, including basic file info such as File name and File
3155
+ ID, along with extended file metadata. Manifest is a data format file supported by Huawei Modelarts. For details,
3156
+ see `Specifications for Importing the Manifest File <https://support.huaweicloud.com/engineers-modelarts/
3157
+ modelarts_23_0009.html>`_ .
3158
+
3159
+ .. code-block::
3160
+
3161
+ .
3162
+ └── manifest_dataset_directory
3163
+ ├── train
3164
+ │ ├── 1.JPEG
3165
+ │ ├── 2.JPEG
3166
+ │ ├── ...
3167
+ ├── eval
3168
+ │ ├── 1.JPEG
3169
+ │ ├── 2.JPEG
3170
+ │ ├── ...
2962
3171
  """
2963
3172
 
2964
3173
  @check_manifestdataset
@@ -3000,9 +3209,9 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
3000
3209
 
3001
3210
  class MnistDataset(MappableDataset, VisionBaseDataset):
3002
3211
  """
3003
- A source dataset that reads and parses the MNIST dataset.
3212
+ MNIST dataset.
3004
3213
 
3005
- The generated dataset has two columns :py:obj:`[image, label]`.
3214
+ The generated dataset has two columns :py:obj:`[image, label]` .
3006
3215
  The tensor of column :py:obj:`image` is of the uint8 type.
3007
3216
  The tensor of column :py:obj:`label` is a scalar of the uint32 type.
3008
3217
 
@@ -3010,22 +3219,23 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
3010
3219
  dataset_dir (str): Path to the root directory that contains the dataset.
3011
3220
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all' . 'train' will read from 60,000
3012
3221
  train samples, 'test' will read from 10,000 test samples, 'all' will read from all 70,000 samples.
3013
- (default=None, will read all samples)
3014
- num_samples (int, optional): The number of images to be included in the dataset
3015
- (default=None, will read all images).
3016
- num_parallel_workers (int, optional): Number of workers to read the data
3017
- (default=None, will use value set in the config).
3018
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3019
- (default=None, expected order behavior shown in the table).
3222
+ Default: None, will read all samples.
3223
+ num_samples (int, optional): The number of images to be included in the dataset.
3224
+ Default: None, will read all images.
3225
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3226
+ Default: None, will use global default workers(8), it can be set
3227
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3228
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3229
+ Default: None, expected order behavior shown in the table below.
3020
3230
  sampler (Sampler, optional): Object used to choose samples from the
3021
- dataset (default=None, expected order behavior shown in the table).
3022
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
3231
+ dataset. Default: None, expected order behavior shown in the table below.
3232
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
3023
3233
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
3024
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3234
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3025
3235
  argument can only be specified when `num_shards` is also specified.
3026
3236
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3027
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3028
- (default=None, which means no cache is used).
3237
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3238
+ Default: None, which means no cache is used.
3029
3239
 
3030
3240
  Raises:
3031
3241
  RuntimeError: If `dataset_dir` does not contain data files.
@@ -3035,10 +3245,10 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
3035
3245
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3036
3246
  RuntimeError: If `num_shards` is specified but shard_id is None.
3037
3247
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3038
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3248
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3039
3249
 
3040
3250
  Note:
3041
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
3251
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
3042
3252
  The table below shows what input arguments are allowed and their expected behavior.
3043
3253
 
3044
3254
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -3120,42 +3330,44 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
3120
3330
  return cde.MnistNode(self.dataset_dir, self.usage, self.sampler)
3121
3331
 
3122
3332
 
3123
- class OmniglotDataset(MappableDataset):
3333
+ class OmniglotDataset(MappableDataset, VisionBaseDataset):
3124
3334
  """
3125
- A source dataset that reads and parses the Omniglot dataset.
3335
+ Omniglot dataset.
3126
3336
 
3127
- The generated dataset has two columns :py:obj:`[image, label]`.
3337
+ The generated dataset has two columns :py:obj:`[image, label]` .
3128
3338
  The tensor of column :py:obj:`image` is of the uint8 type.
3129
3339
  The tensor of column :py:obj:`label` is a scalar of the uint32 type.
3130
3340
 
3131
3341
  Args:
3132
3342
  dataset_dir (str): Path to the root directory that contains the dataset.
3133
- background(bool, optional): Use the background dataset or the evaluation dataset
3134
- (default=None, will use the background dataset).
3135
- num_samples (int, optional): The number of images to be included in the dataset
3136
- (default=None, all images).
3137
- num_parallel_workers (int, optional): Number of workers to read the data
3138
- (default=None, set in the config).
3139
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3140
- (default=None, expected order behavior shown in the table).
3141
- decode (bool, optional): Decode the images after reading (default=False).
3343
+ background (bool, optional): Whether to create dataset from the "background" set.
3344
+ Otherwise create from the "evaluation" set. Default: None, set to True.
3345
+ num_samples (int, optional): The number of images to be included in the dataset.
3346
+ Default: None, all images.
3347
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3348
+ Default: None, will use global default workers(8), it can be set
3349
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3350
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3351
+ Default: None, expected order behavior shown in the table below.
3352
+ decode (bool, optional): Decode the images after reading. Default: False.
3142
3353
  sampler (Sampler, optional): Object used to choose samples from the
3143
- dataset (default=None, expected order behavior shown in the table).
3354
+ dataset. Default: None, expected order behavior shown in the table below.
3144
3355
  num_shards (int, optional): Number of shards that the dataset will be divided
3145
- into (default=None). When this argument is specified, 'num_samples' reflects
3356
+ into. Default: None. When this argument is specified, `num_samples` reflects
3146
3357
  the max sample number of per shard.
3147
- shard_id (int, optional): The shard ID within num_shards (default=None). This
3148
- argument can only be specified when num_shards is also specified.
3358
+ shard_id (int, optional): The shard ID within `num_shards`. Default: None. This
3359
+ argument can only be specified when `num_shards` is also specified.
3149
3360
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3150
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3151
- (default=None, which means no cache is used).
3361
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3362
+ Default: None, which means no cache is used.
3152
3363
 
3153
3364
  Raises:
3365
+ RuntimeError: If `dataset_dir` does not contain data files.
3154
3366
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3155
3367
  RuntimeError: If `sampler` and `sharding` are specified at the same time.
3156
3368
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
3157
3369
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3158
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3370
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3159
3371
 
3160
3372
  Note:
3161
3373
  - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
@@ -3194,14 +3406,15 @@ class OmniglotDataset(MappableDataset):
3194
3406
 
3195
3407
  About Omniglot dataset:
3196
3408
 
3197
- The Omniglot dataset is designed for developing more human-like learning algorithms. Omniglot is a large dataset
3198
- of hand-written characters with 1623 characters and 20 examples for each character. These characters are collected
3199
- based upon 50 alphabets from different countries. It contains both images and strokes data. Stroke data are
3200
- coordinates with time in milliseconds.
3409
+ The Omniglot dataset is designed for developing more human-like learning algorithms. It contains 1623 different
3410
+ handwritten characters from 50 different alphabets. Each of the 1623 characters was drawn online via Amazon's
3411
+ Mechanical Turk by 20 different people. Each image is paired with stroke data, a sequences of [x, y, t] coordinates
3412
+ with time in milliseconds.
3201
3413
 
3202
3414
  You can unzip the original Omniglot dataset files into this directory structure and read by MindSpore's API.
3203
3415
 
3204
3416
  .. code-block::
3417
+
3205
3418
  .
3206
3419
  └── omniglot_dataset_directory
3207
3420
  ├── images_background/
@@ -3252,43 +3465,41 @@ class OmniglotDataset(MappableDataset):
3252
3465
 
3253
3466
  class PhotoTourDataset(MappableDataset, VisionBaseDataset):
3254
3467
  """
3255
- A source dataset that reads and parses the PhotoTour dataset.
3468
+ PhotoTour dataset.
3256
3469
 
3257
- The generated dataset with different usage has different output columns.
3258
- If train, the generated dataset has one column :py:obj:`[image]`,
3259
- else three columns :py:obj:`[image1, image2, matches]`.
3260
- The tensor of column :py:obj:`image`, :py:obj:`image1` and :py:obj:`image2` is of the uint8 type.
3261
- The tensor of column :py:obj:`matches` is a scalar of the uint32 type.
3470
+ According to the given `usage` configuration, the generated dataset has different output columns:
3471
+ - `usage` = 'train', output columns: `[image, dtype=uint8]` .
3472
+ - `usage` ≠ 'train', output columns: `[image1, dtype=uint8]` , `[image2, dtype=uint8]` , `[matches, dtype=uint32]` .
3262
3473
 
3263
3474
  Args:
3264
3475
  dataset_dir (str): Path to the root directory that contains the dataset.
3265
3476
  name (str): Name of the dataset to load,
3266
3477
  should be one of 'notredame', 'yosemite', 'liberty', 'notredame_harris',
3267
3478
  'yosemite_harris' or 'liberty_harris'.
3268
- usage (str, optional): Usage of the dataset, can be 'train' or 'test' (Default=None, will be set to 'train').
3479
+ usage (str, optional): Usage of the dataset, can be 'train' or 'test'. Default: None, will be set to 'train'.
3269
3480
  When usage is 'train', number of samples for each `name` is
3270
3481
  {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092, 'liberty_harris': 379587,
3271
3482
  'yosemite_harris': 450912, 'notredame_harris': 325295}.
3272
3483
  When usage is 'test', will read 100,000 samples for testing.
3273
- num_samples (int, optional): The number of images to be included in the dataset
3274
- (default=None, will read all images).
3275
- num_parallel_workers (int, optional): Number of workers to read the data
3276
- (default=None, will use value set in the config).
3277
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3278
- (default=None, expected order behavior shown in the table).
3279
- sampler (Sampler, optional): Object used to choose samples from the
3280
- dataset (default=None, expected order behavior shown in the table).
3281
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
3484
+ num_samples (int, optional): The number of images to be included in the dataset.
3485
+ Default: None, will read all images.
3486
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3487
+ Default: None, will use global default workers(8), it can be set
3488
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3489
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3490
+ Default: None, expected order behavior shown in the table below.
3491
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
3492
+ Default: None, expected order behavior shown in the table below.
3493
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
3282
3494
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
3283
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3495
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3284
3496
  argument can only be specified when `num_shards` is also specified.
3285
3497
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3286
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3287
- (default=None, which means no cache is used).
3498
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3499
+ Default: None, which means no cache is used.
3288
3500
 
3289
3501
  Raises:
3290
3502
  RuntimeError: If `dataset_dir` does not contain data files.
3291
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3292
3503
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3293
3504
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3294
3505
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
@@ -3297,13 +3508,14 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
3297
3508
  ValueError: If `usage` is not in ["train", "test"].
3298
3509
  ValueError: If name is not in ["notredame", "yosemite", "liberty",
3299
3510
  "notredame_harris", "yosemite_harris", "liberty_harris"].
3300
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3511
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3512
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3301
3513
 
3302
3514
  Note:
3303
- - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive. The table
3515
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive. The table
3304
3516
  below shows what input arguments are allowed and their expected behavior.
3305
3517
 
3306
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
3518
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
3307
3519
  :widths: 64 64 1
3308
3520
  :header-rows: 1
3309
3521
 
@@ -3333,9 +3545,6 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
3333
3545
  >>> # Read 3 samples from PhotoTour dataset.
3334
3546
  >>> dataset = ds.PhotoTourDataset(dataset_dir="/path/to/photo_tour_dataset_directory",
3335
3547
  ... name='liberty', usage='train', num_samples=3)
3336
- >>>
3337
- >>> # In PhotoTourDataset dataset, if usage is 'train', each dictionary has key "image",
3338
- >>> # else has keys "image1" "image2" and "matches".
3339
3548
 
3340
3549
  About PhotoTour dataset:
3341
3550
 
@@ -3407,49 +3616,50 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
3407
3616
 
3408
3617
  class Places365Dataset(MappableDataset, VisionBaseDataset):
3409
3618
  """
3410
- A source dataset that reads and parses the Places365 dataset.
3619
+ Places365 dataset.
3411
3620
 
3412
- The generated dataset has two columns :py:obj:`[image, label]`.
3621
+ The generated dataset has two columns :py:obj:`[image, label]` .
3413
3622
  The tensor of column :py:obj:`image` is of the uint8 type.
3414
- The tensor of column :py:obj:`label` is a scalar of the uint32 type.
3623
+ The tensor of column :py:obj:`label` is of the uint32 type.
3415
3624
 
3416
3625
  Args:
3417
3626
  dataset_dir (str): Path to the root directory that contains the dataset.
3418
- usage (str, optional): Usage of this dataset, can be 'train-standard', 'train-challenge' or 'val'
3419
- (default=None, will be set to 'train-standard').
3420
- small (bool, optional): Use 256 * 256 images (True) or high resolution images (False) (default=False).
3421
- decode (bool, optional): Decode the images after reading (default=True).
3422
- num_samples (int, optional): The number of images to be included in the dataset
3423
- (default=None, will read all images).
3424
- num_parallel_workers (int, optional): Number of workers to read the data
3425
- (default=None, will use value set in the config).
3426
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3427
- (default=None, expected order behavior shown in the table).
3627
+ usage (str, optional): Usage of this dataset, can be 'train-standard', 'train-challenge' or 'val'.
3628
+ Default: None, will be set to 'train-standard'.
3629
+ small (bool, optional): Use 256 * 256 images (True) or high resolution images (False). Default: False.
3630
+ decode (bool, optional): Decode the images after reading. Default: False.
3631
+ num_samples (int, optional): The number of images to be included in the dataset.
3632
+ Default: None, will read all images.
3633
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3634
+ Default: None, will use global default workers(8), it can be set
3635
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3636
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3637
+ Default: None, expected order behavior shown in the table below.
3428
3638
  sampler (Sampler, optional): Object used to choose samples from the
3429
- dataset (default=None, expected order behavior shown in the table).
3430
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
3639
+ dataset. Default: None, expected order behavior shown in the table below.
3640
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
3431
3641
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
3432
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3642
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3433
3643
  argument can only be specified when `num_shards` is also specified.
3434
3644
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3435
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3436
- (default=None, which means no cache is used).
3645
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3646
+ Default: None, which means no cache is used.
3437
3647
 
3438
3648
  Raises:
3439
3649
  RuntimeError: If `dataset_dir` does not contain data files.
3440
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3441
3650
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3442
3651
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3443
3652
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
3444
3653
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3445
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3654
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3655
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3446
3656
  ValueError: If `usage` is not in ["train-standard", "train-challenge", "val"].
3447
3657
 
3448
3658
  Note:
3449
3659
  - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
3450
3660
  The table below shows what input arguments are allowed and their expected behavior.
3451
3661
 
3452
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
3662
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
3453
3663
  :widths: 25 25 50
3454
3664
  :header-rows: 1
3455
3665
 
@@ -3481,8 +3691,6 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
3481
3691
  >>> # Read 3 samples from Places365 dataset
3482
3692
  >>> dataset = ds.Places365Dataset(dataset_dir=place365_dataset_dir, usage='train-standard',
3483
3693
  ... small=True, decode=True, num_samples=3)
3484
- >>>
3485
- >>> # In places365 dataset, each dictionary has keys "image" and "label".
3486
3694
 
3487
3695
  About Places365 dataset:
3488
3696
 
@@ -3549,45 +3757,46 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
3549
3757
 
3550
3758
  class QMnistDataset(MappableDataset, VisionBaseDataset):
3551
3759
  """
3552
- A source dataset that reads and parses the QMNIST dataset.
3760
+ QMNIST dataset.
3553
3761
 
3554
- The generated dataset has two columns :py:obj:`[image, label]`.
3762
+ The generated dataset has two columns :py:obj:`[image, label]` .
3555
3763
  The tensor of column :py:obj:`image` is of the uint8 type.
3556
- The tensor of column :py:obj:`label` is a scalar when `compat` is True else a tensor both of the uint32 type.
3764
+ The tensor of column :py:obj:`label` is of the uint32 type.
3557
3765
 
3558
3766
  Args:
3559
3767
  dataset_dir (str): Path to the root directory that contains the dataset.
3560
3768
  usage (str, optional): Usage of this dataset, can be 'train', 'test', 'test10k', 'test50k', 'nist'
3561
- or 'all' (default=None, will read all samples).
3769
+ or 'all'. Default: None, will read all samples.
3562
3770
  compat (bool, optional): Whether the label for each example is class number (compat=True) or the full QMNIST
3563
- information (compat=False) (default=True).
3564
- num_samples (int, optional): The number of images to be included in the dataset
3565
- (default=None, will read all images).
3566
- num_parallel_workers (int, optional): Number of workers to read the data
3567
- (default=None, will use value set in the config).
3568
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3569
- (default=None, expected order behavior shown in the table).
3771
+ information (compat=False). Default: True.
3772
+ num_samples (int, optional): The number of images to be included in the dataset.
3773
+ Default: None, will read all images.
3774
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3775
+ Default: None, will use global default workers(8), it can be set
3776
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3777
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3778
+ Default: None, expected order behavior shown in the table below.
3570
3779
  sampler (Sampler, optional): Object used to choose samples from the
3571
- dataset (default=None, expected order behavior shown in the table).
3572
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
3780
+ dataset. Default: None, expected order behavior shown in the table below.
3781
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
3573
3782
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
3574
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3783
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3575
3784
  argument can only be specified when `num_shards` is also specified.
3576
3785
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3577
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3578
- (default=None, which means no cache is used).
3786
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3787
+ Default: None, which means no cache is used.
3579
3788
 
3580
3789
  Raises:
3581
3790
  RuntimeError: If `dataset_dir` does not contain data files.
3582
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3583
3791
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3584
3792
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3585
3793
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
3586
3794
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3587
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
3795
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3796
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3588
3797
 
3589
3798
  Note:
3590
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
3799
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
3591
3800
  The table below shows what input arguments are allowed and their expected behavior.
3592
3801
 
3593
3802
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -3678,26 +3887,49 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
3678
3887
  A source dataset that generates random data.
3679
3888
 
3680
3889
  Args:
3681
- total_rows (int, optional): Number of samples for the dataset to generate
3682
- (default=None, number of samples is random).
3683
- schema (Union[str, Schema], optional): Path to the JSON schema file or schema object (default=None).
3684
- If the schema is not provided, the random dataset generates a random schema.
3685
- columns_list (list[str], optional): List of column names of the dataset
3686
- (default=None, the columns will be named like this "c0", "c1", "c2" etc).
3687
- num_samples (int, optional): The number of samples to be included in the dataset
3688
- (default=None, all samples).
3689
- num_parallel_workers (int, optional): Number of workers to read the data
3690
- (default=None, number set in the config).
3890
+ total_rows (int, optional): Number of samples for the dataset to generate.
3891
+ Default: None, number of samples is random.
3892
+ schema (Union[str, Schema], optional): Data format policy, which specifies the data types and shapes of the data
3893
+ column to be read. Both JSON file path and objects constructed by mindspore.dataset.Schema are acceptable.
3894
+ Default: None.
3895
+ columns_list (list[str], optional): List of column names of the dataset.
3896
+ Default: None, the columns will be named like this "c0", "c1", "c2" etc.
3897
+ num_samples (int, optional): The number of samples to be included in the dataset.
3898
+ Default: None, all samples.
3899
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3900
+ Default: None, will use global default workers(8), it can be set
3901
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3691
3902
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3692
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3693
- (default=None, which means no cache is used).
3694
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3695
- (default=None, expected order behavior shown in the table).
3903
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3904
+ Default: None, which means no cache is used.
3905
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3906
+ Default: None, expected order behavior shown in the table below.
3696
3907
  num_shards (int, optional): Number of shards that the dataset will be divided
3697
- into (default=None). When this argument is specified, 'num_samples' reflects
3908
+ into. Default: None. When this argument is specified, `num_samples` reflects
3698
3909
  the maximum sample number of per shard.
3699
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
3910
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3700
3911
  argument can only be specified when `num_shards` is also specified.
3912
+
3913
+ Raises:
3914
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
3915
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
3916
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3917
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3918
+ TypeError: If `total_rows` is not of type int.
3919
+ TypeError: If `num_shards` is not of type int.
3920
+ TypeError: If `num_parallel_workers` is not of type int.
3921
+ TypeError: If `shuffle` is not of type bool.
3922
+ TypeError: If `columns_list` is not of type list.
3923
+
3924
+ Examples:
3925
+ >>> from mindspore import dtype as mstype
3926
+ >>> import mindspore.dataset as ds
3927
+ >>>
3928
+ >>> schema = ds.Schema()
3929
+ >>> schema.add_column('image', de_type=mstype.uint8, shape=[2])
3930
+ >>> schema.add_column('label', de_type=mstype.uint8, shape=[1])
3931
+ >>> # apply dataset operations
3932
+ >>> ds1 = ds.RandomDataset(schema=schema, total_rows=50, num_parallel_workers=4)
3701
3933
  """
3702
3934
 
3703
3935
  @check_random_dataset
@@ -3721,6 +3953,159 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
3721
3953
  return cde.RandomNode(self.total_rows, schema, self.columns_list)
3722
3954
 
3723
3955
 
3956
+ class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
3957
+ """
3958
+ RenderedSST2(Rendered Stanford Sentiment Treebank v2) dataset.
3959
+
3960
+ The generated dataset has two columns: :py:obj:`[image, label]`.
3961
+ The tensor of column :py:obj:`image` is of the uint8 type.
3962
+ The tensor of column :py:obj:`label` is of the uint32 type.
3963
+
3964
+ Args:
3965
+ dataset_dir (str): Path to the root directory that contains the dataset.
3966
+ usage (str, optional): Usage of this dataset, can be 'train', 'val', 'test' or 'all'.
3967
+ Default: None, will read all samples.
3968
+ num_samples (int, optional): The number of images to be included in the dataset.
3969
+ Default: None, will include all images.
3970
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
3971
+ Default: None, will use global default workers(8), it can be set
3972
+ by `mindspore.dataset.config.set_num_parallel_workers` .
3973
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3974
+ Default: None, expected order behavior shown in the table below.
3975
+ decode (bool, optional): Whether or not to decode the images after reading. Default: False.
3976
+ sampler (Sampler, optional): Object used to choose samples from the
3977
+ dataset. Default: None, expected order behavior shown in the table below.
3978
+ num_shards (int, optional): Number of shards that the dataset will be divided
3979
+ into. When this argument is specified, `num_samples` reflects
3980
+ the maximum sample number of per shard. Default: None.
3981
+ shard_id (int, optional): The shard ID within `num_shards` . This
3982
+ argument can only be specified when `num_shards` is also specified. Default: None.
3983
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3984
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
3985
+ Default: None, which means no cache is used.
3986
+
3987
+ Raises:
3988
+ RuntimeError: If `dataset_dir` does not contain data files.
3989
+ ValueError: If `usage` is not 'train', 'test', 'val' or 'all'.
3990
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3991
+ RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3992
+ RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3993
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
3994
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
3995
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3996
+
3997
+ Note:
3998
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
3999
+ The table below shows what input arguments are allowed and their expected behavior.
4000
+
4001
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
4002
+ :widths: 25 25 50
4003
+ :header-rows: 1
4004
+
4005
+ * - Parameter `sampler`
4006
+ - Parameter `shuffle`
4007
+ - Expected Order Behavior
4008
+ * - None
4009
+ - None
4010
+ - random order
4011
+ * - None
4012
+ - True
4013
+ - random order
4014
+ * - None
4015
+ - False
4016
+ - sequential order
4017
+ * - Sampler object
4018
+ - None
4019
+ - order defined by sampler
4020
+ * - Sampler object
4021
+ - True
4022
+ - not allowed
4023
+ * - Sampler object
4024
+ - False
4025
+ - not allowed
4026
+
4027
+ Examples:
4028
+ >>> rendered_sst2_dataset_dir = "/path/to/rendered_sst2_dataset_directory"
4029
+ >>>
4030
+ >>> # 1) Read all samples (image files) in rendered_sst2_dataset_dir with 8 threads
4031
+ >>> dataset = ds.RenderedSST2Dataset(dataset_dir=rendered_sst2_dataset_dir,
4032
+ ... usage="all", num_parallel_workers=8)
4033
+
4034
+ About RenderedSST2Dataset:
4035
+
4036
+ Rendered SST2 is an image classification dataset which was generated by rendering sentences in the Standford
4037
+ Sentiment Treebank v2 dataset. There are three splits in this dataset and each split contains two classes
4038
+ (positive and negative): a train split containing 6920 images (3610 positive and 3310 negative), a validation
4039
+ split containing 872 images (444 positive and 428 negative), and a test split containing 1821 images
4040
+ (909 positive and 912 negative).
4041
+
4042
+ Here is the original RenderedSST2 dataset structure.
4043
+ You can unzip the dataset files into the following directory structure and read by MindSpore's API.
4044
+
4045
+ .. code-block::
4046
+
4047
+ .
4048
+ └── rendered_sst2_dataset_directory
4049
+ ├── train
4050
+ │ ├── negative
4051
+ │ │ ├── 0001.jpg
4052
+ │ │ ├── 0002.jpg
4053
+ │ │ ...
4054
+ │ └── positive
4055
+ │ ├── 0001.jpg
4056
+ │ ├── 0002.jpg
4057
+ │ ...
4058
+ ├── test
4059
+ │ ├── negative
4060
+ │ │ ├── 0001.jpg
4061
+ │ │ ├── 0002.jpg
4062
+ │ │ ...
4063
+ │ └── positive
4064
+ │ ├── 0001.jpg
4065
+ │ ├── 0002.jpg
4066
+ │ ...
4067
+ └── valid
4068
+ ├── negative
4069
+ │ ├── 0001.jpg
4070
+ │ ├── 0002.jpg
4071
+ │ ...
4072
+ └── positive
4073
+ ├── 0001.jpg
4074
+ ├── 0002.jpg
4075
+ ...
4076
+
4077
+ Citation:
4078
+
4079
+ .. code-block::
4080
+
4081
+ @inproceedings{socher-etal-2013-recursive,
4082
+ title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
4083
+ author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
4084
+ Christopher D. and Ng, Andrew and Potts, Christopher},
4085
+ booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
4086
+ month = oct,
4087
+ year = {2013},
4088
+ address = {Seattle, Washington, USA},
4089
+ publisher = {Association for Computational Linguistics},
4090
+ url = {https://www.aclweb.org/anthology/D13-1170},
4091
+ pages = {1631--1642},
4092
+ }
4093
+ """
4094
+
4095
+ @check_rendered_sst2_dataset
4096
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
4097
+ decode=False, sampler=None, num_shards=None, shard_id=None, cache=None):
4098
+ super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
4099
+ shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
4100
+
4101
+ self.dataset_dir = dataset_dir
4102
+ self.usage = replace_none(usage, "all")
4103
+ self.decode = replace_none(decode, False)
4104
+
4105
+ def parse(self, children=None):
4106
+ return cde.RenderedSST2Node(self.dataset_dir, self.usage, self.decode, self.sampler)
4107
+
4108
+
3724
4109
  class _SBDataset:
3725
4110
  """
3726
4111
  Dealing with the data file with .mat extension, and return one row in tuple (image, task) each time.
@@ -3783,43 +4168,44 @@ class _SBDataset:
3783
4168
 
3784
4169
  class SBDataset(GeneratorDataset):
3785
4170
  """
3786
- A source dataset that reads and parses Semantic Boundaries Dataset.
4171
+ SB(Semantic Boundaries) Dataset.
3787
4172
 
3788
- The generated dataset has two columns: :py:obj:`[image, task]`.
3789
- The tensor of column :py:obj:`image` is of the uint8 type.
3790
- The tensor of column :py:obj:`task` contains 20 images of the uint8 type if `task` is 'Boundaries' otherwise
3791
- contains 1 image of the uint8 type.
4173
+ By configuring the 'Task' parameter, the generated dataset has different output columns.
4174
+
4175
+ - 'task' = 'Boundaries' , there are two output columns: the 'image' column has the data type uint8 and
4176
+ the 'label' column contains one image of the data type uint8.
4177
+ - 'task' = 'Segmentation' , there are two output columns: the 'image' column has the data type uint8 and
4178
+ the 'label' column contains 20 images of the data type uint8.
3792
4179
 
3793
4180
  Args:
3794
4181
  dataset_dir (str): Path to the root directory that contains the dataset.
3795
- task (str, optional): Acceptable tasks include 'Boundaries' or 'Segmentation' (default= 'Boundaries').
3796
- usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all' (default= 'all').
4182
+ task (str, optional): Acceptable tasks include 'Boundaries' or 'Segmentation'. Default: 'Boundaries'.
4183
+ usage (str, optional): Acceptable usages include 'train', 'val', 'train_noval' and 'all'. Default: 'all'.
3797
4184
  num_samples (int, optional): The number of images to be included in the dataset.
3798
- (default=None, all images).
3799
- num_parallel_workers (int, optional): Number of workers to read the data
3800
- (default=None, number set in the config).
3801
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
3802
- order behavior shown in the table).
3803
- decode (bool, optional): Decode the images after reading (default=None).
4185
+ Default: None, all images.
4186
+ num_parallel_workers (int, optional): Number of worker subprocesses to read the data. Default: 1.
4187
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
4188
+ order behavior shown in the table below.
4189
+ decode (bool, optional): Decode the images after reading. Default: None.
3804
4190
  sampler (Sampler, optional): Object used to choose samples from the
3805
- dataset (default=None, expected order behavior shown in the table).
4191
+ dataset. Default: None, expected order behavior shown in the table below.
3806
4192
  num_shards (int, optional): Number of shards that the dataset will be divided
3807
- into (default=None). When this argument is specified, `num_samples` reflects
4193
+ into. Default: None. When this argument is specified, `num_samples` reflects
3808
4194
  the max sample number of per shard.
3809
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
4195
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3810
4196
  argument can only be specified when `num_shards` is also specified.
3811
4197
 
3812
4198
  Raises:
3813
4199
  RuntimeError: If `dataset_dir` is not valid or does not contain data files.
3814
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3815
4200
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3816
4201
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3817
4202
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
3818
4203
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3819
4204
  ValueError: If `dataset_dir` is not exist.
4205
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3820
4206
  ValueError: If `task` is not in ['Boundaries', 'Segmentation'].
3821
4207
  ValueError: If `usage` is not in ['train', 'val', 'train_noval', 'all'].
3822
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
4208
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3823
4209
 
3824
4210
  Note:
3825
4211
  - This dataset can take in a sampler. `sampler` and `shuffle` are mutually exclusive.
@@ -3869,7 +4255,7 @@ class SBDataset(GeneratorDataset):
3869
4255
 
3870
4256
  The Semantic Boundaries Dataset consists of 11355 color images. There are 8498 images' name in the train.txt,
3871
4257
  2857 images' name in the val.txt and 5623 images' name in the train_noval.txt. The category cls/
3872
- contains the Segmentation and Boundaries results of category-level, the category inst/ catains the
4258
+ contains the Segmentation and Boundaries results of category-level, the category inst/ contains the
3873
4259
  Segmentation and Boundaries results of instance-level.
3874
4260
 
3875
4261
  You can unzip the dataset files into the following structure and read by MindSpore's API:
@@ -3916,45 +4302,46 @@ class SBDataset(GeneratorDataset):
3916
4302
 
3917
4303
  class SBUDataset(MappableDataset, VisionBaseDataset):
3918
4304
  """
3919
- A source dataset that reads and parses the SBU dataset.
4305
+ SBU(SBU Captioned Photo) dataset.
3920
4306
 
3921
- The generated dataset has two columns :py:obj:`[image, caption]`.
4307
+ The generated dataset has two columns :py:obj:`[image, caption]` .
3922
4308
  The tensor of column :py:obj:`image` is of the uint8 type.
3923
4309
  The tensor of column :py:obj:`caption` is of the string type.
3924
4310
 
3925
4311
  Args:
3926
4312
  dataset_dir (str): Path to the root directory that contains the dataset.
3927
- decode (bool, optional): Decode the images after reading (default=False).
3928
- num_samples (int, optional): The number of images to be included in the dataset
3929
- (default=None, will read all images).
3930
- num_parallel_workers (int, optional): Number of workers to read the data
3931
- (default=None, will use value set in the config).
3932
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
3933
- (default=None, expected order behavior shown in the table).
4313
+ num_samples (int, optional): The number of images to be included in the dataset.
4314
+ Default: None, will read all images.
4315
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
4316
+ Default: None, will use global default workers(8), it can be set
4317
+ by `mindspore.dataset.config.set_num_parallel_workers` .
4318
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
4319
+ Default: None, expected order behavior shown in the table below.
4320
+ decode (bool, optional): Decode the images after reading. Default: False.
3934
4321
  sampler (Sampler, optional): Object used to choose samples from the
3935
- dataset (default=None, expected order behavior shown in the table).
3936
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
4322
+ dataset. Default: None, expected order behavior shown in the table below.
4323
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
3937
4324
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
3938
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
4325
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
3939
4326
  argument can only be specified when `num_shards` is also specified.
3940
4327
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3941
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
3942
- (default=None, which means no cache is used).
4328
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
4329
+ Default: None, which means no cache is used.
3943
4330
 
3944
4331
  Raises:
3945
4332
  RuntimeError: If `dataset_dir` does not contain data files.
3946
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
3947
4333
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
3948
4334
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
3949
4335
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
3950
4336
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
3951
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
4337
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4338
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
3952
4339
 
3953
4340
  Note:
3954
4341
  - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
3955
4342
  The table below shows what input arguments are allowed and their expected behavior.
3956
4343
 
3957
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
4344
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
3958
4345
  :widths: 25 25 50
3959
4346
  :header-rows: 1
3960
4347
 
@@ -4031,42 +4418,43 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
4031
4418
 
4032
4419
  class SemeionDataset(MappableDataset, VisionBaseDataset):
4033
4420
  """
4034
- A source dataset that reads and parses Semeion dataset.
4421
+ Semeion dataset.
4035
4422
 
4036
- The generated dataset has two columns :py:obj:`[image, label]`.
4423
+ The generated dataset has two columns :py:obj:`[image, label]` .
4037
4424
  The tensor of column :py:obj:`image` is of the uint8 type.
4038
4425
  The tensor of column :py:obj:`label` is a scalar of the uint32 type.
4039
4426
 
4040
4427
  Args:
4041
4428
  dataset_dir (str): Path to the root directory that contains the dataset.
4042
- num_samples (int, optional): The number of samples to be included in the dataset
4043
- (default=None, will read all images).
4044
- num_parallel_workers (int, optional): Number of workers to read the data
4045
- (default=None, number set in the config).
4046
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
4047
- order behavior shown in the table).
4429
+ num_samples (int, optional): The number of samples to be included in the dataset.
4430
+ Default: None, will read all images.
4431
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
4432
+ Default: None, will use global default workers(8), it can be set
4433
+ by `mindspore.dataset.config.set_num_parallel_workers` .
4434
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
4435
+ order behavior shown in the table below.
4048
4436
  sampler (Sampler, optional): Object used to choose samples from the
4049
- dataset (default=None, expected order behavior shown in the table).
4437
+ dataset. Default: None, expected order behavior shown in the table below.
4050
4438
  num_shards (int, optional): Number of shards that the dataset will be divided
4051
- into (default=None). When this argument is specified, `num_samples` reflects
4439
+ into. Default: None. When this argument is specified, `num_samples` reflects
4052
4440
  the maximum sample number of per shard.
4053
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
4441
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
4054
4442
  argument can only be specified when `num_shards` is also specified.
4055
4443
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4056
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
4057
- (default=None, which means no cache is used).
4444
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
4445
+ Default: None, which means no cache is used.
4058
4446
 
4059
4447
  Raises:
4060
4448
  RuntimeError: If `dataset_dir` does not contain data files.
4061
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4062
4449
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4063
4450
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4064
4451
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
4065
4452
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
4066
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
4453
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4454
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4067
4455
 
4068
4456
  Note:
4069
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
4457
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
4070
4458
  The table below shows what input arguments are allowed and their expected behavior.
4071
4459
 
4072
4460
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -4153,9 +4541,9 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
4153
4541
 
4154
4542
  class STL10Dataset(MappableDataset, VisionBaseDataset):
4155
4543
  """
4156
- A source dataset that reads and parses STL10 dataset.
4544
+ STL-10 dataset.
4157
4545
 
4158
- The generated dataset has two columns: :py:obj:`[image, label]`.
4546
+ The generated dataset has two columns: :py:obj:`[image, label]` .
4159
4547
  The tensor of column :py:obj:`image` is of the uint8 type.
4160
4548
  The tensor of column :py:obj:`label` is of a scalar of int32 type.
4161
4549
 
@@ -4166,39 +4554,40 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
4166
4554
  train samples, 'test' will read from 8,000 test samples,
4167
4555
  'unlabeled' will read from all 100,000 samples, and 'train+unlabeled'
4168
4556
  will read from 105000 samples, 'all' will read all the samples
4169
- (default=None, all samples).
4557
+ Default: None, all samples.
4170
4558
  num_samples (int, optional): The number of images to be included in the dataset.
4171
- (default=None, all images).
4172
- num_parallel_workers (int, optional): Number of workers to read the data
4173
- (default=None, number set in the config).
4174
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
4175
- order behavior shown in the table).
4559
+ Default: None, all images.
4560
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
4561
+ Default: None, will use global default workers(8), it can be set
4562
+ by `mindspore.dataset.config.set_num_parallel_workers` .
4563
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
4564
+ order behavior shown in the table below.
4176
4565
  sampler (Sampler, optional): Object used to choose samples from the
4177
- dataset (default=None, expected order behavior shown in the table).
4566
+ dataset. Default: None, expected order behavior shown in the table below.
4178
4567
  num_shards (int, optional): Number of shards that the dataset will be divided
4179
- into (default=None). When this argument is specified, 'num_samples' reflects
4568
+ into. Default: None. When this argument is specified, `num_samples` reflects
4180
4569
  the max sample number of per shard.
4181
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
4570
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
4182
4571
  argument can only be specified when `num_shards` is also specified.
4183
4572
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4184
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
4185
- (default=None, which means no cache is used).
4573
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
4574
+ Default: None, which means no cache is used.
4186
4575
 
4187
4576
  Raises:
4188
4577
  RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
4189
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4190
4578
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4191
4579
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4192
4580
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
4193
4581
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
4194
4582
  ValueError: If `usage` is invalid.
4195
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
4583
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4584
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4196
4585
 
4197
4586
  Note:
4198
4587
  - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
4199
4588
  The table below shows what input arguments are allowed and their expected behavior.
4200
4589
 
4201
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
4590
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
4202
4591
  :widths: 25 25 50
4203
4592
  :header-rows: 1
4204
4593
 
@@ -4239,7 +4628,6 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
4239
4628
  About STL10 dataset:
4240
4629
 
4241
4630
  STL10 dataset consists of 10 classes: airplane, bird, car, cat, deer, dog, horse, monkey, ship, truck.
4242
- STL10 is is inspired by the CIFAR-10 dataset.
4243
4631
  Images are 96x96 pixels, color.
4244
4632
  500 training images, 800 test images per class and 100000 unlabeled images.
4245
4633
  Labels are 0-indexed, and unlabeled images have -1 as their labels.
@@ -4257,7 +4645,7 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
4257
4645
  ├── test_y.bin
4258
4646
  └── unlabeled_X.bin
4259
4647
 
4260
- Citation of STL10 dataset.
4648
+ Citation of STL10 dataset:
4261
4649
 
4262
4650
  .. code-block::
4263
4651
 
@@ -4288,6 +4676,151 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
4288
4676
  return cde.STL10Node(self.dataset_dir, self.usage, self.sampler)
4289
4677
 
4290
4678
 
4679
+ class SUN397Dataset(MappableDataset, VisionBaseDataset):
4680
+ """
4681
+ SUN397(Scene UNderstanding) dataset.
4682
+
4683
+ The generated dataset has two columns: :py:obj:`[image, label]`.
4684
+ The tensor of column :py:obj:`image` is of the uint8 type.
4685
+ The tensor of column :py:obj:`label` is of the uint32 type.
4686
+
4687
+ Args:
4688
+ dataset_dir (str): Path to the root directory that contains the dataset.
4689
+ num_samples (int, optional): The number of images to be included in the dataset.
4690
+ Default: None, all images.
4691
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
4692
+ Default: None, will use global default workers(8), it can be set
4693
+ by `mindspore.dataset.config.set_num_parallel_workers` .
4694
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
4695
+ Default: None, expected order behavior shown in the table below.
4696
+ decode (bool, optional): Whether or not to decode the images after reading. Default: False.
4697
+ sampler (Sampler, optional): Object used to choose samples from the
4698
+ dataset. Default: None, expected order behavior shown in the table below.
4699
+ num_shards (int, optional): Number of shards that the dataset will be divided
4700
+ into. When this argument is specified, `num_samples` reflects
4701
+ the maximum sample number of per shard. Default: None.
4702
+ shard_id (int, optional): The shard ID within `num_shards` . This
4703
+ argument can only be specified when `num_shards` is also specified. Default: None.
4704
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4705
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
4706
+ Default: None, which means no cache is used.
4707
+
4708
+ Raises:
4709
+ RuntimeError: If `dataset_dir` does not contain data files.
4710
+ RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4711
+ RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4712
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
4713
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
4714
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4715
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4716
+
4717
+ Note:
4718
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
4719
+ The table below shows what input arguments are allowed and their expected behavior.
4720
+
4721
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
4722
+ :widths: 25 25 50
4723
+ :header-rows: 1
4724
+
4725
+ * - Parameter `sampler`
4726
+ - Parameter `shuffle`
4727
+ - Expected Order Behavior
4728
+ * - None
4729
+ - None
4730
+ - random order
4731
+ * - None
4732
+ - True
4733
+ - random order
4734
+ * - None
4735
+ - False
4736
+ - sequential order
4737
+ * - Sampler object
4738
+ - None
4739
+ - order defined by sampler
4740
+ * - Sampler object
4741
+ - True
4742
+ - not allowed
4743
+ * - Sampler object
4744
+ - False
4745
+ - not allowed
4746
+
4747
+ Examples:
4748
+ >>> sun397_dataset_dir = "/path/to/sun397_dataset_directory"
4749
+ >>>
4750
+ >>> # 1) Read all samples (image files) in sun397_dataset_dir with 8 threads
4751
+ >>> dataset = ds.SUN397Dataset(dataset_dir=sun397_dataset_dir, num_parallel_workers=8)
4752
+
4753
+ About SUN397Dataset:
4754
+
4755
+ The SUN397 or Scene UNderstanding (SUN) is a dataset for scene recognition consisting of 397 categories with
4756
+ 108,754 images. The number of images varies across categories, but there are at least 100 images per category.
4757
+ Images are in jpg, png, or gif format.
4758
+
4759
+ Here is the original SUN397 dataset structure.
4760
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
4761
+
4762
+ .. code-block::
4763
+
4764
+ .
4765
+ └── sun397_dataset_directory
4766
+ ├── ClassName.txt
4767
+ ├── README.txt
4768
+ ├── a
4769
+ │ ├── abbey
4770
+ │ │ ├── sun_aaaulhwrhqgejnyt.jpg
4771
+ │ │ ├── sun_aacphuqehdodwawg.jpg
4772
+ │ │ ├── ...
4773
+ │ ├── apartment_building
4774
+ │ │ └── outdoor
4775
+ │ │ ├── sun_aamyhslnsnomjzue.jpg
4776
+ │ │ ├── sun_abbjzfrsalhqivis.jpg
4777
+ │ │ ├── ...
4778
+ │ ├── ...
4779
+ ├── b
4780
+ │ ├── badlands
4781
+ │ │ ├── sun_aabtemlmesogqbbp.jpg
4782
+ │ │ ├── sun_afbsfeexggdhzshd.jpg
4783
+ │ │ ├── ...
4784
+ │ ├── balcony
4785
+ │ │ ├── exterior
4786
+ │ │ │ ├── sun_aaxzaiuznwquburq.jpg
4787
+ │ │ │ ├── sun_baajuldidvlcyzhv.jpg
4788
+ │ │ │ ├── ...
4789
+ │ │ └── interior
4790
+ │ │ ├── sun_babkzjntjfarengi.jpg
4791
+ │ │ ├── sun_bagjvjynskmonnbv.jpg
4792
+ │ │ ├── ...
4793
+ │ └── ...
4794
+ ├── ...
4795
+
4796
+
4797
+ Citation:
4798
+
4799
+ .. code-block::
4800
+
4801
+ @inproceedings{xiao2010sun,
4802
+ title = {Sun database: Large-scale scene recognition from abbey to zoo},
4803
+ author = {Xiao, Jianxiong and Hays, James and Ehinger, Krista A and Oliva, Aude and Torralba, Antonio},
4804
+ booktitle = {2010 IEEE computer society conference on computer vision and pattern recognition},
4805
+ pages = {3485--3492},
4806
+ year = {2010},
4807
+ organization = {IEEE}
4808
+ }
4809
+ """
4810
+
4811
+ @check_sun397_dataset
4812
+ def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, decode=False,
4813
+ sampler=None, num_shards=None, shard_id=None, cache=None):
4814
+ super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
4815
+ shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
4816
+
4817
+ self.dataset_dir = dataset_dir
4818
+ self.decode = replace_none(decode, False)
4819
+
4820
+ def parse(self, children=None):
4821
+ return cde.SUN397Node(self.dataset_dir, self.decode, self.sampler)
4822
+
4823
+
4291
4824
  class _SVHNDataset:
4292
4825
  """
4293
4826
  Mainly for loading SVHN Dataset, and return two rows each time.
@@ -4326,43 +4859,43 @@ class _SVHNDataset:
4326
4859
 
4327
4860
  class SVHNDataset(GeneratorDataset):
4328
4861
  """
4329
- A source dataset that reads and parses SVHN dataset.
4862
+ SVHN(Street View House Numbers) dataset.
4330
4863
 
4331
- The generated dataset has two columns: :py:obj:`[image, label]`.
4864
+ The generated dataset has two columns: :py:obj:`[image, label]` .
4332
4865
  The tensor of column :py:obj:`image` is of the uint8 type.
4333
4866
  The tensor of column :py:obj:`label` is of a scalar of uint32 type.
4334
4867
 
4335
4868
  Args:
4336
4869
  dataset_dir (str): Path to the root directory that contains the dataset.
4337
- usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset
4338
- (default=None, will read all samples).
4339
- num_samples (int, optional): The number of samples to be included in the dataset (default=None, all images).
4340
- num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
4341
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
4342
- (default=None, expected order behavior shown in the table).
4870
+ usage (str, optional): Specify the 'train', 'test', 'extra' or 'all' parts of dataset.
4871
+ Default: None, will read all samples.
4872
+ num_samples (int, optional): The number of samples to be included in the dataset. Default: None, all images.
4873
+ num_parallel_workers (int, optional): Number of worker subprocesses used to
4874
+ fetch the dataset in parallel. Default: 1.
4875
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
4876
+ Default: None, expected order behavior shown in the table below.
4343
4877
  sampler (Sampler, optional): Object used to choose samples from the dataset. Random accessible
4344
- input is required (default=None, expected order behavior shown in the table).
4345
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
4346
- Random accessible input is required. When this argument is specified, 'num_samples' reflects the max
4347
- sample number of per shard.
4348
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument must be specified only
4349
- when num_shards is also specified. Random accessible input is required.
4878
+ input is required. Default: None, expected order behavior shown in the table below.
4879
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
4880
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
4881
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument must be specified only
4882
+ when `num_shards` is also specified.
4350
4883
 
4351
4884
  Raises:
4352
4885
  RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
4353
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4354
4886
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4355
4887
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4356
4888
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
4357
4889
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
4358
4890
  ValueError: If `usage` is invalid.
4359
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
4891
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4892
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4360
4893
 
4361
4894
  Note:
4362
4895
  - This dataset can take in a sampler. 'sampler' and 'shuffle' are mutually exclusive.
4363
4896
  The table below shows what input arguments are allowed and their expected behavior.
4364
4897
 
4365
- .. list-table:: Expected Order Behavior of Using 'sampler' and 'shuffle'
4898
+ .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
4366
4899
  :widths: 25 25 50
4367
4900
  :header-rows: 1
4368
4901
 
@@ -4394,9 +4927,7 @@ class SVHNDataset(GeneratorDataset):
4394
4927
 
4395
4928
  About SVHN dataset:
4396
4929
 
4397
- SVHN dataset consists of 10 digit classes.
4398
- SVHN is obtained from house numbers in Google Street View images.
4399
- 73257 digits for training, 26032 digits for testing, and 531131 additional extra training data.
4930
+ SVHN dataset consists of 10 digit classes and is obtained from house numbers in Google Street View images.
4400
4931
 
4401
4932
  Here is the original SVHN dataset structure.
4402
4933
  You can unzip the dataset files into this directory structure and read by MindSpore's API.
@@ -4438,54 +4969,53 @@ class SVHNDataset(GeneratorDataset):
4438
4969
 
4439
4970
  class USPSDataset(SourceDataset, VisionBaseDataset):
4440
4971
  """
4441
- A source dataset that reads and parses the USPS dataset.
4972
+ USPS(U.S. Postal Service) dataset.
4442
4973
 
4443
- The generated dataset has two columns: :py:obj:`[image, label]`.
4974
+ The generated dataset has two columns: :py:obj:`[image, label]` .
4444
4975
  The tensor of column :py:obj:`image` is of the uint8 type.
4445
- The tensor of column :py:obj:`label` is of a scalar of uint32 type.
4976
+ The tensor of column :py:obj:`label` is of the uint32 type.
4446
4977
 
4447
4978
  Args:
4448
4979
  dataset_dir (str): Path to the root directory that contains the dataset.
4449
4980
  usage (str, optional): Usage of this dataset, can be 'train', 'test' or 'all'. 'train' will read from 7,291
4450
4981
  train samples, 'test' will read from 2,007 test samples, 'all' will read from all 9,298 samples.
4451
- (default=None, will read all samples)
4452
- num_samples (int, optional): The number of images to be included in the dataset
4453
- (default=None, will read all images).
4454
- num_parallel_workers (int, optional): Number of workers to read the data
4455
- (default=None, will use value set in the config).
4456
- shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch
4457
- (default=Shuffle.GLOBAL). Bool type and Shuffle enum are both supported to pass in.
4458
- If shuffle is False, no shuffling will be performed;
4459
- If shuffle is True, the behavior is the same as setting shuffle to be Shuffle.GLOBAL
4460
- Otherwise, there are two levels of shuffling:
4982
+ Default: None, will read all samples.
4983
+ num_samples (int, optional): The number of images to be included in the dataset.
4984
+ Default: None, will read all images.
4985
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
4986
+ Default: None, will use global default workers(8), it can be set
4987
+ by `mindspore.dataset.config.set_num_parallel_workers` .
4988
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
4989
+ Bool type and Shuffle enum are both supported to pass in. Default: `Shuffle.GLOBAL` .
4990
+ If shuffle is False, no shuffling will be performed.
4991
+ If shuffle is True, it is equivalent to setting `shuffle` to mindspore.dataset.Shuffle.GLOBAL.
4992
+ Set the mode of data shuffling by passing in enumeration variables:
4461
4993
 
4462
4994
  - Shuffle.GLOBAL: Shuffle both the files and samples.
4463
4995
 
4464
4996
  - Shuffle.FILES: Shuffle files only.
4465
4997
 
4466
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
4998
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
4467
4999
  When this argument is specified, `num_samples` reflects the max sample number of per shard.
4468
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
5000
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
4469
5001
  argument can only be specified when `num_shards` is also specified.
4470
5002
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4471
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
4472
- (default=None, which means no cache is used).
5003
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
5004
+ Default: None, which means no cache is used.
4473
5005
 
4474
5006
  Raises:
4475
5007
  RuntimeError: If `dataset_dir` is not valid or does not exist or does not contain data files.
4476
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4477
5008
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
4478
5009
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
4479
5010
  ValueError: If `usage` is invalid.
4480
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
5011
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
5012
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4481
5013
 
4482
5014
  Examples:
4483
5015
  >>> usps_dataset_dir = "/path/to/usps_dataset_directory"
4484
5016
  >>>
4485
5017
  >>> # Read 3 samples from USPS dataset
4486
5018
  >>> dataset = ds.USPSDataset(dataset_dir=usps_dataset_dir, num_samples=3)
4487
- >>>
4488
- >>> # Note: In USPS dataset, each dictionary has keys "image" and "label"
4489
5019
 
4490
5020
  About USPS dataset:
4491
5021
 
@@ -4535,52 +5065,53 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
4535
5065
 
4536
5066
  class VOCDataset(MappableDataset, VisionBaseDataset):
4537
5067
  """
4538
- A source dataset that reads and parses VOC dataset.
5068
+ VOC(Visual Object Classes) dataset.
4539
5069
 
4540
5070
  The generated dataset with different task setting has different output columns:
4541
5071
 
4542
- - task = :py:obj:`Detection`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[bbox, dtype=float32]`, \
4543
- :py:obj:`[label, dtype=uint32]`, :py:obj:`[difficult, dtype=uint32]`, :py:obj:`[truncate, dtype=uint32]`.
4544
- - task = :py:obj:`Segmentation`, output columns: :py:obj:`[image, dtype=uint8]`, :py:obj:`[target,dtype=uint8]`.
5072
+ - task = :py:obj:`Detection` , output columns: :py:obj:`[image, dtype=uint8]` , :py:obj:`[bbox, dtype=float32]` , \
5073
+ :py:obj:`[label, dtype=uint32]` , :py:obj:`[difficult, dtype=uint32]` , :py:obj:`[truncate, dtype=uint32]` .
5074
+ - task = :py:obj:`Segmentation` , output columns: :py:obj:`[image, dtype=uint8]` , :py:obj:`[target,dtype=uint8]` .
4545
5075
 
4546
5076
  Args:
4547
5077
  dataset_dir (str): Path to the root directory that contains the dataset.
4548
- task (str, optional): Set the task type of reading voc data, now only support 'Segmentation' or 'Detection'
4549
- (default= 'Segmentation').
4550
- usage (str, optional): Set the task type of ImageSets(default= 'train'). If task is 'Segmentation', image and
5078
+ task (str, optional): Set the task type of reading voc data, now only support 'Segmentation' or 'Detection'.
5079
+ Default: 'Segmentation'.
5080
+ usage (str, optional): Set the task type of ImageSets. Default: 'train'. If task is 'Segmentation', image and
4551
5081
  annotation list will be loaded in ./ImageSets/Segmentation/usage + ".txt"; If task is 'Detection', image and
4552
5082
  annotation list will be loaded in ./ImageSets/Main/usage + ".txt"; if task and usage are not set, image and
4553
5083
  annotation list will be loaded in ./ImageSets/Segmentation/train.txt as default.
4554
5084
  class_indexing (dict, optional): A str-to-int mapping from label name to index, only valid in
4555
- 'Detection' task (default=None, the folder names will be sorted alphabetically and each
4556
- class will be given a unique index starting from 0).
4557
- num_samples (int, optional): The number of images to be included in the dataset
4558
- (default=None, all images).
4559
- num_parallel_workers (int, optional): Number of workers to read the data
4560
- (default=None, number set in the config).
4561
- shuffle (bool, optional): Whether to perform shuffle on the dataset (default=None, expected
4562
- order behavior shown in the table).
4563
- decode (bool, optional): Decode the images after reading (default=False).
4564
- sampler (Sampler, optional): Object used to choose samples from the dataset
4565
- (default=None, expected order behavior shown in the table).
5085
+ 'Detection' task. Default: None, the folder names will be sorted alphabetically and each
5086
+ class will be given a unique index starting from 0.
5087
+ num_samples (int, optional): The number of images to be included in the dataset.
5088
+ Default: None, all images.
5089
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
5090
+ Default: None, will use global default workers(8), it can be set
5091
+ by `mindspore.dataset.config.set_num_parallel_workers` .
5092
+ shuffle (bool, optional): Whether to perform shuffle on the dataset. Default: None, expected
5093
+ order behavior shown in the table below.
5094
+ decode (bool, optional): Decode the images after reading. Default: False.
5095
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
5096
+ Default: None, expected order behavior shown in the table below.
4566
5097
  num_shards (int, optional): Number of shards that the dataset will be divided
4567
- into (default=None). When this argument is specified, `num_samples` reflects
5098
+ into. Default: None. When this argument is specified, `num_samples` reflects
4568
5099
  the maximum sample number of per shard.
4569
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This
5100
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This
4570
5101
  argument can only be specified when `num_shards` is also specified.
4571
5102
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4572
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
4573
- (default=None, which means no cache is used).
5103
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
5104
+ Default: None, which means no cache is used.
4574
5105
  extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
4575
- :py:obj:`[_meta-filename, dtype=string]` will be output at the end (default=False).
5106
+ :py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: False.
4576
5107
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
4577
5108
  and returns the decrypted bytes data. Default: None, no decryption.
4578
5109
 
4579
5110
  Raises:
4580
5111
  RuntimeError: If `dataset_dir` does not contain data files.
4581
5112
  RuntimeError: If xml of Annotations is an invalid format.
4582
- RuntimeError: If xml of Annotations loss attribution of `object`.
4583
- RuntimeError: If xml of Annotations loss attribution of `bndbox`.
5113
+ RuntimeError: If xml of Annotations loss attribution of `object` .
5114
+ RuntimeError: If xml of Annotations loss attribution of `bndbox` .
4584
5115
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4585
5116
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4586
5117
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
@@ -4589,12 +5120,12 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
4589
5120
  ValueError: If task is not equal 'Segmentation' or 'Detection'.
4590
5121
  ValueError: If task equal 'Segmentation' but class_indexing is not None.
4591
5122
  ValueError: If txt related to mode is not exist.
4592
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
5123
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4593
5124
 
4594
5125
  Note:
4595
5126
  - Column '[_meta-filename, dtype=string]' won't be output unless an explicit rename dataset op
4596
5127
  is added to remove the prefix('_meta-').
4597
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
5128
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
4598
5129
  The table below shows what input arguments are allowed and their expected behavior.
4599
5130
 
4600
5131
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`
@@ -4643,7 +5174,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
4643
5174
  >>> # In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target"
4644
5175
  >>> # In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation"
4645
5176
 
4646
- About VOC dataset.
5177
+ About VOC dataset:
4647
5178
 
4648
5179
  The PASCAL Visual Object Classes (VOC) challenge is a benchmark in visual
4649
5180
  object category recognition and detection, providing the vision and machine
@@ -4741,56 +5272,50 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
4741
5272
 
4742
5273
  class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
4743
5274
  """
4744
- A source dataset that reads and parses WIDERFace dataset.
5275
+ WIDERFace dataset.
4745
5276
 
4746
5277
  When usage is "train", "valid" or "all", the generated dataset has eight columns ["image", "bbox", "blur",
4747
- "expression", "illumination", "occlusion", "pose", "invalid"]. When usage is "test", it only has one column
4748
- ["image"].
4749
- The tensor of column :py:obj:`image` is a vector of the uint8 type.
4750
- The tensor of column :py:obj:`bbox` is a scalar of the uint32 type.
4751
- The tensor of column :py:obj:`blur` is a scalar of the uint32 type.
4752
- The tensor of column :py:obj:`expression` is a scalar of the uint32 type.
4753
- The tensor of column :py:obj:`illumination` is a scalar of the uint32 type.
4754
- The tensor of column :py:obj:`occlusion` is a scalar of the uint32 type.
4755
- The tensor of column :py:obj:`pose` is a scalar of the uint32 type.
4756
- The tensor of column :py:obj:`invalid` is a scalar of the uint32 type.
5278
+ "expression", "illumination", "occlusion", "pose", "invalid"]. The data type of the `image` column is uint8,
5279
+ and all other columns are uint32. When usage is "test", it only has one column
5280
+ ["image"], with uint8 data type.
4757
5281
 
4758
5282
  Args:
4759
5283
  dataset_dir (str): Path to the root directory that contains the dataset.
4760
5284
  usage (str, optional): Usage of this dataset, can be 'train', 'test', 'valid' or 'all'. 'train' will read
4761
5285
  from 12,880 samples, 'test' will read from 16,097 samples, 'valid' will read from 3,226 test samples
4762
- and 'all' will read all 'train' and 'valid' samples (default=None, will be set to 'all').
4763
- num_samples (int, optional): The number of images to be included in the dataset
4764
- (default=None, will read all images).
4765
- num_parallel_workers (int, optional): Number of workers to read the data
4766
- (default=None, will use value set in the config).
4767
- shuffle (bool, optional): Whether or not to perform shuffle on the dataset
4768
- (default=None, expected order behavior shown in the table).
4769
- decode (bool, optional): Decode the images after reading (default=False).
4770
- sampler (Sampler, optional): Object used to choose samples from the dataset
4771
- (default=None, expected order behavior shown in the table).
4772
- num_shards (int, optional): Number of shards that the dataset will be divided into (default=None).
5286
+ and 'all' will read all 'train' and 'valid' samples. Default: None, will be set to 'all'.
5287
+ num_samples (int, optional): The number of images to be included in the dataset.
5288
+ Default: None, will read all images.
5289
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
5290
+ Default: None, will use global default workers(8), it can be set
5291
+ by `mindspore.dataset.config.set_num_parallel_workers` .
5292
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
5293
+ Default: None, expected order behavior shown in the table below.
5294
+ decode (bool, optional): Decode the images after reading. Default: False.
5295
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
5296
+ Default: None, expected order behavior shown in the table below.
5297
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: None.
4773
5298
  When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
4774
- shard_id (int, optional): The shard ID within `num_shards` (default=None). This argument can only be specified
5299
+ shard_id (int, optional): The shard ID within `num_shards` . Default: None. This argument can only be specified
4775
5300
  when `num_shards` is also specified.
4776
5301
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4777
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r1.10/dataset/cache.html>`_
4778
- (default=None, which means no cache is used).
5302
+ `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/r2.0/dataset/cache.html>`_ .
5303
+ Default: None, which means no cache is used.
4779
5304
 
4780
5305
  Raises:
4781
5306
  RuntimeError: If `dataset_dir` does not contain data files.
4782
- ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4783
5307
  RuntimeError: If `sampler` and `shuffle` are specified at the same time.
4784
5308
  RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
4785
5309
  RuntimeError: If `num_shards` is specified but `shard_id` is None.
4786
5310
  RuntimeError: If `shard_id` is specified but `num_shards` is None.
4787
- ValueError: If `shard_id` is invalid (< 0 or >= `num_shards`).
5311
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
4788
5312
  ValueError: If `usage` is not in ['train', 'test', 'valid', 'all'].
5313
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
4789
5314
  ValueError: If `annotation_file` is not exist.
4790
5315
  ValueError: If `dataset_dir` is not exist.
4791
5316
 
4792
5317
  Note:
4793
- - This dataset can take in a `sampler`. `sampler` and `shuffle` are mutually exclusive.
5318
+ - This dataset can take in a `sampler` . `sampler` and `shuffle` are mutually exclusive.
4794
5319
  The table below shows what input arguments are allowed and their expected behavior.
4795
5320
 
4796
5321
  .. list-table:: Expected Order Behavior of Using `sampler` and `shuffle`