mindspore 2.4.0__cp310-cp310-macosx_10_15_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (1387) hide show
  1. mindspore/.commit_id +1 -0
  2. mindspore/__init__.py +53 -0
  3. mindspore/_c_dataengine.cpython-310-darwin.so +0 -0
  4. mindspore/_c_expression.cpython-310-darwin.so +0 -0
  5. mindspore/_c_mindrecord.cpython-310-darwin.so +0 -0
  6. mindspore/_check_jit_forbidden_api.py +106 -0
  7. mindspore/_checkparam.py +1419 -0
  8. mindspore/_extends/__init__.py +23 -0
  9. mindspore/_extends/builtin_operations.py +224 -0
  10. mindspore/_extends/graph_kernel/__init__.py +17 -0
  11. mindspore/_extends/graph_kernel/model/__init__.py +19 -0
  12. mindspore/_extends/graph_kernel/model/graph_parallel.py +311 -0
  13. mindspore/_extends/graph_kernel/model/graph_split.py +1348 -0
  14. mindspore/_extends/graph_kernel/model/model.py +553 -0
  15. mindspore/_extends/graph_kernel/model/model_builder.py +216 -0
  16. mindspore/_extends/graph_kernel/parallel_estimate.py +60 -0
  17. mindspore/_extends/graph_kernel/splitter.py +140 -0
  18. mindspore/_extends/graph_kernel/utils.py +28 -0
  19. mindspore/_extends/parallel_compile/__init__.py +19 -0
  20. mindspore/_extends/parallel_compile/akg_compiler/__init__.py +19 -0
  21. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +269 -0
  22. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +529 -0
  23. mindspore/_extends/parallel_compile/akg_compiler/compiler.py +56 -0
  24. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
  25. mindspore/_extends/parallel_compile/akg_compiler/get_file_path.py +36 -0
  26. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +556 -0
  27. mindspore/_extends/parallel_compile/akg_compiler/util.py +159 -0
  28. mindspore/_extends/parse/__init__.py +49 -0
  29. mindspore/_extends/parse/compile_config.py +299 -0
  30. mindspore/_extends/parse/namespace.py +136 -0
  31. mindspore/_extends/parse/parser.py +1448 -0
  32. mindspore/_extends/parse/resources.py +213 -0
  33. mindspore/_extends/parse/standard_method.py +4475 -0
  34. mindspore/_extends/parse/trope.py +97 -0
  35. mindspore/_extends/pijit/__init__.py +23 -0
  36. mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
  37. mindspore/_extends/remote/__init__.py +19 -0
  38. mindspore/_extends/remote/kernel_build_server.py +199 -0
  39. mindspore/_extends/remote/kernel_build_server_akg.py +55 -0
  40. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  41. mindspore/_extends/remote/kernel_build_server_ascend.py +75 -0
  42. mindspore/_extends/utils.py +68 -0
  43. mindspore/_install_custom.py +43 -0
  44. mindspore/_profiler.py +30 -0
  45. mindspore/amp.py +433 -0
  46. mindspore/boost/__init__.py +42 -0
  47. mindspore/boost/adasum.py +319 -0
  48. mindspore/boost/base.py +535 -0
  49. mindspore/boost/boost.py +400 -0
  50. mindspore/boost/boost_cell_wrapper.py +790 -0
  51. mindspore/boost/dim_reduce.py +323 -0
  52. mindspore/boost/grad_accumulation.py +79 -0
  53. mindspore/boost/grad_freeze.py +382 -0
  54. mindspore/boost/group_loss_scale_manager.py +166 -0
  55. mindspore/boost/less_batch_normalization.py +174 -0
  56. mindspore/common/__init__.py +86 -0
  57. mindspore/common/_auto_dynamic.py +68 -0
  58. mindspore/common/_decorator.py +50 -0
  59. mindspore/common/_jit_fallback_utils.py +110 -0
  60. mindspore/common/_monad.py +25 -0
  61. mindspore/common/_pijit_context.py +190 -0
  62. mindspore/common/_register_for_adapter.py +74 -0
  63. mindspore/common/_register_for_recompute.py +48 -0
  64. mindspore/common/_register_for_tensor.py +46 -0
  65. mindspore/common/_stub_tensor.py +210 -0
  66. mindspore/common/_tensor_overload.py +139 -0
  67. mindspore/common/_utils.py +122 -0
  68. mindspore/common/api.py +2064 -0
  69. mindspore/common/auto_dynamic_shape.py +507 -0
  70. mindspore/common/dtype.py +422 -0
  71. mindspore/common/dump.py +130 -0
  72. mindspore/common/file_system.py +48 -0
  73. mindspore/common/generator.py +254 -0
  74. mindspore/common/hook_handle.py +143 -0
  75. mindspore/common/initializer.py +880 -0
  76. mindspore/common/jit_config.py +98 -0
  77. mindspore/common/lazy_inline.py +240 -0
  78. mindspore/common/mindir_util.py +111 -0
  79. mindspore/common/mutable.py +234 -0
  80. mindspore/common/no_inline.py +54 -0
  81. mindspore/common/np_dtype.py +25 -0
  82. mindspore/common/parameter.py +1081 -0
  83. mindspore/common/recompute.py +292 -0
  84. mindspore/common/seed.py +260 -0
  85. mindspore/common/sparse_tensor.py +1175 -0
  86. mindspore/common/symbol.py +122 -0
  87. mindspore/common/tensor.py +5039 -0
  88. mindspore/communication/__init__.py +37 -0
  89. mindspore/communication/_comm_helper.py +501 -0
  90. mindspore/communication/_hccl_management.py +297 -0
  91. mindspore/communication/comm_func.py +1395 -0
  92. mindspore/communication/management.py +673 -0
  93. mindspore/config/op_info.config +533 -0
  94. mindspore/context.py +2077 -0
  95. mindspore/dataset/__init__.py +90 -0
  96. mindspore/dataset/audio/__init__.py +61 -0
  97. mindspore/dataset/audio/transforms.py +3690 -0
  98. mindspore/dataset/audio/utils.py +386 -0
  99. mindspore/dataset/audio/validators.py +1172 -0
  100. mindspore/dataset/callback/__init__.py +20 -0
  101. mindspore/dataset/callback/ds_callback.py +368 -0
  102. mindspore/dataset/callback/validators.py +32 -0
  103. mindspore/dataset/core/__init__.py +13 -0
  104. mindspore/dataset/core/config.py +1095 -0
  105. mindspore/dataset/core/datatypes.py +101 -0
  106. mindspore/dataset/core/py_util_helpers.py +65 -0
  107. mindspore/dataset/core/validator_helpers.py +781 -0
  108. mindspore/dataset/debug/__init__.py +21 -0
  109. mindspore/dataset/debug/debug_hook.py +97 -0
  110. mindspore/dataset/debug/pre_defined_hook.py +67 -0
  111. mindspore/dataset/engine/__init__.py +124 -0
  112. mindspore/dataset/engine/cache_admin.py +47 -0
  113. mindspore/dataset/engine/cache_client.py +129 -0
  114. mindspore/dataset/engine/datasets.py +4582 -0
  115. mindspore/dataset/engine/datasets_audio.py +911 -0
  116. mindspore/dataset/engine/datasets_standard_format.py +543 -0
  117. mindspore/dataset/engine/datasets_text.py +2161 -0
  118. mindspore/dataset/engine/datasets_user_defined.py +1184 -0
  119. mindspore/dataset/engine/datasets_vision.py +4816 -0
  120. mindspore/dataset/engine/iterators.py +371 -0
  121. mindspore/dataset/engine/obs/__init__.py +23 -0
  122. mindspore/dataset/engine/obs/config_loader.py +68 -0
  123. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +508 -0
  124. mindspore/dataset/engine/obs/util.py +482 -0
  125. mindspore/dataset/engine/offload.py +596 -0
  126. mindspore/dataset/engine/queue.py +304 -0
  127. mindspore/dataset/engine/samplers.py +895 -0
  128. mindspore/dataset/engine/serializer_deserializer.py +159 -0
  129. mindspore/dataset/engine/validators.py +2895 -0
  130. mindspore/dataset/text/__init__.py +51 -0
  131. mindspore/dataset/text/transforms.py +1703 -0
  132. mindspore/dataset/text/utils.py +715 -0
  133. mindspore/dataset/text/validators.py +642 -0
  134. mindspore/dataset/transforms/__init__.py +45 -0
  135. mindspore/dataset/transforms/c_transforms.py +638 -0
  136. mindspore/dataset/transforms/py_transforms.py +393 -0
  137. mindspore/dataset/transforms/py_transforms_util.py +255 -0
  138. mindspore/dataset/transforms/transforms.py +1260 -0
  139. mindspore/dataset/transforms/validators.py +410 -0
  140. mindspore/dataset/utils/__init__.py +19 -0
  141. mindspore/dataset/utils/browse_dataset.py +190 -0
  142. mindspore/dataset/utils/line_reader.py +126 -0
  143. mindspore/dataset/vision/__init__.py +65 -0
  144. mindspore/dataset/vision/c_transforms.py +2641 -0
  145. mindspore/dataset/vision/py_transforms.py +2120 -0
  146. mindspore/dataset/vision/py_transforms_util.py +1660 -0
  147. mindspore/dataset/vision/transforms.py +7295 -0
  148. mindspore/dataset/vision/utils.py +863 -0
  149. mindspore/dataset/vision/validators.py +1483 -0
  150. mindspore/default_config.py +2 -0
  151. mindspore/experimental/__init__.py +20 -0
  152. mindspore/experimental/es/__init__.py +22 -0
  153. mindspore/experimental/es/embedding_service.py +883 -0
  154. mindspore/experimental/es/embedding_service_layer.py +581 -0
  155. mindspore/experimental/llm_boost/__init__.py +21 -0
  156. mindspore/experimental/llm_boost/atb/__init__.py +23 -0
  157. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  158. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  159. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  160. mindspore/experimental/llm_boost/register.py +129 -0
  161. mindspore/experimental/llm_boost/utils.py +31 -0
  162. mindspore/experimental/map_parameter.py +309 -0
  163. mindspore/experimental/optim/__init__.py +40 -0
  164. mindspore/experimental/optim/adadelta.py +161 -0
  165. mindspore/experimental/optim/adagrad.py +168 -0
  166. mindspore/experimental/optim/adam.py +193 -0
  167. mindspore/experimental/optim/adamax.py +170 -0
  168. mindspore/experimental/optim/adamw.py +290 -0
  169. mindspore/experimental/optim/asgd.py +153 -0
  170. mindspore/experimental/optim/lr_scheduler.py +1371 -0
  171. mindspore/experimental/optim/nadam.py +157 -0
  172. mindspore/experimental/optim/optimizer.py +262 -0
  173. mindspore/experimental/optim/radam.py +194 -0
  174. mindspore/experimental/optim/rmsprop.py +154 -0
  175. mindspore/experimental/optim/rprop.py +164 -0
  176. mindspore/experimental/optim/sgd.py +156 -0
  177. mindspore/hal/__init__.py +40 -0
  178. mindspore/hal/_ascend.py +57 -0
  179. mindspore/hal/_base.py +57 -0
  180. mindspore/hal/_cpu.py +56 -0
  181. mindspore/hal/_gpu.py +57 -0
  182. mindspore/hal/contiguous_tensors_handle.py +175 -0
  183. mindspore/hal/device.py +356 -0
  184. mindspore/hal/event.py +179 -0
  185. mindspore/hal/memory.py +326 -0
  186. mindspore/hal/stream.py +357 -0
  187. mindspore/include/OWNERS +7 -0
  188. mindspore/include/api/allocator.h +97 -0
  189. mindspore/include/api/callback/callback.h +93 -0
  190. mindspore/include/api/callback/ckpt_saver.h +41 -0
  191. mindspore/include/api/callback/loss_monitor.h +33 -0
  192. mindspore/include/api/callback/lr_scheduler.h +51 -0
  193. mindspore/include/api/callback/time_monitor.h +34 -0
  194. mindspore/include/api/callback/train_accuracy.h +37 -0
  195. mindspore/include/api/cell.h +90 -0
  196. mindspore/include/api/cfg.h +82 -0
  197. mindspore/include/api/context.h +602 -0
  198. mindspore/include/api/data_type.h +47 -0
  199. mindspore/include/api/delegate.h +178 -0
  200. mindspore/include/api/delegate_api.h +75 -0
  201. mindspore/include/api/dual_abi_helper.h +208 -0
  202. mindspore/include/api/format.h +28 -0
  203. mindspore/include/api/graph.h +46 -0
  204. mindspore/include/api/kernel.h +58 -0
  205. mindspore/include/api/kernel_api.h +168 -0
  206. mindspore/include/api/metrics/accuracy.h +36 -0
  207. mindspore/include/api/metrics/metrics.h +41 -0
  208. mindspore/include/api/model.h +438 -0
  209. mindspore/include/api/model_group.h +91 -0
  210. mindspore/include/api/model_parallel_runner.h +168 -0
  211. mindspore/include/api/serialization.h +185 -0
  212. mindspore/include/api/status.h +192 -0
  213. mindspore/include/api/types.h +431 -0
  214. mindspore/include/api/visible.h +41 -0
  215. mindspore/include/c_api/context_c.h +179 -0
  216. mindspore/include/c_api/data_type_c.h +52 -0
  217. mindspore/include/c_api/format_c.h +46 -0
  218. mindspore/include/c_api/model_c.h +347 -0
  219. mindspore/include/c_api/status_c.h +79 -0
  220. mindspore/include/c_api/tensor_c.h +146 -0
  221. mindspore/include/c_api/types_c.h +67 -0
  222. mindspore/include/dataset/config.h +163 -0
  223. mindspore/include/dataset/constants.h +363 -0
  224. mindspore/include/dataset/execute.h +196 -0
  225. mindspore/include/dataset/text.h +1092 -0
  226. mindspore/include/dataset/transforms.h +638 -0
  227. mindspore/include/dataset/vision.h +2129 -0
  228. mindspore/include/dataset/vision_ascend.h +206 -0
  229. mindspore/include/dataset/vision_lite.h +625 -0
  230. mindspore/lib/libavcodec.59.dylib +0 -0
  231. mindspore/lib/libavdevice.59.dylib +0 -0
  232. mindspore/lib/libavfilter.8.dylib +0 -0
  233. mindspore/lib/libavformat.59.dylib +0 -0
  234. mindspore/lib/libavutil.57.dylib +0 -0
  235. mindspore/lib/libdnnl.2.dylib +0 -0
  236. mindspore/lib/libicudata.69.dylib +0 -0
  237. mindspore/lib/libicui18n.69.dylib +0 -0
  238. mindspore/lib/libicuuc.69.dylib +0 -0
  239. mindspore/lib/libmindspore_address_sorting.15.dylib +0 -0
  240. mindspore/lib/libmindspore_backend.dylib +0 -0
  241. mindspore/lib/libmindspore_common.dylib +0 -0
  242. mindspore/lib/libmindspore_core.dylib +0 -0
  243. mindspore/lib/libmindspore_glog.0.dylib +0 -0
  244. mindspore/lib/libmindspore_gpr.15.dylib +0 -0
  245. mindspore/lib/libmindspore_grpc++.1.dylib +0 -0
  246. mindspore/lib/libmindspore_grpc.15.dylib +0 -0
  247. mindspore/lib/libmindspore_np_dtype.dylib +0 -0
  248. mindspore/lib/libmindspore_ops.dylib +0 -0
  249. mindspore/lib/libmindspore_upb.15.dylib +0 -0
  250. mindspore/lib/libnnacl.dylib +0 -0
  251. mindspore/lib/libopencv_core.4.5.dylib +0 -0
  252. mindspore/lib/libopencv_imgcodecs.4.5.dylib +0 -0
  253. mindspore/lib/libopencv_imgproc.4.5.dylib +0 -0
  254. mindspore/lib/libps_cache.dylib +0 -0
  255. mindspore/lib/libswresample.4.dylib +0 -0
  256. mindspore/lib/libswscale.6.dylib +0 -0
  257. mindspore/lib/libtinyxml2.8.dylib +0 -0
  258. mindspore/log.py +633 -0
  259. mindspore/mindrecord/__init__.py +43 -0
  260. mindspore/mindrecord/common/__init__.py +17 -0
  261. mindspore/mindrecord/common/constant.py +20 -0
  262. mindspore/mindrecord/common/enums.py +44 -0
  263. mindspore/mindrecord/common/exceptions.py +311 -0
  264. mindspore/mindrecord/config.py +809 -0
  265. mindspore/mindrecord/filereader.py +174 -0
  266. mindspore/mindrecord/filewriter.py +722 -0
  267. mindspore/mindrecord/mindpage.py +210 -0
  268. mindspore/mindrecord/shardheader.py +141 -0
  269. mindspore/mindrecord/shardindexgenerator.py +74 -0
  270. mindspore/mindrecord/shardreader.py +117 -0
  271. mindspore/mindrecord/shardsegment.py +128 -0
  272. mindspore/mindrecord/shardutils.py +185 -0
  273. mindspore/mindrecord/shardwriter.py +237 -0
  274. mindspore/mindrecord/tools/__init__.py +17 -0
  275. mindspore/mindrecord/tools/cifar10.py +140 -0
  276. mindspore/mindrecord/tools/cifar100.py +153 -0
  277. mindspore/mindrecord/tools/cifar100_to_mr.py +185 -0
  278. mindspore/mindrecord/tools/cifar10_to_mr.py +177 -0
  279. mindspore/mindrecord/tools/csv_to_mr.py +200 -0
  280. mindspore/mindrecord/tools/imagenet_to_mr.py +206 -0
  281. mindspore/mindrecord/tools/mnist_to_mr.py +259 -0
  282. mindspore/mindrecord/tools/tfrecord_to_mr.py +360 -0
  283. mindspore/mint/__init__.py +1586 -0
  284. mindspore/mint/distributed/__init__.py +31 -0
  285. mindspore/mint/distributed/distributed.py +254 -0
  286. mindspore/mint/linalg/__init__.py +22 -0
  287. mindspore/mint/nn/__init__.py +757 -0
  288. mindspore/mint/nn/functional.py +679 -0
  289. mindspore/mint/nn/layer/__init__.py +39 -0
  290. mindspore/mint/nn/layer/activation.py +133 -0
  291. mindspore/mint/nn/layer/normalization.py +477 -0
  292. mindspore/mint/nn/layer/pooling.py +110 -0
  293. mindspore/mint/optim/__init__.py +24 -0
  294. mindspore/mint/optim/adamw.py +206 -0
  295. mindspore/mint/special/__init__.py +63 -0
  296. mindspore/multiprocessing/__init__.py +73 -0
  297. mindspore/nn/__init__.py +47 -0
  298. mindspore/nn/cell.py +2787 -0
  299. mindspore/nn/dynamic_lr.py +482 -0
  300. mindspore/nn/grad/__init__.py +21 -0
  301. mindspore/nn/grad/cell_grad.py +196 -0
  302. mindspore/nn/layer/__init__.py +63 -0
  303. mindspore/nn/layer/activation.py +1822 -0
  304. mindspore/nn/layer/basic.py +1629 -0
  305. mindspore/nn/layer/channel_shuffle.py +90 -0
  306. mindspore/nn/layer/combined.py +248 -0
  307. mindspore/nn/layer/container.py +734 -0
  308. mindspore/nn/layer/conv.py +1505 -0
  309. mindspore/nn/layer/dense.py +204 -0
  310. mindspore/nn/layer/embedding.py +869 -0
  311. mindspore/nn/layer/image.py +661 -0
  312. mindspore/nn/layer/math.py +1069 -0
  313. mindspore/nn/layer/normalization.py +1273 -0
  314. mindspore/nn/layer/padding.py +880 -0
  315. mindspore/nn/layer/pooling.py +2302 -0
  316. mindspore/nn/layer/rnn_cells.py +388 -0
  317. mindspore/nn/layer/rnns.py +849 -0
  318. mindspore/nn/layer/thor_layer.py +963 -0
  319. mindspore/nn/layer/timedistributed.py +155 -0
  320. mindspore/nn/layer/transformer.py +823 -0
  321. mindspore/nn/learning_rate_schedule.py +512 -0
  322. mindspore/nn/loss/__init__.py +36 -0
  323. mindspore/nn/loss/loss.py +2924 -0
  324. mindspore/nn/metrics.py +53 -0
  325. mindspore/nn/optim/__init__.py +45 -0
  326. mindspore/nn/optim/_dist_optimizer_registry.py +111 -0
  327. mindspore/nn/optim/ada_grad.py +217 -0
  328. mindspore/nn/optim/adadelta.py +206 -0
  329. mindspore/nn/optim/adafactor.py +448 -0
  330. mindspore/nn/optim/adam.py +1297 -0
  331. mindspore/nn/optim/adamax.py +220 -0
  332. mindspore/nn/optim/adasum.py +548 -0
  333. mindspore/nn/optim/asgd.py +216 -0
  334. mindspore/nn/optim/ftrl.py +401 -0
  335. mindspore/nn/optim/lamb.py +296 -0
  336. mindspore/nn/optim/lars.py +202 -0
  337. mindspore/nn/optim/lazyadam.py +533 -0
  338. mindspore/nn/optim/momentum.py +239 -0
  339. mindspore/nn/optim/optimizer.py +1034 -0
  340. mindspore/nn/optim/proximal_ada_grad.py +242 -0
  341. mindspore/nn/optim/rmsprop.py +264 -0
  342. mindspore/nn/optim/rprop.py +251 -0
  343. mindspore/nn/optim/sgd.py +237 -0
  344. mindspore/nn/optim/tft_wrapper.py +127 -0
  345. mindspore/nn/optim/thor.py +1310 -0
  346. mindspore/nn/probability/__init__.py +22 -0
  347. mindspore/nn/probability/bijector/__init__.py +35 -0
  348. mindspore/nn/probability/bijector/bijector.py +337 -0
  349. mindspore/nn/probability/bijector/exp.py +65 -0
  350. mindspore/nn/probability/bijector/gumbel_cdf.py +144 -0
  351. mindspore/nn/probability/bijector/invert.py +126 -0
  352. mindspore/nn/probability/bijector/power_transform.py +196 -0
  353. mindspore/nn/probability/bijector/scalar_affine.py +167 -0
  354. mindspore/nn/probability/bijector/softplus.py +189 -0
  355. mindspore/nn/probability/bnn_layers/__init__.py +29 -0
  356. mindspore/nn/probability/bnn_layers/_util.py +46 -0
  357. mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +112 -0
  358. mindspore/nn/probability/bnn_layers/conv_variational.py +267 -0
  359. mindspore/nn/probability/bnn_layers/dense_variational.py +302 -0
  360. mindspore/nn/probability/bnn_layers/layer_distribution.py +123 -0
  361. mindspore/nn/probability/distribution/__init__.py +56 -0
  362. mindspore/nn/probability/distribution/_utils/__init__.py +34 -0
  363. mindspore/nn/probability/distribution/_utils/custom_ops.py +96 -0
  364. mindspore/nn/probability/distribution/_utils/utils.py +362 -0
  365. mindspore/nn/probability/distribution/bernoulli.py +334 -0
  366. mindspore/nn/probability/distribution/beta.py +391 -0
  367. mindspore/nn/probability/distribution/categorical.py +435 -0
  368. mindspore/nn/probability/distribution/cauchy.py +383 -0
  369. mindspore/nn/probability/distribution/distribution.py +827 -0
  370. mindspore/nn/probability/distribution/exponential.py +350 -0
  371. mindspore/nn/probability/distribution/gamma.py +391 -0
  372. mindspore/nn/probability/distribution/geometric.py +335 -0
  373. mindspore/nn/probability/distribution/gumbel.py +257 -0
  374. mindspore/nn/probability/distribution/half_normal.py +133 -0
  375. mindspore/nn/probability/distribution/laplace.py +128 -0
  376. mindspore/nn/probability/distribution/log_normal.py +272 -0
  377. mindspore/nn/probability/distribution/logistic.py +379 -0
  378. mindspore/nn/probability/distribution/normal.py +336 -0
  379. mindspore/nn/probability/distribution/poisson.py +288 -0
  380. mindspore/nn/probability/distribution/student_t.py +149 -0
  381. mindspore/nn/probability/distribution/transformed_distribution.py +235 -0
  382. mindspore/nn/probability/distribution/uniform.py +375 -0
  383. mindspore/nn/reinforcement/__init__.py +24 -0
  384. mindspore/nn/reinforcement/_batch_read_write.py +142 -0
  385. mindspore/nn/reinforcement/_tensors_queue.py +152 -0
  386. mindspore/nn/reinforcement/tensor_array.py +145 -0
  387. mindspore/nn/sparse/__init__.py +23 -0
  388. mindspore/nn/sparse/sparse.py +147 -0
  389. mindspore/nn/wrap/__init__.py +49 -0
  390. mindspore/nn/wrap/cell_wrapper.py +968 -0
  391. mindspore/nn/wrap/grad_reducer.py +608 -0
  392. mindspore/nn/wrap/loss_scale.py +694 -0
  393. mindspore/numpy/__init__.py +121 -0
  394. mindspore/numpy/array_creations.py +2731 -0
  395. mindspore/numpy/array_ops.py +2629 -0
  396. mindspore/numpy/dtypes.py +185 -0
  397. mindspore/numpy/fft.py +966 -0
  398. mindspore/numpy/logic_ops.py +936 -0
  399. mindspore/numpy/math_ops.py +5911 -0
  400. mindspore/numpy/utils.py +214 -0
  401. mindspore/numpy/utils_const.py +565 -0
  402. mindspore/ops/__init__.py +56 -0
  403. mindspore/ops/_constants.py +30 -0
  404. mindspore/ops/_grad_experimental/__init__.py +31 -0
  405. mindspore/ops/_grad_experimental/grad_array_ops.py +830 -0
  406. mindspore/ops/_grad_experimental/grad_base.py +143 -0
  407. mindspore/ops/_grad_experimental/grad_comm_ops.py +714 -0
  408. mindspore/ops/_grad_experimental/grad_debug_ops.py +31 -0
  409. mindspore/ops/_grad_experimental/grad_implementations.py +203 -0
  410. mindspore/ops/_grad_experimental/grad_inner_ops.py +79 -0
  411. mindspore/ops/_grad_experimental/grad_math_ops.py +802 -0
  412. mindspore/ops/_grad_experimental/grad_nn_ops.py +231 -0
  413. mindspore/ops/_grad_experimental/grad_quant_ops.py +238 -0
  414. mindspore/ops/_grad_experimental/grad_sparse.py +342 -0
  415. mindspore/ops/_grad_experimental/grad_sparse_ops.py +399 -0
  416. mindspore/ops/_grad_experimental/taylor_rule.py +220 -0
  417. mindspore/ops/_op_impl/__init__.py +23 -0
  418. mindspore/ops/_op_impl/_custom_op/__init__.py +39 -0
  419. mindspore/ops/_op_impl/_custom_op/_basic.py +158 -0
  420. mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +279 -0
  421. mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +156 -0
  422. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +109 -0
  423. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +125 -0
  424. mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +105 -0
  425. mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +124 -0
  426. mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +116 -0
  427. mindspore/ops/_op_impl/_custom_op/correction_mul.py +89 -0
  428. mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +196 -0
  429. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +366 -0
  430. mindspore/ops/_op_impl/_custom_op/dsd_impl.py +162 -0
  431. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +136 -0
  432. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +206 -0
  433. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +88 -0
  434. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +128 -0
  435. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +199 -0
  436. mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +88 -0
  437. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +156 -0
  438. mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +184 -0
  439. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +143 -0
  440. mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +169 -0
  441. mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +548 -0
  442. mindspore/ops/_op_impl/_custom_op/img2col_impl.py +881 -0
  443. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +278 -0
  444. mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +200 -0
  445. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +334 -0
  446. mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +255 -0
  447. mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +222 -0
  448. mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +644 -0
  449. mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +488 -0
  450. mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +87 -0
  451. mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +129 -0
  452. mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +121 -0
  453. mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +352 -0
  454. mindspore/ops/_op_impl/aicpu/__init__.py +441 -0
  455. mindspore/ops/_op_impl/aicpu/abs.py +36 -0
  456. mindspore/ops/_op_impl/aicpu/acos.py +32 -0
  457. mindspore/ops/_op_impl/aicpu/acos_grad.py +33 -0
  458. mindspore/ops/_op_impl/aicpu/acosh.py +34 -0
  459. mindspore/ops/_op_impl/aicpu/acosh_grad.py +35 -0
  460. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
  461. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
  462. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
  463. mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
  464. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
  465. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
  466. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
  467. mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
  468. mindspore/ops/_op_impl/aicpu/add.py +43 -0
  469. mindspore/ops/_op_impl/aicpu/add_n.py +41 -0
  470. mindspore/ops/_op_impl/aicpu/add_v2.py +40 -0
  471. mindspore/ops/_op_impl/aicpu/addcdiv.py +41 -0
  472. mindspore/ops/_op_impl/aicpu/addcmul.py +47 -0
  473. mindspore/ops/_op_impl/aicpu/adjust_contrastv2.py +32 -0
  474. mindspore/ops/_op_impl/aicpu/adjust_hue.py +31 -0
  475. mindspore/ops/_op_impl/aicpu/adjust_saturation.py +32 -0
  476. mindspore/ops/_op_impl/aicpu/affine_grid.py +33 -0
  477. mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
  478. mindspore/ops/_op_impl/aicpu/angle.py +31 -0
  479. mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
  480. mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
  481. mindspore/ops/_op_impl/aicpu/argmax_with_value.py +43 -0
  482. mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
  483. mindspore/ops/_op_impl/aicpu/asin.py +32 -0
  484. mindspore/ops/_op_impl/aicpu/asin_grad.py +33 -0
  485. mindspore/ops/_op_impl/aicpu/asinh.py +34 -0
  486. mindspore/ops/_op_impl/aicpu/asinh_grad.py +35 -0
  487. mindspore/ops/_op_impl/aicpu/atanh.py +34 -0
  488. mindspore/ops/_op_impl/aicpu/avgpool_grad_v1.py +37 -0
  489. mindspore/ops/_op_impl/aicpu/avgpool_v1.py +36 -0
  490. mindspore/ops/_op_impl/aicpu/bartlett_window.py +36 -0
  491. mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
  492. mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
  493. mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
  494. mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
  495. mindspore/ops/_op_impl/aicpu/betainc.py +31 -0
  496. mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
  497. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +42 -0
  498. mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
  499. mindspore/ops/_op_impl/aicpu/blackman_window.py +36 -0
  500. mindspore/ops/_op_impl/aicpu/broadcast_to.py +58 -0
  501. mindspore/ops/_op_impl/aicpu/bucketize.py +34 -0
  502. mindspore/ops/_op_impl/aicpu/cache_swap_table.py +102 -0
  503. mindspore/ops/_op_impl/aicpu/cast.py +225 -0
  504. mindspore/ops/_op_impl/aicpu/cauchy.py +33 -0
  505. mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
  506. mindspore/ops/_op_impl/aicpu/check_numerics.py +33 -0
  507. mindspore/ops/_op_impl/aicpu/cholesky.py +32 -0
  508. mindspore/ops/_op_impl/aicpu/cholesky_inverse.py +31 -0
  509. mindspore/ops/_op_impl/aicpu/cholesky_solve.py +33 -0
  510. mindspore/ops/_op_impl/aicpu/choleskygrad.py +32 -0
  511. mindspore/ops/_op_impl/aicpu/coalesce.py +37 -0
  512. mindspore/ops/_op_impl/aicpu/col2im.py +38 -0
  513. mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
  514. mindspore/ops/_op_impl/aicpu/compare_and_bitpack.py +37 -0
  515. mindspore/ops/_op_impl/aicpu/complex.py +32 -0
  516. mindspore/ops/_op_impl/aicpu/complex_abs.py +31 -0
  517. mindspore/ops/_op_impl/aicpu/compute_accidental_hits.py +44 -0
  518. mindspore/ops/_op_impl/aicpu/concat.py +57 -0
  519. mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
  520. mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
  521. mindspore/ops/_op_impl/aicpu/conj.py +42 -0
  522. mindspore/ops/_op_impl/aicpu/conjugate_transpose.py +58 -0
  523. mindspore/ops/_op_impl/aicpu/cos.py +34 -0
  524. mindspore/ops/_op_impl/aicpu/cosh.py +34 -0
  525. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  526. mindspore/ops/_op_impl/aicpu/crop_and_resize.py +69 -0
  527. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_boxes.py +68 -0
  528. mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
  529. mindspore/ops/_op_impl/aicpu/cross.py +42 -0
  530. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_dense.py +48 -0
  531. mindspore/ops/_op_impl/aicpu/csr_sparse_matrix_to_sparse_tensor.py +51 -0
  532. mindspore/ops/_op_impl/aicpu/ctc_greedy_decoder.py +35 -0
  533. mindspore/ops/_op_impl/aicpu/ctc_loss_v2.py +43 -0
  534. mindspore/ops/_op_impl/aicpu/ctc_loss_v2_grad.py +45 -0
  535. mindspore/ops/_op_impl/aicpu/ctcloss.py +38 -0
  536. mindspore/ops/_op_impl/aicpu/cummax.py +41 -0
  537. mindspore/ops/_op_impl/aicpu/cumprod.py +58 -0
  538. mindspore/ops/_op_impl/aicpu/cumsum.py +58 -0
  539. mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
  540. mindspore/ops/_op_impl/aicpu/data_format_vec_permute.py +32 -0
  541. mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
  542. mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +43 -0
  543. mindspore/ops/_op_impl/aicpu/dense_to_csr_sparse_matrix.py +49 -0
  544. mindspore/ops/_op_impl/aicpu/dense_to_dense_set_operation.py +45 -0
  545. mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
  546. mindspore/ops/_op_impl/aicpu/depth_to_space.py +44 -0
  547. mindspore/ops/_op_impl/aicpu/diag.py +36 -0
  548. mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
  549. mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
  550. mindspore/ops/_op_impl/aicpu/digamma.py +31 -0
  551. mindspore/ops/_op_impl/aicpu/div.py +41 -0
  552. mindspore/ops/_op_impl/aicpu/div_no_nan.py +35 -0
  553. mindspore/ops/_op_impl/aicpu/dropout2d.py +42 -0
  554. mindspore/ops/_op_impl/aicpu/dropout3d.py +42 -0
  555. mindspore/ops/_op_impl/aicpu/dropout_genmask.py +41 -0
  556. mindspore/ops/_op_impl/aicpu/dropout_genmask_v3.py +32 -0
  557. mindspore/ops/_op_impl/aicpu/dynamic_stitch.py +42 -0
  558. mindspore/ops/_op_impl/aicpu/edit_distance.py +56 -0
  559. mindspore/ops/_op_impl/aicpu/eig.py +35 -0
  560. mindspore/ops/_op_impl/aicpu/embedding_lookup.py +102 -0
  561. mindspore/ops/_op_impl/aicpu/end_of_sequence.py +30 -0
  562. mindspore/ops/_op_impl/aicpu/environ_create.py +28 -0
  563. mindspore/ops/_op_impl/aicpu/environ_destroy_all.py +28 -0
  564. mindspore/ops/_op_impl/aicpu/environ_get.py +41 -0
  565. mindspore/ops/_op_impl/aicpu/environ_set.py +40 -0
  566. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  567. mindspore/ops/_op_impl/aicpu/equal.py +41 -0
  568. mindspore/ops/_op_impl/aicpu/exp.py +37 -0
  569. mindspore/ops/_op_impl/aicpu/expand.py +45 -0
  570. mindspore/ops/_op_impl/aicpu/expand_dims.py +42 -0
  571. mindspore/ops/_op_impl/aicpu/expm1.py +34 -0
  572. mindspore/ops/_op_impl/aicpu/extract_glimpse.py +35 -0
  573. mindspore/ops/_op_impl/aicpu/eye.py +44 -0
  574. mindspore/ops/_op_impl/aicpu/fft_with_size.py +47 -0
  575. mindspore/ops/_op_impl/aicpu/fill_diagonal.py +39 -0
  576. mindspore/ops/_op_impl/aicpu/fill_v2.py +58 -0
  577. mindspore/ops/_op_impl/aicpu/flatten.py +43 -0
  578. mindspore/ops/_op_impl/aicpu/floor_div.py +38 -0
  579. mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
  580. mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
  581. mindspore/ops/_op_impl/aicpu/fractional_avg_pool.py +41 -0
  582. mindspore/ops/_op_impl/aicpu/fractional_avg_pool_grad.py +41 -0
  583. mindspore/ops/_op_impl/aicpu/fractional_max_pool.py +41 -0
  584. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_grad_with_fixed_ksize.py +43 -0
  585. mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +65 -0
  586. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad.py +42 -0
  587. mindspore/ops/_op_impl/aicpu/fractional_max_pool_grad_with_fixed_ksize.py +42 -0
  588. mindspore/ops/_op_impl/aicpu/fractional_max_pool_with_fixed_ksize.py +49 -0
  589. mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
  590. mindspore/ops/_op_impl/aicpu/fused_sparse_adam.py +46 -0
  591. mindspore/ops/_op_impl/aicpu/fused_sparse_ftrl.py +41 -0
  592. mindspore/ops/_op_impl/aicpu/fused_sparse_lazy_adam.py +46 -0
  593. mindspore/ops/_op_impl/aicpu/fused_sparse_proximal_adagrad.py +39 -0
  594. mindspore/ops/_op_impl/aicpu/gamma.py +38 -0
  595. mindspore/ops/_op_impl/aicpu/gather.py +46 -0
  596. mindspore/ops/_op_impl/aicpu/gather_d.py +79 -0
  597. mindspore/ops/_op_impl/aicpu/gather_d_grad_v2.py +79 -0
  598. mindspore/ops/_op_impl/aicpu/gather_grad.py +54 -0
  599. mindspore/ops/_op_impl/aicpu/gather_nd.py +56 -0
  600. mindspore/ops/_op_impl/aicpu/gcd.py +32 -0
  601. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  602. mindspore/ops/_op_impl/aicpu/geqrf.py +32 -0
  603. mindspore/ops/_op_impl/aicpu/get_next.py +39 -0
  604. mindspore/ops/_op_impl/aicpu/glu.py +33 -0
  605. mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
  606. mindspore/ops/_op_impl/aicpu/greater.py +41 -0
  607. mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
  608. mindspore/ops/_op_impl/aicpu/grid_sampler_2d.py +35 -0
  609. mindspore/ops/_op_impl/aicpu/grid_sampler_2d_grad.py +38 -0
  610. mindspore/ops/_op_impl/aicpu/grid_sampler_3d.py +34 -0
  611. mindspore/ops/_op_impl/aicpu/grid_sampler_3d_grad.py +38 -0
  612. mindspore/ops/_op_impl/aicpu/hamming_window.py +57 -0
  613. mindspore/ops/_op_impl/aicpu/hard_sigmoid.py +32 -0
  614. mindspore/ops/_op_impl/aicpu/hard_sigmoid_grad.py +33 -0
  615. mindspore/ops/_op_impl/aicpu/heaviside.py +40 -0
  616. mindspore/ops/_op_impl/aicpu/histogram.py +35 -0
  617. mindspore/ops/_op_impl/aicpu/hsv_to_rgb.py +32 -0
  618. mindspore/ops/_op_impl/aicpu/hypot.py +32 -0
  619. mindspore/ops/_op_impl/aicpu/identity.py +42 -0
  620. mindspore/ops/_op_impl/aicpu/identity_n.py +41 -0
  621. mindspore/ops/_op_impl/aicpu/igamma.py +30 -0
  622. mindspore/ops/_op_impl/aicpu/igammac.py +30 -0
  623. mindspore/ops/_op_impl/aicpu/igammagrada.py +30 -0
  624. mindspore/ops/_op_impl/aicpu/im2col.py +43 -0
  625. mindspore/ops/_op_impl/aicpu/imag.py +31 -0
  626. mindspore/ops/_op_impl/aicpu/index_fill.py +54 -0
  627. mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
  628. mindspore/ops/_op_impl/aicpu/init_data_set_queue.py +27 -0
  629. mindspore/ops/_op_impl/aicpu/inplace_index_add.py +39 -0
  630. mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
  631. mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
  632. mindspore/ops/_op_impl/aicpu/is_finite.py +40 -0
  633. mindspore/ops/_op_impl/aicpu/is_inf.py +31 -0
  634. mindspore/ops/_op_impl/aicpu/is_nan.py +31 -0
  635. mindspore/ops/_op_impl/aicpu/kldivloss.py +34 -0
  636. mindspore/ops/_op_impl/aicpu/kldivlossgrad.py +35 -0
  637. mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
  638. mindspore/ops/_op_impl/aicpu/lcm.py +32 -0
  639. mindspore/ops/_op_impl/aicpu/left_shift.py +38 -0
  640. mindspore/ops/_op_impl/aicpu/less.py +41 -0
  641. mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
  642. mindspore/ops/_op_impl/aicpu/lgamma.py +33 -0
  643. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +57 -0
  644. mindspore/ops/_op_impl/aicpu/linspace.py +33 -0
  645. mindspore/ops/_op_impl/aicpu/list_diff.py +50 -0
  646. mindspore/ops/_op_impl/aicpu/log.py +37 -0
  647. mindspore/ops/_op_impl/aicpu/log1p.py +34 -0
  648. mindspore/ops/_op_impl/aicpu/log_matrix_determinant.py +31 -0
  649. mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
  650. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +37 -0
  651. mindspore/ops/_op_impl/aicpu/logical_xor.py +30 -0
  652. mindspore/ops/_op_impl/aicpu/logit.py +33 -0
  653. mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
  654. mindspore/ops/_op_impl/aicpu/logspace.py +36 -0
  655. mindspore/ops/_op_impl/aicpu/lower_bound.py +47 -0
  656. mindspore/ops/_op_impl/aicpu/lstsq.py +34 -0
  657. mindspore/ops/_op_impl/aicpu/lu.py +39 -0
  658. mindspore/ops/_op_impl/aicpu/lu_solve.py +32 -0
  659. mindspore/ops/_op_impl/aicpu/lu_unpack.py +114 -0
  660. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +49 -0
  661. mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
  662. mindspore/ops/_op_impl/aicpu/masked_scatter.py +40 -0
  663. mindspore/ops/_op_impl/aicpu/masked_select.py +31 -0
  664. mindspore/ops/_op_impl/aicpu/masked_select_grad.py +35 -0
  665. mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
  666. mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
  667. mindspore/ops/_op_impl/aicpu/matrix_determinant.py +30 -0
  668. mindspore/ops/_op_impl/aicpu/matrix_diag_part_v3.py +54 -0
  669. mindspore/ops/_op_impl/aicpu/matrix_diag_v3.py +56 -0
  670. mindspore/ops/_op_impl/aicpu/matrix_exp.py +34 -0
  671. mindspore/ops/_op_impl/aicpu/matrix_inverse.py +31 -0
  672. mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
  673. mindspore/ops/_op_impl/aicpu/matrix_power.py +37 -0
  674. mindspore/ops/_op_impl/aicpu/matrix_set_diag_v3.py +54 -0
  675. mindspore/ops/_op_impl/aicpu/matrix_solve.py +35 -0
  676. mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
  677. mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
  678. mindspore/ops/_op_impl/aicpu/max_pool3d_grad_with_argmax.py +60 -0
  679. mindspore/ops/_op_impl/aicpu/max_pool3d_with_argmax.py +59 -0
  680. mindspore/ops/_op_impl/aicpu/max_unpool2d.py +57 -0
  681. mindspore/ops/_op_impl/aicpu/max_unpool2d_grad.py +58 -0
  682. mindspore/ops/_op_impl/aicpu/max_unpool3d.py +57 -0
  683. mindspore/ops/_op_impl/aicpu/max_unpool3d_grad.py +58 -0
  684. mindspore/ops/_op_impl/aicpu/maximum_grad_grad.py +40 -0
  685. mindspore/ops/_op_impl/aicpu/maxpool_grad_v1.py +46 -0
  686. mindspore/ops/_op_impl/aicpu/maxpool_v1.py +42 -0
  687. mindspore/ops/_op_impl/aicpu/median.py +39 -0
  688. mindspore/ops/_op_impl/aicpu/median_grad.py +45 -0
  689. mindspore/ops/_op_impl/aicpu/meshgrid.py +41 -0
  690. mindspore/ops/_op_impl/aicpu/minimum_grad_grad.py +40 -0
  691. mindspore/ops/_op_impl/aicpu/mirror_pad.py +50 -0
  692. mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +48 -0
  693. mindspore/ops/_op_impl/aicpu/mul.py +43 -0
  694. mindspore/ops/_op_impl/aicpu/mul_no_nan.py +42 -0
  695. mindspore/ops/_op_impl/aicpu/multi_margin_loss.py +37 -0
  696. mindspore/ops/_op_impl/aicpu/multi_margin_loss_grad.py +41 -0
  697. mindspore/ops/_op_impl/aicpu/multilabel_margin_loss_grad.py +37 -0
  698. mindspore/ops/_op_impl/aicpu/multinomial.py +47 -0
  699. mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
  700. mindspore/ops/_op_impl/aicpu/mvlgamma.py +32 -0
  701. mindspore/ops/_op_impl/aicpu/mvlgamma_grad.py +33 -0
  702. mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
  703. mindspore/ops/_op_impl/aicpu/neg.py +36 -0
  704. mindspore/ops/_op_impl/aicpu/nextafter.py +32 -0
  705. mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
  706. mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
  707. mindspore/ops/_op_impl/aicpu/no_repeat_ngram.py +34 -0
  708. mindspore/ops/_op_impl/aicpu/non_deterministic_ints.py +33 -0
  709. mindspore/ops/_op_impl/aicpu/non_max_suppression.py +36 -0
  710. mindspore/ops/_op_impl/aicpu/non_max_suppression_with_overlaps.py +35 -0
  711. mindspore/ops/_op_impl/aicpu/non_zero.py +43 -0
  712. mindspore/ops/_op_impl/aicpu/not_equal.py +39 -0
  713. mindspore/ops/_op_impl/aicpu/nth_element.py +39 -0
  714. mindspore/ops/_op_impl/aicpu/nuclear_norm.py +33 -0
  715. mindspore/ops/_op_impl/aicpu/one_hot.py +116 -0
  716. mindspore/ops/_op_impl/aicpu/ones_like.py +39 -0
  717. mindspore/ops/_op_impl/aicpu/orgqr.py +34 -0
  718. mindspore/ops/_op_impl/aicpu/pad_and_shift.py +33 -0
  719. mindspore/ops/_op_impl/aicpu/pad_v3.py +61 -0
  720. mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +59 -0
  721. mindspore/ops/_op_impl/aicpu/padding.py +41 -0
  722. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +54 -0
  723. mindspore/ops/_op_impl/aicpu/pdist_grad.py +33 -0
  724. mindspore/ops/_op_impl/aicpu/poisson.py +37 -0
  725. mindspore/ops/_op_impl/aicpu/polar.py +32 -0
  726. mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
  727. mindspore/ops/_op_impl/aicpu/pow.py +39 -0
  728. mindspore/ops/_op_impl/aicpu/print_tensor.py +39 -0
  729. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +113 -0
  730. mindspore/ops/_op_impl/aicpu/qr.py +36 -0
  731. mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
  732. mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
  733. mindspore/ops/_op_impl/aicpu/ragged_range.py +49 -0
  734. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
  735. mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
  736. mindspore/ops/_op_impl/aicpu/random_categorical.py +68 -0
  737. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +36 -0
  738. mindspore/ops/_op_impl/aicpu/random_gamma.py +38 -0
  739. mindspore/ops/_op_impl/aicpu/random_poisson.py +134 -0
  740. mindspore/ops/_op_impl/aicpu/random_shuffle.py +47 -0
  741. mindspore/ops/_op_impl/aicpu/randperm.py +38 -0
  742. mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
  743. mindspore/ops/_op_impl/aicpu/range.py +36 -0
  744. mindspore/ops/_op_impl/aicpu/range_v2.py +35 -0
  745. mindspore/ops/_op_impl/aicpu/real.py +31 -0
  746. mindspore/ops/_op_impl/aicpu/real_div.py +40 -0
  747. mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
  748. mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
  749. mindspore/ops/_op_impl/aicpu/reduce_mean.py +57 -0
  750. mindspore/ops/_op_impl/aicpu/reduce_prod.py +57 -0
  751. mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
  752. mindspore/ops/_op_impl/aicpu/relu_grad_v3.py +41 -0
  753. mindspore/ops/_op_impl/aicpu/relu_v3.py +38 -0
  754. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +96 -0
  755. mindspore/ops/_op_impl/aicpu/reshape.py +42 -0
  756. mindspore/ops/_op_impl/aicpu/resize_area.py +40 -0
  757. mindspore/ops/_op_impl/aicpu/resize_bicubic.py +20 -0
  758. mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +19 -0
  759. mindspore/ops/_op_impl/aicpu/resize_bilinear.py +32 -0
  760. mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +32 -0
  761. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +36 -0
  762. mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +35 -0
  763. mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
  764. mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
  765. mindspore/ops/_op_impl/aicpu/reverse_sequence.py +55 -0
  766. mindspore/ops/_op_impl/aicpu/reversev2.py +54 -0
  767. mindspore/ops/_op_impl/aicpu/rgb_to_hsv.py +32 -0
  768. mindspore/ops/_op_impl/aicpu/right_shift.py +38 -0
  769. mindspore/ops/_op_impl/aicpu/rnnt_loss.py +35 -0
  770. mindspore/ops/_op_impl/aicpu/round.py +34 -0
  771. mindspore/ops/_op_impl/aicpu/rsqrt.py +33 -0
  772. mindspore/ops/_op_impl/aicpu/rsqrt_grad.py +36 -0
  773. mindspore/ops/_op_impl/aicpu/sample_distorted_bounding_box_v2.py +49 -0
  774. mindspore/ops/_op_impl/aicpu/scale_and_translate.py +52 -0
  775. mindspore/ops/_op_impl/aicpu/scale_and_translate_grad.py +36 -0
  776. mindspore/ops/_op_impl/aicpu/scatter.py +79 -0
  777. mindspore/ops/_op_impl/aicpu/scatter_add_with_axis.py +53 -0
  778. mindspore/ops/_op_impl/aicpu/scatter_elements.py +39 -0
  779. mindspore/ops/_op_impl/aicpu/scatter_nd.py +59 -0
  780. mindspore/ops/_op_impl/aicpu/scatter_nd_max.py +54 -0
  781. mindspore/ops/_op_impl/aicpu/scatter_nd_min.py +54 -0
  782. mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +59 -0
  783. mindspore/ops/_op_impl/aicpu/search_sorted.py +44 -0
  784. mindspore/ops/_op_impl/aicpu/segment_max.py +52 -0
  785. mindspore/ops/_op_impl/aicpu/segment_mean.py +56 -0
  786. mindspore/ops/_op_impl/aicpu/segment_min.py +52 -0
  787. mindspore/ops/_op_impl/aicpu/segment_prod.py +56 -0
  788. mindspore/ops/_op_impl/aicpu/segment_sum.py +56 -0
  789. mindspore/ops/_op_impl/aicpu/select.py +45 -0
  790. mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
  791. mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
  792. mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
  793. mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
  794. mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
  795. mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
  796. mindspore/ops/_op_impl/aicpu/set_size.py +38 -0
  797. mindspore/ops/_op_impl/aicpu/sign.py +36 -0
  798. mindspore/ops/_op_impl/aicpu/sin.py +34 -0
  799. mindspore/ops/_op_impl/aicpu/sinc.py +43 -0
  800. mindspore/ops/_op_impl/aicpu/sinh.py +34 -0
  801. mindspore/ops/_op_impl/aicpu/slice.py +59 -0
  802. mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
  803. mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
  804. mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
  805. mindspore/ops/_op_impl/aicpu/sort.py +39 -0
  806. mindspore/ops/_op_impl/aicpu/space_to_depth.py +44 -0
  807. mindspore/ops/_op_impl/aicpu/sparse_addmm.py +87 -0
  808. mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +80 -0
  809. mindspore/ops/_op_impl/aicpu/sparse_apply_centered_rms_prop.py +105 -0
  810. mindspore/ops/_op_impl/aicpu/sparse_apply_momentum.py +80 -0
  811. mindspore/ops/_op_impl/aicpu/sparse_apply_proximal_gradient_descent.py +79 -0
  812. mindspore/ops/_op_impl/aicpu/sparse_concat.py +59 -0
  813. mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
  814. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_add.py +58 -0
  815. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_div.py +58 -0
  816. mindspore/ops/_op_impl/aicpu/sparse_dense_cwise_mul.py +58 -0
  817. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
  818. mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
  819. mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
  820. mindspore/ops/_op_impl/aicpu/sparse_matrix_nnz.py +81 -0
  821. mindspore/ops/_op_impl/aicpu/sparse_matrix_transpose.py +116 -0
  822. mindspore/ops/_op_impl/aicpu/sparse_reorder.py +56 -0
  823. mindspore/ops/_op_impl/aicpu/sparse_reshape.py +34 -0
  824. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_grad.py +36 -0
  825. mindspore/ops/_op_impl/aicpu/sparse_segment_mean_with_num_segments.py +44 -0
  826. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n.py +43 -0
  827. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_grad.py +38 -0
  828. mindspore/ops/_op_impl/aicpu/sparse_segment_sqrt_n_with_num_segments.py +44 -0
  829. mindspore/ops/_op_impl/aicpu/sparse_segment_sum.py +49 -0
  830. mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
  831. mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
  832. mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
  833. mindspore/ops/_op_impl/aicpu/sparse_softmax.py +33 -0
  834. mindspore/ops/_op_impl/aicpu/sparse_softmax_cross_entropy_with_logits_v2.py +35 -0
  835. mindspore/ops/_op_impl/aicpu/sparse_sparse_maximum.py +53 -0
  836. mindspore/ops/_op_impl/aicpu/sparse_sparse_minimum.py +53 -0
  837. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_add.py +84 -0
  838. mindspore/ops/_op_impl/aicpu/sparse_tensor_dense_mat_mul.py +190 -0
  839. mindspore/ops/_op_impl/aicpu/sparse_tensor_to_csr_sparse_matrix.py +51 -0
  840. mindspore/ops/_op_impl/aicpu/sparse_to_dense_v2.py +73 -0
  841. mindspore/ops/_op_impl/aicpu/split.py +45 -0
  842. mindspore/ops/_op_impl/aicpu/sqrt.py +34 -0
  843. mindspore/ops/_op_impl/aicpu/sqrt_grad.py +35 -0
  844. mindspore/ops/_op_impl/aicpu/square.py +35 -0
  845. mindspore/ops/_op_impl/aicpu/squared_difference.py +37 -0
  846. mindspore/ops/_op_impl/aicpu/squeeze.py +42 -0
  847. mindspore/ops/_op_impl/aicpu/sspaddmm.py +97 -0
  848. mindspore/ops/_op_impl/aicpu/stack.py +45 -0
  849. mindspore/ops/_op_impl/aicpu/stack_push_pop.py +87 -0
  850. mindspore/ops/_op_impl/aicpu/standard_laplace.py +34 -0
  851. mindspore/ops/_op_impl/aicpu/standard_normal.py +34 -0
  852. mindspore/ops/_op_impl/aicpu/stateless_dropout_genmask.py +37 -0
  853. mindspore/ops/_op_impl/aicpu/stft.py +70 -0
  854. mindspore/ops/_op_impl/aicpu/strided_slice.py +43 -0
  855. mindspore/ops/_op_impl/aicpu/strided_slice_grad.py +50 -0
  856. mindspore/ops/_op_impl/aicpu/sub.py +41 -0
  857. mindspore/ops/_op_impl/aicpu/sub_and_filter.py +36 -0
  858. mindspore/ops/_op_impl/aicpu/tan.py +34 -0
  859. mindspore/ops/_op_impl/aicpu/tanh.py +34 -0
  860. mindspore/ops/_op_impl/aicpu/tanh_grad.py +35 -0
  861. mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
  862. mindspore/ops/_op_impl/aicpu/tile.py +56 -0
  863. mindspore/ops/_op_impl/aicpu/topk.py +34 -0
  864. mindspore/ops/_op_impl/aicpu/trace.py +40 -0
  865. mindspore/ops/_op_impl/aicpu/tracegrad.py +41 -0
  866. mindspore/ops/_op_impl/aicpu/trans_data.py +35 -0
  867. mindspore/ops/_op_impl/aicpu/transpose.py +58 -0
  868. mindspore/ops/_op_impl/aicpu/tridiagonal_matmul.py +42 -0
  869. mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
  870. mindspore/ops/_op_impl/aicpu/tril.py +42 -0
  871. mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
  872. mindspore/ops/_op_impl/aicpu/triplet_margin_loss.py +62 -0
  873. mindspore/ops/_op_impl/aicpu/triu.py +43 -0
  874. mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
  875. mindspore/ops/_op_impl/aicpu/truncated_normal.py +39 -0
  876. mindspore/ops/_op_impl/aicpu/uniform.py +36 -0
  877. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +41 -0
  878. mindspore/ops/_op_impl/aicpu/uniform_int.py +36 -0
  879. mindspore/ops/_op_impl/aicpu/uniform_real.py +33 -0
  880. mindspore/ops/_op_impl/aicpu/unique.py +31 -0
  881. mindspore/ops/_op_impl/aicpu/unique_consecutive.py +47 -0
  882. mindspore/ops/_op_impl/aicpu/unique_with_pad.py +32 -0
  883. mindspore/ops/_op_impl/aicpu/unravel_index.py +32 -0
  884. mindspore/ops/_op_impl/aicpu/unsorted_segment_prod.py +53 -0
  885. mindspore/ops/_op_impl/aicpu/unsorted_segment_sum.py +57 -0
  886. mindspore/ops/_op_impl/aicpu/unstack.py +45 -0
  887. mindspore/ops/_op_impl/aicpu/update_cache.py +44 -0
  888. mindspore/ops/_op_impl/aicpu/upper_bound.py +47 -0
  889. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +42 -0
  890. mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +49 -0
  891. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +40 -0
  892. mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +50 -0
  893. mindspore/ops/_op_impl/aicpu/xdivy.py +35 -0
  894. mindspore/ops/_op_impl/aicpu/xlogy.py +33 -0
  895. mindspore/ops/_op_impl/aicpu/zeros_like.py +42 -0
  896. mindspore/ops/_op_impl/aicpu/zeta.py +31 -0
  897. mindspore/ops/_op_impl/akg/__init__.py +19 -0
  898. mindspore/ops/_op_impl/akg/ascend/__init__.py +48 -0
  899. mindspore/ops/_op_impl/akg/ascend/abs.py +35 -0
  900. mindspore/ops/_op_impl/akg/ascend/add.py +42 -0
  901. mindspore/ops/_op_impl/akg/ascend/add_n.py +37 -0
  902. mindspore/ops/_op_impl/akg/ascend/batchmatmul.py +33 -0
  903. mindspore/ops/_op_impl/akg/ascend/cast.py +46 -0
  904. mindspore/ops/_op_impl/akg/ascend/equal.py +35 -0
  905. mindspore/ops/_op_impl/akg/ascend/exp.py +35 -0
  906. mindspore/ops/_op_impl/akg/ascend/expand_dims.py +33 -0
  907. mindspore/ops/_op_impl/akg/ascend/greater.py +34 -0
  908. mindspore/ops/_op_impl/akg/ascend/greater_equal.py +35 -0
  909. mindspore/ops/_op_impl/akg/ascend/less.py +31 -0
  910. mindspore/ops/_op_impl/akg/ascend/less_equal.py +35 -0
  911. mindspore/ops/_op_impl/akg/ascend/load_im2col.py +33 -0
  912. mindspore/ops/_op_impl/akg/ascend/log.py +34 -0
  913. mindspore/ops/_op_impl/akg/ascend/maximum.py +36 -0
  914. mindspore/ops/_op_impl/akg/ascend/minimum.py +39 -0
  915. mindspore/ops/_op_impl/akg/ascend/mul.py +41 -0
  916. mindspore/ops/_op_impl/akg/ascend/neg.py +37 -0
  917. mindspore/ops/_op_impl/akg/ascend/pow.py +35 -0
  918. mindspore/ops/_op_impl/akg/ascend/prod_force_se_a.py +33 -0
  919. mindspore/ops/_op_impl/akg/ascend/real_div.py +36 -0
  920. mindspore/ops/_op_impl/akg/ascend/reciprocal.py +32 -0
  921. mindspore/ops/_op_impl/akg/ascend/reduce_max.py +32 -0
  922. mindspore/ops/_op_impl/akg/ascend/reduce_min.py +32 -0
  923. mindspore/ops/_op_impl/akg/ascend/reduce_sum.py +37 -0
  924. mindspore/ops/_op_impl/akg/ascend/rsqrt.py +35 -0
  925. mindspore/ops/_op_impl/akg/ascend/select.py +37 -0
  926. mindspore/ops/_op_impl/akg/ascend/sqrt.py +35 -0
  927. mindspore/ops/_op_impl/akg/ascend/square.py +35 -0
  928. mindspore/ops/_op_impl/akg/ascend/sub.py +42 -0
  929. mindspore/ops/_op_impl/akg/cpu/__init__.py +23 -0
  930. mindspore/ops/_op_impl/akg/cpu/coo2csr.py +29 -0
  931. mindspore/ops/_op_impl/akg/cpu/csr2coo.py +29 -0
  932. mindspore/ops/_op_impl/akg/cpu/csr_gather.py +33 -0
  933. mindspore/ops/_op_impl/akg/cpu/csr_mm.py +34 -0
  934. mindspore/ops/_op_impl/akg/cpu/csr_mul.py +33 -0
  935. mindspore/ops/_op_impl/akg/cpu/csr_mv.py +33 -0
  936. mindspore/ops/_op_impl/akg/cpu/csr_reduce_sum.py +31 -0
  937. mindspore/ops/_op_impl/akg/gpu/__init__.py +24 -0
  938. mindspore/ops/_op_impl/akg/gpu/coo2csr.py +29 -0
  939. mindspore/ops/_op_impl/akg/gpu/csr2coo.py +29 -0
  940. mindspore/ops/_op_impl/akg/gpu/csr_div.py +36 -0
  941. mindspore/ops/_op_impl/akg/gpu/csr_gather.py +33 -0
  942. mindspore/ops/_op_impl/akg/gpu/csr_mm.py +37 -0
  943. mindspore/ops/_op_impl/akg/gpu/csr_mul.py +36 -0
  944. mindspore/ops/_op_impl/akg/gpu/csr_mv.py +36 -0
  945. mindspore/ops/_op_impl/akg/gpu/csr_reduce_sum.py +33 -0
  946. mindspore/ops/_op_impl/cpu/__init__.py +78 -0
  947. mindspore/ops/_op_impl/cpu/adam.py +49 -0
  948. mindspore/ops/_op_impl/cpu/adam_weight_decay.py +47 -0
  949. mindspore/ops/_op_impl/cpu/arg_max.py +30 -0
  950. mindspore/ops/_op_impl/cpu/arg_max_with_value.py +31 -0
  951. mindspore/ops/_op_impl/cpu/arg_min_with_value.py +31 -0
  952. mindspore/ops/_op_impl/cpu/buffer_append.py +28 -0
  953. mindspore/ops/_op_impl/cpu/buffer_get.py +28 -0
  954. mindspore/ops/_op_impl/cpu/buffer_sample.py +28 -0
  955. mindspore/ops/_op_impl/cpu/cast.py +171 -0
  956. mindspore/ops/_op_impl/cpu/concat_offset.py +38 -0
  957. mindspore/ops/_op_impl/cpu/conv2d.py +30 -0
  958. mindspore/ops/_op_impl/cpu/conv3d.py +30 -0
  959. mindspore/ops/_op_impl/cpu/div.py +32 -0
  960. mindspore/ops/_op_impl/cpu/dropout.py +31 -0
  961. mindspore/ops/_op_impl/cpu/dropout_grad.py +30 -0
  962. mindspore/ops/_op_impl/cpu/dynamic_shape.py +42 -0
  963. mindspore/ops/_op_impl/cpu/dynamic_stitch.py +41 -0
  964. mindspore/ops/_op_impl/cpu/equal_count.py +30 -0
  965. mindspore/ops/_op_impl/cpu/gather_d.py +49 -0
  966. mindspore/ops/_op_impl/cpu/gather_d_grad.py +38 -0
  967. mindspore/ops/_op_impl/cpu/gather_d_grad_v2.py +40 -0
  968. mindspore/ops/_op_impl/cpu/gather_v2.py +40 -0
  969. mindspore/ops/_op_impl/cpu/hsigmoid.py +33 -0
  970. mindspore/ops/_op_impl/cpu/hsigmoid_grad.py +34 -0
  971. mindspore/ops/_op_impl/cpu/hswish.py +32 -0
  972. mindspore/ops/_op_impl/cpu/hswish_grad.py +33 -0
  973. mindspore/ops/_op_impl/cpu/identity_n.py +40 -0
  974. mindspore/ops/_op_impl/cpu/is_finite.py +39 -0
  975. mindspore/ops/_op_impl/cpu/l2loss.py +30 -0
  976. mindspore/ops/_op_impl/cpu/layer_norm.py +36 -0
  977. mindspore/ops/_op_impl/cpu/layer_norm_grad.py +38 -0
  978. mindspore/ops/_op_impl/cpu/maximum.py +35 -0
  979. mindspore/ops/_op_impl/cpu/maximum_grad.py +47 -0
  980. mindspore/ops/_op_impl/cpu/minimum.py +40 -0
  981. mindspore/ops/_op_impl/cpu/minimum_grad.py +51 -0
  982. mindspore/ops/_op_impl/cpu/mirror_pad.py +36 -0
  983. mindspore/ops/_op_impl/cpu/mirror_pad_grad.py +36 -0
  984. mindspore/ops/_op_impl/cpu/mul.py +32 -0
  985. mindspore/ops/_op_impl/cpu/one_hot.py +31 -0
  986. mindspore/ops/_op_impl/cpu/pad.py +32 -0
  987. mindspore/ops/_op_impl/cpu/pow.py +32 -0
  988. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +42 -0
  989. mindspore/ops/_op_impl/cpu/pyexecute.py +29 -0
  990. mindspore/ops/_op_impl/cpu/pyfunc.py +29 -0
  991. mindspore/ops/_op_impl/cpu/range.py +34 -0
  992. mindspore/ops/_op_impl/cpu/real_div.py +33 -0
  993. mindspore/ops/_op_impl/cpu/reduce_all.py +29 -0
  994. mindspore/ops/_op_impl/cpu/reduce_any.py +29 -0
  995. mindspore/ops/_op_impl/cpu/reduce_max.py +32 -0
  996. mindspore/ops/_op_impl/cpu/reduce_mean.py +40 -0
  997. mindspore/ops/_op_impl/cpu/reduce_min.py +32 -0
  998. mindspore/ops/_op_impl/cpu/reduce_prod.py +40 -0
  999. mindspore/ops/_op_impl/cpu/reduce_std.py +31 -0
  1000. mindspore/ops/_op_impl/cpu/reduce_sum.py +41 -0
  1001. mindspore/ops/_op_impl/cpu/space_to_batch_nd.py +38 -0
  1002. mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
  1003. mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
  1004. mindspore/ops/_op_impl/cpu/split.py +34 -0
  1005. mindspore/ops/_op_impl/cpu/sspaddmm.py +95 -0
  1006. mindspore/ops/_op_impl/cpu/stack.py +38 -0
  1007. mindspore/ops/_op_impl/cpu/sub.py +32 -0
  1008. mindspore/ops/_op_impl/cpu/tensor_copy_slices.py +41 -0
  1009. mindspore/ops/_op_impl/cpu/tile.py +37 -0
  1010. mindspore/ops/_op_impl/cpu/top_k.py +31 -0
  1011. mindspore/ops/_op_impl/cpu/transpose.py +39 -0
  1012. mindspore/ops/_primitive_cache.py +90 -0
  1013. mindspore/ops/_register_for_op.py +73 -0
  1014. mindspore/ops/_utils/__init__.py +20 -0
  1015. mindspore/ops/_utils/utils.py +147 -0
  1016. mindspore/ops/_vmap/__init__.py +25 -0
  1017. mindspore/ops/_vmap/vmap_array_ops.py +2149 -0
  1018. mindspore/ops/_vmap/vmap_base.py +533 -0
  1019. mindspore/ops/_vmap/vmap_convolution_ops.py +441 -0
  1020. mindspore/ops/_vmap/vmap_debug_ops.py +50 -0
  1021. mindspore/ops/_vmap/vmap_grad_math_ops.py +274 -0
  1022. mindspore/ops/_vmap/vmap_grad_nn_ops.py +806 -0
  1023. mindspore/ops/_vmap/vmap_image_ops.py +194 -0
  1024. mindspore/ops/_vmap/vmap_math_ops.py +993 -0
  1025. mindspore/ops/_vmap/vmap_nn_ops.py +2250 -0
  1026. mindspore/ops/_vmap/vmap_other_ops.py +105 -0
  1027. mindspore/ops/_vmap/vmap_random_ops.py +122 -0
  1028. mindspore/ops/_vmap/vmap_sparse_ops.py +89 -0
  1029. mindspore/ops/auto_generate/__init__.py +31 -0
  1030. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
  1031. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
  1032. mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
  1033. mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
  1034. mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
  1035. mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
  1036. mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
  1037. mindspore/ops/composite/__init__.py +71 -0
  1038. mindspore/ops/composite/base.py +1318 -0
  1039. mindspore/ops/composite/env_ops.py +41 -0
  1040. mindspore/ops/composite/math_ops.py +125 -0
  1041. mindspore/ops/composite/multitype_ops/__init__.py +77 -0
  1042. mindspore/ops/composite/multitype_ops/_compile_utils.py +1459 -0
  1043. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +897 -0
  1044. mindspore/ops/composite/multitype_ops/add_impl.py +606 -0
  1045. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +56 -0
  1046. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +56 -0
  1047. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +56 -0
  1048. mindspore/ops/composite/multitype_ops/div_impl.py +189 -0
  1049. mindspore/ops/composite/multitype_ops/equal_impl.py +335 -0
  1050. mindspore/ops/composite/multitype_ops/floordiv_impl.py +88 -0
  1051. mindspore/ops/composite/multitype_ops/getitem_impl.py +400 -0
  1052. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +109 -0
  1053. mindspore/ops/composite/multitype_ops/greater_impl.py +110 -0
  1054. mindspore/ops/composite/multitype_ops/in_impl.py +196 -0
  1055. mindspore/ops/composite/multitype_ops/left_shift_impl.py +37 -0
  1056. mindspore/ops/composite/multitype_ops/less_equal_impl.py +111 -0
  1057. mindspore/ops/composite/multitype_ops/less_impl.py +112 -0
  1058. mindspore/ops/composite/multitype_ops/logic_not_impl.py +113 -0
  1059. mindspore/ops/composite/multitype_ops/logical_and_impl.py +60 -0
  1060. mindspore/ops/composite/multitype_ops/logical_or_impl.py +61 -0
  1061. mindspore/ops/composite/multitype_ops/mod_impl.py +86 -0
  1062. mindspore/ops/composite/multitype_ops/mul_impl.py +294 -0
  1063. mindspore/ops/composite/multitype_ops/negative_impl.py +79 -0
  1064. mindspore/ops/composite/multitype_ops/not_equal_impl.py +290 -0
  1065. mindspore/ops/composite/multitype_ops/not_in_impl.py +196 -0
  1066. mindspore/ops/composite/multitype_ops/ones_like_impl.py +96 -0
  1067. mindspore/ops/composite/multitype_ops/pow_impl.py +87 -0
  1068. mindspore/ops/composite/multitype_ops/right_shift_impl.py +37 -0
  1069. mindspore/ops/composite/multitype_ops/setitem_impl.py +884 -0
  1070. mindspore/ops/composite/multitype_ops/sub_impl.py +116 -0
  1071. mindspore/ops/composite/multitype_ops/uadd_impl.py +29 -0
  1072. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +228 -0
  1073. mindspore/ops/deprecated.py +315 -0
  1074. mindspore/ops/function/__init__.py +782 -0
  1075. mindspore/ops/function/array_func.py +7226 -0
  1076. mindspore/ops/function/clip_func.py +384 -0
  1077. mindspore/ops/function/debug_func.py +181 -0
  1078. mindspore/ops/function/fft_func.py +44 -0
  1079. mindspore/ops/function/grad/__init__.py +34 -0
  1080. mindspore/ops/function/grad/grad_func.py +1425 -0
  1081. mindspore/ops/function/image_func.py +292 -0
  1082. mindspore/ops/function/linalg_func.py +416 -0
  1083. mindspore/ops/function/math_func.py +12228 -0
  1084. mindspore/ops/function/nn_func.py +8609 -0
  1085. mindspore/ops/function/other_func.py +115 -0
  1086. mindspore/ops/function/parameter_func.py +134 -0
  1087. mindspore/ops/function/random_func.py +1715 -0
  1088. mindspore/ops/function/reshard_func.py +104 -0
  1089. mindspore/ops/function/sparse_func.py +884 -0
  1090. mindspore/ops/function/sparse_unary_func.py +2422 -0
  1091. mindspore/ops/function/spectral_func.py +150 -0
  1092. mindspore/ops/function/vmap_func.py +117 -0
  1093. mindspore/ops/functional.py +464 -0
  1094. mindspore/ops/op_info_register.py +1572 -0
  1095. mindspore/ops/operations/__init__.py +722 -0
  1096. mindspore/ops/operations/_csr_ops.py +403 -0
  1097. mindspore/ops/operations/_custom_grad.py +181 -0
  1098. mindspore/ops/operations/_embedding_cache_ops.py +307 -0
  1099. mindspore/ops/operations/_grad_ops.py +2978 -0
  1100. mindspore/ops/operations/_infer_ops.py +19 -0
  1101. mindspore/ops/operations/_inner_ops.py +2544 -0
  1102. mindspore/ops/operations/_map_tensor_ops.py +112 -0
  1103. mindspore/ops/operations/_ms_kernel.py +601 -0
  1104. mindspore/ops/operations/_ocr_ops.py +379 -0
  1105. mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
  1106. mindspore/ops/operations/_pyfunc_registry.py +58 -0
  1107. mindspore/ops/operations/_quant_ops.py +1844 -0
  1108. mindspore/ops/operations/_rl_inner_ops.py +1231 -0
  1109. mindspore/ops/operations/_scalar_ops.py +106 -0
  1110. mindspore/ops/operations/_sequence_ops.py +1155 -0
  1111. mindspore/ops/operations/_sparse_grad_ops.py +56 -0
  1112. mindspore/ops/operations/_tensor_array.py +359 -0
  1113. mindspore/ops/operations/_thor_ops.py +807 -0
  1114. mindspore/ops/operations/array_ops.py +6124 -0
  1115. mindspore/ops/operations/comm_ops.py +1985 -0
  1116. mindspore/ops/operations/control_ops.py +127 -0
  1117. mindspore/ops/operations/custom_ops.py +1129 -0
  1118. mindspore/ops/operations/debug_ops.py +678 -0
  1119. mindspore/ops/operations/image_ops.py +1041 -0
  1120. mindspore/ops/operations/inner_ops.py +697 -0
  1121. mindspore/ops/operations/linalg_ops.py +95 -0
  1122. mindspore/ops/operations/manually_defined/__init__.py +24 -0
  1123. mindspore/ops/operations/manually_defined/_inner.py +73 -0
  1124. mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
  1125. mindspore/ops/operations/math_ops.py +5095 -0
  1126. mindspore/ops/operations/nn_ops.py +9575 -0
  1127. mindspore/ops/operations/other_ops.py +874 -0
  1128. mindspore/ops/operations/random_ops.py +1288 -0
  1129. mindspore/ops/operations/reshard_ops.py +53 -0
  1130. mindspore/ops/operations/rl_ops.py +288 -0
  1131. mindspore/ops/operations/sparse_ops.py +2753 -0
  1132. mindspore/ops/operations/spectral_ops.py +111 -0
  1133. mindspore/ops/primitive.py +1046 -0
  1134. mindspore/ops/signature.py +54 -0
  1135. mindspore/ops/vm_impl_registry.py +91 -0
  1136. mindspore/ops_generate/__init__.py +27 -0
  1137. mindspore/ops_generate/arg_dtype_cast.py +252 -0
  1138. mindspore/ops_generate/arg_handler.py +197 -0
  1139. mindspore/ops_generate/gen_aclnn_implement.py +263 -0
  1140. mindspore/ops_generate/gen_constants.py +36 -0
  1141. mindspore/ops_generate/gen_ops.py +1099 -0
  1142. mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
  1143. mindspore/ops_generate/gen_pyboost_func.py +1052 -0
  1144. mindspore/ops_generate/gen_utils.py +209 -0
  1145. mindspore/ops_generate/op_proto.py +145 -0
  1146. mindspore/ops_generate/pyboost_utils.py +367 -0
  1147. mindspore/ops_generate/template.py +261 -0
  1148. mindspore/parallel/__init__.py +30 -0
  1149. mindspore/parallel/_auto_parallel_context.py +1486 -0
  1150. mindspore/parallel/_cell_wrapper.py +174 -0
  1151. mindspore/parallel/_cost_model_context.py +700 -0
  1152. mindspore/parallel/_dp_allreduce_fusion.py +159 -0
  1153. mindspore/parallel/_offload_context.py +275 -0
  1154. mindspore/parallel/_parallel_serialization.py +561 -0
  1155. mindspore/parallel/_ps_context.py +242 -0
  1156. mindspore/parallel/_recovery_context.py +110 -0
  1157. mindspore/parallel/_tensor.py +730 -0
  1158. mindspore/parallel/_transformer/__init__.py +35 -0
  1159. mindspore/parallel/_transformer/layers.py +765 -0
  1160. mindspore/parallel/_transformer/loss.py +251 -0
  1161. mindspore/parallel/_transformer/moe.py +693 -0
  1162. mindspore/parallel/_transformer/op_parallel_config.py +222 -0
  1163. mindspore/parallel/_transformer/transformer.py +3119 -0
  1164. mindspore/parallel/_utils.py +612 -0
  1165. mindspore/parallel/algo_parameter_config.py +400 -0
  1166. mindspore/parallel/checkpoint_transform.py +650 -0
  1167. mindspore/parallel/cluster/__init__.py +15 -0
  1168. mindspore/parallel/cluster/process_entity/__init__.py +18 -0
  1169. mindspore/parallel/cluster/process_entity/_api.py +352 -0
  1170. mindspore/parallel/cluster/process_entity/_utils.py +101 -0
  1171. mindspore/parallel/cluster/run.py +136 -0
  1172. mindspore/parallel/mpi/__init__.py +14 -0
  1173. mindspore/parallel/mpi/_mpi_config.py +116 -0
  1174. mindspore/parallel/parameter_broadcast.py +151 -0
  1175. mindspore/parallel/shard.py +481 -0
  1176. mindspore/parallel/transform_safetensors.py +993 -0
  1177. mindspore/profiler/__init__.py +28 -0
  1178. mindspore/profiler/common/__init__.py +14 -0
  1179. mindspore/profiler/common/constant.py +29 -0
  1180. mindspore/profiler/common/exceptions/__init__.py +14 -0
  1181. mindspore/profiler/common/exceptions/error_code.py +83 -0
  1182. mindspore/profiler/common/exceptions/exceptions.py +286 -0
  1183. mindspore/profiler/common/process_pool.py +41 -0
  1184. mindspore/profiler/common/registry.py +47 -0
  1185. mindspore/profiler/common/singleton.py +28 -0
  1186. mindspore/profiler/common/struct_type.py +118 -0
  1187. mindspore/profiler/common/util.py +472 -0
  1188. mindspore/profiler/common/validator/__init__.py +14 -0
  1189. mindspore/profiler/common/validator/validate_path.py +84 -0
  1190. mindspore/profiler/dynamic_profiler.py +694 -0
  1191. mindspore/profiler/envprofiling.py +254 -0
  1192. mindspore/profiler/parser/__init__.py +14 -0
  1193. mindspore/profiler/parser/aicpu_data_parser.py +272 -0
  1194. mindspore/profiler/parser/ascend_analysis/__init__.py +14 -0
  1195. mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
  1196. mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
  1197. mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
  1198. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
  1199. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
  1200. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
  1201. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  1202. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
  1203. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
  1204. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
  1205. mindspore/profiler/parser/ascend_cluster_generator.py +116 -0
  1206. mindspore/profiler/parser/ascend_communicate_generator.py +314 -0
  1207. mindspore/profiler/parser/ascend_flops_generator.py +116 -0
  1208. mindspore/profiler/parser/ascend_fpbp_generator.py +82 -0
  1209. mindspore/profiler/parser/ascend_hccl_generator.py +271 -0
  1210. mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
  1211. mindspore/profiler/parser/ascend_memory_generator.py +185 -0
  1212. mindspore/profiler/parser/ascend_msprof_exporter.py +282 -0
  1213. mindspore/profiler/parser/ascend_msprof_generator.py +187 -0
  1214. mindspore/profiler/parser/ascend_op_generator.py +334 -0
  1215. mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
  1216. mindspore/profiler/parser/ascend_timeline_generator.py +545 -0
  1217. mindspore/profiler/parser/base_timeline_generator.py +483 -0
  1218. mindspore/profiler/parser/container.py +229 -0
  1219. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +697 -0
  1220. mindspore/profiler/parser/flops_parser.py +531 -0
  1221. mindspore/profiler/parser/framework_enum.py +111 -0
  1222. mindspore/profiler/parser/framework_parser.py +464 -0
  1223. mindspore/profiler/parser/framework_struct.py +61 -0
  1224. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  1225. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  1226. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  1227. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  1228. mindspore/profiler/parser/hccl_parser.py +573 -0
  1229. mindspore/profiler/parser/hwts_log_parser.py +122 -0
  1230. mindspore/profiler/parser/integrator.py +526 -0
  1231. mindspore/profiler/parser/memory_usage_parser.py +277 -0
  1232. mindspore/profiler/parser/minddata_analyzer.py +800 -0
  1233. mindspore/profiler/parser/minddata_parser.py +186 -0
  1234. mindspore/profiler/parser/minddata_pipeline_parser.py +299 -0
  1235. mindspore/profiler/parser/op_intermediate_parser.py +149 -0
  1236. mindspore/profiler/parser/optime_parser.py +250 -0
  1237. mindspore/profiler/parser/profiler_info.py +213 -0
  1238. mindspore/profiler/parser/step_trace_parser.py +666 -0
  1239. mindspore/profiler/profiler.py +153 -0
  1240. mindspore/profiler/profiling.py +1922 -0
  1241. mindspore/rewrite/__init__.py +28 -0
  1242. mindspore/rewrite/api/__init__.py +17 -0
  1243. mindspore/rewrite/api/node.py +519 -0
  1244. mindspore/rewrite/api/node_type.py +53 -0
  1245. mindspore/rewrite/api/pattern_engine.py +490 -0
  1246. mindspore/rewrite/api/scoped_value.py +181 -0
  1247. mindspore/rewrite/api/symbol_tree.py +497 -0
  1248. mindspore/rewrite/ast_helpers/__init__.py +25 -0
  1249. mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
  1250. mindspore/rewrite/ast_helpers/ast_finder.py +404 -0
  1251. mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
  1252. mindspore/rewrite/ast_helpers/ast_modifier.py +605 -0
  1253. mindspore/rewrite/ast_helpers/ast_replacer.py +79 -0
  1254. mindspore/rewrite/common/__init__.py +19 -0
  1255. mindspore/rewrite/common/config.py +24 -0
  1256. mindspore/rewrite/common/error_log.py +39 -0
  1257. mindspore/rewrite/common/event.py +28 -0
  1258. mindspore/rewrite/common/namer.py +271 -0
  1259. mindspore/rewrite/common/namespace.py +118 -0
  1260. mindspore/rewrite/common/observable.py +44 -0
  1261. mindspore/rewrite/common/observer.py +54 -0
  1262. mindspore/rewrite/node/__init__.py +22 -0
  1263. mindspore/rewrite/node/call_function.py +95 -0
  1264. mindspore/rewrite/node/cell_container.py +139 -0
  1265. mindspore/rewrite/node/control_flow.py +113 -0
  1266. mindspore/rewrite/node/node.py +1428 -0
  1267. mindspore/rewrite/node/node_manager.py +283 -0
  1268. mindspore/rewrite/node/node_topological_manager.py +223 -0
  1269. mindspore/rewrite/parsers/__init__.py +29 -0
  1270. mindspore/rewrite/parsers/arguments_parser.py +63 -0
  1271. mindspore/rewrite/parsers/assign_parser.py +852 -0
  1272. mindspore/rewrite/parsers/attribute_parser.py +57 -0
  1273. mindspore/rewrite/parsers/class_def_parser.py +289 -0
  1274. mindspore/rewrite/parsers/constant_parser.py +104 -0
  1275. mindspore/rewrite/parsers/container_parser.py +88 -0
  1276. mindspore/rewrite/parsers/expr_parser.py +55 -0
  1277. mindspore/rewrite/parsers/for_parser.py +61 -0
  1278. mindspore/rewrite/parsers/function_def_parser.py +84 -0
  1279. mindspore/rewrite/parsers/if_parser.py +85 -0
  1280. mindspore/rewrite/parsers/module_parser.py +117 -0
  1281. mindspore/rewrite/parsers/parser.py +43 -0
  1282. mindspore/rewrite/parsers/parser_register.py +86 -0
  1283. mindspore/rewrite/parsers/return_parser.py +37 -0
  1284. mindspore/rewrite/parsers/while_parser.py +59 -0
  1285. mindspore/rewrite/sparsify/__init__.py +0 -0
  1286. mindspore/rewrite/sparsify/sparse_transformer.py +457 -0
  1287. mindspore/rewrite/sparsify/sparsify.py +112 -0
  1288. mindspore/rewrite/sparsify/utils.py +179 -0
  1289. mindspore/rewrite/symbol_tree/__init__.py +20 -0
  1290. mindspore/rewrite/symbol_tree/symbol_tree.py +1819 -0
  1291. mindspore/rewrite/symbol_tree/symbol_tree_builder.py +76 -0
  1292. mindspore/rewrite/symbol_tree/symbol_tree_dumper.py +142 -0
  1293. mindspore/run_check/__init__.py +20 -0
  1294. mindspore/run_check/_check_version.py +507 -0
  1295. mindspore/run_check/run_check.py +66 -0
  1296. mindspore/safeguard/__init__.py +18 -0
  1297. mindspore/safeguard/rewrite_obfuscation.py +875 -0
  1298. mindspore/scipy/__init__.py +18 -0
  1299. mindspore/scipy/fft.py +264 -0
  1300. mindspore/scipy/linalg.py +919 -0
  1301. mindspore/scipy/ops.py +165 -0
  1302. mindspore/scipy/ops_grad.py +115 -0
  1303. mindspore/scipy/ops_wrapper.py +74 -0
  1304. mindspore/scipy/optimize/__init__.py +20 -0
  1305. mindspore/scipy/optimize/_bfgs.py +230 -0
  1306. mindspore/scipy/optimize/_lagrange.py +201 -0
  1307. mindspore/scipy/optimize/_lbfgs.py +146 -0
  1308. mindspore/scipy/optimize/gradient_optimization_algorithm.py +168 -0
  1309. mindspore/scipy/optimize/line_search.py +370 -0
  1310. mindspore/scipy/optimize/linear_sum_assignment.py +78 -0
  1311. mindspore/scipy/optimize/minimize.py +200 -0
  1312. mindspore/scipy/utils.py +156 -0
  1313. mindspore/scipy/utils_const.py +246 -0
  1314. mindspore/train/__init__.py +48 -0
  1315. mindspore/train/_utils.py +465 -0
  1316. mindspore/train/amp.py +935 -0
  1317. mindspore/train/anf_ir_pb2.py +1517 -0
  1318. mindspore/train/callback/__init__.py +44 -0
  1319. mindspore/train/callback/_backup_and_restore.py +117 -0
  1320. mindspore/train/callback/_callback.py +613 -0
  1321. mindspore/train/callback/_checkpoint.py +814 -0
  1322. mindspore/train/callback/_cluster_monitor.py +201 -0
  1323. mindspore/train/callback/_dataset_graph.py +150 -0
  1324. mindspore/train/callback/_early_stop.py +239 -0
  1325. mindspore/train/callback/_flops_collector.py +239 -0
  1326. mindspore/train/callback/_history.py +92 -0
  1327. mindspore/train/callback/_lambda_callback.py +80 -0
  1328. mindspore/train/callback/_landscape.py +1049 -0
  1329. mindspore/train/callback/_loss_monitor.py +107 -0
  1330. mindspore/train/callback/_lr_scheduler_callback.py +76 -0
  1331. mindspore/train/callback/_on_request_exit.py +298 -0
  1332. mindspore/train/callback/_reduce_lr_on_plateau.py +226 -0
  1333. mindspore/train/callback/_summary_collector.py +1184 -0
  1334. mindspore/train/callback/_tft_register.py +352 -0
  1335. mindspore/train/callback/_time_monitor.py +141 -0
  1336. mindspore/train/checkpoint_pb2.py +233 -0
  1337. mindspore/train/data_sink.py +219 -0
  1338. mindspore/train/dataset_helper.py +692 -0
  1339. mindspore/train/lineage_pb2.py +1260 -0
  1340. mindspore/train/loss_scale_manager.py +213 -0
  1341. mindspore/train/memory_profiling_pb2.py +298 -0
  1342. mindspore/train/metrics/__init__.py +175 -0
  1343. mindspore/train/metrics/accuracy.py +133 -0
  1344. mindspore/train/metrics/auc.py +129 -0
  1345. mindspore/train/metrics/bleu_score.py +170 -0
  1346. mindspore/train/metrics/confusion_matrix.py +700 -0
  1347. mindspore/train/metrics/cosine_similarity.py +109 -0
  1348. mindspore/train/metrics/dice.py +116 -0
  1349. mindspore/train/metrics/error.py +175 -0
  1350. mindspore/train/metrics/fbeta.py +167 -0
  1351. mindspore/train/metrics/hausdorff_distance.py +333 -0
  1352. mindspore/train/metrics/loss.py +97 -0
  1353. mindspore/train/metrics/mean_surface_distance.py +189 -0
  1354. mindspore/train/metrics/metric.py +373 -0
  1355. mindspore/train/metrics/occlusion_sensitivity.py +225 -0
  1356. mindspore/train/metrics/perplexity.py +133 -0
  1357. mindspore/train/metrics/precision.py +160 -0
  1358. mindspore/train/metrics/recall.py +159 -0
  1359. mindspore/train/metrics/roc.py +223 -0
  1360. mindspore/train/metrics/root_mean_square_surface_distance.py +191 -0
  1361. mindspore/train/metrics/topk.py +167 -0
  1362. mindspore/train/mind_ir_pb2.py +1908 -0
  1363. mindspore/train/model.py +2252 -0
  1364. mindspore/train/node_strategy_pb2.py +653 -0
  1365. mindspore/train/print_pb2.py +184 -0
  1366. mindspore/train/profiling_parallel_pb2.py +151 -0
  1367. mindspore/train/serialization.py +3325 -0
  1368. mindspore/train/summary/__init__.py +23 -0
  1369. mindspore/train/summary/_lineage_adapter.py +41 -0
  1370. mindspore/train/summary/_summary_adapter.py +496 -0
  1371. mindspore/train/summary/_writer_pool.py +207 -0
  1372. mindspore/train/summary/enums.py +56 -0
  1373. mindspore/train/summary/summary_record.py +581 -0
  1374. mindspore/train/summary/writer.py +167 -0
  1375. mindspore/train/summary_pb2.py +1165 -0
  1376. mindspore/train/train_thor/__init__.py +20 -0
  1377. mindspore/train/train_thor/convert_utils.py +268 -0
  1378. mindspore/train/train_thor/dataset_helper.py +192 -0
  1379. mindspore/train/train_thor/model_thor.py +257 -0
  1380. mindspore/utils/__init__.py +21 -0
  1381. mindspore/utils/utils.py +60 -0
  1382. mindspore/version.py +1 -0
  1383. mindspore-2.4.0.dist-info/METADATA +352 -0
  1384. mindspore-2.4.0.dist-info/RECORD +1387 -0
  1385. mindspore-2.4.0.dist-info/WHEEL +5 -0
  1386. mindspore-2.4.0.dist-info/entry_points.txt +3 -0
  1387. mindspore-2.4.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2161 @@
1
+ # Copyright 2019-2023 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """
16
+ This file contains specific text dataset loading classes. You can easily use
17
+ these classes to load the prepared dataset. For example:
18
+ IMDBDataset: which is IMDB dataset.
19
+ WikiTextDataset: which is Wiki text dataset.
20
+ CLUEDataset: which is CLUE dataset.
21
+ YelpReviewDataset: which is yelp review dataset.
22
+ ...
23
+ After declaring the dataset object, you can further apply dataset operations
24
+ (e.g. filter, skip, concat, map, batch) on it.
25
+ """
26
+ import mindspore._c_dataengine as cde
27
+
28
+ from .datasets import TextBaseDataset, SourceDataset, MappableDataset, Shuffle
29
+ from .validators import check_imdb_dataset, check_iwslt2016_dataset, check_iwslt2017_dataset, \
30
+ check_penn_treebank_dataset, check_ag_news_dataset, check_amazon_review_dataset, check_udpos_dataset, \
31
+ check_wiki_text_dataset, check_conll2000_dataset, check_cluedataset, \
32
+ check_sogou_news_dataset, check_textfiledataset, check_dbpedia_dataset, check_yelp_review_dataset, \
33
+ check_en_wik9_dataset, check_yahoo_answers_dataset, check_multi30k_dataset, check_squad_dataset, \
34
+ check_sst2_dataset
35
+
36
+ from ..core.validator_helpers import replace_none
37
+
38
+
39
+ class AGNewsDataset(SourceDataset, TextBaseDataset):
40
+ """
41
+ AG News dataset.
42
+
43
+ The generated dataset has three columns: :py:obj:`[index, title, description]` ,
44
+ and the data type of three columns is string type.
45
+
46
+ Args:
47
+ dataset_dir (str): Path to the root directory that contains the dataset.
48
+ usage (str, optional): Acceptable usages include ``'train'`` , ``'test'`` and ``'all'`` .
49
+ Default: ``None`` , all samples.
50
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` ,
51
+ reads the full dataset.
52
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
53
+ Default: ``None`` , will use global default workers(8), it can be set
54
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
55
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
56
+ Bool type and Shuffle enum are both supported to pass in.
57
+ Default: ``Shuffle.GLOBAL`` .
58
+ If `shuffle` is ``False``, no shuffling will be performed.
59
+ If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
60
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
61
+ Set the mode of data shuffling by passing in enumeration variables:
62
+
63
+ - ``Shuffle.GLOBAL``: Shuffle both the files and samples.
64
+
65
+ - ``Shuffle.FILES``: Shuffle files only.
66
+
67
+ num_shards (int, optional): Number of shards that the dataset will be divided into.
68
+ Default: ``None``. When this argument is specified, `num_samples` reflects the
69
+ max sample number of per shard.
70
+ shard_id (int, optional): The shard ID within `num_shards` . This
71
+ argument can only be specified when `num_shards` is also specified. Default: ``None``.
72
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
73
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
74
+ Default: ``None``, which means no cache is used.
75
+
76
+ Raises:
77
+ RuntimeError: If `dataset_dir` does not contain data files.
78
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
79
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
80
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
81
+
82
+ Tutorial Examples:
83
+ - `Load & Process Data With Dataset Pipeline
84
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
85
+
86
+ Examples:
87
+ >>> import mindspore.dataset as ds
88
+ >>> ag_news_dataset_dir = "/path/to/ag_news_dataset_file"
89
+ >>> dataset = ds.AGNewsDataset(dataset_dir=ag_news_dataset_dir, usage='all')
90
+
91
+ About AGNews dataset:
92
+
93
+ AG is a collection of over 1 million news articles. The news articles were collected
94
+ by ComeToMyHead from over 2,000 news sources in over 1 year of activity. ComeToMyHead
95
+ is an academic news search engine that has been in operation since July 2004.
96
+ The dataset is provided by academics for research purposes such as data mining
97
+ (clustering, classification, etc.), information retrieval (ranking, searching, etc.),
98
+ xml, data compression, data streaming, and any other non-commercial activities.
99
+ AG's news topic classification dataset was constructed by selecting the four largest
100
+ classes from the original corpus. Each class contains 30,000 training samples and
101
+ 1,900 test samples. The total number of training samples in train.csv is 120,000
102
+ and the number of test samples in test.csv is 7,600.
103
+
104
+ You can unzip the dataset files into the following structure and read by MindSpore's API:
105
+
106
+ .. code-block::
107
+
108
+ .
109
+ └── ag_news_dataset_dir
110
+ ├── classes.txt
111
+ ├── train.csv
112
+ ├── test.csv
113
+ └── readme.txt
114
+
115
+ Citation:
116
+
117
+ .. code-block::
118
+
119
+ @misc{zhang2015characterlevel,
120
+ title={Character-level Convolutional Networks for Text Classification},
121
+ author={Xiang Zhang and Junbo Zhao and Yann LeCun},
122
+ year={2015},
123
+ eprint={1509.01626},
124
+ archivePrefix={arXiv},
125
+ primaryClass={cs.LG}
126
+ }
127
+ """
128
+
129
+ @check_ag_news_dataset
130
+ def __init__(self, dataset_dir, usage=None, num_samples=None,
131
+ num_parallel_workers=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
132
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
133
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
134
+ self.dataset_dir = dataset_dir
135
+ self.usage = replace_none(usage, "all")
136
+
137
+ def parse(self, children=None):
138
+ return cde.AGNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
139
+ self.shard_id)
140
+
141
+
142
+ class AmazonReviewDataset(SourceDataset, TextBaseDataset):
143
+ """
144
+ Amazon Review Polarity and Amazon Review Full datasets.
145
+
146
+ The generated dataset has three columns: :py:obj:`[label, title, content]` ,
147
+ and the data type of three columns is string.
148
+
149
+ Args:
150
+ dataset_dir (str): Path to the root directory that contains the Amazon Review Polarity dataset
151
+ or the Amazon Review Full dataset.
152
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
153
+ For Polarity dataset, ``'train'`` will read from 3,600,000 train samples,
154
+ ``'test'`` will read from 400,000 test samples,
155
+ ``'all'`` will read from all 4,000,000 samples.
156
+ For Full dataset, ``'train'`` will read from 3,000,000 train samples,
157
+ ``'test'`` will read from 650,000 test samples,
158
+ ``'all'`` will read from all 3,650,000 samples. Default: ``None``, all samples.
159
+ num_samples (int, optional): Number of samples (rows) to be read. Default: ``None``,
160
+ reads the full dataset.
161
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
162
+ Default: ``None`` , will use global default workers(8), it can be set
163
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
164
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
165
+ Bool type and Shuffle enum are both supported to pass in.
166
+ Default: ``Shuffle.GLOBAL`` .
167
+ If `shuffle` is ``False``, no shuffling will be performed.
168
+ If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
169
+ ``mindspore.dataset.Shuffle.GLOBAL``.
170
+ Set the mode of data shuffling by passing in enumeration variables:
171
+
172
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
173
+
174
+ - ``Shuffle.FILES`` : Shuffle files only.
175
+
176
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
177
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
178
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
179
+ argument can only be specified when `num_shards` is also specified.
180
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
181
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
182
+ Default: ``None`` , which means no cache is used.
183
+
184
+ Raises:
185
+ RuntimeError: If `dataset_dir` does not contain data files.
186
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
187
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
188
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
189
+
190
+ Tutorial Examples:
191
+ - `Load & Process Data With Dataset Pipeline
192
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
193
+
194
+ Examples:
195
+ >>> import mindspore.dataset as ds
196
+ >>> amazon_review_dataset_dir = "/path/to/amazon_review_dataset_dir"
197
+ >>> dataset = ds.AmazonReviewDataset(dataset_dir=amazon_review_dataset_dir, usage='all')
198
+
199
+ About AmazonReview Dataset:
200
+
201
+ The Amazon reviews full dataset consists of reviews from Amazon. The data span a period of 18 years, including ~35
202
+ million reviews up to March 2013. Reviews include product and user information, ratings, and a plaintext review.
203
+ The dataset is mainly used for text classification, given the content and title, predict the correct star rating.
204
+
205
+ The Amazon reviews polarity dataset is constructed by taking review score 1 and 2 as negative, 4 and 5 as positive.
206
+ Samples of score 3 is ignored.
207
+
208
+ The Amazon Reviews Polarity and Amazon Reviews Full datasets have the same directory structures.
209
+ You can unzip the dataset files into the following structure and read by MindSpore's API:
210
+
211
+ .. code-block::
212
+
213
+ .
214
+ └── amazon_review_dir
215
+ ├── train.csv
216
+ ├── test.csv
217
+ └── readme.txt
218
+
219
+ Citation:
220
+
221
+ .. code-block::
222
+
223
+ @article{zhang2015character,
224
+ title={Character-level convolutional networks for text classification},
225
+ author={Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
226
+ journal={Advances in neural information processing systems},
227
+ volume={28},
228
+ pages={649--657},
229
+ year={2015}
230
+ }
231
+ """
232
+
233
+ @check_amazon_review_dataset
234
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
235
+ num_shards=None, shard_id=None, cache=None):
236
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
237
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
238
+ self.dataset_dir = dataset_dir
239
+ self.usage = replace_none(usage, 'all')
240
+
241
+ def parse(self, children=None):
242
+ return cde.AmazonReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
243
+ self.shard_id)
244
+
245
+
246
+ class CLUEDataset(SourceDataset, TextBaseDataset):
247
+ """
248
+ CLUE(Chinese Language Understanding Evaluation) dataset.
249
+ Supported CLUE classification tasks: ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``,
250
+ ``'WSC'`` and ``'CSL'``.
251
+
252
+ Args:
253
+ dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for
254
+ a pattern of files. The list will be sorted in a lexicographical order.
255
+ task (str, optional): The kind of task, one of ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``,
256
+ ``'WSC'`` and ``'CSL'``. Default: ``'AFQMC'`` .
257
+ usage (str, optional): Specify the ``'train'``, ``'test'`` or ``'eval'`` part of dataset.
258
+ Default: ``'train'``.
259
+ num_samples (int, optional): The number of samples to be included in the dataset.
260
+ Default: ``None`` , will include all images.
261
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
262
+ Default: ``None`` , will use global default workers(8), it can be set
263
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
264
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
265
+ Default: ``Shuffle.GLOBAL`` . Bool type and Shuffle enum are both supported to pass in.
266
+ If `shuffle` is ``False``, no shuffling will be performed.
267
+ If `shuffle` is ``True``, performs global shuffle.
268
+ There are three levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
269
+
270
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting `shuffle` to ``True``.
271
+
272
+ - ``Shuffle.FILES`` : Shuffle files only.
273
+
274
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
275
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
276
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
277
+ argument can only be specified when `num_shards` is also specified.
278
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
279
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
280
+ Default: ``None`` , which means no cache is used.
281
+
282
+ The generated dataset with different task setting has different output columns:
283
+
284
+ +-------------------------+------------------------------+-----------------------------+
285
+ | `task` | `usage` | Output column |
286
+ +=========================+==============================+=============================+
287
+ | AFQMC | train | [sentence1, dtype=string] |
288
+ | | | |
289
+ | | | [sentence2, dtype=string] |
290
+ | | | |
291
+ | | | [label, dtype=string] |
292
+ | +------------------------------+-----------------------------+
293
+ | | test | [id, dtype=uint32] |
294
+ | | | |
295
+ | | | [sentence1, dtype=string] |
296
+ | | | |
297
+ | | | [sentence2, dtype=string] |
298
+ | +------------------------------+-----------------------------+
299
+ | | eval | [sentence1, dtype=string] |
300
+ | | | |
301
+ | | | [sentence2, dtype=string] |
302
+ | | | |
303
+ | | | [label, dtype=string] |
304
+ +-------------------------+------------------------------+-----------------------------+
305
+ | TNEWS | train | [label, dtype=string] |
306
+ | | | |
307
+ | | | [label_des, dtype=string] |
308
+ | | | |
309
+ | | | [sentence, dtype=string] |
310
+ | | | |
311
+ | | | [keywords, dtype=string] |
312
+ | +------------------------------+-----------------------------+
313
+ | | test | [label, dtype=uint32] |
314
+ | | | |
315
+ | | | [keywords, dtype=string] |
316
+ | | | |
317
+ | | | [sentence, dtype=string] |
318
+ | +------------------------------+-----------------------------+
319
+ | | eval | [label, dtype=string] |
320
+ | | | |
321
+ | | | [label_des, dtype=string] |
322
+ | | | |
323
+ | | | [sentence, dtype=string] |
324
+ | | | |
325
+ | | | [keywords, dtype=string] |
326
+ +-------------------------+------------------------------+-----------------------------+
327
+ | IFLYTEK | train | [label, dtype=string] |
328
+ | | | |
329
+ | | | [label_des, dtype=string] |
330
+ | | | |
331
+ | | | [sentence, dtype=string] |
332
+ | +------------------------------+-----------------------------+
333
+ | | test | [id, dtype=uint32] |
334
+ | | | |
335
+ | | | [sentence, dtype=string] |
336
+ | +------------------------------+-----------------------------+
337
+ | | eval | [label, dtype=string] |
338
+ | | | |
339
+ | | | [label_des, dtype=string] |
340
+ | | | |
341
+ | | | [sentence, dtype=string] |
342
+ +-------------------------+------------------------------+-----------------------------+
343
+ | CMNLI | train | [sentence1, dtype=string] |
344
+ | | | |
345
+ | | | [sentence2, dtype=string] |
346
+ | | | |
347
+ | | | [label, dtype=string] |
348
+ | +------------------------------+-----------------------------+
349
+ | | test | [id, dtype=uint32] |
350
+ | | | |
351
+ | | | [sentence1, dtype=string] |
352
+ | | | |
353
+ | | | [sentence2, dtype=string] |
354
+ | +------------------------------+-----------------------------+
355
+ | | eval | [sentence1, dtype=string] |
356
+ | | | |
357
+ | | | [sentence2, dtype=string] |
358
+ | | | |
359
+ | | | [label, dtype=string] |
360
+ +-------------------------+------------------------------+-----------------------------+
361
+ | WSC | train | [span1_index, dtype=uint32]|
362
+ | | | |
363
+ | | | [span2_index, dtype=uint32]|
364
+ | | | |
365
+ | | | [span1_text, dtype=string] |
366
+ | | | |
367
+ | | | [span2_text, dtype=string] |
368
+ | | | |
369
+ | | | [idx, dtype=uint32] |
370
+ | | | |
371
+ | | | [text, dtype=string] |
372
+ | | | |
373
+ | | | [label, dtype=string] |
374
+ | +------------------------------+-----------------------------+
375
+ | | test | [span1_index, dtype=uint32]|
376
+ | | | |
377
+ | | | [span2_index, dtype=uint32]|
378
+ | | | |
379
+ | | | [span1_text, dtype=string] |
380
+ | | | |
381
+ | | | [span2_text, dtype=string] |
382
+ | | | |
383
+ | | | [idx, dtype=uint32] |
384
+ | | | |
385
+ | | | [text, dtype=string] |
386
+ | +------------------------------+-----------------------------+
387
+ | | eval | [span1_index, dtype=uint32]|
388
+ | | | |
389
+ | | | [span2_index, dtype=uint32]|
390
+ | | | |
391
+ | | | [span1_text, dtype=string] |
392
+ | | | |
393
+ | | | [span2_text, dtype=string] |
394
+ | | | |
395
+ | | | [idx, dtype=uint32] |
396
+ | | | |
397
+ | | | [text, dtype=string] |
398
+ | | | |
399
+ | | | [label, dtype=string] |
400
+ +-------------------------+------------------------------+-----------------------------+
401
+ | CSL | train | [id, dtype=uint32] |
402
+ | | | |
403
+ | | | [abst, dtype=string] |
404
+ | | | |
405
+ | | | [keyword, dtype=string] |
406
+ | | | |
407
+ | | | [label, dtype=string] |
408
+ | +------------------------------+-----------------------------+
409
+ | | test | [id, dtype=uint32] |
410
+ | | | |
411
+ | | | [abst, dtype=string] |
412
+ | | | |
413
+ | | | [keyword, dtype=string] |
414
+ | +------------------------------+-----------------------------+
415
+ | | eval | [id, dtype=uint32] |
416
+ | | | |
417
+ | | | [abst, dtype=string] |
418
+ | | | |
419
+ | | | [keyword, dtype=string] |
420
+ | | | |
421
+ | | | [label, dtype=string] |
422
+ +-------------------------+------------------------------+-----------------------------+
423
+
424
+ Raises:
425
+ ValueError: If dataset_files are not valid or do not exist.
426
+ ValueError: task is not in ``'AFQMC'`` , ``'TNEWS'``, ``'IFLYTEK'``, ``'CMNLI'``, ``'WSC'``
427
+ or ``'CSL'``.
428
+ ValueError: usage is not in ``'train'``, ``'test'`` or ``'eval'``.
429
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
430
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
431
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
432
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
433
+
434
+ Tutorial Examples:
435
+ - `Load & Process Data With Dataset Pipeline
436
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
437
+
438
+ Examples:
439
+ >>> import mindspore.dataset as ds
440
+ >>> clue_dataset_dir = ["/path/to/clue_dataset_file"] # contains 1 or multiple clue files
441
+ >>> dataset = ds.CLUEDataset(dataset_files=clue_dataset_dir, task='AFQMC', usage='train')
442
+
443
+ About CLUE dataset:
444
+
445
+ CLUE, a Chinese Language Understanding Evaluation benchmark. It contains multiple
446
+ tasks, including single-sentence classification, sentence pair classification, and machine
447
+ reading comprehension.
448
+
449
+ You can unzip the dataset files into the following structure and read by MindSpore's API,
450
+ such as afqmc dataset:
451
+
452
+ .. code-block::
453
+
454
+ .
455
+ └── afqmc_public
456
+ ├── train.json
457
+ ├── test.json
458
+ └── dev.json
459
+
460
+ Citation:
461
+
462
+ .. code-block::
463
+
464
+ @article{CLUEbenchmark,
465
+ title = {CLUE: A Chinese Language Understanding Evaluation Benchmark},
466
+ author = {Liang Xu, Xuanwei Zhang, Lu Li, Hai Hu, Chenjie Cao, Weitang Liu, Junyi Li, Yudong Li,
467
+ Kai Sun, Yechen Xu, Yiming Cui, Cong Yu, Qianqian Dong, Yin Tian, Dian Yu, Bo Shi, Jun Zeng,
468
+ Rongzhao Wang, Weijian Xie, Yanting Li, Yina Patterson, Zuoyu Tian, Yiwen Zhang, He Zhou,
469
+ Shaoweihua Liu, Qipeng Zhao, Cong Yue, Xinrui Zhang, Zhengliang Yang, Zhenzhong Lan},
470
+ journal = {arXiv preprint arXiv:2004.05986},
471
+ year = {2020},
472
+ howpublished = {https://github.com/CLUEbenchmark/CLUE}
473
+ }
474
+ """
475
+
476
+ @check_cluedataset
477
+ def __init__(self, dataset_files, task='AFQMC', usage='train', num_samples=None, num_parallel_workers=None,
478
+ shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
479
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
480
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
481
+ self.dataset_files = self._find_files(dataset_files)
482
+ self.usage = replace_none(usage, 'train')
483
+ self.task = replace_none(task, 'AFQMC')
484
+
485
+ def parse(self, children=None):
486
+ return cde.CLUENode(self.dataset_files, self.task, self.usage, self.num_samples, self.shuffle_flag,
487
+ self.num_shards, self.shard_id)
488
+
489
+
490
+ class CoNLL2000Dataset(SourceDataset, TextBaseDataset):
491
+ """
492
+ CoNLL-2000(Conference on Computational Natural Language Learning) chunking dataset.
493
+
494
+ The generated dataset has three columns: :py:obj:`[word, pos_tag, chunk_tag]` .
495
+ The tensors of column :py:obj:`word` , column :py:obj:`pos_tag` ,
496
+ and column :py:obj:`chunk_tag` are of the string type.
497
+
498
+ Args:
499
+ dataset_dir (str): Path to the root directory that contains the CoNLL2000 chunking dataset.
500
+ usage (str, optional): Usage of dataset, can be ``'train'`` , ``'test'`` , or ``'all'`` .
501
+ For dataset, ``'train'`` will read from 8,936 train samples,
502
+ ``'test'`` will read from 2,012 test samples,
503
+ ``'all'`` will read from all 1,0948 samples. Default: ``None`` , read all samples.
504
+ num_samples (int, optional): Number of samples (rows) to be read. Default: ``None`` ,
505
+ read the full dataset.
506
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
507
+ Default: ``Shuffle.GLOBAL`` .
508
+ If `shuffle` is ``False`` , no shuffling will be performed.
509
+ If `shuffle` is ``True`` , performs global shuffle.
510
+ There are three levels of shuffling, desired shuffle enum defined by
511
+ :class:`mindspore.dataset.Shuffle` .
512
+
513
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting `shuffle` to ``True``.
514
+ - ``Shuffle.FILES`` : Shuffle files only.
515
+
516
+ num_shards (int, optional): Number of shards that the dataset will be divided into.
517
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
518
+ Default: ``None`` .
519
+ shard_id (int, optional): The shard ID within `num_shards` . This
520
+ argument can only be specified when `num_shards` is also specified. Default: ``None`` .
521
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
522
+ Default: ``None`` , will use global default workers(8), it can be set
523
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
524
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
525
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
526
+ Default: ``None`` , which means no cache is used.
527
+
528
+ Raises:
529
+ RuntimeError: If `dataset_dir` does not contain data files.
530
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
531
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
532
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
533
+
534
+ Tutorial Examples:
535
+ - `Load & Process Data With Dataset Pipeline
536
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
537
+
538
+ Examples:
539
+ >>> import mindspore.dataset as ds
540
+ >>> conll2000_dataset_dir = "/path/to/conll2000_dataset_dir"
541
+ >>> dataset = ds.CoNLL2000Dataset(dataset_dir=conll2000_dataset_dir, usage='all')
542
+
543
+ About CoNLL2000 Dataset:
544
+
545
+ The CoNLL2000 chunking dataset consists of the text from sections 15-20 of the Wall Street Journal corpus.
546
+ Texts are chunked using IOB notation, and the chunk type has NP, VP, PP, ADJP and ADVP.
547
+ The dataset consist of three columns separated by spaces. The first column contains the current word,
548
+ the second is part-of-speech tag as derived by the Brill tagger and the third is chunk tag as derived from
549
+ the WSJ corpus. Text chunking consists of dividing a text in syntactically correlated parts of words.
550
+
551
+ You can unzip the dataset files into the following structure and read by MindSpore's API:
552
+
553
+ .. code-block::
554
+
555
+ .
556
+ └── conll2000_dataset_dir
557
+ ├── train.txt
558
+ ├── test.txt
559
+ └── readme.txt
560
+
561
+ Citation:
562
+
563
+ .. code-block::
564
+
565
+ @inproceedings{tksbuchholz2000conll,
566
+ author = {Tjong Kim Sang, Erik F. and Sabine Buchholz},
567
+ title = {Introduction to the CoNLL-2000 Shared Task: Chunking},
568
+ editor = {Claire Cardie and Walter Daelemans and Claire Nedellec and Tjong Kim Sang, Erik},
569
+ booktitle = {Proceedings of CoNLL-2000 and LLL-2000},
570
+ publisher = {Lisbon, Portugal},
571
+ pages = {127--132},
572
+ year = {2000}
573
+ }
574
+ """
575
+
576
+ @check_conll2000_dataset
577
+ def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
578
+ shard_id=None, num_parallel_workers=None, cache=None):
579
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
580
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
581
+ self.dataset_dir = dataset_dir
582
+ self.usage = replace_none(usage, 'all')
583
+
584
+ def parse(self, children=None):
585
+ return cde.CoNLL2000Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
586
+ self.shard_id)
587
+
588
+
589
+ class DBpediaDataset(SourceDataset, TextBaseDataset):
590
+ """
591
+ DBpedia dataset.
592
+
593
+ The generated dataset has three columns :py:obj:`[class, title, content]` ,
594
+ and the data type of three columns is string.
595
+
596
+ Args:
597
+ dataset_dir (str): Path to the root directory that contains the dataset.
598
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
599
+ ``'train'`` will read from 560,000 train samples,
600
+ ``'test'`` will read from 70,000 test samples,
601
+ ``'all'`` will read from all 630,000 samples. Default: ``None`` , all samples.
602
+ num_samples (int, optional): The number of samples to be included in the dataset.
603
+ Default: ``None`` , will include all text.
604
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
605
+ Default: ``None`` , will use global default workers(8), it can be set
606
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
607
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
608
+ Bool type and Shuffle enum are both supported to pass in.
609
+ Default: ``Shuffle.GLOBAL`` .
610
+ If `shuffle` is ``False`` , no shuffling will be performed.
611
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
612
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
613
+ Set the mode of data shuffling by passing in enumeration variables:
614
+
615
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
616
+
617
+ - ``Shuffle.FILES`` : Shuffle files only.
618
+
619
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
620
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
621
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
622
+ argument can only be specified when `num_shards` is also specified.
623
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
624
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
625
+ Default: ``None`` , which means no cache is used.
626
+
627
+ Raises:
628
+ RuntimeError: If `dataset_dir` does not contain data files.
629
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
630
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
631
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
632
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
633
+
634
+ Tutorial Examples:
635
+ - `Load & Process Data With Dataset Pipeline
636
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
637
+
638
+ Examples:
639
+ >>> import mindspore.dataset as ds
640
+ >>> dbpedia_dataset_dir = "/path/to/dbpedia_dataset_directory"
641
+ >>>
642
+ >>> # 1) Read 3 samples from DBpedia dataset
643
+ >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, num_samples=3)
644
+ >>>
645
+ >>> # 2) Read train samples from DBpedia dataset
646
+ >>> dataset = ds.DBpediaDataset(dataset_dir=dbpedia_dataset_dir, usage="train")
647
+
648
+ About DBpedia dataset:
649
+
650
+ The DBpedia dataset consists of 630,000 text samples in 14 classes, there are 560,000 samples in the train.csv
651
+ and 70,000 samples in the test.csv.
652
+ The 14 different classes represent Company, EducationaInstitution, Artist, Athlete, OfficeHolder,
653
+ MeanOfTransportation, Building, NaturalPlace, Village, Animal, Plant, Album, Film, WrittenWork.
654
+
655
+ Here is the original DBpedia dataset structure.
656
+ You can unzip the dataset files into this directory structure and read by Mindspore's API.
657
+
658
+ .. code-block::
659
+
660
+ .
661
+ └── dbpedia_dataset_dir
662
+ ├── train.csv
663
+ ├── test.csv
664
+ ├── classes.txt
665
+ └── readme.txt
666
+
667
+ Citation:
668
+
669
+ .. code-block::
670
+
671
+ @article{DBpedia,
672
+ title = {DBPedia Ontology Classification Dataset},
673
+ author = {Jens Lehmann, Robert Isele, Max Jakob, Anja Jentzsch, Dimitris Kontokostas,
674
+ Pablo N. Mendes, Sebastian Hellmann, Mohamed Morsey, Patrick van Kleef,
675
+ Sören Auer, Christian Bizer},
676
+ year = {2015},
677
+ howpublished = {http://dbpedia.org}
678
+ }
679
+ """
680
+
681
+ @check_dbpedia_dataset
682
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
683
+ num_shards=None, shard_id=None, cache=None):
684
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
685
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
686
+ self.dataset_dir = dataset_dir
687
+ self.usage = replace_none(usage, "all")
688
+
689
+ def parse(self, children=None):
690
+ return cde.DBpediaNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
691
+ self.shard_id)
692
+
693
+
694
+ class EnWik9Dataset(SourceDataset, TextBaseDataset):
695
+ """
696
+ EnWik9 dataset.
697
+
698
+ The generated dataset has one column :py:obj:`[text]` with type string.
699
+
700
+ Args:
701
+ dataset_dir (str): Path to the root directory that contains the dataset.
702
+ num_samples (int, optional): The number of samples to be included in the dataset.
703
+ Default: ``None`` , will include all samples.
704
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
705
+ Default: ``None`` , will use global default workers(8), it can be set
706
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
707
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
708
+ Bool type and Shuffle enum are both supported to pass in. Default: ``True``.
709
+ If `shuffle` is ``False`` , no shuffling will be performed.
710
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
711
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
712
+ Set the mode of data shuffling by passing in enumeration variables:
713
+
714
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
715
+
716
+ - ``Shuffle.FILES`` : Shuffle files only.
717
+
718
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
719
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
720
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
721
+ argument can only be specified when `num_shards` is also specified.
722
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
723
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
724
+ Default: ``None`` , which means no cache is used.
725
+
726
+ Raises:
727
+ RuntimeError: If `dataset_dir` does not contain data files.
728
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
729
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
730
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
731
+
732
+ Tutorial Examples:
733
+ - `Load & Process Data With Dataset Pipeline
734
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
735
+
736
+ Examples:
737
+ >>> import mindspore.dataset as ds
738
+ >>> en_wik9_dataset_dir = "/path/to/en_wik9_dataset"
739
+ >>> dataset2 = ds.EnWik9Dataset(dataset_dir=en_wik9_dataset_dir, num_samples=2,
740
+ ... shuffle=True)
741
+
742
+ About EnWik9 dataset:
743
+
744
+ The data of EnWik9 is UTF-8 encoded XML consisting primarily of English text. It contains 243,426 article titles,
745
+ of which 85,560 are #REDIRECT to fix broken links, and the rest are regular articles.
746
+
747
+ The data is UTF-8 clean. All characters are in the range U'0000 to U'10FFFF with valid encodings of 1 to
748
+ 4 bytes. The byte values 0xC0, 0xC1, and 0xF5-0xFF never occur. Also, in the Wikipedia dumps,
749
+ there are no control characters in the range 0x00-0x1F except for 0x09 (tab) and 0x0A (linefeed).
750
+ Linebreaks occur only on paragraph boundaries, so they always have a semantic purpose.
751
+
752
+ You can unzip the dataset files into the following directory structure and read by MindSpore's API.
753
+
754
+ .. code-block::
755
+
756
+ .
757
+ └── EnWik9
758
+ ├── enwik9
759
+
760
+ Citation:
761
+
762
+ .. code-block::
763
+
764
+ @NetworkResource{Hutter_prize,
765
+ author = {English Wikipedia},
766
+ url = "https://mattmahoney.net/dc/textdata.html",
767
+ month = {March},
768
+ year = {2006}
769
+ }
770
+ """
771
+
772
+ @check_en_wik9_dataset
773
+ def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=True,
774
+ num_shards=None, shard_id=None, cache=None):
775
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
776
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
777
+ self.dataset_dir = dataset_dir
778
+
779
+ def parse(self, children=None):
780
+ return cde.EnWik9Node(self.dataset_dir, self.num_samples, self.shuffle_flag, self.num_shards,
781
+ self.shard_id)
782
+
783
+
784
+ class IMDBDataset(MappableDataset, TextBaseDataset):
785
+ """
786
+ IMDb(Internet Movie Database) dataset.
787
+
788
+ The generated dataset has two columns: :py:obj:`[text, label]` .
789
+ The tensor of column :py:obj:`text` is of the string type.
790
+ The column :py:obj:`label` is of a scalar of uint32 type.
791
+
792
+ Args:
793
+ dataset_dir (str): Path to the root directory that contains the dataset.
794
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
795
+ Default: ``None`` , will read all samples.
796
+ num_samples (int, optional): The number of images to be included in the dataset.
797
+ Default: ``None`` , will include all samples.
798
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
799
+ Default: ``None`` , will use global default workers(8), it can be set
800
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
801
+ shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
802
+ Default: ``None`` , expected order behavior shown in the table below.
803
+ sampler (Sampler, optional): Object used to choose samples from the dataset.
804
+ Default: ``None`` , expected order behavior shown in the table below.
805
+ num_shards (int, optional): Number of shards that the dataset will be divided
806
+ into. Default: ``None`` . When this argument is specified, `num_samples` reflects
807
+ the maximum sample number of per shard.
808
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
809
+ argument can only be specified when `num_shards` is also specified.
810
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
811
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
812
+ Default: ``None`` , which means no cache is used.
813
+
814
+ Raises:
815
+ RuntimeError: If `dataset_dir` does not contain data files.
816
+ RuntimeError: If `sampler` and `shuffle` are specified at the same time.
817
+ RuntimeError: If `sampler` and `num_shards`/`shard_id` are specified at the same time.
818
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
819
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
820
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
821
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
822
+
823
+ Tutorial Examples:
824
+ - `Load & Process Data With Dataset Pipeline
825
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
826
+
827
+ Note:
828
+ - The shape of the test column.
829
+ - The parameters `num_samples` , `shuffle` , `num_shards` , `shard_id` can be used to control the sampler
830
+ used in the dataset, and their effects when combined with parameter `sampler` are as follows.
831
+
832
+ .. include:: mindspore.dataset.sampler.txt
833
+
834
+ Examples:
835
+ >>> import mindspore.dataset as ds
836
+ >>> imdb_dataset_dir = "/path/to/imdb_dataset_directory"
837
+ >>>
838
+ >>> # 1) Read all samples (text files) in imdb_dataset_dir with 8 threads
839
+ >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, num_parallel_workers=8)
840
+ >>>
841
+ >>> # 2) Read train samples (text files).
842
+ >>> dataset = ds.IMDBDataset(dataset_dir=imdb_dataset_dir, usage="train")
843
+
844
+ About IMDBDataset:
845
+
846
+ The IMDB dataset contains 50, 000 highly polarized reviews from the Internet Movie Database (IMDB). The dataset
847
+ was divided into 25 000 comments for training and 25 000 comments for testing, with both the training set and test
848
+ set containing 50% positive and 50% negative comments. Train labels and test labels are all lists of 0 and 1, where
849
+ 0 stands for negative and 1 for positive.
850
+
851
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
852
+
853
+ .. code-block::
854
+
855
+ .
856
+ └── imdb_dataset_directory
857
+ ├── train
858
+ │ ├── pos
859
+ │ │ ├── 0_9.txt
860
+ │ │ ├── 1_7.txt
861
+ │ │ ├── ...
862
+ │ ├── neg
863
+ │ │ ├── 0_3.txt
864
+ │ │ ├── 1_1.txt
865
+ │ │ ├── ...
866
+ ├── test
867
+ │ ├── pos
868
+ │ │ ├── 0_10.txt
869
+ │ │ ├── 1_10.txt
870
+ │ │ ├── ...
871
+ │ ├── neg
872
+ │ │ ├── 0_2.txt
873
+ │ │ ├── 1_3.txt
874
+ │ │ ├── ...
875
+
876
+ Citation:
877
+
878
+ .. code-block::
879
+
880
+ @InProceedings{maas-EtAl:2011:ACL-HLT2011,
881
+ author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan
882
+ and Ng, Andrew Y. and Potts, Christopher},
883
+ title = {Learning Word Vectors for Sentiment Analysis},
884
+ booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics:
885
+ Human Language Technologies},
886
+ month = {June},
887
+ year = {2011},
888
+ address = {Portland, Oregon, USA},
889
+ publisher = {Association for Computational Linguistics},
890
+ pages = {142--150},
891
+ url = {http://www.aclweb.org/anthology/P11-1015}
892
+ }
893
+ """
894
+
895
+ @check_imdb_dataset
896
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
897
+ num_shards=None, shard_id=None, cache=None):
898
+ super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
899
+ shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
900
+
901
+ self.dataset_dir = dataset_dir
902
+ self.usage = replace_none(usage, "all")
903
+
904
+ def parse(self, children=None):
905
+ return cde.IMDBNode(self.dataset_dir, self.usage, self.sampler)
906
+
907
+
908
+ class IWSLT2016Dataset(SourceDataset, TextBaseDataset):
909
+ """
910
+ IWSLT2016(International Workshop on Spoken Language Translation) dataset.
911
+
912
+ The generated dataset has two columns: :py:obj:`[text, translation]` .
913
+ The tensor of column :py:obj: `text` is of the string type.
914
+ The column :py:obj: `translation` is of the string type.
915
+
916
+ Args:
917
+ dataset_dir (str): Path to the root directory that contains the dataset.
918
+ usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: ``None`` ,
919
+ all samples.
920
+ language_pair (sequence, optional): Sequence containing source and target language, supported values are
921
+ ``('en', 'fr')``, ``('en', 'de')``, ``('en', 'cs')``, ``('en', 'ar')``, ``('fr', 'en')``,
922
+ ``('de', 'en')``, ``('cs', 'en')``, ``('ar', 'en')``. Default: ``None``, set to ``('de', 'en')``.
923
+ valid_set (str, optional): A string to identify validation set, when usage is valid or all, the validation set
924
+ of `valid_set` type will be read, supported values are ``'dev2010'``, ``'tst2010'``, ``'tst2011'``,
925
+ ``'tst2012'``, ``'tst2013'`` and ``'tst2014'``. Default: ``None``, set to ``'tst2013'``.
926
+ test_set (str, optional): A string to identify test set, when usage is test or all, the test set of `test_set`
927
+ type will be read, supported values are ``'dev2010'``, ``'tst2010'``, ``'tst2011'``, ``'tst2012'``,
928
+ ``'tst2013'`` and ``'tst2014'``. Default: ``None``, set to ``'tst2014'``.
929
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
930
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
931
+ Bool type and Shuffle enum are both supported to pass in.
932
+ Default: ``Shuffle.GLOBAL`` .
933
+ If `shuffle` is ``False``, no shuffling will be performed.
934
+ If `shuffle` is ``True``, it is equivalent to setting `shuffle` to
935
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
936
+ Set the mode of data shuffling by passing in enumeration variables:
937
+
938
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
939
+
940
+ - ``Shuffle.FILES`` : Shuffle files only.
941
+
942
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
943
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
944
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
945
+ argument can only be specified when `num_shards` is also specified.
946
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
947
+ Default: ``None`` , will use global default workers(8), it can be set
948
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
949
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
950
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
951
+ Default: ``None`` , which means no cache is used.
952
+
953
+ Raises:
954
+ RuntimeError: If `dataset_dir` does not contain data files.
955
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
956
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
957
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
958
+
959
+ Tutorial Examples:
960
+ - `Load & Process Data With Dataset Pipeline
961
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
962
+
963
+ Examples:
964
+ >>> import mindspore.dataset as ds
965
+ >>> iwslt2016_dataset_dir = "/path/to/iwslt2016_dataset_dir"
966
+ >>> dataset = ds.IWSLT2016Dataset(dataset_dir=iwslt2016_dataset_dir, usage='all',
967
+ ... language_pair=('de', 'en'), valid_set='tst2013', test_set='tst2014')
968
+
969
+ About IWSLT2016 dataset:
970
+
971
+ IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
972
+ of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
973
+ obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2016 dataset includes translations from
974
+ English to Arabic, Czech, French, and German, and translations from Arabic, Czech, French, and German to English.
975
+
976
+ You can unzip the original IWSLT2016 dataset files into this directory structure and read by MindSpore's API. After
977
+ decompression, you also need to decompress the dataset to be read in the specified folder. For example, if you want
978
+ to read the dataset of de-en, you need to unzip the tgz file in the de/en directory, the dataset is in the
979
+ unzipped folder.
980
+
981
+ .. code-block::
982
+
983
+ .
984
+ └── iwslt2016_dataset_directory
985
+ ├── subeval_files
986
+ └── texts
987
+ ├── ar
988
+ │ └── en
989
+ │ └── ar-en
990
+ ├── cs
991
+ │ └── en
992
+ │ └── cs-en
993
+ ├── de
994
+ │ └── en
995
+ │ └── de-en
996
+ │ ├── IWSLT16.TED.dev2010.de-en.de.xml
997
+ │ ├── train.tags.de-en.de
998
+ │ ├── ...
999
+ ├── en
1000
+ │ ├── ar
1001
+ │ │ └── en-ar
1002
+ │ ├── cs
1003
+ │ │ └── en-cs
1004
+ │ ├── de
1005
+ │ │ └── en-de
1006
+ │ └── fr
1007
+ │ └── en-fr
1008
+ └── fr
1009
+ └── en
1010
+ └── fr-en
1011
+
1012
+ Citation:
1013
+
1014
+ .. code-block::
1015
+
1016
+ @inproceedings{cettoloEtAl:EAMT2012,
1017
+ Address = {Trento, Italy},
1018
+ Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
1019
+ Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
1020
+ (EAMT)},
1021
+ Date = {28-30},
1022
+ Month = {May},
1023
+ Pages = {261--268},
1024
+ Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
1025
+ Year = {2012}}
1026
+ """
1027
+
1028
+ @check_iwslt2016_dataset
1029
+ def __init__(self, dataset_dir, usage=None, language_pair=None, valid_set=None, test_set=None,
1030
+ num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, num_parallel_workers=None,
1031
+ cache=None):
1032
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1033
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1034
+ self.dataset_dir = dataset_dir
1035
+ self.usage = replace_none(usage, 'all')
1036
+ self.language_pair = replace_none(language_pair, ["de", "en"])
1037
+ self.valid_set = replace_none(valid_set, 'tst2013')
1038
+ self.test_set = replace_none(test_set, 'tst2014')
1039
+
1040
+ def parse(self, children=None):
1041
+ return cde.IWSLT2016Node(self.dataset_dir, self.usage, self.language_pair, self.valid_set, self.test_set,
1042
+ self.num_samples, self.shuffle_flag, self.num_shards, self.shard_id)
1043
+
1044
+
1045
+ class IWSLT2017Dataset(SourceDataset, TextBaseDataset):
1046
+ """
1047
+ IWSLT2017(International Workshop on Spoken Language Translation) dataset.
1048
+
1049
+ The generated dataset has two columns: :py:obj:`[text, translation]` .
1050
+ The tensor of column :py:obj:`text` and :py:obj:`translation` are of the string type.
1051
+
1052
+ Args:
1053
+ dataset_dir (str): Path to the root directory that contains the dataset.
1054
+ usage (str, optional): Acceptable usages include 'train', 'valid', 'test' and 'all'. Default: ``None`` ,
1055
+ all samples.
1056
+ language_pair (sequence, optional): List containing src and tgt language, supported values are ``('en', 'nl')``,
1057
+ ``('en', 'de')``, ``('en', 'it')``, ``('en', 'ro')``, ``('nl', 'en')``, ``('nl', 'de')``, ``('nl', 'it')``,
1058
+ ``('nl', 'ro')``, ``('de', 'en')``, ``('de', 'nl')``, ``('de', 'it')``, ``('de', 'ro')``, ``('it', 'en')``,
1059
+ ``('it', 'nl')``, ``('it', 'de')``, ``('it', 'ro')``, ``('ro', 'en')``, ``('ro', 'nl')``, ``('ro', 'de')``,
1060
+ ``('ro', 'it')``. Default: ``None``, set to ``('de', 'en')``.
1061
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
1062
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1063
+ Bool type and Shuffle enum are both supported to pass in.
1064
+ Default: ``Shuffle.GLOBAL`` .
1065
+ If `shuffle` is ``False`` , no shuffling will be performed.
1066
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1067
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1068
+ Set the mode of data shuffling by passing in enumeration variables:
1069
+
1070
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1071
+
1072
+ - ``Shuffle.FILES`` : Shuffle files only.
1073
+
1074
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1075
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1076
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1077
+ argument can only be specified when `num_shards` is also specified.
1078
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1079
+ Default: ``None`` , will use global default workers(8), it can be set
1080
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1081
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1082
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1083
+ Default: ``None`` , which means no cache is used.
1084
+
1085
+ Raises:
1086
+ RuntimeError: If `dataset_dir` does not contain data files.
1087
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1088
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1089
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1090
+
1091
+ Tutorial Examples:
1092
+ - `Load & Process Data With Dataset Pipeline
1093
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1094
+
1095
+ Examples:
1096
+ >>> import mindspore.dataset as ds
1097
+ >>> iwslt2017_dataset_dir = "/path/to/iwslt2017_dataset_dir"
1098
+ >>> dataset = ds.IWSLT2017Dataset(dataset_dir=iwslt2017_dataset_dir, usage='all', language_pair=('de', 'en'))
1099
+
1100
+ About IWSLT2017 dataset:
1101
+
1102
+ IWSLT is an international oral translation conference, a major annual scientific conference dedicated to all aspects
1103
+ of oral translation. The MT task of the IWSLT evaluation activity constitutes a dataset, which can be publicly
1104
+ obtained through the WIT3 website `wit3 <https://wit3.fbk.eu>`_ . The IWSLT2017 dataset involves German, English,
1105
+ Italian, Dutch, and Romanian. The dataset includes translations in any two different languages.
1106
+
1107
+ You can unzip the original IWSLT2017 dataset files into this directory structure and read by MindSpore's API. You
1108
+ need to decompress the dataset package in texts/DeEnItNlRo/DeEnItNlRo directory to get the DeEnItNlRo-DeEnItNlRo
1109
+ subdirectory.
1110
+
1111
+ .. code-block::
1112
+
1113
+ .
1114
+ └── iwslt2017_dataset_directory
1115
+ └── DeEnItNlRo
1116
+ └── DeEnItNlRo
1117
+ └── DeEnItNlRo-DeEnItNlRo
1118
+ ├── IWSLT17.TED.dev2010.de-en.de.xml
1119
+ ├── train.tags.de-en.de
1120
+ ├── ...
1121
+
1122
+ Citation:
1123
+
1124
+ .. code-block::
1125
+
1126
+ @inproceedings{cettoloEtAl:EAMT2012,
1127
+ Address = {Trento, Italy},
1128
+ Author = {Mauro Cettolo and Christian Girardi and Marcello Federico},
1129
+ Booktitle = {Proceedings of the 16$^{th}$ Conference of the European Association for Machine Translation
1130
+ (EAMT)},
1131
+ Date = {28-30},
1132
+ Month = {May},
1133
+ Pages = {261--268},
1134
+ Title = {WIT$^3$: Web Inventory of Transcribed and Translated Talks},
1135
+ Year = {2012}}
1136
+ """
1137
+
1138
+ @check_iwslt2017_dataset
1139
+ def __init__(self, dataset_dir, usage=None, language_pair=None, num_samples=None, shuffle=Shuffle.GLOBAL,
1140
+ num_shards=None, shard_id=None, num_parallel_workers=None, cache=None):
1141
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1142
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1143
+ self.dataset_dir = dataset_dir
1144
+ self.usage = replace_none(usage, 'all')
1145
+ self.language_pair = replace_none(language_pair, ["de", "en"])
1146
+
1147
+ def parse(self, children=None):
1148
+ return cde.IWSLT2017Node(self.dataset_dir, self.usage, self.language_pair, self.num_samples,
1149
+ self.shuffle_flag, self.num_shards, self.shard_id)
1150
+
1151
+
1152
+ class Multi30kDataset(SourceDataset, TextBaseDataset):
1153
+ """
1154
+ Multi30k dataset.
1155
+
1156
+ The generated dataset has two columns :py:obj:`[text, translation]` .
1157
+ The tensor of column :py:obj:`text` is of the string type.
1158
+ The tensor of column :py:obj:`translation` is of the string type.
1159
+
1160
+ Args:
1161
+ dataset_dir (str): Path to the root directory that contains the dataset.
1162
+ usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
1163
+ Default: ``None`` , will read all samples.
1164
+ language_pair (Sequence[str, str], optional): Acceptable language_pair include ``['en', 'de']``,
1165
+ ``['de', 'en']``. Default: ``None`` , means ``['en', 'de']``.
1166
+ num_samples (int, optional): The number of images to be included in the dataset.
1167
+ Default: ``None`` , will read all samples.
1168
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1169
+ Default: ``None`` , will use global default workers(8), it can be set
1170
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1171
+ shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset. Default: ``None`` ,
1172
+ means ``mindspore.dataset.Shuffle.GLOBAL`` .
1173
+ If ``False`` is provided, no shuffling will be performed.
1174
+ If ``True`` is provided, it is the same as setting to
1175
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1176
+ If Shuffle is provided, the effect is as follows:
1177
+
1178
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1179
+ - ``Shuffle.FILES`` : Shuffle files only.
1180
+
1181
+ num_shards (int, optional): Number of shards that the dataset will be divided
1182
+ into. Default: ``None`` . When this argument is specified, `num_samples` reflects
1183
+ the max sample number of per shard.
1184
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1185
+ argument can only be specified when `num_shards` is also specified.
1186
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1187
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1188
+ Default: ``None`` , which means no cache is used.
1189
+
1190
+ Raises:
1191
+ RuntimeError: If `dataset_dir` does not contain data files.
1192
+ ValueError: If `usage` is not ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
1193
+ TypeError: If `language_pair` is not of type Sequence[str, str].
1194
+ RuntimeError: If num_samples is less than 0.
1195
+ RuntimeError: If `num_parallel_workers` exceeds the max thread numbers.
1196
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1197
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1198
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1199
+
1200
+ Tutorial Examples:
1201
+ - `Load & Process Data With Dataset Pipeline
1202
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1203
+
1204
+ Examples:
1205
+ >>> import mindspore.dataset as ds
1206
+ >>> multi30k_dataset_dir = "/path/to/multi30k_dataset_directory"
1207
+ >>> data = ds.Multi30kDataset(dataset_dir=multi30k_dataset_dir, usage='all', language_pair=['de', 'en'])
1208
+
1209
+ About Multi30k dataset:
1210
+
1211
+ Multi30K is a multilingual dataset that features approximately 31,000 standardized images
1212
+ described in multiple languages. The images are sourced from Flickr and each image comes
1213
+ with sentence descripitions in both English and German, as well as descriptions in other
1214
+ languages. Multi30k is used primarily for training and testing in tasks such as image
1215
+ captioning, machine translation, and visual question answering.
1216
+
1217
+ You can unzip the dataset files into the following directory structure and read by MindSpore's API.
1218
+
1219
+ .. code-block::
1220
+
1221
+ └── multi30k_dataset_directory
1222
+ ├── training
1223
+ │ ├── train.de
1224
+ │ └── train.en
1225
+ ├── validation
1226
+ │ ├── val.de
1227
+ │ └── val.en
1228
+ └── mmt16_task1_test
1229
+ ├── val.de
1230
+ └── val.en
1231
+
1232
+ Citation:
1233
+
1234
+ .. code-block::
1235
+
1236
+ @article{elliott-EtAl:2016:VL16,
1237
+ author = {{Elliott}, D. and {Frank}, S. and {Sima'an}, K. and {Specia}, L.},
1238
+ title = {Multi30K: Multilingual English-German Image Descriptions},
1239
+ booktitle = {Proceedings of the 5th Workshop on Vision and Language},
1240
+ year = {2016},
1241
+ pages = {70--74},
1242
+ year = 2016
1243
+ }
1244
+ """
1245
+
1246
+ @check_multi30k_dataset
1247
+ def __init__(self, dataset_dir, usage=None, language_pair=None, num_samples=None,
1248
+ num_parallel_workers=None, shuffle=None, num_shards=None, shard_id=None, cache=None):
1249
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1250
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1251
+ self.dataset_dir = dataset_dir
1252
+ self.usage = replace_none(usage, 'all')
1253
+ self.language_pair = replace_none(language_pair, ["en", "de"])
1254
+ self.shuffle = replace_none(shuffle, Shuffle.GLOBAL)
1255
+
1256
+ def parse(self, children=None):
1257
+ return cde.Multi30kNode(self.dataset_dir, self.usage, self.language_pair, self.num_samples,
1258
+ self.shuffle_flag, self.num_shards, self.shard_id)
1259
+
1260
+
1261
+ class PennTreebankDataset(SourceDataset, TextBaseDataset):
1262
+ """
1263
+ PennTreebank dataset.
1264
+
1265
+ The generated dataset has one column :py:obj:`[text]` .
1266
+ The tensor of column :py:obj:`text` is of the string type.
1267
+
1268
+ Args:
1269
+ dataset_dir (str): Path to the root directory that contains the dataset.
1270
+ usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` and ``'all'``.
1271
+ ``'train'`` will read from 42,068 train samples of string type,
1272
+ ``'test'`` will read from 3,370 test samples of string type,
1273
+ ``'valid'`` will read from 3,761 test samples of string type,
1274
+ ``'all'`` will read from all 49,199 samples of string type. Default: ``None`` , all samples.
1275
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
1276
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1277
+ Default: ``None`` , will use global default workers(8), it can be set
1278
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1279
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1280
+ Bool type and Shuffle enum are both supported to pass in.
1281
+ Default: ``Shuffle.GLOBAL`` .
1282
+ If `shuffle` is ``False`` , no shuffling will be performed.
1283
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1284
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1285
+ Set the mode of data shuffling by passing in enumeration variables:
1286
+
1287
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1288
+
1289
+ - ``Shuffle.FILES`` : Shuffle files only.
1290
+
1291
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1292
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1293
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1294
+ argument can only be specified when `num_shards` is also specified.
1295
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1296
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1297
+ Default: ``None`` , which means no cache is used.
1298
+
1299
+ Raises:
1300
+ RuntimeError: If `dataset_dir` does not contain data files.
1301
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1302
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1303
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1304
+
1305
+ Tutorial Examples:
1306
+ - `Load & Process Data With Dataset Pipeline
1307
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1308
+
1309
+ Examples:
1310
+ >>> import mindspore.dataset as ds
1311
+ >>> penn_treebank_dataset_dir = "/path/to/penn_treebank_dataset_directory"
1312
+ >>> dataset = ds.PennTreebankDataset(dataset_dir=penn_treebank_dataset_dir, usage='all')
1313
+
1314
+ About PennTreebank dataset:
1315
+
1316
+ Penn Treebank (PTB) dataset, is widely used in machine learning for NLP (Natural Language Processing)
1317
+ research. Word-level PTB does not contain capital letters, numbers, and punctuations, and the vocabulary
1318
+ is capped at 10k unique words, which is relatively small in comparison to most modern datasets which
1319
+ can result in a larger number of out of vocabulary tokens.
1320
+
1321
+ Here is the original PennTreebank dataset structure.
1322
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
1323
+
1324
+ .. code-block::
1325
+
1326
+ .
1327
+ └── PennTreebank_dataset_dir
1328
+ ├── ptb.test.txt
1329
+ ├── ptb.train.txt
1330
+ └── ptb.valid.txt
1331
+
1332
+ Citation:
1333
+
1334
+ .. code-block::
1335
+
1336
+ @techreport{Santorini1990,
1337
+ added-at = {2014-03-26T23:25:56.000+0100},
1338
+ author = {Santorini, Beatrice},
1339
+ biburl = {https://www.bibsonomy.org/bibtex/234cdf6ddadd89376090e7dada2fc18ec/butonic},
1340
+ file = {:Santorini - Penn Treebank tag definitions.pdf:PDF},
1341
+ institution = {Department of Computer and Information Science, University of Pennsylvania},
1342
+ interhash = {818e72efd9e4b5fae3e51e88848100a0},
1343
+ intrahash = {34cdf6ddadd89376090e7dada2fc18ec},
1344
+ keywords = {dis pos tagging treebank},
1345
+ number = {MS-CIS-90-47},
1346
+ timestamp = {2014-03-26T23:25:56.000+0100},
1347
+ title = {Part-of-speech tagging guidelines for the {P}enn {T}reebank {P}roject},
1348
+ url = {ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz},
1349
+ year = 1990
1350
+ }
1351
+ """
1352
+
1353
+ @check_penn_treebank_dataset
1354
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
1355
+ num_shards=None, shard_id=None, cache=None):
1356
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1357
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1358
+ self.dataset_dir = dataset_dir
1359
+ self.usage = replace_none(usage, "all")
1360
+
1361
+ def parse(self, children=None):
1362
+ return cde.PennTreebankNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
1363
+ self.shard_id)
1364
+
1365
+
1366
+ class SogouNewsDataset(SourceDataset, TextBaseDataset):
1367
+ r"""
1368
+ Sogou News dataset.
1369
+
1370
+ The generated dataset has three columns: :py:obj:`[index, title, content]` ,
1371
+ and the data type of three columns is string.
1372
+
1373
+ Args:
1374
+ dataset_dir (str): Path to the root directory that contains the dataset.
1375
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
1376
+ ``'train'`` will read from 450,000 train samples, ``'test'`` will read from 60,000 test samples,
1377
+ ``'all'`` will read from all 510,000 samples. Default: ``None`` , all samples.
1378
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , read all samples.
1379
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1380
+ Bool type and Shuffle enum are both supported to pass in.
1381
+ Default: ``Shuffle.GLOBAL`` .
1382
+ If `shuffle` is ``False`` , no shuffling will be performed.
1383
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1384
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1385
+ Set the mode of data shuffling by passing in enumeration variables:
1386
+
1387
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting shuffle to True.
1388
+
1389
+ - ``Shuffle.FILES`` : Shuffle files only.
1390
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1391
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1392
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1393
+ argument can only be specified when `num_shards` is also specified.
1394
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1395
+ Default: ``None`` , will use global default workers(8), it can be set
1396
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1397
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1398
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1399
+ Default: ``None`` , which means no cache is used.
1400
+
1401
+ Raises:
1402
+ RuntimeError: If `dataset_dir` does not contain data files.
1403
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1404
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1405
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1406
+
1407
+ Tutorial Examples:
1408
+ - `Load & Process Data With Dataset Pipeline
1409
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1410
+
1411
+ Examples:
1412
+ >>> import mindspore.dataset as ds
1413
+ >>> sogou_news_dataset_dir = "/path/to/sogou_news_dataset_dir"
1414
+ >>> dataset = ds.SogouNewsDataset(dataset_dir=sogou_news_dataset_dir, usage='all')
1415
+
1416
+ About SogouNews Dataset:
1417
+
1418
+ SogouNews dataset includes 3 columns, corresponding to class index (1 to 5), title and content. The title and
1419
+ content are escaped using double quotes ("), and any internal double quote is escaped by 2 double quotes ("").
1420
+ New lines are escaped by a backslash followed with an "n" character, that is "\n".
1421
+
1422
+ You can unzip the dataset files into the following structure and read by MindSpore's API:
1423
+
1424
+ .. code-block::
1425
+
1426
+ .
1427
+ └── sogou_news_dir
1428
+ ├── classes.txt
1429
+ ├── readme.txt
1430
+ ├── test.csv
1431
+ └── train.csv
1432
+
1433
+ Citation:
1434
+
1435
+ .. code-block::
1436
+
1437
+ @misc{zhang2015characterlevel,
1438
+ title={Character-level Convolutional Networks for Text Classification},
1439
+ author={Xiang Zhang and Junbo Zhao and Yann LeCun},
1440
+ year={2015},
1441
+ eprint={1509.01626},
1442
+ archivePrefix={arXiv},
1443
+ primaryClass={cs.LG}
1444
+ }
1445
+ """
1446
+
1447
+ @check_sogou_news_dataset
1448
+ def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
1449
+ shard_id=None, num_parallel_workers=None, cache=None):
1450
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1451
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1452
+ self.dataset_dir = dataset_dir
1453
+ self.usage = replace_none(usage, 'all')
1454
+
1455
+ def parse(self, children=None):
1456
+ return cde.SogouNewsNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
1457
+ self.num_shards, self.shard_id)
1458
+
1459
+
1460
+ class SQuADDataset(SourceDataset, TextBaseDataset):
1461
+ """
1462
+ SQuAD 1.1 and SQuAD 2.0 datasets.
1463
+
1464
+ The generated dataset with different versions and usages has the same output columns:
1465
+ :py:obj:`[context, question, text, answer_start]` .
1466
+ The tensor of column :py:obj:`context` is of the string type.
1467
+ The tensor of column :py:obj:`question` is of the string type.
1468
+ The tensor of column :py:obj:`text` is the answer in the context of the string type.
1469
+ The tensor of column :py:obj:`answer_start` is the start index of answer in context,
1470
+ which is of the uint32 type.
1471
+
1472
+ Args:
1473
+ dataset_dir (str): Path to the root directory that contains the dataset.
1474
+ usage (str, optional): Specify the ``'train'``, ``'dev'`` or ``'all'`` part of dataset.
1475
+ Default: ``None`` , all samples.
1476
+ num_samples (int, optional): The number of samples to be included in the dataset.
1477
+ Default: ``None`` , will include all samples.
1478
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1479
+ Default: ``None`` , will use global default workers(8), it can be set
1480
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1481
+ shuffle (Union[bool, Shuffle], optional): Whether to shuffle the dataset.
1482
+ Default: ``Shuffle.GLOBAL`` .
1483
+ If ``False`` is provided, no shuffling will be performed.
1484
+ If ``True`` is provided, it is the same as setting to
1485
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1486
+ If Shuffle is provided, the effect is as follows:
1487
+
1488
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1489
+ - ``Shuffle.FILES`` : Shuffle files only.
1490
+
1491
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1492
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1493
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1494
+ argument can only be specified when `num_shards` is also specified.
1495
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1496
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1497
+ Default: ``None`` , which means no cache is used.
1498
+
1499
+ Raises:
1500
+ RuntimeError: If `dataset_dir` does not contain data files.
1501
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1502
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1503
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1504
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1505
+
1506
+ Tutorial Examples:
1507
+ - `Load & Process Data With Dataset Pipeline
1508
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1509
+
1510
+ Examples:
1511
+ >>> import mindspore.dataset as ds
1512
+ >>> squad_dataset_dir = "/path/to/squad_dataset_file"
1513
+ >>> dataset = ds.SQuADDataset(dataset_dir=squad_dataset_dir, usage='all')
1514
+
1515
+ About SQuAD dataset:
1516
+
1517
+ SQuAD (Stanford Question Answering Dataset) is a reading comprehension dataset, consisting of questions posed by
1518
+ crowdworkers on a set of Wikipedia articles, where the answer to every question is a segment of text, or span,
1519
+ from the corresponding reading passage, or the question might be unanswerable.
1520
+
1521
+ SQuAD 1.1, the previous version of the SQuAD dataset, contains 100,000+ question-answer pairs on 500+ articles.
1522
+ SQuAD 2.0 combines the 100,000 questions in SQuAD 1.1 with over 50,000 unanswerable questions written adversarially
1523
+ by crowdworkers to look similar to answerable ones. To do well on SQuAD 2.0, systems must not only answer questions
1524
+ when possible, but also determine when no answer is supported by the paragraph and abstain from answering.
1525
+
1526
+ You can get the dataset files into the following structure and read by MindSpore's API,
1527
+
1528
+ For SQuAD 1.1:
1529
+
1530
+ .. code-block::
1531
+
1532
+ .
1533
+ └── SQuAD1
1534
+ ├── train-v1.1.json
1535
+ └── dev-v1.1.json
1536
+
1537
+ For SQuAD 2.0:
1538
+
1539
+ .. code-block::
1540
+
1541
+ .
1542
+ └── SQuAD2
1543
+ ├── train-v2.0.json
1544
+ └── dev-v2.0.json
1545
+
1546
+ Citation:
1547
+
1548
+ .. code-block::
1549
+
1550
+ @misc{rajpurkar2016squad,
1551
+ title = {SQuAD: 100,000+ Questions for Machine Comprehension of Text},
1552
+ author = {Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
1553
+ year = {2016},
1554
+ eprint = {1606.05250},
1555
+ archivePrefix = {arXiv},
1556
+ primaryClass = {cs.CL}
1557
+ }
1558
+
1559
+ @misc{rajpurkar2018know,
1560
+ title = {Know What You Don't Know: Unanswerable Questions for SQuAD},
1561
+ author = {Pranav Rajpurkar and Robin Jia and Percy Liang},
1562
+ year = {2018},
1563
+ eprint = {1806.03822},
1564
+ archivePrefix = {arXiv},
1565
+ primaryClass = {cs.CL}
1566
+ }
1567
+ """
1568
+
1569
+ @check_squad_dataset
1570
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None,
1571
+ shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, cache=None):
1572
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1573
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1574
+ self.dataset_dir = dataset_dir
1575
+ self.usage = replace_none(usage, 'all')
1576
+
1577
+ def parse(self, children=None):
1578
+ return cde.SQuADNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
1579
+ self.num_shards, self.shard_id)
1580
+
1581
+
1582
+ class SST2Dataset(SourceDataset, TextBaseDataset):
1583
+ """
1584
+ SST2(Stanford Sentiment Treebank v2) dataset.
1585
+
1586
+ The generated dataset's train.tsv and dev.tsv have two columns :py:obj:`[sentence, label]` .
1587
+ The generated dataset's test.tsv has one column :py:obj:`[sentence]` .
1588
+ The tensor of column :py:obj:`sentence` and :py:obj:`label` are of the string type.
1589
+
1590
+ Args:
1591
+ dataset_dir (str): Path to the root directory that contains the dataset.
1592
+ usage (str, optional): Usage of this dataset, can be ``"train"``, ``"test"`` or ``"dev"``.
1593
+ ``"train"`` will read from 67,349 train samples, ``"test"`` will read from 1,821 test samples,
1594
+ ``"dev"`` will read from all 872 samples. Default: ``None`` , will read train samples.
1595
+ num_samples (int, optional): The number of samples to be included in the dataset.
1596
+ Default: ``None`` , will include all text.
1597
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1598
+ Default: ``None`` , will use global default workers(8), it can be set
1599
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1600
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1601
+ Bool type and Shuffle enum are both supported to pass in.
1602
+ Default: ``Shuffle.GLOBAL`` .
1603
+ If `shuffle` is ``False`` , no shuffling will be performed;
1604
+ If `shuffle` is ``True`` , the behavior is the same as setting shuffle to be Shuffle.GLOBAL
1605
+ Set the mode of data shuffling by passing in enumeration variables:
1606
+
1607
+ - ``Shuffle.GLOBAL`` : Shuffle the samples.
1608
+
1609
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1610
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1611
+ shard_id (int, optional): The shard ID within `num_shards`. This argument can only be specified when
1612
+ `num_shards` is also specified. Default: ``None`` .
1613
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1614
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1615
+ Default: ``None`` , which means no cache is used.
1616
+
1617
+ Raises:
1618
+ RuntimeError: If `dataset_dir` does not contain data files.
1619
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1620
+ RuntimeError: If `num_shards` is specified but shard_id is None.
1621
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1622
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1623
+
1624
+ Tutorial Examples:
1625
+ - `Load & Process Data With Dataset Pipeline
1626
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1627
+
1628
+ Examples:
1629
+ >>> import mindspore.dataset as ds
1630
+ >>> sst2_dataset_dir = "/path/to/sst2_dataset_directory"
1631
+ >>>
1632
+ >>> # 1) Read 3 samples from SST2 dataset
1633
+ >>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, num_samples=3)
1634
+ >>>
1635
+ >>> # 2) Read train samples from SST2 dataset
1636
+ >>> dataset = ds.SST2Dataset(dataset_dir=sst2_dataset_dir, usage="train")
1637
+
1638
+ About SST2 dataset:
1639
+ The Stanford Sentiment Treebank is a corpus with fully labeled parse trees that allows for a complete
1640
+ analysis of the compositional effects of sentiment in language. The corpus is based on the dataset introduced
1641
+ by Pang and Lee (2005) and consists of 11,855 single sentences extracted from movie reviews. It was parsed
1642
+ with the Stanford parser and includes a total of 215,154 unique phrases from those parse trees, each
1643
+ annotated by 3 human judges.
1644
+
1645
+ Here is the original SST2 dataset structure.
1646
+ You can unzip the dataset files into this directory structure and read by Mindspore's API.
1647
+
1648
+ .. code-block::
1649
+
1650
+ .
1651
+ └── sst2_dataset_dir
1652
+ ├── train.tsv
1653
+ ├── test.tsv
1654
+ ├── dev.tsv
1655
+ └── original
1656
+
1657
+ Citation:
1658
+
1659
+ .. code-block::
1660
+
1661
+ @inproceedings{socher-etal-2013-recursive,
1662
+ title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank},
1663
+ author = {Socher, Richard and Perelygin, Alex and Wu, Jean and Chuang, Jason and Manning,
1664
+ Christopher D. and Ng, Andrew and Potts, Christopher},
1665
+ booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
1666
+ month = oct,
1667
+ year = {2013},
1668
+ address = {Seattle, Washington, USA},
1669
+ publisher = {Association for Computational Linguistics},
1670
+ url = {https://www.aclweb.org/anthology/D13-1170},
1671
+ pages = {1631--1642},
1672
+ }
1673
+ """
1674
+
1675
+ @check_sst2_dataset
1676
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
1677
+ num_shards=None, shard_id=None, cache=None):
1678
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1679
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1680
+ self.dataset_dir = dataset_dir
1681
+ self.usage = replace_none(usage, "train")
1682
+
1683
+ def parse(self, children=None):
1684
+ return cde.SST2Node(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
1685
+ self.num_shards, self.shard_id)
1686
+
1687
+
1688
+ class TextFileDataset(SourceDataset, TextBaseDataset):
1689
+ """
1690
+ A source dataset that reads and parses datasets stored on disk in text format.
1691
+ The generated dataset has one column :py:obj:`[text]` with type string.
1692
+
1693
+ Args:
1694
+ dataset_files (Union[str, list[str]]): String or list of files to be read or glob strings to search for a
1695
+ pattern of files. The list will be sorted in a lexicographical order.
1696
+ num_samples (int, optional): The number of samples to be included in the dataset.
1697
+ Default: ``None`` , will include all images.
1698
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1699
+ Default: ``None`` , will use global default workers(8), it can be set
1700
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1701
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1702
+ Default: ``Shuffle.GLOBAL`` .
1703
+ Bool type and Shuffle enum are both supported to pass in.
1704
+ If `shuffle` is ``False`` , no shuffling will be performed.
1705
+ If `shuffle` is ``True`` , performs global shuffle.
1706
+ There are three levels of shuffling, desired shuffle enum defined by :class:`mindspore.dataset.Shuffle` .
1707
+
1708
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples, same as setting shuffle to True.
1709
+
1710
+ - ``Shuffle.FILES`` : Shuffle files only.
1711
+
1712
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1713
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1714
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1715
+ argument can only be specified when `num_shards` is also specified.
1716
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1717
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1718
+ Default: ``None`` , which means no cache is used.
1719
+
1720
+ Raises:
1721
+ ValueError: If dataset_files are not valid or do not exist.
1722
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1723
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1724
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1725
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1726
+
1727
+ Tutorial Examples:
1728
+ - `Load & Process Data With Dataset Pipeline
1729
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1730
+
1731
+ Examples:
1732
+ >>> import mindspore.dataset as ds
1733
+ >>> text_file_list = ["/path/to/text_file_dataset_file"] # contains 1 or multiple text files
1734
+ >>> dataset = ds.TextFileDataset(dataset_files=text_file_list)
1735
+ """
1736
+
1737
+ @check_textfiledataset
1738
+ def __init__(self, dataset_files, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
1739
+ num_shards=None, shard_id=None, cache=None):
1740
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1741
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1742
+ self.dataset_files = self._find_files(dataset_files)
1743
+ self.dataset_files.sort()
1744
+
1745
+ def parse(self, children=None):
1746
+ return cde.TextFileNode(self.dataset_files, self.num_samples, self.shuffle_flag, self.num_shards,
1747
+ self.shard_id)
1748
+
1749
+
1750
+ class UDPOSDataset(SourceDataset, TextBaseDataset):
1751
+ """
1752
+ UDPOS(Universal Dependencies dataset for Part of Speech) dataset.
1753
+
1754
+ The generated dataset has three columns: :py:obj:`[word, universal, stanford]` ,
1755
+ and the data type of three columns is string.
1756
+
1757
+ Args:
1758
+ dataset_dir (str): Path to the root directory that contains the dataset.
1759
+ usage (str, optional): Usage of this dataset, can be ``'train'``, ``'test'``, ``'valid'`` or ``'all'``.
1760
+ ``'train'`` will read from 12,543 train samples, ``'test'`` will read from 2,077 test samples,
1761
+ ``'valid'`` will read from 2,002 test samples, ``'all'`` will read from all 16,622 samples.
1762
+ Default: ``None`` , all samples.
1763
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
1764
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1765
+ Bool type and Shuffle enum are both supported to pass in.
1766
+ Default: ``Shuffle.GLOBAL`` .
1767
+ If `shuffle` is ``False`` , no shuffling will be performed.
1768
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1769
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1770
+ Set the mode of data shuffling by passing in enumeration variables:
1771
+
1772
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1773
+
1774
+ - ``Shuffle.FILES`` : Shuffle files only.
1775
+
1776
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1777
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1778
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1779
+ argument can only be specified when `num_shards` is also specified.
1780
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1781
+ Default: ``None`` , will use global default workers(8), it can be set
1782
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1783
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1784
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1785
+ Default: ``None`` , which means no cache is used.
1786
+
1787
+ Raises:
1788
+ RuntimeError: If `dataset_dir` does not contain data files.
1789
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1790
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1791
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1792
+
1793
+ Tutorial Examples:
1794
+ - `Load & Process Data With Dataset Pipeline
1795
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1796
+
1797
+ Examples:
1798
+ >>> import mindspore.dataset as ds
1799
+ >>> udpos_dataset_dir = "/path/to/udpos_dataset_dir"
1800
+ >>> dataset = ds.UDPOSDataset(dataset_dir=udpos_dataset_dir, usage='all')
1801
+
1802
+ About UDPOS dataset:
1803
+
1804
+ Text corpus dataset that clarifies syntactic or semantic sentence structure.
1805
+ The corpus comprises 254,830 words and 16,622 sentences, taken from various web media including
1806
+ weblogs, newsgroups, emails and reviews.
1807
+
1808
+ Citation:
1809
+
1810
+ .. code-block::
1811
+
1812
+ @inproceedings{silveira14gold,
1813
+ year = {2014},
1814
+ author = {Natalia Silveira and Timothy Dozat and Marie-Catherine de Marneffe and Samuel Bowman
1815
+ and Miriam Connor and John Bauer and Christopher D. Manning},
1816
+ title = {A Gold Standard Dependency Corpus for {E}nglish},
1817
+ booktitle = {Proceedings of the Ninth International Conference on Language
1818
+ Resources and Evaluation (LREC-2014)}
1819
+ }
1820
+ """
1821
+
1822
+ @check_udpos_dataset
1823
+ def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
1824
+ shard_id=None, num_parallel_workers=None, cache=None):
1825
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1826
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1827
+ self.dataset_dir = dataset_dir
1828
+ self.usage = replace_none(usage, 'all')
1829
+
1830
+ def parse(self, children=None):
1831
+ return cde.UDPOSNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
1832
+ self.shard_id)
1833
+
1834
+
1835
+ class WikiTextDataset(SourceDataset, TextBaseDataset):
1836
+ """
1837
+ WikiText2 and WikiText103 datasets.
1838
+
1839
+ The generated dataset has one column :py:obj:`[text]` , and
1840
+ the tensor of column `text` is of the string type.
1841
+
1842
+ Args:
1843
+ dataset_dir (str): Path to the root directory that contains the dataset.
1844
+ usage (str, optional): Acceptable usages include ``'train'``, ``'test'``, ``'valid'`` and ``'all'``.
1845
+ Default: ``None`` , all samples.
1846
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads the full dataset.
1847
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1848
+ Default: ``None`` , will use global default workers(8), it can be set
1849
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1850
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1851
+ Bool type and Shuffle enum are both supported to pass in.
1852
+ Default: ``Shuffle.GLOBAL`` .
1853
+ If `shuffle` is ``False`` , no shuffling will be performed.
1854
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1855
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1856
+ Set the mode of data shuffling by passing in enumeration variables:
1857
+
1858
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1859
+
1860
+ - ``Shuffle.FILES`` : Shuffle files only.
1861
+
1862
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1863
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
1864
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1865
+ argument can only be specified when `num_shards` is also specified.
1866
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1867
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1868
+ Default: ``None`` , which means no cache is used.
1869
+
1870
+ Raises:
1871
+ RuntimeError: If `dataset_dir` does not contain data files or invalid.
1872
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1873
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1874
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1875
+ ValueError: If `num_samples` is invalid (< 0).
1876
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1877
+
1878
+ Tutorial Examples:
1879
+ - `Load & Process Data With Dataset Pipeline
1880
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1881
+
1882
+ About WikiTextDataset dataset:
1883
+
1884
+ The WikiText Long Term Dependency Language Modeling Dataset is an English lexicon containing 100 million words.
1885
+ These terms are drawn from Wikipedia's premium and benchmark articles, including versions of Wikitext2 and
1886
+ Wikitext103. For WikiText2, it has 36718 lines in wiki.train.tokens, 4358 lines in wiki.test.tokens and
1887
+ 3760 lines in wiki.valid.tokens. For WikiText103, it has 1801350 lines in wiki.train.tokens, 4358 lines in
1888
+ wiki.test.tokens and 3760 lines in wiki.valid.tokens.
1889
+
1890
+ Here is the original WikiText dataset structure.
1891
+ You can unzip the dataset files into this directory structure and read by MindSpore's API.
1892
+
1893
+ .. code-block::
1894
+
1895
+ .
1896
+ └── WikiText2/WikiText103
1897
+ ├── wiki.train.tokens
1898
+ ├── wiki.test.tokens
1899
+ ├── wiki.valid.tokens
1900
+
1901
+ Citation:
1902
+
1903
+ .. code-block::
1904
+
1905
+ @article{merity2016pointer,
1906
+ title={Pointer sentinel mixture models},
1907
+ author={Merity, Stephen and Xiong, Caiming and Bradbury, James and Socher, Richard},
1908
+ journal={arXiv preprint arXiv:1609.07843},
1909
+ year={2016}
1910
+ }
1911
+
1912
+ Examples:
1913
+ >>> import mindspore.dataset as ds
1914
+ >>> wiki_text_dataset_dir = "/path/to/wiki_text_dataset_directory"
1915
+ >>> dataset = ds.WikiTextDataset(dataset_dir=wiki_text_dataset_dir, usage='all')
1916
+ """
1917
+
1918
+ @check_wiki_text_dataset
1919
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
1920
+ num_shards=None, shard_id=None, cache=None):
1921
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
1922
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
1923
+ self.dataset_dir = dataset_dir
1924
+ self.usage = replace_none(usage, "all")
1925
+
1926
+ def parse(self, children=None):
1927
+ return cde.WikiTextNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag, self.num_shards,
1928
+ self.shard_id)
1929
+
1930
+
1931
+ class YahooAnswersDataset(SourceDataset, TextBaseDataset):
1932
+ """
1933
+ YahooAnswers dataset.
1934
+
1935
+ The generated dataset has four columns :py:obj:`[class, title, content, answer]` , whose data type is string.
1936
+
1937
+ Args:
1938
+ dataset_dir (str): Path to the root directory that contains the dataset.
1939
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
1940
+ ``'train'`` will read from 1,400,000 train samples, ``'test'`` will read from 60,000 test
1941
+ samples, ``'all'`` will read from all 1,460,000 samples. Default: ``None`` , all samples.
1942
+ num_samples (int, optional): The number of samples to be included in the dataset.
1943
+ Default: ``None`` , will include all text.
1944
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
1945
+ Default: ``None`` , will use global default workers(8), it can be set
1946
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
1947
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
1948
+ Bool type and Shuffle enum are both supported to pass in.
1949
+ Default: ``Shuffle.GLOBAL`` .
1950
+ If `shuffle` is ``False`` , no shuffling will be performed.
1951
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
1952
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
1953
+ Set the mode of data shuffling by passing in enumeration variables:
1954
+
1955
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
1956
+
1957
+ - ``Shuffle.FILES`` : Shuffle files only.
1958
+
1959
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
1960
+ When this argument is specified, `num_samples` reflects the maximum sample number of per shard.
1961
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1962
+ argument can only be specified when `num_shards` is also specified.
1963
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1964
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1965
+ Default: ``None`` , which means no cache is used.
1966
+
1967
+ Raises:
1968
+ RuntimeError: If `dataset_dir` does not contain data files.
1969
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
1970
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
1971
+ ValueError: If `shard_id` is not in range of [0, `num_shards` ).
1972
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
1973
+
1974
+ Tutorial Examples:
1975
+ - `Load & Process Data With Dataset Pipeline
1976
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
1977
+
1978
+ Examples:
1979
+ >>> import mindspore.dataset as ds
1980
+ >>> yahoo_answers_dataset_dir = "/path/to/yahoo_answers_dataset_directory"
1981
+ >>>
1982
+ >>> # 1) Read 3 samples from YahooAnswers dataset
1983
+ >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, num_samples=3)
1984
+ >>>
1985
+ >>> # 2) Read train samples from YahooAnswers dataset
1986
+ >>> dataset = ds.YahooAnswersDataset(dataset_dir=yahoo_answers_dataset_dir, usage="train")
1987
+
1988
+ About YahooAnswers dataset:
1989
+
1990
+ The YahooAnswers dataset consists of 630,000 text samples in 10 classes,
1991
+ There are 560,000 samples in the train.csv and 70,000 samples in the test.csv.
1992
+ The 10 different classes represent Society & Culture, Science & Mathematics, Health, Education & Reference,
1993
+ Computers & Internet, Sports, Business & Finance, Entertainment & Music, Family & Relationships,
1994
+ Politics & Government.
1995
+
1996
+ Here is the original YahooAnswers dataset structure.
1997
+ You can unzip the dataset files into this directory structure and read by Mindspore's API.
1998
+
1999
+ .. code-block::
2000
+
2001
+ .
2002
+ └── yahoo_answers_dataset_dir
2003
+ ├── train.csv
2004
+ ├── test.csv
2005
+ ├── classes.txt
2006
+ └── readme.txt
2007
+
2008
+ Citation:
2009
+
2010
+ .. code-block::
2011
+
2012
+ @article{YahooAnswers,
2013
+ title = {Yahoo! Answers Topic Classification Dataset},
2014
+ author = {Xiang Zhang},
2015
+ year = {2015},
2016
+ howpublished = {}
2017
+ }
2018
+ """
2019
+
2020
+ @check_yahoo_answers_dataset
2021
+ def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=Shuffle.GLOBAL,
2022
+ num_shards=None, shard_id=None, cache=None):
2023
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
2024
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
2025
+ self.dataset_dir = dataset_dir
2026
+ self.usage = replace_none(usage, "all")
2027
+
2028
+ def parse(self, children=None):
2029
+ return cde.YahooAnswersNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
2030
+ self.num_shards, self.shard_id)
2031
+
2032
+
2033
+ class YelpReviewDataset(SourceDataset, TextBaseDataset):
2034
+ """
2035
+ Yelp Review Polarity and Yelp Review Full datasets.
2036
+
2037
+ The generated dataset has two columns: :py:obj:`[label, text]` , and the data type of two columns is string.
2038
+
2039
+ Args:
2040
+ dataset_dir (str): Path to the root directory that contains the dataset.
2041
+ usage (str, optional): Usage of this dataset, can be ``'train'`` , ``'test'`` or ``'all'`` .
2042
+ For Polarity, ``'train'`` will read from 560,000 train samples,
2043
+ ``'test'`` will read from 38,000 test samples,
2044
+ ``'all'`` will read from all 598,000 samples.
2045
+ For Full, ``'train'`` will read from 650,000 train samples, ``'test'`` will read from 50,000 test samples,
2046
+ ``'all'`` will read from all 700,000 samples. Default: ``None`` , all samples.
2047
+ num_samples (int, optional): Number of samples (rows) to read. Default: ``None`` , reads all samples.
2048
+ shuffle (Union[bool, Shuffle], optional): Perform reshuffling of the data every epoch.
2049
+ Bool type and Shuffle enum are both supported to pass in.
2050
+ Default: ``Shuffle.GLOBAL`` .
2051
+ If `shuffle` is ``False`` , no shuffling will be performed.
2052
+ If `shuffle` is ``True`` , it is equivalent to setting `shuffle` to
2053
+ ``mindspore.dataset.Shuffle.GLOBAL`` .
2054
+ Set the mode of data shuffling by passing in enumeration variables:
2055
+
2056
+ - ``Shuffle.GLOBAL`` : Shuffle both the files and samples.
2057
+
2058
+ - ``Shuffle.FILES`` : Shuffle files only.
2059
+ num_shards (int, optional): Number of shards that the dataset will be divided into. Default: ``None`` .
2060
+ When this argument is specified, `num_samples` reflects the max sample number of per shard.
2061
+ shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2062
+ argument can only be specified when `num_shards` is also specified.
2063
+ num_parallel_workers (int, optional): Number of worker threads to read the data.
2064
+ Default: ``None`` , will use global default workers(8), it can be set
2065
+ by :func:`mindspore.dataset.config.set_num_parallel_workers` .
2066
+ cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2067
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2068
+ Default: ``None`` , which means no cache is used.
2069
+
2070
+ Raises:
2071
+ RuntimeError: If `dataset_dir` does not contain data files.
2072
+ RuntimeError: If `num_shards` is specified but `shard_id` is None.
2073
+ RuntimeError: If `shard_id` is specified but `num_shards` is None.
2074
+ ValueError: If `num_parallel_workers` exceeds the max thread numbers.
2075
+
2076
+ Tutorial Examples:
2077
+ - `Load & Process Data With Dataset Pipeline
2078
+ <https://www.mindspore.cn/docs/en/master/api_python/samples/dataset/dataset_gallery.html>`_
2079
+
2080
+ Examples:
2081
+ >>> import mindspore.dataset as ds
2082
+ >>> yelp_review_dataset_dir = "/path/to/yelp_review_dataset_dir"
2083
+ >>> dataset = ds.YelpReviewDataset(dataset_dir=yelp_review_dataset_dir, usage='all')
2084
+
2085
+ About YelpReview Dataset:
2086
+
2087
+ The Yelp Review Full dataset consists of reviews from Yelp. It is extracted from the Yelp Dataset Challenge 2015
2088
+ data, and it is mainly used for text classification.
2089
+
2090
+ The Yelp Review Polarity dataset is constructed from the above dataset, by considering stars 1 and 2 negative, and 3
2091
+ and 4 positive.
2092
+
2093
+ The directory structures of these two datasets are the same.
2094
+ You can unzip the dataset files into the following structure and read by MindSpore's API:
2095
+
2096
+ .. code-block::
2097
+
2098
+ .
2099
+ └── yelp_review_dir
2100
+ ├── train.csv
2101
+ ├── test.csv
2102
+ └── readme.txt
2103
+
2104
+ Citation:
2105
+
2106
+ For Yelp Review Polarity:
2107
+
2108
+ .. code-block::
2109
+
2110
+ @article{zhangCharacterlevelConvolutionalNetworks2015,
2111
+ archivePrefix = {arXiv},
2112
+ eprinttype = {arxiv},
2113
+ eprint = {1509.01626},
2114
+ primaryClass = {cs},
2115
+ title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
2116
+ abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
2117
+ (ConvNets) for text classification. We constructed several large-scale datasets to show that
2118
+ character-level convolutional networks could achieve state-of-the-art or competitive results.
2119
+ Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
2120
+ variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
2121
+ journal = {arXiv:1509.01626 [cs]},
2122
+ author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
2123
+ month = sep,
2124
+ year = {2015},
2125
+ }
2126
+
2127
+ Citation:
2128
+
2129
+ For Yelp Review Full:
2130
+
2131
+ .. code-block::
2132
+
2133
+ @article{zhangCharacterlevelConvolutionalNetworks2015,
2134
+ archivePrefix = {arXiv},
2135
+ eprinttype = {arxiv},
2136
+ eprint = {1509.01626},
2137
+ primaryClass = {cs},
2138
+ title = {Character-Level {{Convolutional Networks}} for {{Text Classification}}},
2139
+ abstract = {This article offers an empirical exploration on the use of character-level convolutional networks
2140
+ (ConvNets) for text classification. We constructed several large-scale datasets to show that
2141
+ character-level convolutional networks could achieve state-of-the-art or competitive results.
2142
+ Comparisons are offered against traditional models such as bag of words, n-grams and their TFIDF
2143
+ variants, and deep learning models such as word-based ConvNets and recurrent neural networks.},
2144
+ journal = {arXiv:1509.01626 [cs]},
2145
+ author = {Zhang, Xiang and Zhao, Junbo and LeCun, Yann},
2146
+ month = sep,
2147
+ year = {2015},
2148
+ }
2149
+ """
2150
+
2151
+ @check_yelp_review_dataset
2152
+ def __init__(self, dataset_dir, usage=None, num_samples=None, shuffle=Shuffle.GLOBAL, num_shards=None,
2153
+ shard_id=None, num_parallel_workers=None, cache=None):
2154
+ super().__init__(num_parallel_workers=num_parallel_workers, num_samples=num_samples, shuffle=shuffle,
2155
+ num_shards=num_shards, shard_id=shard_id, cache=cache)
2156
+ self.dataset_dir = dataset_dir
2157
+ self.usage = replace_none(usage, 'all')
2158
+
2159
+ def parse(self, children=None):
2160
+ return cde.YelpReviewNode(self.dataset_dir, self.usage, self.num_samples, self.shuffle_flag,
2161
+ self.num_shards, self.shard_id)