mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (308) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +3 -1
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +50 -9
  9. mindspore/_extends/parse/compile_config.py +41 -0
  10. mindspore/_extends/parse/parser.py +9 -7
  11. mindspore/_extends/parse/standard_method.py +52 -14
  12. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  13. mindspore/amp.py +24 -10
  14. mindspore/atlprov.dll +0 -0
  15. mindspore/avcodec-59.dll +0 -0
  16. mindspore/avdevice-59.dll +0 -0
  17. mindspore/avfilter-8.dll +0 -0
  18. mindspore/avformat-59.dll +0 -0
  19. mindspore/avutil-57.dll +0 -0
  20. mindspore/c1.dll +0 -0
  21. mindspore/c1xx.dll +0 -0
  22. mindspore/c2.dll +0 -0
  23. mindspore/common/__init__.py +6 -4
  24. mindspore/common/_pijit_context.py +190 -0
  25. mindspore/common/_register_for_tensor.py +2 -1
  26. mindspore/common/_tensor_overload.py +139 -0
  27. mindspore/common/api.py +102 -87
  28. mindspore/common/dump.py +5 -6
  29. mindspore/common/generator.py +1 -7
  30. mindspore/common/hook_handle.py +14 -26
  31. mindspore/common/mindir_util.py +2 -2
  32. mindspore/common/parameter.py +46 -13
  33. mindspore/common/recompute.py +39 -9
  34. mindspore/common/sparse_tensor.py +7 -3
  35. mindspore/common/tensor.py +209 -29
  36. mindspore/communication/__init__.py +1 -1
  37. mindspore/communication/_comm_helper.py +38 -3
  38. mindspore/communication/comm_func.py +310 -55
  39. mindspore/communication/management.py +14 -14
  40. mindspore/context.py +123 -22
  41. mindspore/dataset/__init__.py +1 -1
  42. mindspore/dataset/audio/__init__.py +1 -1
  43. mindspore/dataset/core/config.py +7 -0
  44. mindspore/dataset/core/validator_helpers.py +7 -0
  45. mindspore/dataset/engine/cache_client.py +1 -1
  46. mindspore/dataset/engine/datasets.py +72 -44
  47. mindspore/dataset/engine/datasets_audio.py +7 -7
  48. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  49. mindspore/dataset/engine/datasets_text.py +20 -20
  50. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  51. mindspore/dataset/engine/datasets_vision.py +33 -33
  52. mindspore/dataset/engine/iterators.py +29 -0
  53. mindspore/dataset/engine/obs/util.py +7 -0
  54. mindspore/dataset/engine/queue.py +114 -60
  55. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  56. mindspore/dataset/engine/validators.py +34 -14
  57. mindspore/dataset/text/__init__.py +1 -4
  58. mindspore/dataset/transforms/__init__.py +0 -3
  59. mindspore/dataset/utils/line_reader.py +2 -0
  60. mindspore/dataset/vision/__init__.py +1 -4
  61. mindspore/dataset/vision/utils.py +1 -1
  62. mindspore/dataset/vision/validators.py +2 -1
  63. mindspore/dnnl.dll +0 -0
  64. mindspore/dpcmi.dll +0 -0
  65. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  66. mindspore/experimental/es/embedding_service.py +883 -0
  67. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  68. mindspore/experimental/llm_boost/__init__.py +21 -0
  69. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  70. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  71. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  72. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  73. mindspore/experimental/llm_boost/register.py +129 -0
  74. mindspore/experimental/llm_boost/utils.py +31 -0
  75. mindspore/experimental/optim/adamw.py +85 -0
  76. mindspore/experimental/optim/optimizer.py +3 -0
  77. mindspore/hal/__init__.py +3 -3
  78. mindspore/hal/contiguous_tensors_handle.py +175 -0
  79. mindspore/hal/stream.py +18 -0
  80. mindspore/include/api/model_group.h +13 -1
  81. mindspore/include/api/types.h +10 -10
  82. mindspore/include/dataset/config.h +2 -2
  83. mindspore/include/dataset/constants.h +2 -2
  84. mindspore/include/dataset/execute.h +2 -2
  85. mindspore/include/dataset/vision.h +4 -0
  86. mindspore/jpeg62.dll +0 -0
  87. mindspore/log.py +1 -1
  88. mindspore/mindrecord/filewriter.py +68 -51
  89. mindspore/mindspore_backend.dll +0 -0
  90. mindspore/mindspore_common.dll +0 -0
  91. mindspore/mindspore_core.dll +0 -0
  92. mindspore/mindspore_glog.dll +0 -0
  93. mindspore/mindspore_np_dtype.dll +0 -0
  94. mindspore/mindspore_ops.dll +0 -0
  95. mindspore/mint/__init__.py +495 -46
  96. mindspore/mint/distributed/__init__.py +31 -0
  97. mindspore/mint/distributed/distributed.py +254 -0
  98. mindspore/mint/nn/__init__.py +266 -21
  99. mindspore/mint/nn/functional.py +125 -19
  100. mindspore/mint/nn/layer/__init__.py +39 -0
  101. mindspore/mint/nn/layer/activation.py +133 -0
  102. mindspore/mint/nn/layer/normalization.py +477 -0
  103. mindspore/mint/nn/layer/pooling.py +110 -0
  104. mindspore/mint/optim/adamw.py +28 -7
  105. mindspore/mint/special/__init__.py +63 -0
  106. mindspore/msobj140.dll +0 -0
  107. mindspore/mspdb140.dll +0 -0
  108. mindspore/mspdbcore.dll +0 -0
  109. mindspore/mspdbst.dll +0 -0
  110. mindspore/mspft140.dll +0 -0
  111. mindspore/msvcdis140.dll +0 -0
  112. mindspore/msvcp140_1.dll +0 -0
  113. mindspore/msvcp140_2.dll +0 -0
  114. mindspore/msvcp140_atomic_wait.dll +0 -0
  115. mindspore/msvcp140_codecvt_ids.dll +0 -0
  116. mindspore/multiprocessing/__init__.py +2 -1
  117. mindspore/nn/__init__.py +0 -1
  118. mindspore/nn/cell.py +275 -93
  119. mindspore/nn/layer/activation.py +211 -44
  120. mindspore/nn/layer/basic.py +113 -3
  121. mindspore/nn/layer/embedding.py +120 -2
  122. mindspore/nn/layer/normalization.py +101 -5
  123. mindspore/nn/layer/padding.py +34 -48
  124. mindspore/nn/layer/pooling.py +161 -7
  125. mindspore/nn/layer/transformer.py +3 -3
  126. mindspore/nn/loss/__init__.py +2 -2
  127. mindspore/nn/loss/loss.py +84 -6
  128. mindspore/nn/optim/__init__.py +2 -1
  129. mindspore/nn/optim/adadelta.py +1 -1
  130. mindspore/nn/optim/adam.py +1 -1
  131. mindspore/nn/optim/lamb.py +1 -1
  132. mindspore/nn/optim/tft_wrapper.py +127 -0
  133. mindspore/nn/wrap/cell_wrapper.py +12 -23
  134. mindspore/nn/wrap/grad_reducer.py +5 -5
  135. mindspore/nn/wrap/loss_scale.py +17 -3
  136. mindspore/numpy/__init__.py +1 -1
  137. mindspore/numpy/array_creations.py +65 -68
  138. mindspore/numpy/array_ops.py +64 -60
  139. mindspore/numpy/fft.py +610 -75
  140. mindspore/numpy/logic_ops.py +11 -10
  141. mindspore/numpy/math_ops.py +85 -84
  142. mindspore/numpy/utils_const.py +4 -4
  143. mindspore/opencv_core452.dll +0 -0
  144. mindspore/opencv_imgcodecs452.dll +0 -0
  145. mindspore/opencv_imgproc452.dll +0 -0
  146. mindspore/ops/__init__.py +6 -4
  147. mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
  148. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  149. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  150. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  151. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  152. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
  153. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  154. mindspore/ops/auto_generate/gen_extend_func.py +734 -13
  155. mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
  156. mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
  157. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  158. mindspore/ops/composite/base.py +85 -48
  159. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  160. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  161. mindspore/ops/function/__init__.py +22 -0
  162. mindspore/ops/function/array_func.py +490 -153
  163. mindspore/ops/function/debug_func.py +113 -1
  164. mindspore/ops/function/fft_func.py +15 -2
  165. mindspore/ops/function/grad/grad_func.py +3 -2
  166. mindspore/ops/function/math_func.py +558 -207
  167. mindspore/ops/function/nn_func.py +817 -383
  168. mindspore/ops/function/other_func.py +3 -2
  169. mindspore/ops/function/random_func.py +184 -8
  170. mindspore/ops/function/reshard_func.py +13 -11
  171. mindspore/ops/function/sparse_unary_func.py +1 -1
  172. mindspore/ops/function/vmap_func.py +3 -2
  173. mindspore/ops/functional.py +24 -14
  174. mindspore/ops/op_info_register.py +3 -3
  175. mindspore/ops/operations/__init__.py +6 -1
  176. mindspore/ops/operations/_grad_ops.py +2 -76
  177. mindspore/ops/operations/_infer_ops.py +1 -1
  178. mindspore/ops/operations/_inner_ops.py +71 -94
  179. mindspore/ops/operations/array_ops.py +12 -146
  180. mindspore/ops/operations/comm_ops.py +42 -53
  181. mindspore/ops/operations/custom_ops.py +83 -19
  182. mindspore/ops/operations/debug_ops.py +42 -10
  183. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  184. mindspore/ops/operations/manually_defined/ops_def.py +265 -10
  185. mindspore/ops/operations/math_ops.py +12 -223
  186. mindspore/ops/operations/nn_ops.py +20 -114
  187. mindspore/ops/operations/other_ops.py +7 -4
  188. mindspore/ops/operations/random_ops.py +46 -1
  189. mindspore/ops/primitive.py +18 -6
  190. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  191. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  192. mindspore/ops_generate/gen_constants.py +36 -0
  193. mindspore/ops_generate/gen_ops.py +67 -52
  194. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  195. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  196. mindspore/ops_generate/op_proto.py +10 -3
  197. mindspore/ops_generate/pyboost_utils.py +14 -1
  198. mindspore/ops_generate/template.py +43 -21
  199. mindspore/parallel/__init__.py +3 -1
  200. mindspore/parallel/_auto_parallel_context.py +28 -8
  201. mindspore/parallel/_cell_wrapper.py +83 -0
  202. mindspore/parallel/_parallel_serialization.py +47 -19
  203. mindspore/parallel/_tensor.py +81 -11
  204. mindspore/parallel/_utils.py +13 -1
  205. mindspore/parallel/algo_parameter_config.py +5 -5
  206. mindspore/parallel/checkpoint_transform.py +46 -39
  207. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  208. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  209. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  210. mindspore/parallel/parameter_broadcast.py +3 -4
  211. mindspore/parallel/shard.py +162 -31
  212. mindspore/parallel/transform_safetensors.py +993 -0
  213. mindspore/pgodb140.dll +0 -0
  214. mindspore/pgort140.dll +0 -0
  215. mindspore/profiler/__init__.py +2 -1
  216. mindspore/profiler/common/constant.py +29 -0
  217. mindspore/profiler/common/registry.py +47 -0
  218. mindspore/profiler/common/util.py +28 -0
  219. mindspore/profiler/dynamic_profiler.py +694 -0
  220. mindspore/profiler/envprofiling.py +17 -19
  221. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  222. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  223. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  224. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  225. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  226. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  227. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  228. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  229. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  230. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  231. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  232. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  233. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  234. mindspore/profiler/parser/framework_parser.py +1 -391
  235. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  236. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  237. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  238. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  239. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  240. mindspore/profiler/parser/profiler_info.py +78 -6
  241. mindspore/profiler/profiler.py +153 -0
  242. mindspore/profiler/profiling.py +280 -412
  243. mindspore/rewrite/__init__.py +1 -2
  244. mindspore/rewrite/common/namespace.py +4 -4
  245. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  246. mindspore/run_check/_check_version.py +36 -103
  247. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  248. mindspore/swresample-4.dll +0 -0
  249. mindspore/swscale-6.dll +0 -0
  250. mindspore/tbbmalloc.dll +0 -0
  251. mindspore/tinyxml2.dll +0 -0
  252. mindspore/train/__init__.py +4 -3
  253. mindspore/train/_utils.py +28 -2
  254. mindspore/train/amp.py +171 -53
  255. mindspore/train/callback/__init__.py +2 -2
  256. mindspore/train/callback/_callback.py +4 -4
  257. mindspore/train/callback/_checkpoint.py +85 -22
  258. mindspore/train/callback/_cluster_monitor.py +1 -1
  259. mindspore/train/callback/_flops_collector.py +1 -0
  260. mindspore/train/callback/_loss_monitor.py +3 -3
  261. mindspore/train/callback/_on_request_exit.py +134 -31
  262. mindspore/train/callback/_summary_collector.py +5 -5
  263. mindspore/train/callback/_tft_register.py +352 -0
  264. mindspore/train/dataset_helper.py +7 -3
  265. mindspore/train/metrics/metric.py +3 -3
  266. mindspore/train/metrics/roc.py +4 -4
  267. mindspore/train/mind_ir_pb2.py +44 -39
  268. mindspore/train/model.py +134 -58
  269. mindspore/train/serialization.py +336 -112
  270. mindspore/turbojpeg.dll +0 -0
  271. mindspore/utils/__init__.py +21 -0
  272. mindspore/utils/utils.py +60 -0
  273. mindspore/vcmeta.dll +0 -0
  274. mindspore/vcruntime140.dll +0 -0
  275. mindspore/vcruntime140_1.dll +0 -0
  276. mindspore/version.py +1 -1
  277. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
  278. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +281 -275
  279. mindspore/include/c_api/ms/abstract.h +0 -67
  280. mindspore/include/c_api/ms/attribute.h +0 -197
  281. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  282. mindspore/include/c_api/ms/base/macros.h +0 -32
  283. mindspore/include/c_api/ms/base/status.h +0 -33
  284. mindspore/include/c_api/ms/base/types.h +0 -283
  285. mindspore/include/c_api/ms/context.h +0 -102
  286. mindspore/include/c_api/ms/graph.h +0 -160
  287. mindspore/include/c_api/ms/node.h +0 -606
  288. mindspore/include/c_api/ms/tensor.h +0 -161
  289. mindspore/include/c_api/ms/value.h +0 -84
  290. mindspore/mindspore_shared_lib.dll +0 -0
  291. mindspore/nn/extend/basic.py +0 -140
  292. mindspore/nn/extend/embedding.py +0 -143
  293. mindspore/nn/extend/layer/normalization.py +0 -109
  294. mindspore/nn/extend/pooling.py +0 -117
  295. mindspore/nn/layer/embedding_service.py +0 -531
  296. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  297. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  298. mindspore/ops/extend/__init__.py +0 -53
  299. mindspore/ops/extend/array_func.py +0 -218
  300. mindspore/ops/extend/math_func.py +0 -76
  301. mindspore/ops/extend/nn_func.py +0 -308
  302. mindspore/ops/silent_check.py +0 -162
  303. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  304. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  305. mindspore/train/callback/_mindio_ttp.py +0 -443
  306. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
  307. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
  308. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
@@ -287,7 +287,7 @@ class Caltech256Dataset(MappableDataset, VisionBaseDataset):
287
287
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
288
288
  argument can only be specified when `num_shards` is also specified.
289
289
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
290
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
290
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
291
291
  Default: ``None`` , which means no cache is used.
292
292
 
293
293
  Raises:
@@ -399,7 +399,7 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
399
399
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
400
400
  argument can only be specified when `num_shards` is also specified.
401
401
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
402
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
402
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
403
403
  Default: ``None`` , which means no cache is used.
404
404
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
405
405
  and returns the decrypted bytes data. Default: ``None`` , no decryption.
@@ -552,7 +552,7 @@ class Cifar10Dataset(MappableDataset, VisionBaseDataset):
552
552
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
553
553
  argument can only be specified when `num_shards` is also specified.
554
554
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
555
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
555
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
556
556
  Default: ``None`` , which means no cache is used.
557
557
 
558
558
  Raises:
@@ -666,7 +666,7 @@ class Cifar100Dataset(MappableDataset, VisionBaseDataset):
666
666
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
667
667
  argument can only be specified when `num_shards` is also specified.
668
668
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
669
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
669
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
670
670
  Default: ``None`` , which means no cache is used.
671
671
 
672
672
  Raises:
@@ -780,7 +780,7 @@ class CityscapesDataset(MappableDataset, VisionBaseDataset):
780
780
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
781
781
  argument can only be specified when `num_shards` is also specified.
782
782
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
783
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
783
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
784
784
  Default: ``None`` , which means no cache is used.
785
785
 
786
786
  Raises:
@@ -931,7 +931,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
931
931
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
932
932
  argument can only be specified when `num_shards` is also specified.
933
933
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
934
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
934
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
935
935
  Default: ``None`` , which means no cache is used.
936
936
  extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
937
937
  output at the end :py:obj:`[_meta-filename, dtype=string]` . Default: ``False``.
@@ -1173,7 +1173,7 @@ class DIV2KDataset(MappableDataset, VisionBaseDataset):
1173
1173
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1174
1174
  argument can only be specified when `num_shards` is also specified.
1175
1175
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1176
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
1176
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1177
1177
  Default: ``None`` , which means no cache is used.
1178
1178
 
1179
1179
  Raises:
@@ -1341,7 +1341,7 @@ class EMnistDataset(MappableDataset, VisionBaseDataset):
1341
1341
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1342
1342
  argument can only be specified when `num_shards` is also specified.
1343
1343
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1344
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
1344
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1345
1345
  Default: ``None`` , which means no cache is used.
1346
1346
 
1347
1347
  Raises:
@@ -1456,7 +1456,7 @@ class FakeImageDataset(MappableDataset, VisionBaseDataset):
1456
1456
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1457
1457
  argument can only be specified when `num_shards` is also specified.
1458
1458
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1459
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
1459
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1460
1460
  Default: ``None`` , which means no cache is used.
1461
1461
 
1462
1462
  Raises:
@@ -1527,7 +1527,7 @@ class FashionMnistDataset(MappableDataset, VisionBaseDataset):
1527
1527
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1528
1528
  argument can only be specified when `num_shards` is also specified.
1529
1529
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1530
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
1530
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1531
1531
  Default: ``None`` , which means no cache is used.
1532
1532
 
1533
1533
  Raises:
@@ -1632,7 +1632,7 @@ class FlickrDataset(MappableDataset, VisionBaseDataset):
1632
1632
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
1633
1633
  argument can only be specified when `num_shards` is also specified.
1634
1634
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
1635
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
1635
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
1636
1636
  Default: ``None`` , which means no cache is used.
1637
1637
 
1638
1638
  Raises:
@@ -2005,7 +2005,7 @@ class Food101Dataset(MappableDataset, VisionBaseDataset):
2005
2005
  shard_id (int, optional): The shard ID within `num_shards` . This argument can only be specified
2006
2006
  when `num_shards` is also specified. Default: ``None`` .
2007
2007
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2008
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2008
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2009
2009
  Default: ``None`` , which means no cache is used.
2010
2010
 
2011
2011
  Raises:
@@ -2126,7 +2126,7 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
2126
2126
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2127
2127
  argument can only be specified when `num_shards` is also specified.
2128
2128
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2129
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2129
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2130
2130
  Default: ``None`` , which means no cache is used.
2131
2131
  decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
2132
2132
  and returns the decrypted bytes data. Default: ``None`` , no decryption.
@@ -2270,7 +2270,7 @@ class KITTIDataset(MappableDataset, VisionBaseDataset):
2270
2270
  shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
2271
2271
  argument can only be specified when `num_shards` is also specified.
2272
2272
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2273
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2273
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2274
2274
  Default: ``None`` , which means no cache is used.
2275
2275
 
2276
2276
  Raises:
@@ -2390,7 +2390,7 @@ class KMnistDataset(MappableDataset, VisionBaseDataset):
2390
2390
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2391
2391
  argument can only be specified when `num_shards` is also specified.
2392
2392
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2393
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2393
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2394
2394
  Default: ``None`` , which means no cache is used.
2395
2395
 
2396
2396
  Raises:
@@ -2500,7 +2500,7 @@ class LFWDataset(MappableDataset, VisionBaseDataset):
2500
2500
  shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
2501
2501
  argument can only be specified when `num_shards` is also specified.
2502
2502
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2503
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2503
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2504
2504
  Default: ``None`` , which means no cache is used.
2505
2505
 
2506
2506
  Raises:
@@ -2639,7 +2639,7 @@ class LSUNDataset(MappableDataset, VisionBaseDataset):
2639
2639
  shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
2640
2640
  argument can only be specified when `num_shards` is also specified.
2641
2641
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2642
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2642
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2643
2643
  Default: ``None`` , which means no cache is used.
2644
2644
 
2645
2645
  Raises:
@@ -2760,7 +2760,7 @@ class ManifestDataset(MappableDataset, VisionBaseDataset):
2760
2760
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2761
2761
  argument can only be specified when `num_shards` is also specified.
2762
2762
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2763
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2763
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2764
2764
  Default: ``None`` , which means no cache is used.
2765
2765
 
2766
2766
  Raises:
@@ -2881,7 +2881,7 @@ class MnistDataset(MappableDataset, VisionBaseDataset):
2881
2881
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
2882
2882
  argument can only be specified when `num_shards` is also specified.
2883
2883
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2884
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2884
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2885
2885
  Default: ``None`` , which means no cache is used.
2886
2886
 
2887
2887
  Raises:
@@ -2986,7 +2986,7 @@ class OmniglotDataset(MappableDataset, VisionBaseDataset):
2986
2986
  shard_id (int, optional): The shard ID within `num_shards`. Default: ``None`` . This
2987
2987
  argument can only be specified when `num_shards` is also specified.
2988
2988
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
2989
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
2989
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
2990
2990
  Default: ``None`` , which means no cache is used.
2991
2991
 
2992
2992
  Raises:
@@ -3106,7 +3106,7 @@ class PhotoTourDataset(MappableDataset, VisionBaseDataset):
3106
3106
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
3107
3107
  argument can only be specified when `num_shards` is also specified.
3108
3108
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3109
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3109
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3110
3110
  Default: ``None`` , which means no cache is used.
3111
3111
 
3112
3112
  Raises:
@@ -3234,7 +3234,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
3234
3234
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
3235
3235
  argument can only be specified when `num_shards` is also specified.
3236
3236
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3237
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3237
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3238
3238
  Default: ``None`` , which means no cache is used.
3239
3239
 
3240
3240
  Raises:
@@ -3319,7 +3319,7 @@ class Places365Dataset(MappableDataset, VisionBaseDataset):
3319
3319
  super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
3320
3320
  shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
3321
3321
 
3322
- self.dataset_dir = os.path.abspath(dataset_dir)
3322
+ self.dataset_dir = os.path.realpath(dataset_dir)
3323
3323
  self.usage = replace_none(usage, "train-standard")
3324
3324
  self.small = small
3325
3325
  self.decode = decode
@@ -3356,7 +3356,7 @@ class QMnistDataset(MappableDataset, VisionBaseDataset):
3356
3356
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
3357
3357
  argument can only be specified when `num_shards` is also specified.
3358
3358
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3359
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3359
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3360
3360
  Default: ``None`` , which means no cache is used.
3361
3361
 
3362
3362
  Raises:
@@ -3454,7 +3454,7 @@ class RandomDataset(SourceDataset, VisionBaseDataset):
3454
3454
  Default: ``None`` , will use global default workers(8), it can be set
3455
3455
  by :func:`mindspore.dataset.config.set_num_parallel_workers` .
3456
3456
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3457
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3457
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3458
3458
  Default: ``None`` , which means no cache is used.
3459
3459
  shuffle (bool, optional): Whether or not to perform shuffle on the dataset.
3460
3460
  Default: ``None`` , expected order behavior shown in the table below.
@@ -3539,7 +3539,7 @@ class RenderedSST2Dataset(MappableDataset, VisionBaseDataset):
3539
3539
  shard_id (int, optional): The shard ID within `num_shards` . This
3540
3540
  argument can only be specified when `num_shards` is also specified. Default: ``None`` .
3541
3541
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3542
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3542
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3543
3543
  Default: ``None`` , which means no cache is used.
3544
3544
 
3545
3545
  Raises:
@@ -3847,7 +3847,7 @@ class SBUDataset(MappableDataset, VisionBaseDataset):
3847
3847
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
3848
3848
  argument can only be specified when `num_shards` is also specified.
3849
3849
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3850
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3850
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3851
3851
  Default: ``None`` , which means no cache is used.
3852
3852
 
3853
3853
  Raises:
@@ -3944,7 +3944,7 @@ class SemeionDataset(MappableDataset, VisionBaseDataset):
3944
3944
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
3945
3945
  argument can only be specified when `num_shards` is also specified.
3946
3946
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
3947
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
3947
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
3948
3948
  Default: ``None`` , which means no cache is used.
3949
3949
 
3950
3950
  Raises:
@@ -4054,7 +4054,7 @@ class STL10Dataset(MappableDataset, VisionBaseDataset):
4054
4054
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
4055
4055
  argument can only be specified when `num_shards` is also specified.
4056
4056
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4057
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
4057
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
4058
4058
  Default: ``None`` , which means no cache is used.
4059
4059
 
4060
4060
  Raises:
@@ -4167,7 +4167,7 @@ class SUN397Dataset(MappableDataset, VisionBaseDataset):
4167
4167
  shard_id (int, optional): The shard ID within `num_shards` . This
4168
4168
  argument can only be specified when `num_shards` is also specified. Default: ``None`` .
4169
4169
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4170
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
4170
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
4171
4171
  Default: ``None`` , which means no cache is used.
4172
4172
 
4173
4173
  Raises:
@@ -4428,7 +4428,7 @@ class USPSDataset(SourceDataset, VisionBaseDataset):
4428
4428
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
4429
4429
  argument can only be specified when `num_shards` is also specified.
4430
4430
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4431
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
4431
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
4432
4432
  Default: ``None`` , which means no cache is used.
4433
4433
 
4434
4434
  Raises:
@@ -4536,7 +4536,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
4536
4536
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` . This
4537
4537
  argument can only be specified when `num_shards` is also specified.
4538
4538
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4539
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
4539
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
4540
4540
  Default: ``None`` , which means no cache is used.
4541
4541
  extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
4542
4542
  :py:obj:`[_meta-filename, dtype=string]` will be output at the end. Default: ``False``.
@@ -4718,7 +4718,7 @@ class WIDERFaceDataset(MappableDataset, VisionBaseDataset):
4718
4718
  shard_id (int, optional): The shard ID within `num_shards` . Default: ``None`` .
4719
4719
  This argument can only be specified when `num_shards` is also specified.
4720
4720
  cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing. More details:
4721
- `Single-Node Data Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
4721
+ `Single-Node Data Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
4722
4722
  Default: ``None`` , which means no cache is used.
4723
4723
 
4724
4724
  Raises:
@@ -19,6 +19,7 @@ import json
19
19
  import os
20
20
  import signal
21
21
  import weakref
22
+ from functools import wraps
22
23
  import numpy as np
23
24
 
24
25
  import mindspore._c_dataengine as cde
@@ -58,6 +59,34 @@ def _cleanup():
58
59
  itr.release()
59
60
 
60
61
 
62
+ def _cleanup_the_iterators_if_created(method):
63
+ """Release the iterators which is new created by the method"""
64
+
65
+ @wraps(method)
66
+ def wrapper(self, *args, **kwargs):
67
+ original_iterators = deepcopy(ITERATORS_LIST)
68
+
69
+ result = method(self, *args, **kwargs)
70
+
71
+ # it is used to attribute function like: dataset_size / output_shapes / output_types and
72
+ # it is a GeneratorDataset with two stage pipeline. The first pipeline will create a new iterator
73
+ # which need to be released after dataset_size / output_shapes / output_types end.
74
+ # 1. find the iterators which are started by dataset_size / output_shapes / output_types with two stage pipeline
75
+ iterators_to_be_released = []
76
+ for index, item in enumerate(ITERATORS_LIST):
77
+ if item not in original_iterators:
78
+ iterators_to_be_released.append(index)
79
+
80
+ # 2. release the iterators
81
+ for index in reversed(iterators_to_be_released):
82
+ itr = ITERATORS_LIST[index]()
83
+ if itr is not None:
84
+ itr.release()
85
+
86
+ return result
87
+ return wrapper
88
+
89
+
61
90
  class Iterator:
62
91
  """
63
92
  General Iterator over a dataset.
@@ -47,6 +47,13 @@ def get_used_disk_per():
47
47
  os.makedirs(config.WORKING_PATH)
48
48
  except FileExistsError:
49
49
  pass
50
+ except PermissionError as e:
51
+ err_msg =\
52
+ str(e) + ". Suggestion: " \
53
+ "1) It is recommended to manually create the {} directory and add read/write permissions to it. " \
54
+ "2) If you can't create, we suggest you download MindRecord manually and "\
55
+ "read it using the MindDataset interface.".format(config.WORKING_PATH)
56
+ raise RuntimeError(err_msg)
50
57
 
51
58
  total, used, _ = shutil.disk_usage(config.WORKING_PATH)
52
59
  return used / total
@@ -1,4 +1,4 @@
1
- # Copyright 2021 Huawei Technologies Co., Ltd
1
+ # Copyright 2021-2024 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -31,18 +31,28 @@ import mindspore._c_dataengine as cde
31
31
  from ..transforms.py_transforms_util import ExceptionHandler
32
32
 
33
33
 
34
+ def get_total_size(data):
35
+ """Calculate the total size of numpy arrays."""
36
+ total_size = 0
37
+ for column in data:
38
+ if isinstance(column, np.ndarray):
39
+ total_size += column.nbytes
40
+ return total_size
41
+
42
+
34
43
  class _SharedQueue(multiprocessing.queues.Queue):
35
44
  """
36
45
  Class to implement a queue using shared memory for better performance.
37
46
  Args:
38
47
  size: Number of elements in the queue.
39
48
  count: Shared variable to suppress log printing.
40
- copy_out: Flag to indidcate whether an extra copy should be done before returning. If data will immediately be
41
- copied before returning, then this can be set to False.
42
- max_rowsize: Maximum size of any element in the Queue in MB.
49
+ copy_out: Whether to copy the data from shared memory to process virtual memory. Default: ``True``.
50
+ max_rowsize: Maximum size of row in MB that is used for shared memory allocation to copy
51
+ data between processes. If set to -1, shared memory will be dynamically allocated with
52
+ the actual size of data. Default: -1.
43
53
  """
44
54
 
45
- def __init__(self, size, count, copy_out=False, max_rowsize=6):
55
+ def __init__(self, size, count, copy_out=True, max_rowsize=-1):
46
56
  super().__init__(size, ctx=multiprocessing.get_context())
47
57
 
48
58
  self.copy_out = copy_out
@@ -55,28 +65,29 @@ class _SharedQueue(multiprocessing.queues.Queue):
55
65
  self.data_shared = 1
56
66
  self.count = count
57
67
  self.print_error = True
68
+ self.shm_list = []
69
+ self.seg_pos = 0
70
+ # num_seg has to be 2 more than the queue size. We can have remote worker filling a buffer, main process
71
+ # reading a buffer and also have a full queue of buffers in the meta-data queue
72
+ self.num_seg = size + 2
58
73
 
59
74
  if platform.system().lower() != 'windows' and max_rowsize == -1:
60
75
  self.dynamic_shm = True
76
+ self.fd_list = []
61
77
  else:
62
78
  self.dynamic_shm = False
63
79
  # change max_rowsize in MB into bytes
64
80
  self.seg_size = max_rowsize * 1024 * 1024
65
- self.shm_list = []
66
- self.seg_pos = 0
67
- # num_seg has to be 2 more than the queue size. We can have remote worker filling a buffer, main process
68
- # reading a buffer and also have a full queue of buffers in the meta-data queue
69
- self.num_seg = size + 2
70
81
  for _ in range(self.num_seg):
71
82
  try:
72
- a = multiprocessing.Array("b", self.seg_size)
83
+ shared_array = multiprocessing.Array("b", self.seg_size)
73
84
  except OSError as e:
74
85
  if e.errno == errno.ENOMEM:
75
86
  raise RuntimeError("Failed to allocate shared memory for {0} elements of {1}MB: {2}"
76
87
  .format(self.num_seg, self.seg_size / 1024 / 1024, e))
77
88
  raise
78
89
  else:
79
- self.shm_list.append(a)
90
+ self.shm_list.append(shared_array)
80
91
 
81
92
  def put_until(self, data, timeout=None, exit_signal=None):
82
93
  """Put data into the queue. Block until timeout is reached or exit_signal is set."""
@@ -102,32 +113,36 @@ class _SharedQueue(multiprocessing.queues.Queue):
102
113
  if isinstance(data, np.ndarray):
103
114
  name_list.append((self.data_immediate, np.array(data)))
104
115
  else:
105
- for r in data:
106
- # the map:pyfunc is a yield generator which can't be serialize
107
- if isinstance(r, types.GeneratorType):
116
+ if self.dynamic_shm:
117
+ total_size = get_total_size(data)
118
+ if total_size > 0:
119
+ self.check_and_create_shm(total_size)
120
+ for column in data:
121
+ # the map:pyfunc is a yield generator which can't be serialized
122
+ if isinstance(column, types.GeneratorType):
108
123
  raise TypeError("Cannot pickle {} object, please verify pyfunc return with numpy array"
109
- .format(type(r)))
110
- if isinstance(r, np.ndarray) and self.dynamic_shm:
111
- byte = r.nbytes
112
- shm = cde.SharedMemory(None, True, -1, byte)
113
- dest = np.ndarray(r.shape, r.dtype, buffer=shm.buf())
114
- np.copyto(dest, r)
115
- fd = shm.fd()
116
- df = multiprocessing.reduction.DupFd(fd)
117
- name_list.append((self.data_shared, r.dtype, r.shape, shm.name(), df, shm.size()))
118
- elif (isinstance(r, np.ndarray) and r.size > self.min_shared_mem
119
- and start_bytes + r.nbytes < self.seg_size):
124
+ .format(type(column)))
125
+ if self.dynamic_shm and isinstance(column, np.ndarray) and column.nbytes > 0:
126
+ shm = self.shm_list[self.seg_pos]
127
+ fd = self.fd_list[self.seg_pos]
128
+ dest = np.ndarray(column.shape, column.dtype, buffer=shm.buf(), offset=start_bytes)
129
+ np.copyto(dest, column)
130
+ start_bytes += column.nbytes
131
+ shm_metadata = (shm.name(), fd, total_size)
132
+ name_list.append((self.data_shared, self.seg_pos, column.dtype, column.shape, shm_metadata))
133
+ elif (isinstance(column, np.ndarray) and column.size > self.min_shared_mem
134
+ and start_bytes + column.nbytes < self.seg_size):
120
135
  # need to convert start_bytes to offset in array
121
136
  start_offset = start_bytes
122
- dest = np.ndarray(r.shape, r.dtype, buffer=self.shm_list[self.seg_pos].get_obj(),
123
- offset=start_offset)
124
- np.copyto(dest, r)
125
- byte = r.nbytes
137
+ shm = self.shm_list[self.seg_pos]
138
+ dest = np.ndarray(column.shape, column.dtype, buffer=shm.get_obj(), offset=start_offset)
139
+ np.copyto(dest, column)
140
+ byte = column.nbytes
126
141
  byte = 8 * ((byte + 7) // 8)
127
142
  start_bytes += byte
128
- name_list.append((self.data_shared, self.seg_pos, byte, r.dtype, r.shape))
143
+ name_list.append((self.data_shared, self.seg_pos, byte, column.dtype, column.shape))
129
144
  else:
130
- if isinstance(r, np.ndarray) and r.size > self.min_shared_mem:
145
+ if isinstance(column, np.ndarray) and column.size > self.min_shared_mem:
131
146
  # Only print out error the first time it happens
132
147
  if self.count.value == 0 and self.print_error:
133
148
  logger.warning(
@@ -135,12 +150,12 @@ class _SharedQueue(multiprocessing.queues.Queue):
135
150
  + "max_rowsize: "
136
151
  + str(self.seg_size / 1024 / 1024)
137
152
  + "MB, current rowsize: "
138
- + str((start_bytes + r.nbytes) / 1024 / 1024)
153
+ + str((start_bytes + column.nbytes) / 1024 / 1024)
139
154
  + "MB."
140
155
  )
141
156
  self.print_error = False
142
157
  self.count.value += 1
143
- name_list.append((self.data_immediate, r))
158
+ name_list.append((self.data_immediate, column))
144
159
  super().put(name_list, timeout=timeout)
145
160
  # note above could generate a queue full exception. It will be handled by teh caller
146
161
  # only increment seg_pos after successfully adding to metadata queue
@@ -152,52 +167,91 @@ class _SharedQueue(multiprocessing.queues.Queue):
152
167
  """Get data from the queue. Block until timeout is reached or exit_signal is set."""
153
168
  while True:
154
169
  try:
155
- r = self.get(timeout=timeout)
170
+ result = self.get(timeout=timeout)
156
171
  except queue.Empty as e:
157
172
  if exit_signal is None:
158
173
  raise e
159
174
  if exit_signal.is_set():
160
175
  return None
161
176
  continue
162
- if r is None:
177
+ if result is None:
163
178
  # receive finish signal
164
179
  return None
165
180
  if exit_signal.is_set():
166
181
  # loop until the queue becomes empty
167
182
  continue
168
- return r
183
+ return result
169
184
 
170
185
  def get(self, timeout=None):
171
- result = super().get(timeout=timeout)
172
- if isinstance(result, ExceptionHandler):
173
- return result
174
- r = []
186
+ raw_data = super().get(timeout=timeout)
187
+ if isinstance(raw_data, ExceptionHandler):
188
+ return raw_data
189
+ result = []
175
190
  start_bytes = 0
176
- for x in result:
177
- if x[0] == self.data_shared:
191
+ for column in raw_data:
192
+ if column[0] == self.data_shared:
178
193
  if self.dynamic_shm:
179
- dtype, shape, shm_name, df, buf_size = x[1:]
180
- fd = df.detach()
181
- shm = cde.SharedMemory(shm_name, False, fd, buf_size)
182
- data = np.ndarray(shape, dtype, buffer=shm.buf())
183
- dest = np.copy(data)
184
- r.append(dest)
194
+ seg_pos, dtype, shape, shm_metadata = column[1:]
195
+ if start_bytes == 0:
196
+ # only need to check once since all the columns are stored in the same shared memory
197
+ self.check_and_attach_shm(seg_pos, shm_metadata)
198
+ shm = self.shm_list[seg_pos]
199
+ array = np.ndarray(shape, dtype, buffer=shm.buf(), offset=start_bytes)
200
+ start_bytes += array.nbytes
185
201
  else:
186
- seg_pos, byte, dtype, shape = x[1:]
202
+ seg_pos, byte, dtype, shape = column[1:]
187
203
  start_offset = start_bytes
188
- b = self.shm_list[seg_pos]
189
- data = np.ndarray(shape, dtype, buffer=b.get_obj(), offset=start_offset)
204
+ shm = self.shm_list[seg_pos]
205
+ array = np.ndarray(shape, dtype, buffer=shm.get_obj(), offset=start_offset)
190
206
  start_bytes += byte
191
- if self.copy_out:
192
- dest = np.copy(data)
193
- r.append(dest)
194
- else:
195
- r.append(data)
196
- elif x[0] == self.data_immediate:
197
- r.append(x[1])
207
+ if self.copy_out:
208
+ result.append(np.copy(array))
209
+ else:
210
+ result.append(array)
211
+ elif column[0] == self.data_immediate:
212
+ result.append(column[1])
198
213
  else:
199
214
  raise RuntimeError("SharedQueue, invalid entry in metadata.")
200
- return tuple(r)
215
+ return tuple(result)
216
+
217
+ def check_and_create_shm(self, size):
218
+ """Check if the shared memory is initialized and of sufficient size."""
219
+ if len(self.shm_list) == self.seg_pos:
220
+ # shared memory has not been created and appended to the cache list
221
+ shm = cde.SharedMemory(None, True, -1, size)
222
+ shared_fd = multiprocessing.reduction.DupFd(shm.fd())
223
+ self.shm_list.append(shm)
224
+ self.fd_list.append(shared_fd)
225
+ elif len(self.shm_list) > self.seg_pos:
226
+ if self.shm_list[self.seg_pos].size() < size:
227
+ # shared memory is not big enough to hold the data
228
+ shm = cde.SharedMemory(None, True, -1, size)
229
+ shared_fd = multiprocessing.reduction.DupFd(shm.fd())
230
+ self.shm_list[self.seg_pos] = shm
231
+ self.fd_list[self.seg_pos] = shared_fd
232
+ else:
233
+ raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
234
+ "Uninitialized shared memory may exist.")
235
+
236
+ def check_and_attach_shm(self, shm_index, shm_metadata):
237
+ """Check if the shared memory is initialized and is the same as the current one."""
238
+ shm_name, fd, size = shm_metadata
239
+ if len(self.shm_list) == shm_index:
240
+ # shared memory has not been created and appended to the cache list
241
+ fd = fd.detach()
242
+ shm = cde.SharedMemory(shm_name, False, fd, size)
243
+ self.shm_list.append(shm)
244
+ self.fd_list.append(fd)
245
+ elif len(self.shm_list) > shm_index:
246
+ if self.shm_list[shm_index].name() != shm_name:
247
+ # shared memory has changed
248
+ fd = fd.detach()
249
+ shm = cde.SharedMemory(shm_name, False, fd, size)
250
+ self.shm_list[shm_index] = shm
251
+ self.fd_list[shm_index] = fd
252
+ else:
253
+ raise RuntimeError("The shared memory index is larger than the length of shared memory list. "
254
+ "Uninitialized shared memory may exist.")
201
255
 
202
256
  def __del__(self):
203
257
  if not self.dynamic_shm:
@@ -107,9 +107,9 @@ def deserialize(input_dict=None, json_filepath=None):
107
107
  def expand_path(node_repr, key, val):
108
108
  """Convert relative to absolute path."""
109
109
  if isinstance(val, list):
110
- node_repr[key] = [os.path.abspath(file) for file in val]
110
+ node_repr[key] = [os.path.realpath(file) for file in val]
111
111
  else:
112
- node_repr[key] = os.path.abspath(val)
112
+ node_repr[key] = os.path.realpath(val)
113
113
 
114
114
 
115
115
  def show(dataset, indentation=2):