maxframe 1.3.1__cp39-cp39-win32.whl → 2.0.0__cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (640) hide show
  1. maxframe/_utils.cp39-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp39-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +9 -8
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +65 -3
  242. maxframe/dataframe/reduction/core.py +3 -1
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/models.py +38 -9
  279. maxframe/learn/contrib/utils.py +55 -0
  280. maxframe/learn/contrib/xgboost/callback.py +86 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  282. maxframe/learn/contrib/xgboost/core.py +54 -42
  283. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  284. maxframe/learn/contrib/xgboost/predict.py +16 -9
  285. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  286. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  287. maxframe/learn/contrib/xgboost/train.py +59 -16
  288. maxframe/learn/core.py +252 -0
  289. maxframe/learn/datasets/__init__.py +20 -0
  290. maxframe/learn/datasets/samples_generator.py +628 -0
  291. maxframe/learn/linear_model/__init__.py +15 -0
  292. maxframe/learn/linear_model/_base.py +163 -0
  293. maxframe/learn/linear_model/_lin_reg.py +175 -0
  294. maxframe/learn/metrics/__init__.py +25 -0
  295. maxframe/learn/metrics/_check_targets.py +95 -0
  296. maxframe/learn/metrics/_classification.py +1121 -0
  297. maxframe/learn/metrics/_regression.py +256 -0
  298. maxframe/learn/model_selection/__init__.py +15 -0
  299. maxframe/learn/model_selection/_split.py +451 -0
  300. maxframe/learn/model_selection/tests/__init__.py +13 -0
  301. maxframe/learn/model_selection/tests/test_split.py +156 -0
  302. maxframe/learn/preprocessing/__init__.py +16 -0
  303. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  304. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  305. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  306. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  307. maxframe/learn/preprocessing/_data/utils.py +79 -0
  308. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  309. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  310. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  311. maxframe/learn/utils/__init__.py +4 -0
  312. maxframe/learn/utils/_encode.py +314 -0
  313. maxframe/learn/utils/checks.py +161 -0
  314. maxframe/learn/utils/core.py +33 -0
  315. maxframe/learn/utils/extmath.py +176 -0
  316. maxframe/learn/utils/multiclass.py +292 -0
  317. maxframe/learn/utils/shuffle.py +114 -0
  318. maxframe/learn/utils/sparsefuncs.py +87 -0
  319. maxframe/learn/utils/validation.py +775 -0
  320. maxframe/lib/__init__.py +0 -2
  321. maxframe/lib/compat.py +145 -0
  322. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  323. maxframe/lib/mmh3.cp39-win32.pyd +0 -0
  324. maxframe/lib/sparse/__init__.py +10 -15
  325. maxframe/lib/sparse/array.py +45 -33
  326. maxframe/lib/sparse/core.py +0 -2
  327. maxframe/lib/sparse/linalg.py +31 -0
  328. maxframe/lib/sparse/matrix.py +5 -2
  329. maxframe/lib/sparse/tests/__init__.py +0 -2
  330. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  331. maxframe/lib/sparse/vector.py +0 -2
  332. maxframe/mixin.py +59 -2
  333. maxframe/opcodes.py +13 -5
  334. maxframe/protocol.py +67 -14
  335. maxframe/remote/core.py +16 -14
  336. maxframe/remote/run_script.py +6 -3
  337. maxframe/serialization/__init__.py +2 -0
  338. maxframe/serialization/core.cp39-win32.pyd +0 -0
  339. maxframe/serialization/core.pxd +3 -0
  340. maxframe/serialization/core.pyi +3 -1
  341. maxframe/serialization/core.pyx +82 -4
  342. maxframe/serialization/pandas.py +5 -1
  343. maxframe/serialization/serializables/core.py +6 -5
  344. maxframe/serialization/serializables/field.py +2 -2
  345. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  346. maxframe/serialization/tests/test_serial.py +27 -0
  347. maxframe/session.py +4 -71
  348. maxframe/sperunner.py +165 -0
  349. maxframe/tensor/__init__.py +35 -2
  350. maxframe/tensor/arithmetic/__init__.py +2 -4
  351. maxframe/tensor/arithmetic/abs.py +0 -2
  352. maxframe/tensor/arithmetic/absolute.py +0 -2
  353. maxframe/tensor/arithmetic/add.py +34 -4
  354. maxframe/tensor/arithmetic/angle.py +0 -2
  355. maxframe/tensor/arithmetic/arccos.py +1 -4
  356. maxframe/tensor/arithmetic/arccosh.py +1 -3
  357. maxframe/tensor/arithmetic/arcsin.py +0 -2
  358. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  359. maxframe/tensor/arithmetic/arctan.py +0 -2
  360. maxframe/tensor/arithmetic/arctan2.py +0 -2
  361. maxframe/tensor/arithmetic/arctanh.py +0 -2
  362. maxframe/tensor/arithmetic/around.py +0 -2
  363. maxframe/tensor/arithmetic/bitand.py +0 -2
  364. maxframe/tensor/arithmetic/bitor.py +1 -3
  365. maxframe/tensor/arithmetic/bitxor.py +1 -3
  366. maxframe/tensor/arithmetic/cbrt.py +0 -2
  367. maxframe/tensor/arithmetic/ceil.py +0 -2
  368. maxframe/tensor/arithmetic/clip.py +13 -13
  369. maxframe/tensor/arithmetic/conj.py +0 -2
  370. maxframe/tensor/arithmetic/copysign.py +0 -2
  371. maxframe/tensor/arithmetic/core.py +47 -39
  372. maxframe/tensor/arithmetic/cos.py +1 -3
  373. maxframe/tensor/arithmetic/cosh.py +0 -2
  374. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  375. maxframe/tensor/arithmetic/degrees.py +0 -2
  376. maxframe/tensor/arithmetic/divide.py +0 -2
  377. maxframe/tensor/arithmetic/equal.py +0 -2
  378. maxframe/tensor/arithmetic/exp.py +1 -3
  379. maxframe/tensor/arithmetic/exp2.py +0 -2
  380. maxframe/tensor/arithmetic/expm1.py +0 -2
  381. maxframe/tensor/arithmetic/fabs.py +0 -2
  382. maxframe/tensor/arithmetic/fix.py +0 -2
  383. maxframe/tensor/arithmetic/float_power.py +0 -2
  384. maxframe/tensor/arithmetic/floor.py +0 -2
  385. maxframe/tensor/arithmetic/floordiv.py +0 -2
  386. maxframe/tensor/arithmetic/fmax.py +0 -2
  387. maxframe/tensor/arithmetic/fmin.py +0 -2
  388. maxframe/tensor/arithmetic/fmod.py +0 -2
  389. maxframe/tensor/arithmetic/frexp.py +6 -2
  390. maxframe/tensor/arithmetic/greater.py +0 -2
  391. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  392. maxframe/tensor/arithmetic/hypot.py +0 -2
  393. maxframe/tensor/arithmetic/i0.py +1 -3
  394. maxframe/tensor/arithmetic/imag.py +0 -2
  395. maxframe/tensor/arithmetic/invert.py +1 -3
  396. maxframe/tensor/arithmetic/isclose.py +0 -2
  397. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  398. maxframe/tensor/arithmetic/isfinite.py +1 -3
  399. maxframe/tensor/arithmetic/isinf.py +0 -2
  400. maxframe/tensor/arithmetic/isnan.py +0 -2
  401. maxframe/tensor/arithmetic/isreal.py +0 -2
  402. maxframe/tensor/arithmetic/ldexp.py +0 -2
  403. maxframe/tensor/arithmetic/less.py +0 -2
  404. maxframe/tensor/arithmetic/less_equal.py +0 -2
  405. maxframe/tensor/arithmetic/log.py +1 -3
  406. maxframe/tensor/arithmetic/log10.py +1 -3
  407. maxframe/tensor/arithmetic/log1p.py +1 -3
  408. maxframe/tensor/arithmetic/log2.py +1 -3
  409. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  410. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  411. maxframe/tensor/arithmetic/logical_and.py +0 -2
  412. maxframe/tensor/arithmetic/logical_not.py +1 -3
  413. maxframe/tensor/arithmetic/logical_or.py +0 -2
  414. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  415. maxframe/tensor/arithmetic/lshift.py +0 -2
  416. maxframe/tensor/arithmetic/maximum.py +0 -2
  417. maxframe/tensor/arithmetic/minimum.py +0 -2
  418. maxframe/tensor/arithmetic/mod.py +0 -2
  419. maxframe/tensor/arithmetic/modf.py +6 -2
  420. maxframe/tensor/arithmetic/multiply.py +37 -4
  421. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  422. maxframe/tensor/arithmetic/negative.py +0 -2
  423. maxframe/tensor/arithmetic/nextafter.py +0 -2
  424. maxframe/tensor/arithmetic/not_equal.py +0 -2
  425. maxframe/tensor/arithmetic/positive.py +0 -2
  426. maxframe/tensor/arithmetic/power.py +0 -2
  427. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  428. maxframe/tensor/arithmetic/radians.py +0 -2
  429. maxframe/tensor/arithmetic/real.py +0 -2
  430. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  431. maxframe/tensor/arithmetic/rint.py +1 -3
  432. maxframe/tensor/arithmetic/rshift.py +0 -2
  433. maxframe/tensor/arithmetic/setimag.py +0 -2
  434. maxframe/tensor/arithmetic/setreal.py +0 -2
  435. maxframe/tensor/arithmetic/sign.py +0 -2
  436. maxframe/tensor/arithmetic/signbit.py +0 -2
  437. maxframe/tensor/arithmetic/sin.py +0 -2
  438. maxframe/tensor/arithmetic/sinc.py +1 -3
  439. maxframe/tensor/arithmetic/sinh.py +0 -2
  440. maxframe/tensor/arithmetic/spacing.py +0 -2
  441. maxframe/tensor/arithmetic/sqrt.py +0 -2
  442. maxframe/tensor/arithmetic/square.py +0 -2
  443. maxframe/tensor/arithmetic/subtract.py +4 -2
  444. maxframe/tensor/arithmetic/tan.py +0 -2
  445. maxframe/tensor/arithmetic/tanh.py +0 -2
  446. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  447. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  448. maxframe/tensor/arithmetic/truediv.py +0 -2
  449. maxframe/tensor/arithmetic/trunc.py +0 -2
  450. maxframe/tensor/arithmetic/utils.py +32 -6
  451. maxframe/tensor/array_utils.py +3 -25
  452. maxframe/tensor/core.py +6 -6
  453. maxframe/tensor/datasource/__init__.py +10 -2
  454. maxframe/tensor/datasource/arange.py +0 -2
  455. maxframe/tensor/datasource/array.py +3 -22
  456. maxframe/tensor/datasource/core.py +15 -10
  457. maxframe/tensor/datasource/diag.py +140 -0
  458. maxframe/tensor/datasource/diagflat.py +69 -0
  459. maxframe/tensor/datasource/empty.py +0 -2
  460. maxframe/tensor/datasource/eye.py +95 -0
  461. maxframe/tensor/datasource/from_dataframe.py +0 -2
  462. maxframe/tensor/datasource/from_dense.py +0 -17
  463. maxframe/tensor/datasource/from_sparse.py +0 -2
  464. maxframe/tensor/datasource/full.py +0 -2
  465. maxframe/tensor/datasource/identity.py +54 -0
  466. maxframe/tensor/datasource/indices.py +115 -0
  467. maxframe/tensor/datasource/linspace.py +140 -0
  468. maxframe/tensor/datasource/meshgrid.py +135 -0
  469. maxframe/tensor/datasource/ones.py +8 -3
  470. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  471. maxframe/tensor/datasource/tri_array.py +107 -0
  472. maxframe/tensor/datasource/zeros.py +7 -3
  473. maxframe/tensor/extensions/__init__.py +31 -0
  474. maxframe/tensor/extensions/accessor.py +25 -0
  475. maxframe/tensor/extensions/apply_chunk.py +137 -0
  476. maxframe/tensor/indexing/__init__.py +1 -1
  477. maxframe/tensor/indexing/choose.py +8 -6
  478. maxframe/tensor/indexing/compress.py +0 -2
  479. maxframe/tensor/indexing/extract.py +0 -2
  480. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  481. maxframe/tensor/indexing/flatnonzero.py +1 -3
  482. maxframe/tensor/indexing/getitem.py +10 -43
  483. maxframe/tensor/indexing/nonzero.py +2 -4
  484. maxframe/tensor/indexing/setitem.py +19 -9
  485. maxframe/tensor/indexing/slice.py +6 -3
  486. maxframe/tensor/indexing/take.py +0 -2
  487. maxframe/tensor/indexing/tests/__init__.py +0 -2
  488. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  489. maxframe/tensor/indexing/unravel_index.py +6 -6
  490. maxframe/tensor/lib/__init__.py +16 -0
  491. maxframe/tensor/lib/index_tricks.py +404 -0
  492. maxframe/tensor/linalg/__init__.py +36 -0
  493. maxframe/tensor/linalg/dot.py +145 -0
  494. maxframe/tensor/linalg/inner.py +36 -0
  495. maxframe/tensor/linalg/inv.py +83 -0
  496. maxframe/tensor/linalg/lu.py +115 -0
  497. maxframe/tensor/linalg/matmul.py +225 -0
  498. maxframe/tensor/linalg/qr.py +124 -0
  499. maxframe/tensor/linalg/solve_triangular.py +103 -0
  500. maxframe/tensor/linalg/svd.py +167 -0
  501. maxframe/tensor/linalg/tensordot.py +213 -0
  502. maxframe/tensor/linalg/vdot.py +73 -0
  503. maxframe/tensor/merge/__init__.py +4 -0
  504. maxframe/tensor/merge/append.py +74 -0
  505. maxframe/tensor/merge/column_stack.py +63 -0
  506. maxframe/tensor/merge/concatenate.py +3 -2
  507. maxframe/tensor/merge/dstack.py +71 -0
  508. maxframe/tensor/merge/hstack.py +70 -0
  509. maxframe/tensor/merge/stack.py +0 -2
  510. maxframe/tensor/merge/tests/test_merge.py +0 -2
  511. maxframe/tensor/misc/__init__.py +18 -5
  512. maxframe/tensor/misc/astype.py +10 -8
  513. maxframe/tensor/misc/broadcast_to.py +1 -1
  514. maxframe/tensor/misc/copy.py +64 -0
  515. maxframe/tensor/misc/diff.py +115 -0
  516. maxframe/tensor/misc/flatten.py +63 -0
  517. maxframe/tensor/misc/in1d.py +94 -0
  518. maxframe/tensor/misc/isin.py +130 -0
  519. maxframe/tensor/misc/ndim.py +53 -0
  520. maxframe/tensor/misc/ravel.py +0 -2
  521. maxframe/tensor/misc/repeat.py +129 -0
  522. maxframe/tensor/misc/searchsorted.py +147 -0
  523. maxframe/tensor/misc/setdiff1d.py +58 -0
  524. maxframe/tensor/misc/squeeze.py +117 -0
  525. maxframe/tensor/misc/swapaxes.py +113 -0
  526. maxframe/tensor/misc/tests/test_misc.py +0 -2
  527. maxframe/tensor/misc/transpose.py +8 -4
  528. maxframe/tensor/misc/trapezoid.py +123 -0
  529. maxframe/tensor/misc/unique.py +0 -1
  530. maxframe/tensor/misc/where.py +10 -8
  531. maxframe/tensor/operators.py +0 -34
  532. maxframe/tensor/random/__init__.py +3 -5
  533. maxframe/tensor/random/binomial.py +0 -2
  534. maxframe/tensor/random/bytes.py +0 -2
  535. maxframe/tensor/random/chisquare.py +0 -2
  536. maxframe/tensor/random/choice.py +9 -8
  537. maxframe/tensor/random/core.py +20 -5
  538. maxframe/tensor/random/dirichlet.py +0 -2
  539. maxframe/tensor/random/exponential.py +0 -2
  540. maxframe/tensor/random/f.py +2 -4
  541. maxframe/tensor/random/gamma.py +0 -2
  542. maxframe/tensor/random/geometric.py +0 -2
  543. maxframe/tensor/random/gumbel.py +0 -2
  544. maxframe/tensor/random/hypergeometric.py +0 -2
  545. maxframe/tensor/random/laplace.py +2 -4
  546. maxframe/tensor/random/logistic.py +0 -2
  547. maxframe/tensor/random/lognormal.py +0 -2
  548. maxframe/tensor/random/logseries.py +0 -2
  549. maxframe/tensor/random/multinomial.py +0 -2
  550. maxframe/tensor/random/multivariate_normal.py +0 -2
  551. maxframe/tensor/random/negative_binomial.py +0 -2
  552. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  553. maxframe/tensor/random/noncentral_f.py +1 -3
  554. maxframe/tensor/random/normal.py +0 -2
  555. maxframe/tensor/random/pareto.py +0 -2
  556. maxframe/tensor/random/permutation.py +6 -3
  557. maxframe/tensor/random/poisson.py +0 -2
  558. maxframe/tensor/random/power.py +0 -2
  559. maxframe/tensor/random/rand.py +0 -2
  560. maxframe/tensor/random/randint.py +0 -2
  561. maxframe/tensor/random/randn.py +0 -2
  562. maxframe/tensor/random/random_integers.py +0 -2
  563. maxframe/tensor/random/random_sample.py +0 -2
  564. maxframe/tensor/random/rayleigh.py +0 -2
  565. maxframe/tensor/random/standard_cauchy.py +0 -2
  566. maxframe/tensor/random/standard_exponential.py +0 -2
  567. maxframe/tensor/random/standard_gamma.py +0 -2
  568. maxframe/tensor/random/standard_normal.py +0 -2
  569. maxframe/tensor/random/standard_t.py +0 -2
  570. maxframe/tensor/random/tests/__init__.py +0 -2
  571. maxframe/tensor/random/tests/test_random.py +0 -2
  572. maxframe/tensor/random/triangular.py +0 -2
  573. maxframe/tensor/random/uniform.py +0 -2
  574. maxframe/tensor/random/vonmises.py +0 -2
  575. maxframe/tensor/random/wald.py +0 -2
  576. maxframe/tensor/random/weibull.py +0 -2
  577. maxframe/tensor/random/zipf.py +0 -2
  578. maxframe/tensor/reduction/__init__.py +0 -2
  579. maxframe/tensor/reduction/all.py +0 -2
  580. maxframe/tensor/reduction/allclose.py +0 -2
  581. maxframe/tensor/reduction/any.py +0 -2
  582. maxframe/tensor/reduction/argmax.py +1 -3
  583. maxframe/tensor/reduction/argmin.py +1 -3
  584. maxframe/tensor/reduction/array_equal.py +0 -2
  585. maxframe/tensor/reduction/core.py +0 -2
  586. maxframe/tensor/reduction/count_nonzero.py +0 -2
  587. maxframe/tensor/reduction/cumprod.py +0 -2
  588. maxframe/tensor/reduction/cumsum.py +0 -2
  589. maxframe/tensor/reduction/max.py +0 -2
  590. maxframe/tensor/reduction/mean.py +0 -2
  591. maxframe/tensor/reduction/min.py +0 -2
  592. maxframe/tensor/reduction/nanargmax.py +0 -2
  593. maxframe/tensor/reduction/nanargmin.py +0 -2
  594. maxframe/tensor/reduction/nancumprod.py +0 -2
  595. maxframe/tensor/reduction/nancumsum.py +0 -2
  596. maxframe/tensor/reduction/nanmax.py +0 -2
  597. maxframe/tensor/reduction/nanmean.py +0 -2
  598. maxframe/tensor/reduction/nanmin.py +0 -2
  599. maxframe/tensor/reduction/nanprod.py +0 -2
  600. maxframe/tensor/reduction/nanstd.py +0 -2
  601. maxframe/tensor/reduction/nansum.py +0 -2
  602. maxframe/tensor/reduction/nanvar.py +0 -2
  603. maxframe/tensor/reduction/prod.py +0 -2
  604. maxframe/tensor/reduction/std.py +0 -2
  605. maxframe/tensor/reduction/sum.py +0 -2
  606. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  607. maxframe/tensor/reduction/var.py +0 -2
  608. maxframe/tensor/reshape/__init__.py +0 -2
  609. maxframe/tensor/reshape/reshape.py +6 -5
  610. maxframe/tensor/reshape/tests/__init__.py +0 -2
  611. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  612. maxframe/tensor/sort/__init__.py +16 -0
  613. maxframe/tensor/sort/argsort.py +150 -0
  614. maxframe/tensor/sort/sort.py +295 -0
  615. maxframe/tensor/special/__init__.py +37 -0
  616. maxframe/tensor/special/core.py +38 -0
  617. maxframe/tensor/special/misc.py +142 -0
  618. maxframe/tensor/special/statistical.py +56 -0
  619. maxframe/tensor/statistics/__init__.py +5 -0
  620. maxframe/tensor/statistics/average.py +143 -0
  621. maxframe/tensor/statistics/bincount.py +133 -0
  622. maxframe/tensor/statistics/quantile.py +10 -8
  623. maxframe/tensor/ufunc/__init__.py +0 -2
  624. maxframe/tensor/ufunc/ufunc.py +0 -2
  625. maxframe/tensor/utils.py +21 -3
  626. maxframe/tests/test_protocol.py +3 -3
  627. maxframe/tests/test_utils.py +210 -1
  628. maxframe/tests/utils.py +59 -1
  629. maxframe/udf.py +76 -6
  630. maxframe/utils.py +418 -17
  631. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/METADATA +4 -1
  632. maxframe-2.0.0.dist-info/RECORD +939 -0
  633. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  634. maxframe_client/clients/framedriver.py +19 -3
  635. maxframe_client/fetcher.py +113 -6
  636. maxframe_client/session/odps.py +173 -38
  637. maxframe_client/session/task.py +3 -1
  638. maxframe_client/tests/test_session.py +41 -5
  639. maxframe-1.3.1.dist-info/RECORD +0 -705
  640. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import itertools
15
16
  import struct
16
17
  from io import BytesIO
17
18
  from typing import Any, Dict
@@ -19,36 +20,65 @@ from typing import Any, Dict
19
20
  import msgpack
20
21
  import numpy as np
21
22
 
23
+ from ...config import options
24
+ from ...core import OutputType
22
25
  from ...lib import wrapped_pickle as pickle
23
- from ...tensor.core import TensorData
26
+ from ...lib.version import parse as parse_version
24
27
  from ...typing_ import SlicesType, TileableType
25
28
  from ..odpsio import ODPSVolumeReader, ODPSVolumeWriter
26
29
  from .core import AbstractObjectIOHandler, register_object_io_handler
27
30
 
31
+ try:
32
+ from maxframe import __version__ as mf_version
33
+ except ImportError:
34
+ mf_version = None
28
35
 
29
- @register_object_io_handler(TensorData)
36
+
37
+ @register_object_io_handler(OutputType.tensor)
30
38
  class TensorIOHandler(AbstractObjectIOHandler):
39
+ def _prepare_meta_for_serial(self, tileable: TileableType) -> Dict[str, Any]:
40
+ meta = super()._prepare_meta_for_serial(tileable)
41
+ meta["nsplits"] = (
42
+ getattr(tileable, "nsplits", None) or (np.nan,) * tileable.ndim
43
+ )
44
+ return meta
45
+
31
46
  def write_object_meta(
32
47
  self,
33
48
  writer: ODPSVolumeWriter,
34
49
  tileable: TileableType,
35
50
  extra_meta: Dict[str, Any] = None,
36
51
  ):
37
- # fixme upload in real slices when tensors are supported in DPE
38
52
  extra_meta = extra_meta or dict()
39
- extra_meta["nsplits"] = ((np.nan,),)
40
-
53
+ extra_meta["nsplits"] = (
54
+ getattr(tileable, "nsplits", None) or ((np.nan,),) * tileable.ndim
55
+ )
41
56
  super().write_object_meta(writer, tileable, extra_meta=extra_meta)
42
57
 
43
- def _read_object_body(
44
- self,
45
- reader: ODPSVolumeReader,
46
- tileable: TileableType,
47
- meta: Dict[str, Any],
48
- slices: SlicesType = None,
58
+ @staticmethod
59
+ def _get_chunk_file_name(params: Dict[str, Any]) -> str:
60
+ # fixme remove this when all v1.0 clients removed
61
+ mf_ver = options.session.client_version or mf_version
62
+ if mf_ver:
63
+ client_major = parse_version(
64
+ options.session.client_version or mf_version
65
+ ).major
66
+ else:
67
+ client_major = None
68
+
69
+ if client_major == 1:
70
+ # returns v1.0 file name as we do not support tiled tensors
71
+ return "0,0.dat"
72
+
73
+ ndim = len(params.get("shape") or (0,))
74
+ data_index = params.get("index") or (0,) * ndim
75
+ return ",".join(str(ix) for ix in data_index) + ".dat"
76
+
77
+ def _read_single_chunk(
78
+ self, reader: ODPSVolumeReader, params: Dict[str, Any]
49
79
  ) -> Any:
50
- # fixme read data with slices when tensors are supported in DPE
51
- body = reader.read_file("0,0.dat")
80
+ file_name = self._get_chunk_file_name(params)
81
+ body = reader.read_file(file_name)
52
82
  bio = BytesIO(body)
53
83
  (header_len,) = struct.unpack("<I", bio.read(4))
54
84
  header_data = msgpack.loads(bio.read(header_len))
@@ -57,10 +87,35 @@ class TensorIOHandler(AbstractObjectIOHandler):
57
87
  bufs = [bio.read(size) for size in header_data[1:]]
58
88
  return pickle.loads(pickled, buffers=bufs)
59
89
 
60
- def _write_object_body(
61
- self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
90
+ def read_object_body(
91
+ self,
92
+ reader: ODPSVolumeReader,
93
+ params: Dict[str, Any],
94
+ extra_params: Dict[str, Any] = None,
95
+ slices: SlicesType = None,
96
+ ) -> Any:
97
+ if "index" in params:
98
+ return self._read_single_chunk(reader, params)
99
+
100
+ tileable_params = params.copy()
101
+ tileable_params.update(extra_params or {})
102
+
103
+ # todo implements slices argument for head and tail fetching
104
+ assert slices is None
105
+
106
+ chunk_shape = tuple(len(x) for x in params["nsplits"])
107
+ block_array = np.empty(shape=chunk_shape, dtype="O")
108
+ for idx in itertools.product(*(range(sp) for sp in chunk_shape)):
109
+ chunk_params = tileable_params.copy()
110
+ chunk_params.pop("nsplits")
111
+ chunk_params["index"] = idx
112
+ block_array[idx] = self._read_single_chunk(reader, chunk_params)
113
+
114
+ return np.block(block_array.tolist())
115
+
116
+ def write_object_body(
117
+ self, writer: ODPSVolumeWriter, params: Dict[str, Any], value: Any
62
118
  ):
63
- # fixme upload in real slices when tensors are supported in DPE
64
119
  def data_gen():
65
120
  bufs = []
66
121
  pickled = pickle.dumps(value, buffer_callback=bufs.append)
@@ -73,4 +128,5 @@ class TensorIOHandler(AbstractObjectIOHandler):
73
128
  for buf in bufs:
74
129
  yield buf
75
130
 
76
- writer.write_file("0,0.dat", data_gen())
131
+ file_name = self._get_chunk_file_name(params)
132
+ writer.write_file(file_name, data_gen())
@@ -19,7 +19,7 @@ from odps import ODPS
19
19
  from ....core import OutputType
20
20
  from ....core.operator import ObjectOperatorMixin, Operator
21
21
  from ....tensor.datasource import ArrayDataSource
22
- from ....tests.utils import tn
22
+ from ....tests.utils import create_test_volume, get_test_unique_name, tn
23
23
  from ...odpsio import ODPSVolumeReader, ODPSVolumeWriter
24
24
  from ..core import get_object_io_handler
25
25
 
@@ -31,61 +31,11 @@ class TestObjectOp(Operator, ObjectOperatorMixin):
31
31
 
32
32
 
33
33
  @pytest.fixture(scope="module")
34
- def create_volume(request, oss_config):
35
- test_vol_name = tn("test_object_io_volume")
36
- odps_entry = ODPS.from_environments()
37
-
38
- try:
39
- odps_entry.delete_volume(test_vol_name, auto_remove_dir=True, recursive=True)
40
- except:
41
- pass
42
-
43
- oss_test_dir_name = tn("test_oss_directory")
44
- if oss_config is None:
45
- pytest.skip("Need oss and its config to run this test")
46
- (
47
- oss_access_id,
48
- oss_secret_access_key,
49
- oss_bucket_name,
50
- oss_endpoint,
51
- ) = oss_config.oss_config
52
-
53
- if "test" in oss_endpoint:
54
- # offline config
55
- test_location = "oss://%s:%s@%s/%s/%s" % (
56
- oss_access_id,
57
- oss_secret_access_key,
58
- oss_endpoint,
59
- oss_bucket_name,
60
- oss_test_dir_name,
61
- )
62
- rolearn = None
63
- else:
64
- # online config
65
- endpoint_parts = oss_endpoint.split(".", 1)
66
- if "-internal" not in endpoint_parts[0]:
67
- endpoint_parts[0] += "-internal"
68
- test_location = "oss://%s/%s/%s" % (
69
- ".".join(endpoint_parts),
70
- oss_bucket_name,
71
- oss_test_dir_name,
72
- )
73
- rolearn = oss_config.oss_rolearn
74
-
75
- oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
76
- odps_entry.create_external_volume(
77
- test_vol_name, location=test_location, rolearn=rolearn
78
- )
79
-
80
- try:
34
+ def create_volume(oss_config):
35
+ with create_test_volume(
36
+ tn("test_object_io_vol_" + get_test_unique_name(5)), oss_config
37
+ ) as test_vol_name:
81
38
  yield test_vol_name
82
- finally:
83
- try:
84
- odps_entry.delete_volume(
85
- test_vol_name, auto_remove_dir=True, recursive=True
86
- )
87
- except:
88
- pass
89
39
 
90
40
 
91
41
  def test_simple_object_io(create_volume):
@@ -119,6 +69,11 @@ def test_tensor_object_io(create_volume):
119
69
  odps_entry, create_volume, obj.key, replace_internal_host=True
120
70
  )
121
71
 
72
+ # test write and read full object
122
73
  handler = get_object_io_handler(obj)()
123
74
  handler.write_object(writer, obj, data)
124
75
  np.testing.assert_equal(data, handler.read_object(reader, obj))
76
+
77
+ # test read single chunk
78
+ params = {"index": (0, 0)}
79
+ np.testing.assert_equal(data, handler.read_object_body(reader, params))
@@ -68,6 +68,8 @@ def arrow_to_pandas(
68
68
  return _rebuild_dataframe(df, table_meta)
69
69
  elif table_meta.type == OutputType.index:
70
70
  return _rebuild_index(df, table_meta)
71
+ elif table_meta.type == OutputType.tensor:
72
+ return _rebuild_index(df, table_meta).to_numpy()
71
73
  elif table_meta.type == OutputType.scalar:
72
74
  return _rebuild_index(df, table_meta)[0]
73
75
  else: # this could never happen # pragma: no cover
@@ -107,9 +109,9 @@ def pandas_to_arrow(
107
109
  else:
108
110
  table_datetime_cols = {"_idx_0"}
109
111
  df = df.to_frame(name=names[0] if len(names) == 1 else names)
110
- elif table_meta.type == OutputType.scalar:
112
+ elif table_meta.type in (OutputType.scalar, OutputType.tensor):
111
113
  names = ["_idx_0"]
112
- if isinstance(df, TENSOR_TYPE):
114
+ if isinstance(df, (TENSOR_TYPE, np.ndarray)):
113
115
  df = pd.DataFrame([], columns=names).astype({names[0]: df.dtype})
114
116
  else:
115
117
  df = pd.DataFrame([[df]], columns=names)
@@ -145,4 +147,15 @@ def pandas_to_arrow(
145
147
  col_data = pa_table.column(idx).cast(pa.timestamp("ms"))
146
148
  col_datas.append(col_data)
147
149
  pa_table = pa.Table.from_arrays(col_datas, names=col_names)
150
+
151
+ new_names, new_dtypes = [], []
152
+ for table_col, (pd_col, pd_dtype) in zip(
153
+ table_meta.table_column_names, table_meta.pd_column_dtypes.items()
154
+ ):
155
+ new_names.append(pd_col)
156
+ if table_col not in table_datetime_cols:
157
+ new_dtypes.append(pd_dtype)
158
+ else:
159
+ new_dtypes.append(np.dtype("datetime64[ms]"))
160
+ table_meta.pd_column_dtypes = pd.Series(new_dtypes, index=new_names)
148
161
  return pa_table, table_meta
@@ -14,7 +14,7 @@
14
14
 
15
15
  import string
16
16
  from collections import defaultdict
17
- from typing import Any, Dict, Tuple, Union
17
+ from typing import Any, Dict, Tuple
18
18
 
19
19
  import numpy as np
20
20
  import pandas as pd
@@ -23,9 +23,11 @@ from odps import types as odps_types
23
23
  from pandas.api import types as pd_types
24
24
 
25
25
  from ...core import TILEABLE_TYPE, OutputType
26
+ from ...dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
26
27
  from ...lib.dtypes_extension import ArrowDtype
27
28
  from ...protocol import DataFrameTableMeta
28
29
  from ...tensor.core import TENSOR_TYPE
30
+ from ...utils import build_temp_table_name
29
31
 
30
32
  _TEMP_TABLE_PREFIX = "tmp_mf_"
31
33
  DEFAULT_SINGLE_INDEX_NAME = "_idx_0"
@@ -63,15 +65,15 @@ _odps_type_to_arrow = {
63
65
  odps_types.timestamp_ntz: pa.timestamp("ns"),
64
66
  }
65
67
 
66
- _based_for_pandas_pa_dtypes = Union[pa.MapType]
68
+ _based_for_pandas_pa_types = (pa.ListType, pa.MapType)
67
69
 
68
70
 
69
- def is_based_for_pandas_dtype(dtype: pa.DataType) -> bool:
71
+ def is_based_for_pandas_dtype(arrow_type: pa.DataType) -> bool:
70
72
  """
71
73
  Check whether the arrow type is based for one pandas data type.
72
74
  If true, we should make sure the environment support ArrowDtype.
73
75
  """
74
- if not isinstance(dtype, _based_for_pandas_pa_dtypes):
76
+ if not isinstance(arrow_type, _based_for_pandas_pa_types):
75
77
  return False
76
78
 
77
79
  if ArrowDtype is None:
@@ -243,12 +245,37 @@ def pandas_dataframe_to_arrow_table(df: pd.DataFrame, nthreads=1) -> pa.Table:
243
245
  )
244
246
 
245
247
 
248
+ def pandas_dtypes_to_arrow_schema(dtypes, unknown_as_string: bool = False) -> pa.Schema:
249
+ if unknown_as_string:
250
+ dt_list = [dt if dt != np.dtype("O") else pd.StringDtype() for dt in dtypes]
251
+ dtypes = pd.Series(dt_list, index=dtypes.index)
252
+ schema = pandas_types_to_arrow_schema(
253
+ pd.DataFrame([], columns=dtypes.index).astype(dtypes)
254
+ )
255
+ return schema
256
+
257
+
258
+ def pandas_dtype_to_arrow_type(dtype, unknown_as_string: bool = False) -> pa.DataType:
259
+ if unknown_as_string and dtype == np.dtype("O"):
260
+ dtype = pd.StringDtype()
261
+ schema = pandas_types_to_arrow_schema(
262
+ pd.DataFrame([], columns=["a"]).astype({"a": dtype})
263
+ )
264
+ return schema.types[0]
265
+
266
+
246
267
  def is_scalar_object(df_obj: Any) -> bool:
247
268
  return (
248
269
  isinstance(df_obj, TENSOR_TYPE) and df_obj.shape == ()
249
270
  ) or pd_types.is_scalar(df_obj)
250
271
 
251
272
 
273
+ def is_tensor_object(df_obj: Any) -> bool:
274
+ return (
275
+ isinstance(df_obj, TENSOR_TYPE) or isinstance(df_obj, np.ndarray)
276
+ ) and df_obj.ndim <= 1
277
+
278
+
252
279
  def _scalar_as_index(df_obj: Any) -> pd.Index:
253
280
  if isinstance(df_obj, TILEABLE_TYPE):
254
281
  return pd.Index([], dtype=df_obj.dtype)
@@ -264,7 +291,7 @@ def pandas_to_odps_schema(
264
291
  from ... import dataframe as md
265
292
  from .arrow import pandas_to_arrow
266
293
 
267
- if is_scalar_object(df_obj):
294
+ if is_scalar_object(df_obj) or is_tensor_object(df_obj):
268
295
  empty_index = None
269
296
  elif hasattr(df_obj, "index_value"):
270
297
  empty_index = df_obj.index_value.to_pandas()[:0]
@@ -368,25 +395,25 @@ def build_table_column_name(
368
395
 
369
396
 
370
397
  def build_dataframe_table_meta(
371
- df_obj: Any, ignore_index: bool = False
398
+ df_obj: Any, ignore_index: bool = False, session_id: str = None
372
399
  ) -> DataFrameTableMeta:
373
- from ... import dataframe as md
374
-
375
400
  col_to_count = defaultdict(lambda: 0)
376
401
  col_to_idx = defaultdict(lambda: 0)
377
402
  pd_col_to_col_name = dict()
378
- if isinstance(df_obj, (md.DataFrame, pd.DataFrame)):
403
+ if isinstance(df_obj, (DATAFRAME_TYPE, pd.DataFrame)):
379
404
  obj_type = OutputType.dataframe
380
- elif isinstance(df_obj, (md.Series, pd.Series)):
405
+ elif isinstance(df_obj, (SERIES_TYPE, pd.Series)):
381
406
  obj_type = OutputType.series
382
- elif isinstance(df_obj, (md.Index, pd.Index)):
407
+ elif isinstance(df_obj, (INDEX_TYPE, pd.Index)):
383
408
  obj_type = OutputType.index
409
+ elif is_tensor_object(df_obj) and df_obj.ndim == 1:
410
+ obj_type = OutputType.tensor
384
411
  elif is_scalar_object(df_obj):
385
412
  obj_type = OutputType.scalar
386
413
  else: # pragma: no cover
387
414
  raise TypeError(f"Cannot accept type {type(df_obj)}")
388
415
 
389
- if obj_type == OutputType.scalar:
416
+ if obj_type in (OutputType.scalar, OutputType.tensor):
390
417
  pd_dtypes = pd.Series([])
391
418
  column_index_names = []
392
419
  index_obj = _scalar_as_index(df_obj)
@@ -404,7 +431,10 @@ def build_dataframe_table_meta(
404
431
  index_obj = df_obj.index
405
432
 
406
433
  if isinstance(df_obj, TILEABLE_TYPE):
407
- table_name = _TEMP_TABLE_PREFIX + str(df_obj.key)
434
+ if not session_id:
435
+ table_name = _TEMP_TABLE_PREFIX + str(df_obj.key)
436
+ else:
437
+ table_name = build_temp_table_name(session_id, df_obj.key)
408
438
  else:
409
439
  table_name = None
410
440
 
@@ -26,9 +26,9 @@ from odps.apis.storage_api import (
26
26
  TableBatchScanResponse,
27
27
  TableBatchWriteResponse,
28
28
  )
29
+ from odps.errors import TableModified
29
30
  from odps.tunnel import TableDownloadSession, TableDownloadStatus, TableTunnel
30
31
  from odps.types import OdpsSchema, PartitionSpec, timestamp_ntz
31
- from odps.utils import call_with_retry
32
32
 
33
33
  try:
34
34
  import pyarrow.compute as pac
@@ -37,7 +37,7 @@ except ImportError:
37
37
 
38
38
  from ...config import options
39
39
  from ...env import ODPS_STORAGE_API_ENDPOINT
40
- from ...utils import is_empty, sync_pyodps_options
40
+ from ...utils import call_with_retry, is_empty, sync_pyodps_options
41
41
  from .schema import odps_schema_to_arrow_schema
42
42
 
43
43
  PartitionsType = Union[List[str], str, None]
@@ -154,6 +154,32 @@ class TunnelMultiPartitionReader:
154
154
  return None
155
155
  return self._count
156
156
 
157
+ def _open_table_reader(self, partition: Optional[str], columns: List[str]):
158
+ attempts = 2
159
+ for trial in range(attempts):
160
+ try:
161
+ return self._table.open_reader(
162
+ partition,
163
+ columns=columns,
164
+ arrow=True,
165
+ download_id=self._partition_to_download_ids.get(partition),
166
+ append_partitions=True,
167
+ )
168
+ except TableModified:
169
+ if trial == attempts - 1:
170
+ raise
171
+ pt_to_session = TunnelTableIO.create_download_sessions(
172
+ self._odps_entry,
173
+ self._table.full_table_name,
174
+ partition,
175
+ reopen=True,
176
+ )
177
+ assert partition in pt_to_session
178
+ self._partition_to_download_ids[partition] = pt_to_session[partition].id
179
+ raise RuntimeError(
180
+ "Unexpected condition: all trial of open reader done and not raised"
181
+ )
182
+
157
183
  def _open_next_reader(self):
158
184
  if self._cur_reader is not None:
159
185
  self._reader_start_pos += self._cur_reader.count
@@ -170,12 +196,8 @@ class TunnelMultiPartitionReader:
170
196
  part_str = self._partitions[self._cur_partition_id]
171
197
  req_columns = self._schema.names
172
198
  with sync_pyodps_options():
173
- self._cur_reader = self._table.open_reader(
174
- part_str,
175
- columns=req_columns,
176
- arrow=True,
177
- download_id=self._partition_to_download_ids.get(part_str),
178
- append_partitions=True,
199
+ self._cur_reader = self._open_table_reader(
200
+ part_str, columns=req_columns
179
201
  )
180
202
  if self._cur_reader.count + self._reader_start_pos > self._start:
181
203
  start = self._start - self._reader_start_pos
@@ -193,13 +215,27 @@ class TunnelMultiPartitionReader:
193
215
 
194
216
  def read(self):
195
217
  with sync_pyodps_options():
218
+ is_first_batch = False
196
219
  if self._cur_reader is None:
220
+ is_first_batch = True
197
221
  self._open_next_reader()
198
222
  if self._cur_reader is None:
199
223
  return None
200
224
  while self._cur_reader is not None:
201
225
  try:
202
- batch = next(self._reader_iter)
226
+ try:
227
+ batch = next(self._reader_iter)
228
+ except TableModified:
229
+ if not is_first_batch:
230
+ raise
231
+ # clear download id cache to create new sessions
232
+ self._partition_to_download_ids = dict()
233
+ self._cur_reader = None
234
+ self._open_next_reader()
235
+ if self._cur_reader is None:
236
+ return None
237
+ batch = next(self._reader_iter)
238
+
203
239
  if batch is not None:
204
240
  if self._row_left is not None:
205
241
  self._row_left -= batch.num_rows
@@ -222,6 +258,14 @@ class TunnelMultiPartitionReader:
222
258
 
223
259
  class TunnelTableIO(ODPSTableIO):
224
260
  _down_session_ids = OrderedDict()
261
+ _down_modified_time = dict()
262
+
263
+ @classmethod
264
+ def _get_modified_time(cls, odps_entry: ODPS, full_table_name, partition):
265
+ data_src = odps_entry.get_table(full_table_name)
266
+ if partition is not None:
267
+ data_src = data_src.partitions[partition]
268
+ return data_src.last_data_modified_time
225
269
 
226
270
  @classmethod
227
271
  def create_download_sessions(
@@ -229,6 +273,7 @@ class TunnelTableIO(ODPSTableIO):
229
273
  odps_entry: ODPS,
230
274
  full_table_name: str,
231
275
  partitions: List[Optional[str]] = None,
276
+ reopen: bool = False,
232
277
  ) -> Dict[Optional[str], TableDownloadSession]:
233
278
  table = odps_entry.get_table(full_table_name)
234
279
  tunnel = TableTunnel(odps_entry, quota_name=options.tunnel_quota_name)
@@ -240,9 +285,14 @@ class TunnelTableIO(ODPSTableIO):
240
285
  part_to_session = dict()
241
286
  for part in parts:
242
287
  part_key = (full_table_name, part)
288
+ modified_time = cls._get_modified_time(odps_entry, full_table_name, part)
243
289
  down_session = None
244
290
 
245
- if part_key in cls._down_session_ids:
291
+ if (
292
+ not reopen
293
+ and part_key in cls._down_session_ids
294
+ and cls._down_modified_time.get(part_key) == modified_time
295
+ ):
246
296
  down_id = cls._down_session_ids[part_key]
247
297
  down_session = tunnel.create_download_session(
248
298
  table, async_mode=True, partition_spec=part, download_id=down_id
@@ -256,8 +306,10 @@ class TunnelTableIO(ODPSTableIO):
256
306
  )
257
307
 
258
308
  while len(cls._down_session_ids) >= _DOWNLOAD_ID_CACHE_SIZE:
259
- cls._down_session_ids.popitem(False)
309
+ k, _ = cls._down_session_ids.popitem(False)
310
+ cls._down_modified_time.pop(k)
260
311
  cls._down_session_ids[part_key] = down_session.id
312
+ cls._down_modified_time[part_key] = modified_time
261
313
  part_to_session[part] = down_session
262
314
  return part_to_session
263
315
 
@@ -17,8 +17,7 @@ import pandas as pd
17
17
  import pyarrow as pa
18
18
  import pytest
19
19
 
20
- from maxframe.lib.dtypes_extension import dict_
21
-
20
+ from ....lib.dtypes_extension import dict_
22
21
  from ..arrow import arrow_to_pandas, pandas_to_arrow
23
22
 
24
23