maxframe 0.1.0b5__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-310-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,837 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import inspect
17
+ from collections import OrderedDict
18
+ from typing import Any, Dict, List, NamedTuple, Optional, Tuple
19
+
20
+ import msgpack
21
+ import numpy as np
22
+ import pandas as pd
23
+
24
+ from ...core import ENTITY_TYPE, enter_mode, is_build_mode, is_kernel_mode
25
+ from ...serialization.serializables import (
26
+ AnyField,
27
+ BoolField,
28
+ DataTypeField,
29
+ Int32Field,
30
+ StringField,
31
+ )
32
+ from ...typing_ import TileableType
33
+ from ...utils import get_item_if_scalar, pd_release_version, tokenize
34
+ from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
35
+ from ..utils import (
36
+ build_df,
37
+ build_empty_df,
38
+ build_empty_series,
39
+ build_series,
40
+ parse_index,
41
+ validate_axis,
42
+ )
43
+
44
+ # in pandas<1.3, when aggregating with multiple levels and numeric_only is True,
45
+ # object cols not ignored with min-max funcs
46
+ _level_reduction_keep_object = pd_release_version[:2] < (1, 3)
47
+ # in pandas>=1.3, when dataframes are reduced into series, mixture of float and bool
48
+ # results in object.
49
+ _reduce_bool_as_object = pd_release_version[:2] != (1, 2)
50
+
51
+
52
+ class DataFrameReductionOperator(DataFrameOperator):
53
+ axis = AnyField("axis", default=None)
54
+ skipna = BoolField("skipna", default=True)
55
+ level = AnyField("level", default=None)
56
+ numeric_only = BoolField("numeric_only", default=None)
57
+ bool_only = BoolField("bool_only", default=None)
58
+ min_count = Int32Field("min_count", default=None)
59
+ method = StringField("method", default=None)
60
+
61
+ dtype = DataTypeField("dtype", default=None)
62
+
63
+ def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
64
+ super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
65
+
66
+ @property
67
+ def is_atomic(self):
68
+ return False
69
+
70
+ def get_reduction_args(self, axis=None):
71
+ args = dict(skipna=self.skipna)
72
+ if self.inputs and self.inputs[0].ndim > 1:
73
+ args["axis"] = axis
74
+ if self.numeric_only is not None:
75
+ args["numeric_only"] = self.numeric_only
76
+ if self.bool_only is not None:
77
+ args["bool_only"] = self.bool_only
78
+ return {k: v for k, v in args.items() if v is not None}
79
+
80
+
81
+ class DataFrameCumReductionOperator(DataFrameOperator):
82
+ axis = AnyField("axis", default=None)
83
+ skipna = BoolField("skipna", default=None)
84
+
85
+ dtype = DataTypeField("dtype", default=None)
86
+
87
+ def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
88
+ super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
89
+
90
+
91
+ def _default_agg_fun(value, func_name=None, **kw):
92
+ if value.ndim == 1:
93
+ kw.pop("bool_only", None)
94
+ kw.pop("numeric_only", None)
95
+ return getattr(value, func_name)(**kw)
96
+ else:
97
+ return getattr(value, func_name)(**kw)
98
+
99
+
100
+ @functools.lru_cache(100)
101
+ def _get_series_reduction_dtype(
102
+ dtype,
103
+ func_name,
104
+ axis=None,
105
+ bool_only=False,
106
+ skipna=True,
107
+ numeric_only=False,
108
+ ):
109
+ test_series = build_series(dtype=dtype, ensure_string=True)
110
+ if func_name == "count":
111
+ reduced = test_series.count()
112
+ elif func_name == "nunique":
113
+ reduced = test_series.nunique()
114
+ elif func_name in ("all", "any"):
115
+ reduced = getattr(test_series, func_name)(axis=axis, bool_only=bool_only)
116
+ elif func_name == "size":
117
+ reduced = test_series.size
118
+ elif func_name == "str_concat":
119
+ reduced = pd.Series([test_series.str.cat()])
120
+ else:
121
+ reduced = getattr(test_series, func_name)(
122
+ axis=axis, skipna=skipna, numeric_only=numeric_only
123
+ )
124
+ return pd.Series(reduced).dtype
125
+
126
+
127
+ @functools.lru_cache(100)
128
+ def _get_df_reduction_dtype(
129
+ dtype, func_name, axis=None, bool_only=False, skipna=False, numeric_only=False
130
+ ):
131
+ test_df = build_series(dtype=dtype, ensure_string=True).to_frame()
132
+ if func_name == "count":
133
+ reduced = getattr(test_df, func_name)(axis=axis, numeric_only=numeric_only)
134
+ elif func_name == "nunique":
135
+ reduced = getattr(test_df, func_name)(axis=axis)
136
+ elif func_name in ("all", "any"):
137
+ reduced = getattr(test_df, func_name)(axis=axis, bool_only=bool_only)
138
+ elif func_name == "str_concat":
139
+ reduced = test_df.apply(lambda s: s.str.cat(), axis=axis)
140
+ else:
141
+ reduced = getattr(test_df, func_name)(
142
+ axis=axis, skipna=skipna, numeric_only=numeric_only
143
+ )
144
+ if len(reduced) == 0:
145
+ return None
146
+ return reduced.dtype
147
+
148
+
149
+ class DataFrameReductionMixin(DataFrameOperatorMixin):
150
+ @classmethod
151
+ def get_reduction_callable(cls, op):
152
+ func_name = getattr(op, "_func_name")
153
+ kw = dict(
154
+ skipna=op.skipna, numeric_only=op.numeric_only, bool_only=op.bool_only
155
+ )
156
+ kw = {k: v for k, v in kw.items() if v is not None}
157
+ fun = functools.partial(_default_agg_fun, func_name=func_name, **kw)
158
+ fun.__name__ = func_name
159
+ return fun
160
+
161
+ def _call_groupby_level(self, df, level):
162
+ return df.groupby(level=level).agg(
163
+ self.get_reduction_callable(self), method=self.method
164
+ )
165
+
166
+ def _call_dataframe(self, df):
167
+ axis = getattr(self, "axis", None) or 0
168
+ level = getattr(self, "level", None)
169
+ skipna = getattr(self, "skipna", True)
170
+ numeric_only = getattr(self, "numeric_only", None)
171
+ bool_only = getattr(self, "bool_only", None)
172
+ self.axis = axis = validate_axis(axis, df)
173
+ func_name = getattr(self, "_func_name")
174
+
175
+ if level is not None and axis == 1:
176
+ raise NotImplementedError("Not support specify level for axis==1")
177
+
178
+ if func_name == "size":
179
+ reduced = pd.Series(
180
+ np.zeros(df.shape[1 - axis]),
181
+ index=df.dtypes.index if axis == 0 else None,
182
+ )
183
+ reduced_cols = list(reduced.index)
184
+ reduced_dtype = reduced.dtype
185
+ elif func_name == "custom_reduction":
186
+ empty_df = build_df(df, ensure_string=True)
187
+ reduced = getattr(self, "custom_reduction").__call_agg__(empty_df)
188
+ reduced_cols = list(reduced.index)
189
+ reduced_dtype = reduced.dtype
190
+ else:
191
+ reduced_cols, dtypes = [], []
192
+ for col, src_dt in df.dtypes.items():
193
+ dt = _get_df_reduction_dtype(
194
+ src_dt,
195
+ func_name,
196
+ axis=axis,
197
+ bool_only=bool_only,
198
+ skipna=skipna,
199
+ numeric_only=numeric_only,
200
+ )
201
+ if dt is not None:
202
+ reduced_cols.append(col)
203
+ dtypes.append(dt)
204
+ elif (
205
+ _level_reduction_keep_object
206
+ and numeric_only
207
+ and level is not None
208
+ and func_name in ("min", "max")
209
+ and src_dt == np.dtype(object)
210
+ ): # pragma: no cover
211
+ reduced_cols.append(col)
212
+ dtypes.append(np.dtype(object))
213
+ if len(dtypes) == 0:
214
+ reduced_dtype = np.dtype("O")
215
+ elif all(dt == dtypes[0] for dt in dtypes):
216
+ reduced_dtype = dtypes[0]
217
+ else:
218
+ # as we already bypassed dtypes with same values,
219
+ # when has_mixed_bool is True, there are other dtypes
220
+ # other than bool.
221
+ has_mixed_bool = any(dt == np.dtype(bool) for dt in dtypes)
222
+ if _reduce_bool_as_object and has_mixed_bool:
223
+ reduced_dtype = np.dtype("O")
224
+ elif not all(isinstance(dt, np.dtype) for dt in dtypes):
225
+ # todo currently we return mixed dtypes as np.dtype('O').
226
+ # handle pandas Dtypes in the future more carefully.
227
+ reduced_dtype = np.dtype("O")
228
+ else:
229
+ reduced_dtype = np.find_common_type(dtypes, [])
230
+
231
+ if level is not None:
232
+ return self._call_groupby_level(df[reduced_cols], level)
233
+
234
+ if axis == 0:
235
+ reduced_shape = (len(reduced_cols),)
236
+ reduced_index_value = parse_index(pd.Index(reduced_cols), store_data=True)
237
+ else:
238
+ reduced_shape = (df.shape[0],)
239
+ reduced_index_value = parse_index(pd.RangeIndex(-1))
240
+
241
+ return self.new_series(
242
+ [df],
243
+ shape=reduced_shape,
244
+ dtype=reduced_dtype,
245
+ index_value=reduced_index_value,
246
+ )
247
+
248
+ def _call_series(self, series):
249
+ level = getattr(self, "level", None)
250
+ axis = getattr(self, "axis", None)
251
+ skipna = getattr(self, "skipna", True)
252
+ numeric_only = getattr(self, "numeric_only", None)
253
+ bool_only = getattr(self, "bool_only", None)
254
+ self.axis = axis = validate_axis(axis or 0, series)
255
+ func_name = getattr(self, "_func_name")
256
+
257
+ if level is not None:
258
+ return self._call_groupby_level(series, level)
259
+
260
+ if func_name == "custom_reduction":
261
+ empty_series = build_series(series, ensure_string=True)
262
+ result_scalar = getattr(self, "custom_reduction").__call_agg__(empty_series)
263
+ if hasattr(result_scalar, "to_pandas"): # pragma: no cover
264
+ result_scalar = result_scalar.to_pandas()
265
+ result_dtype = pd.Series(result_scalar).dtype
266
+ else:
267
+ result_dtype = _get_series_reduction_dtype(
268
+ series.dtype,
269
+ func_name,
270
+ axis=axis,
271
+ bool_only=bool_only,
272
+ numeric_only=numeric_only,
273
+ skipna=skipna,
274
+ )
275
+ return self.new_scalar([series], dtype=result_dtype)
276
+
277
+ def __call__(self, a):
278
+ if is_kernel_mode() and not getattr(self, "is_atomic", False):
279
+ return self.get_reduction_callable(self)(a)
280
+
281
+ if isinstance(a, DATAFRAME_TYPE):
282
+ return self._call_dataframe(a)
283
+ else:
284
+ return self._call_series(a)
285
+
286
+
287
+ class DataFrameCumReductionMixin(DataFrameOperatorMixin):
288
+ def _call_dataframe(self, df):
289
+ axis = getattr(self, "axis", None) or 0
290
+ self.axis = axis = validate_axis(axis, df)
291
+
292
+ empty_df = build_empty_df(df.dtypes)
293
+ reduced_df = getattr(empty_df, getattr(self, "_func_name"))(axis=axis)
294
+ return self.new_dataframe(
295
+ [df],
296
+ shape=df.shape,
297
+ dtypes=reduced_df.dtypes,
298
+ index_value=df.index_value,
299
+ columns_value=df.columns_value,
300
+ )
301
+
302
+ def _call_series(self, series):
303
+ axis = getattr(self, "axis", None) or 0
304
+ if axis == "index":
305
+ axis = 0
306
+ self.axis = axis
307
+
308
+ return self.new_series(
309
+ [series],
310
+ shape=series.shape,
311
+ dtype=series.dtype,
312
+ name=series.name,
313
+ index_value=series.index_value,
314
+ )
315
+
316
+ def __call__(self, a):
317
+ if isinstance(a, DATAFRAME_TYPE):
318
+ return self._call_dataframe(a)
319
+ else:
320
+ return self._call_series(a)
321
+
322
+
323
+ class CustomReduction:
324
+ name: Optional[str]
325
+ output_limit: Optional[int]
326
+ kwds: Dict
327
+
328
+ # set to True when pre() already performs aggregation
329
+ pre_with_agg = False
330
+
331
+ def __init__(self, name=None, is_gpu=None):
332
+ self.name = name or "<custom>"
333
+ self.output_limit = 1
334
+ self._is_gpu = is_gpu
335
+
336
+ @property
337
+ def __name__(self):
338
+ return self.name
339
+
340
+ def __call__(self, value):
341
+ if isinstance(value, ENTITY_TYPE):
342
+ from .custom_reduction import build_custom_reduction_result
343
+
344
+ return build_custom_reduction_result(value, self)
345
+ return self.__call_agg__(value)
346
+
347
+ def __call_agg__(self, value):
348
+ r = self.pre(value)
349
+ if not isinstance(r, tuple):
350
+ r = (r,)
351
+ # update output limit into actual size
352
+ self.output_limit = len(r)
353
+
354
+ # only perform aggregation when pre() does not perform aggregation
355
+ if not self.pre_with_agg:
356
+ r = self.agg(*r)
357
+ if not isinstance(r, tuple):
358
+ r = (r,)
359
+
360
+ r = self.post(*r)
361
+ return r
362
+
363
+ def is_gpu(self):
364
+ return self._is_gpu if not is_build_mode() else False
365
+
366
+ def pre(self, value): # noqa: R0201 # pylint: disable=no-self-use
367
+ return (value,)
368
+
369
+ def agg(self, *values): # noqa: R0201 # pylint: disable=no-self-use
370
+ raise NotImplementedError
371
+
372
+ def post(self, *value): # noqa: R0201 # pylint: disable=no-self-use
373
+ assert len(value) == 1
374
+ return value[0]
375
+
376
+ def __maxframe_tokenize__(self):
377
+ import cloudpickle
378
+
379
+ return cloudpickle.dumps(self)
380
+
381
+
382
+ class ReductionPreStep(NamedTuple):
383
+ input_key: str
384
+ output_key: str
385
+ columns: Optional[List[str]]
386
+ func_idl: bytes
387
+
388
+
389
+ class ReductionAggStep(NamedTuple):
390
+ input_key: str
391
+ raw_func_name: Optional[str]
392
+ step_func_name: Optional[str]
393
+ map_func_name: Optional[str]
394
+ agg_func_name: Optional[str]
395
+ custom_reduction: Optional[CustomReduction]
396
+ output_key: str
397
+ output_limit: int
398
+ kwds: Dict[str, Any]
399
+
400
+
401
+ class ReductionPostStep(NamedTuple):
402
+ input_keys: List[str]
403
+ output_key: str
404
+ func_name: str
405
+ columns: Optional[List[str]]
406
+ func_idl: bytes
407
+
408
+
409
+ class ReductionSteps(NamedTuple):
410
+ pre_funcs: List[ReductionPreStep]
411
+ agg_funcs: List[ReductionAggStep]
412
+ post_funcs: List[ReductionPostStep]
413
+
414
+
415
+ # lookup table for numpy arithmetic operators in pandas
416
+ _func_name_converts = dict(
417
+ greater="gt",
418
+ greater_equal="ge",
419
+ less="lt",
420
+ less_equal="le",
421
+ equal="eq",
422
+ not_equal="ne",
423
+ true_divide="truediv",
424
+ floor_divide="floordiv",
425
+ power="pow",
426
+ )
427
+ _func_compile_cache = dict() # type: Dict[str, ReductionSteps]
428
+
429
+ _idl_primitive_types = (
430
+ type(None),
431
+ int,
432
+ float,
433
+ bool,
434
+ str,
435
+ bytes,
436
+ np.integer,
437
+ np.bool_,
438
+ )
439
+
440
+ IN_VAR_IDL_OP = "in_var"
441
+ OUT_VAR_IDL_OP = "out_var"
442
+ MASK_VAR_OP = "mask"
443
+ WHERE_VAR_OP = "where"
444
+ LET_VAR_OP = "let"
445
+ UNARY_IDL_OP_PREFIX = "unary:"
446
+ BINARY_IDL_OP_PREFIX = "bin:"
447
+
448
+
449
+ class ReductionCompiler:
450
+ def __init__(self, axis=0):
451
+ self._axis = axis
452
+
453
+ self._key_to_tileable = dict()
454
+ self._output_tileables = []
455
+ self._lambda_counter = 0
456
+ self._custom_counter = 0
457
+ self._func_cache = dict()
458
+
459
+ self._compiled_funcs = []
460
+ self._output_key_to_pre_steps = dict()
461
+ self._output_key_to_pre_cols = dict()
462
+ self._output_key_to_agg_steps = dict()
463
+ self._output_key_to_post_steps = dict()
464
+ self._output_key_to_post_cols = dict()
465
+
466
+ @classmethod
467
+ def _check_function_valid(cls, func):
468
+ if isinstance(func, functools.partial):
469
+ return cls._check_function_valid(func.func)
470
+ elif isinstance(func, CustomReduction):
471
+ return
472
+
473
+ func_code = func.__code__
474
+ func_vars = {n: func.__globals__.get(n) for n in func_code.co_names}
475
+ if func.__closure__:
476
+ func_vars.update(
477
+ {
478
+ n: cell.cell_contents
479
+ for n, cell in zip(func_code.co_freevars, func.__closure__)
480
+ }
481
+ )
482
+ # external MaxFrame objects shall not be referenced
483
+ for var_name, val in func_vars.items():
484
+ if isinstance(val, ENTITY_TYPE):
485
+ raise ValueError(
486
+ f"Variable {var_name} used by {func.__name__} "
487
+ "cannot be a MaxFrame object"
488
+ )
489
+
490
+ @staticmethod
491
+ def _update_col_dict(col_dict: Dict, key: str, cols: List):
492
+ if key in col_dict:
493
+ existing_cols = col_dict[key]
494
+ if existing_cols is not None:
495
+ existing_col_set = set(existing_cols)
496
+ col_dict[key].extend([c for c in cols if c not in existing_col_set])
497
+ else:
498
+ col_dict[key] = list(cols) if cols is not None else None
499
+
500
+ def add_function(self, func, ndim, cols=None, func_name=None):
501
+ from .aggregation import _agg_functions
502
+
503
+ cols = cols if cols is not None and self._axis == 0 else None
504
+
505
+ func_name = func_name or getattr(func, "__name__", None)
506
+ if func_name == "<lambda>" or func_name is None:
507
+ func_name = f"<lambda_{self._lambda_counter}>"
508
+ self._lambda_counter += 1
509
+ if func_name == "<custom>" or func_name is None:
510
+ func_name = f"<custom_{self._custom_counter}>"
511
+ self._custom_counter += 1
512
+
513
+ if inspect.isbuiltin(func):
514
+ raw_func_name = getattr(func, "__name__", "N/A")
515
+ if raw_func_name in _agg_functions:
516
+ func = _agg_functions[raw_func_name]
517
+ else:
518
+ raise ValueError(f"Unexpected built-in function {raw_func_name}")
519
+
520
+ compile_result = self._compile_function(func, func_name, ndim=ndim)
521
+ self._compiled_funcs.append(compile_result)
522
+
523
+ for step in compile_result.pre_funcs:
524
+ self._output_key_to_pre_steps[step.output_key] = step
525
+ self._update_col_dict(self._output_key_to_pre_cols, step.output_key, cols)
526
+
527
+ for step in compile_result.agg_funcs:
528
+ self._output_key_to_agg_steps[step.output_key] = step
529
+
530
+ for step in compile_result.post_funcs:
531
+ self._output_key_to_post_steps[step.output_key] = step
532
+ self._update_col_dict(self._output_key_to_post_cols, step.output_key, cols)
533
+
534
+ @staticmethod
535
+ def _build_mock_return_object(func, input_dtype, ndim):
536
+ from ..initializer import DataFrame as MaxDataFrame
537
+ from ..initializer import Series as MaxSeries
538
+
539
+ if ndim == 1:
540
+ mock_series = build_empty_series(np.dtype(input_dtype))
541
+ mock_obj = MaxSeries(mock_series)
542
+ else:
543
+ mock_df = build_empty_df(
544
+ pd.Series([np.dtype(input_dtype)] * 2, index=["A", "B"])
545
+ )
546
+ mock_obj = MaxDataFrame(mock_df)
547
+
548
+ # calc target tileable to generate DAG
549
+ with enter_mode(kernel=True, build=False):
550
+ return func(mock_obj)
551
+
552
+ @enter_mode(build=True)
553
+ def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
554
+ from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
555
+ from ...tensor.base import TensorWhere
556
+ from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
557
+ from ..datasource.dataframe import DataFrameDataSource
558
+ from ..datasource.series import SeriesDataSource
559
+ from ..indexing.where import DataFrameWhere
560
+
561
+ func_token = tokenize(func, self._axis, func_name, ndim)
562
+ if func_token in _func_compile_cache:
563
+ return _func_compile_cache[func_token]
564
+ custom_reduction = func if isinstance(func, CustomReduction) else None
565
+
566
+ self._check_function_valid(func)
567
+
568
+ try:
569
+ func_ret = self._build_mock_return_object(func, float, ndim=ndim)
570
+ except (TypeError, AttributeError):
571
+ # we may encounter lambda x: x.str.cat(...), use an object series to test
572
+ func_ret = self._build_mock_return_object(func, object, ndim=1)
573
+ output_limit = getattr(func, "output_limit", None) or 1
574
+
575
+ if not isinstance(func_ret, ENTITY_TYPE):
576
+ raise ValueError(
577
+ f"Custom function should return a MaxFrame object, not {type(func_ret)}"
578
+ )
579
+ if func_ret.ndim >= ndim:
580
+ raise ValueError("Function not a reduction")
581
+
582
+ agg_graph = func_ret.build_graph()
583
+ agg_tileables = set(t for t in agg_graph if getattr(t.op, "is_atomic", False))
584
+ # check operators before aggregation
585
+ for t in agg_graph.dfs(
586
+ list(agg_tileables), visit_predicate="all", reverse=True
587
+ ):
588
+ if t not in agg_tileables and not isinstance(
589
+ t.op,
590
+ (
591
+ DataFrameUnaryOp,
592
+ DataFrameBinOp,
593
+ TensorUnaryOp,
594
+ TensorBinOp,
595
+ TensorWhere,
596
+ DataFrameWhere,
597
+ DataFrameDataSource,
598
+ SeriesDataSource,
599
+ ),
600
+ ):
601
+ raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
602
+ # check operators after aggregation
603
+ for t in agg_graph.dfs(list(agg_tileables), visit_predicate="all"):
604
+ if t not in agg_tileables and not isinstance(
605
+ t.op,
606
+ (
607
+ DataFrameUnaryOp,
608
+ DataFrameBinOp,
609
+ TensorWhere,
610
+ DataFrameWhere,
611
+ TensorUnaryOp,
612
+ TensorBinOp,
613
+ ),
614
+ ):
615
+ raise ValueError(f"Cannot support operator {type(t.op)} in aggregation")
616
+
617
+ pre_funcs, agg_funcs, post_funcs = [], [], []
618
+ visited_inputs = set()
619
+ # collect aggregations and their inputs
620
+ for t in agg_tileables:
621
+ agg_input_key = t.inputs[0].key
622
+
623
+ # collect agg names
624
+ step_func_name = getattr(t.op, "_func_name")
625
+ if step_func_name in ("count", "size"):
626
+ map_func_name, agg_func_name = step_func_name, "sum"
627
+ else:
628
+ map_func_name, agg_func_name = step_func_name, step_func_name
629
+
630
+ # build agg description
631
+ agg_funcs.append(
632
+ ReductionAggStep(
633
+ agg_input_key,
634
+ func_name,
635
+ step_func_name,
636
+ map_func_name,
637
+ agg_func_name,
638
+ custom_reduction,
639
+ t.key,
640
+ output_limit,
641
+ t.op.get_reduction_args(axis=self._axis),
642
+ )
643
+ )
644
+ # collect agg input and build function
645
+ if agg_input_key not in visited_inputs:
646
+ visited_inputs.add(agg_input_key)
647
+ initial_inputs = list(t.inputs[0].build_graph().iter_indep())
648
+ assert len(initial_inputs) == 1
649
+ input_key = initial_inputs[0].key
650
+
651
+ func_idl, _ = self._generate_function_idl(t.inputs[0])
652
+ pre_funcs.append(
653
+ ReductionPreStep(
654
+ input_key, agg_input_key, None, msgpack.dumps(func_idl)
655
+ )
656
+ )
657
+ # collect function output after agg
658
+ func_idl, input_keys = self._generate_function_idl(func_ret)
659
+ post_funcs.append(
660
+ ReductionPostStep(
661
+ input_keys, func_ret.key, func_name, None, msgpack.dumps(func_idl)
662
+ )
663
+ )
664
+ if len(_func_compile_cache) > 100: # pragma: no cover
665
+ _func_compile_cache.pop(next(iter(_func_compile_cache.keys())))
666
+ result = _func_compile_cache[func_token] = ReductionSteps(
667
+ pre_funcs, agg_funcs, post_funcs
668
+ )
669
+ return result
670
+
671
+ def _generate_function_idl(self, out_tileable: TileableType) -> Tuple[List, List]:
672
+ """
673
+ Generate function IDL from tileable DAG
674
+
675
+ IDL Format: [
676
+ ["in_var", "input_var_name"],
677
+ ["op", "op_output_var", ["op_arg1", "op_arg2"], {"op_key1": "op_key2"}],
678
+ ["out_var", "output_var_name"],
679
+ ]
680
+ """
681
+ from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
682
+ from ...tensor.base import TensorWhere
683
+ from ...tensor.datasource import Scalar
684
+ from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
685
+ from ..datasource.dataframe import DataFrameDataSource
686
+ from ..datasource.series import SeriesDataSource
687
+ from ..indexing.where import DataFrameWhere
688
+
689
+ input_key_to_var = OrderedDict()
690
+ local_key_to_var = dict()
691
+ idl_lines = []
692
+
693
+ input_op_types = (
694
+ DataFrameDataSource,
695
+ SeriesDataSource,
696
+ DataFrameReductionOperator,
697
+ )
698
+
699
+ def _gen_expr_str(t):
700
+ # generate code for t
701
+ if t.key in local_key_to_var:
702
+ return
703
+
704
+ if isinstance(t.op, input_op_types):
705
+ # tileable is an input arg, build a function variable
706
+ if t.key not in input_key_to_var: # pragma: no branch
707
+ input_key_to_var[t.key] = local_key_to_var[
708
+ t.key
709
+ ] = f"invar{len(input_key_to_var)}"
710
+ else:
711
+ for inp in t.inputs:
712
+ _gen_expr_str(inp)
713
+
714
+ var_name = local_key_to_var[t.key] = f"var{len(local_key_to_var)}"
715
+ keys_to_vars = {inp.key: local_key_to_var[inp.key] for inp in t.inputs}
716
+
717
+ def _interpret_var(v):
718
+ v = get_item_if_scalar(v)
719
+ # get representation for variables
720
+ if hasattr(v, "key"):
721
+ return keys_to_vars[v.key]
722
+ elif isinstance(v, _idl_primitive_types):
723
+ return v
724
+ else:
725
+ raise NotImplementedError(
726
+ f"Type {type(v)} currently not interpretable"
727
+ )
728
+
729
+ func_name = getattr(t.op, "_func_name", None)
730
+ if func_name is None:
731
+ func_name = getattr(t.op, "_bit_func_name", None)
732
+ # handle function name differences between numpy and pandas arithmetic ops
733
+ if func_name in _func_name_converts:
734
+ func_name = _func_name_converts[func_name]
735
+
736
+ # build given different op types
737
+ if isinstance(t.op, (DataFrameUnaryOp, TensorUnaryOp)):
738
+ val = _interpret_var(t.inputs[0])
739
+ statements = [
740
+ [UNARY_IDL_OP_PREFIX + func_name, var_name, [val], {}]
741
+ ]
742
+ elif isinstance(t.op, (DataFrameBinOp, TensorBinOp)):
743
+ lhs, rhs = t.op.lhs, t.op.rhs
744
+ op_axis = (
745
+ 1 - self._axis
746
+ if hasattr(lhs, "ndim")
747
+ and hasattr(rhs, "ndim")
748
+ and lhs.ndim != rhs.ndim
749
+ else None
750
+ )
751
+ lhs = _interpret_var(lhs)
752
+ rhs = _interpret_var(rhs)
753
+ axis_arg = {"axis": op_axis} if op_axis is not None else {}
754
+ statements = [
755
+ [
756
+ BINARY_IDL_OP_PREFIX + func_name,
757
+ var_name,
758
+ [lhs, rhs],
759
+ {},
760
+ axis_arg,
761
+ ]
762
+ ]
763
+ elif isinstance(t.op, TensorWhere):
764
+ cond = _interpret_var(t.op.condition)
765
+ x = _interpret_var(t.op.x)
766
+ y = _interpret_var(t.op.y)
767
+ statements = [[WHERE_VAR_OP, var_name, [cond, x, y], {}]]
768
+ elif isinstance(t.op, DataFrameWhere):
769
+ func_name = MASK_VAR_OP if t.op.replace_true else WHERE_VAR_OP
770
+ inp = _interpret_var(t.op.input)
771
+ cond = _interpret_var(t.op.cond)
772
+ other = _interpret_var(t.op.other)
773
+ statements = [
774
+ [
775
+ func_name,
776
+ var_name,
777
+ [cond, inp, other],
778
+ {"axis": t.op.axis, "level": t.op.level},
779
+ ]
780
+ ]
781
+ elif isinstance(t.op, Scalar):
782
+ # for scalar inputs of other operators
783
+ data = _interpret_var(t.op.data)
784
+ statements = [[LET_VAR_OP, var_name, [data]]]
785
+ else: # pragma: no cover
786
+ raise NotImplementedError(
787
+ f"Does not support aggregating on {type(t.op)}"
788
+ )
789
+
790
+ idl_lines.extend(statements)
791
+
792
+ _gen_expr_str(out_tileable)
793
+
794
+ input_idls = [
795
+ [IN_VAR_IDL_OP, var_name] for var_name in input_key_to_var.values()
796
+ ]
797
+ output_idls = [[OUT_VAR_IDL_OP, local_key_to_var[out_tileable.key]]]
798
+ return input_idls + idl_lines + output_idls, list(input_key_to_var.keys())
799
+
800
+ def compile(self) -> ReductionSteps:
801
+ pre_funcs, agg_funcs, post_funcs = [], [], []
802
+ referred_cols = set()
803
+ for key, step in self._output_key_to_pre_steps.items():
804
+ cols = self._output_key_to_pre_cols[key]
805
+ if cols:
806
+ referred_cols.update(cols)
807
+ pre_funcs.append(
808
+ ReductionPreStep(step.input_key, step.output_key, cols, step.func_idl)
809
+ )
810
+
811
+ for step in self._output_key_to_agg_steps.values():
812
+ agg_funcs.append(step)
813
+
814
+ for key, step in self._output_key_to_post_steps.items():
815
+ cols = self._output_key_to_post_cols[key]
816
+ if cols and set(cols) == set(referred_cols):
817
+ post_cols = None
818
+ else:
819
+ post_cols = cols
820
+
821
+ func_name = step.func_name
822
+ if self._lambda_counter == 1 and step.func_name == "<lambda_0>":
823
+ func_name = "<lambda>"
824
+ if self._custom_counter == 1 and step.func_name == "<custom_0>":
825
+ func_name = "<custom>"
826
+
827
+ post_funcs.append(
828
+ ReductionPostStep(
829
+ step.input_keys,
830
+ step.output_key,
831
+ func_name,
832
+ post_cols,
833
+ step.func_idl,
834
+ )
835
+ )
836
+
837
+ return ReductionSteps(pre_funcs, agg_funcs, post_funcs)