maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +32 -0
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/_utils.pxd +33 -0
- maxframe/_utils.pyx +547 -0
- maxframe/codegen.py +528 -0
- maxframe/config/__init__.py +15 -0
- maxframe/config/config.py +443 -0
- maxframe/config/tests/__init__.py +13 -0
- maxframe/config/tests/test_config.py +103 -0
- maxframe/config/tests/test_validators.py +34 -0
- maxframe/config/validators.py +57 -0
- maxframe/conftest.py +139 -0
- maxframe/core/__init__.py +65 -0
- maxframe/core/base.py +156 -0
- maxframe/core/entity/__init__.py +44 -0
- maxframe/core/entity/chunks.py +68 -0
- maxframe/core/entity/core.py +152 -0
- maxframe/core/entity/executable.py +337 -0
- maxframe/core/entity/fuse.py +73 -0
- maxframe/core/entity/objects.py +100 -0
- maxframe/core/entity/output_types.py +90 -0
- maxframe/core/entity/tileables.py +438 -0
- maxframe/core/entity/utils.py +24 -0
- maxframe/core/graph/__init__.py +17 -0
- maxframe/core/graph/builder/__init__.py +16 -0
- maxframe/core/graph/builder/base.py +86 -0
- maxframe/core/graph/builder/chunk.py +430 -0
- maxframe/core/graph/builder/tileable.py +34 -0
- maxframe/core/graph/builder/utils.py +41 -0
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +467 -0
- maxframe/core/graph/entity.py +171 -0
- maxframe/core/graph/tests/__init__.py +13 -0
- maxframe/core/graph/tests/test_graph.py +205 -0
- maxframe/core/mode.py +96 -0
- maxframe/core/operator/__init__.py +34 -0
- maxframe/core/operator/base.py +450 -0
- maxframe/core/operator/core.py +276 -0
- maxframe/core/operator/fetch.py +53 -0
- maxframe/core/operator/fuse.py +29 -0
- maxframe/core/operator/objects.py +72 -0
- maxframe/core/operator/shuffle.py +111 -0
- maxframe/core/operator/tests/__init__.py +13 -0
- maxframe/core/operator/tests/test_core.py +64 -0
- maxframe/core/tests/__init__.py +13 -0
- maxframe/core/tests/test_mode.py +75 -0
- maxframe/dataframe/__init__.py +81 -0
- maxframe/dataframe/arithmetic/__init__.py +359 -0
- maxframe/dataframe/arithmetic/abs.py +33 -0
- maxframe/dataframe/arithmetic/add.py +60 -0
- maxframe/dataframe/arithmetic/arccos.py +28 -0
- maxframe/dataframe/arithmetic/arccosh.py +28 -0
- maxframe/dataframe/arithmetic/arcsin.py +28 -0
- maxframe/dataframe/arithmetic/arcsinh.py +28 -0
- maxframe/dataframe/arithmetic/arctan.py +28 -0
- maxframe/dataframe/arithmetic/arctanh.py +28 -0
- maxframe/dataframe/arithmetic/around.py +152 -0
- maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
- maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
- maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
- maxframe/dataframe/arithmetic/ceil.py +28 -0
- maxframe/dataframe/arithmetic/core.py +342 -0
- maxframe/dataframe/arithmetic/cos.py +28 -0
- maxframe/dataframe/arithmetic/cosh.py +28 -0
- maxframe/dataframe/arithmetic/degrees.py +28 -0
- maxframe/dataframe/arithmetic/docstring.py +442 -0
- maxframe/dataframe/arithmetic/equal.py +56 -0
- maxframe/dataframe/arithmetic/exp.py +28 -0
- maxframe/dataframe/arithmetic/exp2.py +28 -0
- maxframe/dataframe/arithmetic/expm1.py +28 -0
- maxframe/dataframe/arithmetic/floor.py +28 -0
- maxframe/dataframe/arithmetic/floordiv.py +64 -0
- maxframe/dataframe/arithmetic/greater.py +57 -0
- maxframe/dataframe/arithmetic/greater_equal.py +57 -0
- maxframe/dataframe/arithmetic/invert.py +33 -0
- maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
- maxframe/dataframe/arithmetic/less.py +57 -0
- maxframe/dataframe/arithmetic/less_equal.py +57 -0
- maxframe/dataframe/arithmetic/log.py +28 -0
- maxframe/dataframe/arithmetic/log10.py +28 -0
- maxframe/dataframe/arithmetic/log2.py +28 -0
- maxframe/dataframe/arithmetic/mod.py +60 -0
- maxframe/dataframe/arithmetic/multiply.py +60 -0
- maxframe/dataframe/arithmetic/negative.py +33 -0
- maxframe/dataframe/arithmetic/not_equal.py +56 -0
- maxframe/dataframe/arithmetic/power.py +68 -0
- maxframe/dataframe/arithmetic/radians.py +28 -0
- maxframe/dataframe/arithmetic/sin.py +28 -0
- maxframe/dataframe/arithmetic/sinh.py +28 -0
- maxframe/dataframe/arithmetic/sqrt.py +28 -0
- maxframe/dataframe/arithmetic/subtract.py +64 -0
- maxframe/dataframe/arithmetic/tan.py +28 -0
- maxframe/dataframe/arithmetic/tanh.py +28 -0
- maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
- maxframe/dataframe/arithmetic/truediv.py +64 -0
- maxframe/dataframe/arithmetic/trunc.py +28 -0
- maxframe/dataframe/arrays.py +864 -0
- maxframe/dataframe/core.py +2417 -0
- maxframe/dataframe/datasource/__init__.py +15 -0
- maxframe/dataframe/datasource/core.py +81 -0
- maxframe/dataframe/datasource/dataframe.py +59 -0
- maxframe/dataframe/datasource/date_range.py +504 -0
- maxframe/dataframe/datasource/from_index.py +54 -0
- maxframe/dataframe/datasource/from_records.py +107 -0
- maxframe/dataframe/datasource/from_tensor.py +419 -0
- maxframe/dataframe/datasource/index.py +117 -0
- maxframe/dataframe/datasource/read_csv.py +528 -0
- maxframe/dataframe/datasource/read_odps_query.py +299 -0
- maxframe/dataframe/datasource/read_odps_table.py +253 -0
- maxframe/dataframe/datasource/read_parquet.py +421 -0
- maxframe/dataframe/datasource/series.py +55 -0
- maxframe/dataframe/datasource/tests/__init__.py +13 -0
- maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
- maxframe/dataframe/datastore/__init__.py +26 -0
- maxframe/dataframe/datastore/core.py +19 -0
- maxframe/dataframe/datastore/to_csv.py +227 -0
- maxframe/dataframe/datastore/to_odps.py +162 -0
- maxframe/dataframe/extensions/__init__.py +41 -0
- maxframe/dataframe/extensions/accessor.py +50 -0
- maxframe/dataframe/extensions/reshuffle.py +83 -0
- maxframe/dataframe/extensions/tests/__init__.py +13 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
- maxframe/dataframe/fetch/__init__.py +15 -0
- maxframe/dataframe/fetch/core.py +86 -0
- maxframe/dataframe/groupby/__init__.py +82 -0
- maxframe/dataframe/groupby/aggregation.py +350 -0
- maxframe/dataframe/groupby/apply.py +251 -0
- maxframe/dataframe/groupby/core.py +179 -0
- maxframe/dataframe/groupby/cum.py +124 -0
- maxframe/dataframe/groupby/fill.py +141 -0
- maxframe/dataframe/groupby/getitem.py +92 -0
- maxframe/dataframe/groupby/head.py +105 -0
- maxframe/dataframe/groupby/sample.py +214 -0
- maxframe/dataframe/groupby/tests/__init__.py +13 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
- maxframe/dataframe/groupby/transform.py +255 -0
- maxframe/dataframe/indexing/__init__.py +84 -0
- maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
- maxframe/dataframe/indexing/align.py +349 -0
- maxframe/dataframe/indexing/at.py +83 -0
- maxframe/dataframe/indexing/getitem.py +204 -0
- maxframe/dataframe/indexing/iat.py +37 -0
- maxframe/dataframe/indexing/iloc.py +566 -0
- maxframe/dataframe/indexing/insert.py +86 -0
- maxframe/dataframe/indexing/loc.py +411 -0
- maxframe/dataframe/indexing/reindex.py +526 -0
- maxframe/dataframe/indexing/rename.py +462 -0
- maxframe/dataframe/indexing/rename_axis.py +209 -0
- maxframe/dataframe/indexing/reset_index.py +402 -0
- maxframe/dataframe/indexing/sample.py +221 -0
- maxframe/dataframe/indexing/set_axis.py +194 -0
- maxframe/dataframe/indexing/set_index.py +61 -0
- maxframe/dataframe/indexing/setitem.py +130 -0
- maxframe/dataframe/indexing/tests/__init__.py +13 -0
- maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
- maxframe/dataframe/indexing/where.py +308 -0
- maxframe/dataframe/initializer.py +288 -0
- maxframe/dataframe/merge/__init__.py +32 -0
- maxframe/dataframe/merge/append.py +121 -0
- maxframe/dataframe/merge/concat.py +325 -0
- maxframe/dataframe/merge/merge.py +593 -0
- maxframe/dataframe/merge/tests/__init__.py +13 -0
- maxframe/dataframe/merge/tests/test_merge.py +215 -0
- maxframe/dataframe/misc/__init__.py +134 -0
- maxframe/dataframe/misc/_duplicate.py +46 -0
- maxframe/dataframe/misc/accessor.py +276 -0
- maxframe/dataframe/misc/apply.py +692 -0
- maxframe/dataframe/misc/astype.py +236 -0
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/check_monotonic.py +84 -0
- maxframe/dataframe/misc/cut.py +383 -0
- maxframe/dataframe/misc/datetimes.py +79 -0
- maxframe/dataframe/misc/describe.py +108 -0
- maxframe/dataframe/misc/diff.py +210 -0
- maxframe/dataframe/misc/drop.py +440 -0
- maxframe/dataframe/misc/drop_duplicates.py +248 -0
- maxframe/dataframe/misc/duplicated.py +292 -0
- maxframe/dataframe/misc/eval.py +728 -0
- maxframe/dataframe/misc/explode.py +171 -0
- maxframe/dataframe/misc/get_dummies.py +208 -0
- maxframe/dataframe/misc/isin.py +217 -0
- maxframe/dataframe/misc/map.py +236 -0
- maxframe/dataframe/misc/melt.py +162 -0
- maxframe/dataframe/misc/memory_usage.py +248 -0
- maxframe/dataframe/misc/pct_change.py +150 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/qcut.py +104 -0
- maxframe/dataframe/misc/select_dtypes.py +104 -0
- maxframe/dataframe/misc/shift.py +256 -0
- maxframe/dataframe/misc/stack.py +238 -0
- maxframe/dataframe/misc/string_.py +221 -0
- maxframe/dataframe/misc/tests/__init__.py +13 -0
- maxframe/dataframe/misc/tests/test_misc.py +468 -0
- maxframe/dataframe/misc/to_numeric.py +178 -0
- maxframe/dataframe/misc/transform.py +361 -0
- maxframe/dataframe/misc/transpose.py +136 -0
- maxframe/dataframe/misc/value_counts.py +182 -0
- maxframe/dataframe/missing/__init__.py +53 -0
- maxframe/dataframe/missing/checkna.py +223 -0
- maxframe/dataframe/missing/dropna.py +280 -0
- maxframe/dataframe/missing/fillna.py +275 -0
- maxframe/dataframe/missing/replace.py +439 -0
- maxframe/dataframe/missing/tests/__init__.py +13 -0
- maxframe/dataframe/missing/tests/test_missing.py +89 -0
- maxframe/dataframe/operators.py +273 -0
- maxframe/dataframe/plotting/__init__.py +40 -0
- maxframe/dataframe/plotting/core.py +78 -0
- maxframe/dataframe/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
- maxframe/dataframe/reduction/__init__.py +107 -0
- maxframe/dataframe/reduction/aggregation.py +344 -0
- maxframe/dataframe/reduction/all.py +78 -0
- maxframe/dataframe/reduction/any.py +78 -0
- maxframe/dataframe/reduction/core.py +837 -0
- maxframe/dataframe/reduction/count.py +59 -0
- maxframe/dataframe/reduction/cummax.py +30 -0
- maxframe/dataframe/reduction/cummin.py +30 -0
- maxframe/dataframe/reduction/cumprod.py +30 -0
- maxframe/dataframe/reduction/cumsum.py +30 -0
- maxframe/dataframe/reduction/custom_reduction.py +42 -0
- maxframe/dataframe/reduction/kurtosis.py +104 -0
- maxframe/dataframe/reduction/max.py +65 -0
- maxframe/dataframe/reduction/mean.py +61 -0
- maxframe/dataframe/reduction/min.py +65 -0
- maxframe/dataframe/reduction/nunique.py +141 -0
- maxframe/dataframe/reduction/prod.py +76 -0
- maxframe/dataframe/reduction/reduction_size.py +36 -0
- maxframe/dataframe/reduction/sem.py +69 -0
- maxframe/dataframe/reduction/skew.py +89 -0
- maxframe/dataframe/reduction/std.py +53 -0
- maxframe/dataframe/reduction/str_concat.py +48 -0
- maxframe/dataframe/reduction/sum.py +77 -0
- maxframe/dataframe/reduction/tests/__init__.py +13 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
- maxframe/dataframe/reduction/unique.py +90 -0
- maxframe/dataframe/reduction/var.py +72 -0
- maxframe/dataframe/sort/__init__.py +34 -0
- maxframe/dataframe/sort/core.py +36 -0
- maxframe/dataframe/sort/sort_index.py +153 -0
- maxframe/dataframe/sort/sort_values.py +311 -0
- maxframe/dataframe/sort/tests/__init__.py +13 -0
- maxframe/dataframe/sort/tests/test_sort.py +81 -0
- maxframe/dataframe/statistics/__init__.py +33 -0
- maxframe/dataframe/statistics/corr.py +280 -0
- maxframe/dataframe/statistics/quantile.py +341 -0
- maxframe/dataframe/statistics/tests/__init__.py +13 -0
- maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
- maxframe/dataframe/tests/__init__.py +13 -0
- maxframe/dataframe/tests/test_initializer.py +29 -0
- maxframe/dataframe/tseries/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/__init__.py +13 -0
- maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
- maxframe/dataframe/tseries/to_datetime.py +297 -0
- maxframe/dataframe/ufunc/__init__.py +27 -0
- maxframe/dataframe/ufunc/tensor.py +54 -0
- maxframe/dataframe/ufunc/ufunc.py +52 -0
- maxframe/dataframe/utils.py +1267 -0
- maxframe/dataframe/window/__init__.py +29 -0
- maxframe/dataframe/window/aggregation.py +96 -0
- maxframe/dataframe/window/core.py +69 -0
- maxframe/dataframe/window/ewm.py +249 -0
- maxframe/dataframe/window/expanding.py +147 -0
- maxframe/dataframe/window/rolling.py +376 -0
- maxframe/dataframe/window/tests/__init__.py +13 -0
- maxframe/dataframe/window/tests/test_ewm.py +70 -0
- maxframe/dataframe/window/tests/test_expanding.py +66 -0
- maxframe/dataframe/window/tests/test_rolling.py +57 -0
- maxframe/env.py +33 -0
- maxframe/errors.py +21 -0
- maxframe/extension.py +81 -0
- maxframe/learn/__init__.py +17 -0
- maxframe/learn/contrib/__init__.py +17 -0
- maxframe/learn/contrib/pytorch/__init__.py +16 -0
- maxframe/learn/contrib/pytorch/run_function.py +110 -0
- maxframe/learn/contrib/pytorch/run_script.py +102 -0
- maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/__init__.py +15 -0
- maxframe/lib/aio/__init__.py +27 -0
- maxframe/lib/aio/_runners.py +162 -0
- maxframe/lib/aio/_threads.py +35 -0
- maxframe/lib/aio/base.py +82 -0
- maxframe/lib/aio/file.py +85 -0
- maxframe/lib/aio/isolation.py +100 -0
- maxframe/lib/aio/lru.py +242 -0
- maxframe/lib/aio/parallelism.py +37 -0
- maxframe/lib/aio/tests/__init__.py +13 -0
- maxframe/lib/aio/tests/test_aio_file.py +55 -0
- maxframe/lib/compression.py +55 -0
- maxframe/lib/cython/__init__.py +13 -0
- maxframe/lib/cython/libcpp.pxd +30 -0
- maxframe/lib/filesystem/__init__.py +21 -0
- maxframe/lib/filesystem/_glob.py +173 -0
- maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
- maxframe/lib/filesystem/_oss_lib/common.py +198 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
- maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
- maxframe/lib/filesystem/arrow.py +236 -0
- maxframe/lib/filesystem/base.py +263 -0
- maxframe/lib/filesystem/core.py +95 -0
- maxframe/lib/filesystem/fsmap.py +164 -0
- maxframe/lib/filesystem/hdfs.py +31 -0
- maxframe/lib/filesystem/local.py +112 -0
- maxframe/lib/filesystem/oss.py +157 -0
- maxframe/lib/filesystem/tests/__init__.py +13 -0
- maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
- maxframe/lib/filesystem/tests/test_oss.py +182 -0
- maxframe/lib/functools_compat.py +81 -0
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
- maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
- maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
- maxframe/lib/sparse/__init__.py +861 -0
- maxframe/lib/sparse/array.py +1604 -0
- maxframe/lib/sparse/core.py +92 -0
- maxframe/lib/sparse/matrix.py +241 -0
- maxframe/lib/sparse/tests/__init__.py +15 -0
- maxframe/lib/sparse/tests/test_sparse.py +476 -0
- maxframe/lib/sparse/vector.py +150 -0
- maxframe/lib/tblib/LICENSE +20 -0
- maxframe/lib/tblib/__init__.py +327 -0
- maxframe/lib/tblib/cpython.py +83 -0
- maxframe/lib/tblib/decorators.py +44 -0
- maxframe/lib/tblib/pickling_support.py +90 -0
- maxframe/lib/tests/__init__.py +13 -0
- maxframe/lib/tests/test_wrapped_pickle.py +51 -0
- maxframe/lib/version.py +620 -0
- maxframe/lib/wrapped_pickle.py +139 -0
- maxframe/mixin.py +100 -0
- maxframe/odpsio/__init__.py +21 -0
- maxframe/odpsio/arrow.py +91 -0
- maxframe/odpsio/schema.py +364 -0
- maxframe/odpsio/tableio.py +322 -0
- maxframe/odpsio/tests/__init__.py +13 -0
- maxframe/odpsio/tests/test_arrow.py +88 -0
- maxframe/odpsio/tests/test_schema.py +297 -0
- maxframe/odpsio/tests/test_tableio.py +136 -0
- maxframe/odpsio/tests/test_volumeio.py +90 -0
- maxframe/odpsio/volumeio.py +95 -0
- maxframe/opcodes.py +590 -0
- maxframe/protocol.py +415 -0
- maxframe/remote/__init__.py +18 -0
- maxframe/remote/core.py +210 -0
- maxframe/remote/run_script.py +121 -0
- maxframe/serialization/__init__.py +26 -0
- maxframe/serialization/arrow.py +95 -0
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +44 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/serialization/core.pyx +1094 -0
- maxframe/serialization/exception.py +86 -0
- maxframe/serialization/maxframe_objects.py +39 -0
- maxframe/serialization/numpy.py +91 -0
- maxframe/serialization/pandas.py +202 -0
- maxframe/serialization/scipy.py +71 -0
- maxframe/serialization/serializables/__init__.py +55 -0
- maxframe/serialization/serializables/core.py +262 -0
- maxframe/serialization/serializables/field.py +624 -0
- maxframe/serialization/serializables/field_type.py +589 -0
- maxframe/serialization/serializables/tests/__init__.py +13 -0
- maxframe/serialization/serializables/tests/test_field_type.py +121 -0
- maxframe/serialization/serializables/tests/test_serializable.py +250 -0
- maxframe/serialization/tests/__init__.py +13 -0
- maxframe/serialization/tests/test_serial.py +412 -0
- maxframe/session.py +1310 -0
- maxframe/tensor/__init__.py +183 -0
- maxframe/tensor/arithmetic/__init__.py +315 -0
- maxframe/tensor/arithmetic/abs.py +68 -0
- maxframe/tensor/arithmetic/absolute.py +68 -0
- maxframe/tensor/arithmetic/add.py +82 -0
- maxframe/tensor/arithmetic/angle.py +72 -0
- maxframe/tensor/arithmetic/arccos.py +104 -0
- maxframe/tensor/arithmetic/arccosh.py +91 -0
- maxframe/tensor/arithmetic/arcsin.py +94 -0
- maxframe/tensor/arithmetic/arcsinh.py +86 -0
- maxframe/tensor/arithmetic/arctan.py +106 -0
- maxframe/tensor/arithmetic/arctan2.py +128 -0
- maxframe/tensor/arithmetic/arctanh.py +86 -0
- maxframe/tensor/arithmetic/around.py +114 -0
- maxframe/tensor/arithmetic/bitand.py +95 -0
- maxframe/tensor/arithmetic/bitor.py +102 -0
- maxframe/tensor/arithmetic/bitxor.py +95 -0
- maxframe/tensor/arithmetic/cbrt.py +66 -0
- maxframe/tensor/arithmetic/ceil.py +71 -0
- maxframe/tensor/arithmetic/clip.py +165 -0
- maxframe/tensor/arithmetic/conj.py +74 -0
- maxframe/tensor/arithmetic/copysign.py +78 -0
- maxframe/tensor/arithmetic/core.py +544 -0
- maxframe/tensor/arithmetic/cos.py +85 -0
- maxframe/tensor/arithmetic/cosh.py +72 -0
- maxframe/tensor/arithmetic/deg2rad.py +72 -0
- maxframe/tensor/arithmetic/degrees.py +77 -0
- maxframe/tensor/arithmetic/divide.py +114 -0
- maxframe/tensor/arithmetic/equal.py +76 -0
- maxframe/tensor/arithmetic/exp.py +106 -0
- maxframe/tensor/arithmetic/exp2.py +67 -0
- maxframe/tensor/arithmetic/expm1.py +79 -0
- maxframe/tensor/arithmetic/fabs.py +74 -0
- maxframe/tensor/arithmetic/fix.py +69 -0
- maxframe/tensor/arithmetic/float_power.py +103 -0
- maxframe/tensor/arithmetic/floor.py +77 -0
- maxframe/tensor/arithmetic/floordiv.py +94 -0
- maxframe/tensor/arithmetic/fmax.py +105 -0
- maxframe/tensor/arithmetic/fmin.py +106 -0
- maxframe/tensor/arithmetic/fmod.py +99 -0
- maxframe/tensor/arithmetic/frexp.py +92 -0
- maxframe/tensor/arithmetic/greater.py +77 -0
- maxframe/tensor/arithmetic/greater_equal.py +69 -0
- maxframe/tensor/arithmetic/hypot.py +77 -0
- maxframe/tensor/arithmetic/i0.py +89 -0
- maxframe/tensor/arithmetic/imag.py +67 -0
- maxframe/tensor/arithmetic/invert.py +110 -0
- maxframe/tensor/arithmetic/isclose.py +115 -0
- maxframe/tensor/arithmetic/iscomplex.py +64 -0
- maxframe/tensor/arithmetic/isfinite.py +106 -0
- maxframe/tensor/arithmetic/isinf.py +103 -0
- maxframe/tensor/arithmetic/isnan.py +82 -0
- maxframe/tensor/arithmetic/isreal.py +63 -0
- maxframe/tensor/arithmetic/ldexp.py +99 -0
- maxframe/tensor/arithmetic/less.py +69 -0
- maxframe/tensor/arithmetic/less_equal.py +69 -0
- maxframe/tensor/arithmetic/log.py +92 -0
- maxframe/tensor/arithmetic/log10.py +85 -0
- maxframe/tensor/arithmetic/log1p.py +95 -0
- maxframe/tensor/arithmetic/log2.py +85 -0
- maxframe/tensor/arithmetic/logaddexp.py +80 -0
- maxframe/tensor/arithmetic/logaddexp2.py +78 -0
- maxframe/tensor/arithmetic/logical_and.py +81 -0
- maxframe/tensor/arithmetic/logical_not.py +74 -0
- maxframe/tensor/arithmetic/logical_or.py +82 -0
- maxframe/tensor/arithmetic/logical_xor.py +88 -0
- maxframe/tensor/arithmetic/lshift.py +82 -0
- maxframe/tensor/arithmetic/maximum.py +108 -0
- maxframe/tensor/arithmetic/minimum.py +108 -0
- maxframe/tensor/arithmetic/mod.py +104 -0
- maxframe/tensor/arithmetic/modf.py +83 -0
- maxframe/tensor/arithmetic/multiply.py +81 -0
- maxframe/tensor/arithmetic/nan_to_num.py +99 -0
- maxframe/tensor/arithmetic/negative.py +65 -0
- maxframe/tensor/arithmetic/nextafter.py +68 -0
- maxframe/tensor/arithmetic/not_equal.py +72 -0
- maxframe/tensor/arithmetic/positive.py +47 -0
- maxframe/tensor/arithmetic/power.py +106 -0
- maxframe/tensor/arithmetic/rad2deg.py +71 -0
- maxframe/tensor/arithmetic/radians.py +77 -0
- maxframe/tensor/arithmetic/real.py +70 -0
- maxframe/tensor/arithmetic/reciprocal.py +76 -0
- maxframe/tensor/arithmetic/rint.py +68 -0
- maxframe/tensor/arithmetic/rshift.py +81 -0
- maxframe/tensor/arithmetic/setimag.py +29 -0
- maxframe/tensor/arithmetic/setreal.py +29 -0
- maxframe/tensor/arithmetic/sign.py +81 -0
- maxframe/tensor/arithmetic/signbit.py +65 -0
- maxframe/tensor/arithmetic/sin.py +98 -0
- maxframe/tensor/arithmetic/sinc.py +102 -0
- maxframe/tensor/arithmetic/sinh.py +93 -0
- maxframe/tensor/arithmetic/spacing.py +72 -0
- maxframe/tensor/arithmetic/sqrt.py +81 -0
- maxframe/tensor/arithmetic/square.py +69 -0
- maxframe/tensor/arithmetic/subtract.py +81 -0
- maxframe/tensor/arithmetic/tan.py +88 -0
- maxframe/tensor/arithmetic/tanh.py +92 -0
- maxframe/tensor/arithmetic/tests/__init__.py +15 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
- maxframe/tensor/arithmetic/truediv.py +104 -0
- maxframe/tensor/arithmetic/trunc.py +72 -0
- maxframe/tensor/arithmetic/utils.py +65 -0
- maxframe/tensor/array_utils.py +186 -0
- maxframe/tensor/base/__init__.py +34 -0
- maxframe/tensor/base/astype.py +119 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/broadcast_to.py +89 -0
- maxframe/tensor/base/ravel.py +92 -0
- maxframe/tensor/base/tests/__init__.py +13 -0
- maxframe/tensor/base/tests/test_base.py +114 -0
- maxframe/tensor/base/transpose.py +125 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/base/where.py +127 -0
- maxframe/tensor/core.py +724 -0
- maxframe/tensor/datasource/__init__.py +32 -0
- maxframe/tensor/datasource/arange.py +156 -0
- maxframe/tensor/datasource/array.py +415 -0
- maxframe/tensor/datasource/core.py +109 -0
- maxframe/tensor/datasource/empty.py +169 -0
- maxframe/tensor/datasource/from_dataframe.py +70 -0
- maxframe/tensor/datasource/from_dense.py +54 -0
- maxframe/tensor/datasource/from_sparse.py +47 -0
- maxframe/tensor/datasource/full.py +186 -0
- maxframe/tensor/datasource/ones.py +173 -0
- maxframe/tensor/datasource/scalar.py +40 -0
- maxframe/tensor/datasource/tests/__init__.py +13 -0
- maxframe/tensor/datasource/tests/test_datasource.py +278 -0
- maxframe/tensor/datasource/zeros.py +188 -0
- maxframe/tensor/fetch/__init__.py +15 -0
- maxframe/tensor/fetch/core.py +54 -0
- maxframe/tensor/indexing/__init__.py +47 -0
- maxframe/tensor/indexing/choose.py +196 -0
- maxframe/tensor/indexing/compress.py +124 -0
- maxframe/tensor/indexing/core.py +190 -0
- maxframe/tensor/indexing/extract.py +71 -0
- maxframe/tensor/indexing/fill_diagonal.py +183 -0
- maxframe/tensor/indexing/flatnonzero.py +60 -0
- maxframe/tensor/indexing/getitem.py +175 -0
- maxframe/tensor/indexing/nonzero.py +120 -0
- maxframe/tensor/indexing/setitem.py +132 -0
- maxframe/tensor/indexing/slice.py +29 -0
- maxframe/tensor/indexing/take.py +130 -0
- maxframe/tensor/indexing/tests/__init__.py +15 -0
- maxframe/tensor/indexing/tests/test_indexing.py +234 -0
- maxframe/tensor/indexing/unravel_index.py +103 -0
- maxframe/tensor/merge/__init__.py +15 -0
- maxframe/tensor/merge/stack.py +132 -0
- maxframe/tensor/merge/tests/__init__.py +13 -0
- maxframe/tensor/merge/tests/test_merge.py +52 -0
- maxframe/tensor/operators.py +123 -0
- maxframe/tensor/random/__init__.py +168 -0
- maxframe/tensor/random/beta.py +87 -0
- maxframe/tensor/random/binomial.py +137 -0
- maxframe/tensor/random/bytes.py +39 -0
- maxframe/tensor/random/chisquare.py +110 -0
- maxframe/tensor/random/choice.py +186 -0
- maxframe/tensor/random/core.py +234 -0
- maxframe/tensor/random/dirichlet.py +123 -0
- maxframe/tensor/random/exponential.py +94 -0
- maxframe/tensor/random/f.py +135 -0
- maxframe/tensor/random/gamma.py +128 -0
- maxframe/tensor/random/geometric.py +93 -0
- maxframe/tensor/random/gumbel.py +167 -0
- maxframe/tensor/random/hypergeometric.py +148 -0
- maxframe/tensor/random/laplace.py +133 -0
- maxframe/tensor/random/logistic.py +129 -0
- maxframe/tensor/random/lognormal.py +159 -0
- maxframe/tensor/random/logseries.py +122 -0
- maxframe/tensor/random/multinomial.py +133 -0
- maxframe/tensor/random/multivariate_normal.py +192 -0
- maxframe/tensor/random/negative_binomial.py +125 -0
- maxframe/tensor/random/noncentral_chisquare.py +132 -0
- maxframe/tensor/random/noncentral_f.py +126 -0
- maxframe/tensor/random/normal.py +143 -0
- maxframe/tensor/random/pareto.py +140 -0
- maxframe/tensor/random/permutation.py +104 -0
- maxframe/tensor/random/poisson.py +111 -0
- maxframe/tensor/random/power.py +142 -0
- maxframe/tensor/random/rand.py +82 -0
- maxframe/tensor/random/randint.py +121 -0
- maxframe/tensor/random/randn.py +96 -0
- maxframe/tensor/random/random_integers.py +123 -0
- maxframe/tensor/random/random_sample.py +86 -0
- maxframe/tensor/random/rayleigh.py +110 -0
- maxframe/tensor/random/shuffle.py +61 -0
- maxframe/tensor/random/standard_cauchy.py +105 -0
- maxframe/tensor/random/standard_exponential.py +72 -0
- maxframe/tensor/random/standard_gamma.py +120 -0
- maxframe/tensor/random/standard_normal.py +74 -0
- maxframe/tensor/random/standard_t.py +135 -0
- maxframe/tensor/random/tests/__init__.py +15 -0
- maxframe/tensor/random/tests/test_random.py +167 -0
- maxframe/tensor/random/triangular.py +119 -0
- maxframe/tensor/random/uniform.py +131 -0
- maxframe/tensor/random/vonmises.py +131 -0
- maxframe/tensor/random/wald.py +114 -0
- maxframe/tensor/random/weibull.py +140 -0
- maxframe/tensor/random/zipf.py +122 -0
- maxframe/tensor/rechunk/__init__.py +26 -0
- maxframe/tensor/rechunk/rechunk.py +43 -0
- maxframe/tensor/reduction/__init__.py +66 -0
- maxframe/tensor/reduction/all.py +103 -0
- maxframe/tensor/reduction/allclose.py +88 -0
- maxframe/tensor/reduction/any.py +105 -0
- maxframe/tensor/reduction/argmax.py +103 -0
- maxframe/tensor/reduction/argmin.py +103 -0
- maxframe/tensor/reduction/array_equal.py +64 -0
- maxframe/tensor/reduction/core.py +168 -0
- maxframe/tensor/reduction/count_nonzero.py +81 -0
- maxframe/tensor/reduction/cumprod.py +97 -0
- maxframe/tensor/reduction/cumsum.py +101 -0
- maxframe/tensor/reduction/max.py +120 -0
- maxframe/tensor/reduction/mean.py +123 -0
- maxframe/tensor/reduction/min.py +120 -0
- maxframe/tensor/reduction/nanargmax.py +82 -0
- maxframe/tensor/reduction/nanargmin.py +76 -0
- maxframe/tensor/reduction/nancumprod.py +91 -0
- maxframe/tensor/reduction/nancumsum.py +94 -0
- maxframe/tensor/reduction/nanmax.py +111 -0
- maxframe/tensor/reduction/nanmean.py +106 -0
- maxframe/tensor/reduction/nanmin.py +111 -0
- maxframe/tensor/reduction/nanprod.py +94 -0
- maxframe/tensor/reduction/nanstd.py +126 -0
- maxframe/tensor/reduction/nansum.py +115 -0
- maxframe/tensor/reduction/nanvar.py +149 -0
- maxframe/tensor/reduction/prod.py +130 -0
- maxframe/tensor/reduction/std.py +134 -0
- maxframe/tensor/reduction/sum.py +125 -0
- maxframe/tensor/reduction/tests/__init__.py +13 -0
- maxframe/tensor/reduction/tests/test_reduction.py +181 -0
- maxframe/tensor/reduction/var.py +176 -0
- maxframe/tensor/reshape/__init__.py +17 -0
- maxframe/tensor/reshape/reshape.py +188 -0
- maxframe/tensor/reshape/tests/__init__.py +15 -0
- maxframe/tensor/reshape/tests/test_reshape.py +37 -0
- maxframe/tensor/statistics/__init__.py +13 -0
- maxframe/tensor/statistics/percentile.py +175 -0
- maxframe/tensor/statistics/quantile.py +288 -0
- maxframe/tensor/ufunc/__init__.py +26 -0
- maxframe/tensor/ufunc/ufunc.py +200 -0
- maxframe/tensor/utils.py +718 -0
- maxframe/tests/__init__.py +13 -0
- maxframe/tests/test_codegen.py +69 -0
- maxframe/tests/test_protocol.py +144 -0
- maxframe/tests/test_utils.py +376 -0
- maxframe/tests/utils.py +164 -0
- maxframe/typing_.py +37 -0
- maxframe/udf.py +134 -0
- maxframe/utils.py +1114 -0
- maxframe-0.1.0b5.dist-info/METADATA +104 -0
- maxframe-0.1.0b5.dist-info/RECORD +647 -0
- maxframe-0.1.0b5.dist-info/WHEEL +5 -0
- maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
- maxframe_client/__init__.py +17 -0
- maxframe_client/clients/__init__.py +13 -0
- maxframe_client/clients/framedriver.py +118 -0
- maxframe_client/clients/spe.py +104 -0
- maxframe_client/conftest.py +15 -0
- maxframe_client/fetcher.py +264 -0
- maxframe_client/session/__init__.py +22 -0
- maxframe_client/session/consts.py +36 -0
- maxframe_client/session/graph.py +119 -0
- maxframe_client/session/odps.py +482 -0
- maxframe_client/session/task.py +280 -0
- maxframe_client/session/tests/__init__.py +13 -0
- maxframe_client/session/tests/test_task.py +85 -0
- maxframe_client/tests/__init__.py +13 -0
- maxframe_client/tests/test_fetcher.py +89 -0
- maxframe_client/tests/test_session.py +255 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import dataclasses
|
|
16
|
+
import re
|
|
17
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import pandas as pd
|
|
21
|
+
from odps import ODPS
|
|
22
|
+
from odps.types import Column, OdpsSchema, validate_data_type
|
|
23
|
+
|
|
24
|
+
from ... import opcodes
|
|
25
|
+
from ...core import OutputType
|
|
26
|
+
from ...core.graph import DAG
|
|
27
|
+
from ...odpsio import odps_schema_to_pandas_dtypes
|
|
28
|
+
from ...serialization.serializables import (
|
|
29
|
+
AnyField,
|
|
30
|
+
BoolField,
|
|
31
|
+
FieldTypes,
|
|
32
|
+
Int64Field,
|
|
33
|
+
ListField,
|
|
34
|
+
SeriesField,
|
|
35
|
+
StringField,
|
|
36
|
+
)
|
|
37
|
+
from ..utils import parse_index
|
|
38
|
+
from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
|
|
39
|
+
|
|
40
|
+
_EXPLAIN_DEPENDS_REGEX = re.compile(r"([^\s]+) depends on: ([^\n]+)")
|
|
41
|
+
_EXPLAIN_JOB_REGEX = re.compile(r"(\S+) is root job")
|
|
42
|
+
_EXPLAIN_TASKS_HEADER_REGEX = re.compile(r"In Job ([^:]+):")
|
|
43
|
+
_EXPLAIN_ROOT_TASKS_REGEX = re.compile(r"root Tasks: (.+)")
|
|
44
|
+
_EXPLAIN_TASK_REGEX = re.compile(r"In Task ([^:]+)")
|
|
45
|
+
_EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
|
|
46
|
+
r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
|
|
47
|
+
re.MULTILINE,
|
|
48
|
+
)
|
|
49
|
+
_EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class DependencySector:
|
|
54
|
+
roots: List[str]
|
|
55
|
+
dependencies: List[Tuple[str, str]]
|
|
56
|
+
|
|
57
|
+
def build_dag(self) -> DAG:
|
|
58
|
+
dag = DAG()
|
|
59
|
+
for r in self.roots:
|
|
60
|
+
dag.add_node(r)
|
|
61
|
+
for v_from, v_to in self.dependencies:
|
|
62
|
+
dag.add_node(v_from)
|
|
63
|
+
dag.add_node(v_to)
|
|
64
|
+
dag.add_edge(v_from, v_to)
|
|
65
|
+
return dag
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclasses.dataclass
|
|
69
|
+
class JobsSector(DependencySector):
|
|
70
|
+
jobs: Dict[str, "TasksSector"] = dataclasses.field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclasses.dataclass
|
|
74
|
+
class TasksSector(DependencySector):
|
|
75
|
+
job_name: str
|
|
76
|
+
tasks: Dict[str, "TaskSector"] = dataclasses.field(default_factory=dict)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclasses.dataclass
|
|
80
|
+
class ColumnSchema:
|
|
81
|
+
column_name: str
|
|
82
|
+
column_type: str
|
|
83
|
+
column_alias: Optional[str]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclasses.dataclass
|
|
87
|
+
class TaskSector:
|
|
88
|
+
job_name: str
|
|
89
|
+
task_name: str
|
|
90
|
+
output_target: Optional[str]
|
|
91
|
+
schema: List[ColumnSchema]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _split_explain_string(explain_string: str) -> List[str]:
|
|
95
|
+
parts = explain_string.split("\n\n")
|
|
96
|
+
final_parts = []
|
|
97
|
+
grouped = []
|
|
98
|
+
for part in parts:
|
|
99
|
+
part = part.strip("\n")
|
|
100
|
+
if grouped and not part.startswith(" "):
|
|
101
|
+
final_parts.append("\n\n".join(grouped).strip())
|
|
102
|
+
grouped = []
|
|
103
|
+
grouped.append(part)
|
|
104
|
+
if grouped:
|
|
105
|
+
final_parts.append("\n\n".join(grouped).strip())
|
|
106
|
+
return final_parts
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _find_all_deps(sector: str) -> List[Tuple[str, str]]:
|
|
110
|
+
deps = []
|
|
111
|
+
for match in _EXPLAIN_DEPENDS_REGEX.findall(sector):
|
|
112
|
+
descendant = match[0]
|
|
113
|
+
for r in match[1].split(","):
|
|
114
|
+
deps.append((r.strip(), descendant))
|
|
115
|
+
return deps
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _resolve_jobs_sector(sector: str) -> JobsSector:
|
|
119
|
+
match = _EXPLAIN_JOB_REGEX.search(sector)
|
|
120
|
+
roots = [r.strip() for r in match.group(1).split(",")]
|
|
121
|
+
deps = _find_all_deps(sector)
|
|
122
|
+
return JobsSector(roots, deps)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _resolve_tasks_sector(sector: str) -> TasksSector:
|
|
126
|
+
match = _EXPLAIN_ROOT_TASKS_REGEX.search(sector)
|
|
127
|
+
roots = [r.strip() for r in match.group(1).split(",")]
|
|
128
|
+
|
|
129
|
+
match = _EXPLAIN_TASKS_HEADER_REGEX.search(sector)
|
|
130
|
+
job_name = match.group(1)
|
|
131
|
+
|
|
132
|
+
deps = _find_all_deps(sector)
|
|
133
|
+
return TasksSector(roots, deps, job_name)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _resolve_task_sector(job_name: str, sector: str) -> TaskSector:
|
|
137
|
+
match = _EXPLAIN_TASK_REGEX.match(sector)
|
|
138
|
+
task_name = match.group(1)
|
|
139
|
+
|
|
140
|
+
match = _EXPLAIN_TASK_SCHEMA_REGEX.match(sector)
|
|
141
|
+
if match is None:
|
|
142
|
+
return TaskSector(job_name, task_name, None, [])
|
|
143
|
+
|
|
144
|
+
out_target = match.group(2)
|
|
145
|
+
out_schema = match.group(3)
|
|
146
|
+
|
|
147
|
+
schemas = []
|
|
148
|
+
for match in _EXPLAIN_COLUMN_REGEX.findall(out_schema):
|
|
149
|
+
col_name, data_type, alias = match
|
|
150
|
+
schemas.append(ColumnSchema(col_name.strip(), data_type.strip(), alias.strip()))
|
|
151
|
+
return TaskSector(job_name, task_name, out_target, schemas)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _parse_explained_schema(explain_string: str) -> OdpsSchema:
|
|
155
|
+
sectors = _split_explain_string(explain_string)
|
|
156
|
+
jobs_sector = tasks_sector = None
|
|
157
|
+
|
|
158
|
+
for sector in sectors:
|
|
159
|
+
if _EXPLAIN_JOB_REGEX.search(sector):
|
|
160
|
+
jobs_sector = _resolve_jobs_sector(sector)
|
|
161
|
+
elif _EXPLAIN_TASKS_HEADER_REGEX.search(sector):
|
|
162
|
+
tasks_sector = _resolve_tasks_sector(sector)
|
|
163
|
+
assert jobs_sector is not None
|
|
164
|
+
jobs_sector.jobs[tasks_sector.job_name] = tasks_sector
|
|
165
|
+
elif _EXPLAIN_TASK_REGEX.search(sector):
|
|
166
|
+
assert tasks_sector is not None
|
|
167
|
+
task_sector = _resolve_task_sector(tasks_sector.job_name, sector)
|
|
168
|
+
tasks_sector.tasks[task_sector.task_name] = task_sector
|
|
169
|
+
|
|
170
|
+
job_dag = jobs_sector.build_dag()
|
|
171
|
+
indep_job_names = list(job_dag.iter_indep(reverse=True))
|
|
172
|
+
if len(indep_job_names) > 1: # pragma: no cover
|
|
173
|
+
raise ValueError("Only one final job is allowed in SQL statement")
|
|
174
|
+
|
|
175
|
+
tasks_sector = jobs_sector.jobs[indep_job_names[0]]
|
|
176
|
+
task_dag = tasks_sector.build_dag()
|
|
177
|
+
indep_task_names = list(task_dag.iter_indep(reverse=True))
|
|
178
|
+
if len(indep_task_names) > 1: # pragma: no cover
|
|
179
|
+
raise ValueError("Only one final task is allowed in SQL statement")
|
|
180
|
+
|
|
181
|
+
task_sector = tasks_sector.tasks[indep_task_names[0]]
|
|
182
|
+
if not task_sector.schema: # pragma: no cover
|
|
183
|
+
raise ValueError("Cannot detect output schema")
|
|
184
|
+
if task_sector.output_target != "Screen":
|
|
185
|
+
raise ValueError("The SQL statement should be an instant query")
|
|
186
|
+
cols = [
|
|
187
|
+
Column(c.column_alias or c.column_name, validate_data_type(c.column_type))
|
|
188
|
+
for c in task_sector.schema
|
|
189
|
+
]
|
|
190
|
+
return OdpsSchema(cols)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class DataFrameReadODPSQuery(
|
|
194
|
+
IncrementalIndexDatasource,
|
|
195
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
196
|
+
):
|
|
197
|
+
_op_type_ = opcodes.READ_ODPS_QUERY
|
|
198
|
+
|
|
199
|
+
query = StringField("query")
|
|
200
|
+
dtypes = SeriesField("dtypes", default=None)
|
|
201
|
+
columns = AnyField("columns", default=None)
|
|
202
|
+
nrows = Int64Field("nrows", default=None)
|
|
203
|
+
use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
|
|
204
|
+
string_as_binary = BoolField("string_as_binary", default=None)
|
|
205
|
+
index_columns = ListField("index_columns", FieldTypes.string, default=None)
|
|
206
|
+
index_dtypes = SeriesField("index_dtypes", default=None)
|
|
207
|
+
|
|
208
|
+
def get_columns(self):
|
|
209
|
+
return self.columns
|
|
210
|
+
|
|
211
|
+
def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
|
|
212
|
+
self.columns = columns
|
|
213
|
+
|
|
214
|
+
def __call__(self, chunk_bytes=None, chunk_size=None):
|
|
215
|
+
if not self.index_columns:
|
|
216
|
+
index_value = parse_index(pd.RangeIndex(0))
|
|
217
|
+
elif len(self.index_columns) == 1:
|
|
218
|
+
index_value = parse_index(
|
|
219
|
+
pd.Index([], name=self.index_columns[0]).astype(
|
|
220
|
+
self.index_dtypes.iloc[0]
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
idx = pd.MultiIndex.from_frame(
|
|
225
|
+
pd.DataFrame([], columns=self.index_columns).astype(self.index_dtypes)
|
|
226
|
+
)
|
|
227
|
+
index_value = parse_index(idx)
|
|
228
|
+
|
|
229
|
+
columns_value = parse_index(self.dtypes.index, store_data=True)
|
|
230
|
+
self.output_types = [OutputType.dataframe]
|
|
231
|
+
return self.new_tileable(
|
|
232
|
+
[],
|
|
233
|
+
None,
|
|
234
|
+
shape=(len(self.dtypes), np.nan),
|
|
235
|
+
dtypes=self.dtypes,
|
|
236
|
+
index_value=index_value,
|
|
237
|
+
columns_value=columns_value,
|
|
238
|
+
chunk_bytes=chunk_bytes,
|
|
239
|
+
chunk_size=chunk_size,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def read_odps_query(
|
|
244
|
+
query: str,
|
|
245
|
+
odps_entry: ODPS = None,
|
|
246
|
+
index_col: Union[None, str, List[str]] = None,
|
|
247
|
+
string_as_binary: bool = None,
|
|
248
|
+
**kw,
|
|
249
|
+
):
|
|
250
|
+
"""
|
|
251
|
+
Read data from a MaxCompute (ODPS) query into DataFrame.
|
|
252
|
+
|
|
253
|
+
Supports specifying some columns as indexes. If not specified, RangeIndex
|
|
254
|
+
will be generated.
|
|
255
|
+
|
|
256
|
+
Parameters
|
|
257
|
+
----------
|
|
258
|
+
query: str
|
|
259
|
+
MaxCompute SQL statement.
|
|
260
|
+
index_col: Union[None, str, List[str]]
|
|
261
|
+
Columns to be specified as indexes.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
result: DataFrame
|
|
266
|
+
DataFrame read from MaxCompute (ODPS) table
|
|
267
|
+
"""
|
|
268
|
+
odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
|
|
269
|
+
if odps_entry is None:
|
|
270
|
+
raise ValueError("Missing odps_entry parameter")
|
|
271
|
+
inst = odps_entry.execute_sql(f"EXPLAIN {query}")
|
|
272
|
+
explain_str = list(inst.get_task_results().values())[0]
|
|
273
|
+
|
|
274
|
+
odps_schema = _parse_explained_schema(explain_str)
|
|
275
|
+
dtypes = odps_schema_to_pandas_dtypes(odps_schema)
|
|
276
|
+
|
|
277
|
+
if not index_col:
|
|
278
|
+
index_dtypes = None
|
|
279
|
+
else:
|
|
280
|
+
if isinstance(index_col, str):
|
|
281
|
+
index_col = [index_col]
|
|
282
|
+
index_col_set = set(index_col)
|
|
283
|
+
data_cols = [c for c in dtypes.index if c not in index_col_set]
|
|
284
|
+
idx_dtype_vals = [dtypes[c] for c in index_col]
|
|
285
|
+
col_dtype_vals = [dtypes[c] for c in data_cols]
|
|
286
|
+
index_dtypes = pd.Series(idx_dtype_vals, index=index_col)
|
|
287
|
+
dtypes = pd.Series(col_dtype_vals, index=data_cols)
|
|
288
|
+
|
|
289
|
+
chunk_bytes = kw.pop("chunk_bytes", None)
|
|
290
|
+
chunk_size = kw.pop("chunk_size", None)
|
|
291
|
+
op = DataFrameReadODPSQuery(
|
|
292
|
+
query=query,
|
|
293
|
+
dtypes=dtypes,
|
|
294
|
+
use_arrow_dtype=kw.pop("use_arrow_dtype", True),
|
|
295
|
+
string_as_binary=string_as_binary,
|
|
296
|
+
index_columns=index_col,
|
|
297
|
+
index_dtypes=index_dtypes,
|
|
298
|
+
)
|
|
299
|
+
return op(chunk_bytes=chunk_bytes, chunk_size=chunk_size)
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import List, Optional, Union
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from odps import ODPS
|
|
21
|
+
from odps.models import Table
|
|
22
|
+
from odps.utils import to_timestamp
|
|
23
|
+
|
|
24
|
+
from ... import opcodes
|
|
25
|
+
from ...core import OutputType
|
|
26
|
+
from ...odpsio import odps_schema_to_pandas_dtypes
|
|
27
|
+
from ...serialization.serializables import (
|
|
28
|
+
AnyField,
|
|
29
|
+
BoolField,
|
|
30
|
+
FieldTypes,
|
|
31
|
+
Int64Field,
|
|
32
|
+
ListField,
|
|
33
|
+
SeriesField,
|
|
34
|
+
StringField,
|
|
35
|
+
)
|
|
36
|
+
from ..core import DataFrame # noqa: F401
|
|
37
|
+
from ..utils import parse_index
|
|
38
|
+
from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class DataFrameReadODPSTable(
|
|
44
|
+
IncrementalIndexDatasource,
|
|
45
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
46
|
+
):
|
|
47
|
+
_op_type_ = opcodes.READ_ODPS_TABLE
|
|
48
|
+
|
|
49
|
+
table_name = StringField("table_name")
|
|
50
|
+
partitions = ListField("partitions", field_type=FieldTypes.string, default=None)
|
|
51
|
+
dtypes = SeriesField("dtypes", default=None)
|
|
52
|
+
columns = AnyField("columns", default=None)
|
|
53
|
+
nrows = Int64Field("nrows", default=None)
|
|
54
|
+
use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
|
|
55
|
+
string_as_binary = BoolField("string_as_binary", default=None)
|
|
56
|
+
append_partitions = BoolField("append_partitions", default=None)
|
|
57
|
+
last_modified_time = Int64Field("last_modified_time", default=None)
|
|
58
|
+
index_columns = ListField("index_columns", FieldTypes.string, default=None)
|
|
59
|
+
index_dtypes = SeriesField("index_dtypes", default=None)
|
|
60
|
+
|
|
61
|
+
def __init__(self, memory_scale=None, **kw):
|
|
62
|
+
output_type = kw.get("output_type", OutputType.dataframe)
|
|
63
|
+
super(DataFrameReadODPSTable, self).__init__(
|
|
64
|
+
memory_scale=memory_scale, _output_types=[output_type], **kw
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def partition(self):
|
|
69
|
+
return getattr(self, "partition_spec", None)
|
|
70
|
+
|
|
71
|
+
def get_columns(self):
|
|
72
|
+
return self.columns or list(self.dtypes.index)
|
|
73
|
+
|
|
74
|
+
def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
|
|
75
|
+
self.columns = columns
|
|
76
|
+
|
|
77
|
+
def __call__(self, shape, chunk_bytes=None, chunk_size=None):
|
|
78
|
+
if not self.index_columns:
|
|
79
|
+
if np.isnan(shape[0]):
|
|
80
|
+
index_value = parse_index(pd.RangeIndex(0))
|
|
81
|
+
else:
|
|
82
|
+
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
83
|
+
elif len(self.index_columns) == 1:
|
|
84
|
+
index_value = parse_index(
|
|
85
|
+
pd.Index([], name=self.index_columns[0]).astype(
|
|
86
|
+
self.index_dtypes.iloc[0]
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
idx = pd.MultiIndex.from_frame(
|
|
91
|
+
pd.DataFrame([], columns=self.index_columns).astype(self.index_dtypes)
|
|
92
|
+
)
|
|
93
|
+
index_value = parse_index(idx)
|
|
94
|
+
|
|
95
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
96
|
+
columns_value = parse_index(self.dtypes.index, store_data=True)
|
|
97
|
+
return self.new_tileable(
|
|
98
|
+
[],
|
|
99
|
+
None,
|
|
100
|
+
shape=shape,
|
|
101
|
+
dtypes=self.dtypes,
|
|
102
|
+
index_value=index_value,
|
|
103
|
+
columns_value=columns_value,
|
|
104
|
+
chunk_bytes=chunk_bytes,
|
|
105
|
+
chunk_size=chunk_size,
|
|
106
|
+
)
|
|
107
|
+
elif self.output_types[0] == OutputType.series:
|
|
108
|
+
return self.new_tileable(
|
|
109
|
+
[],
|
|
110
|
+
None,
|
|
111
|
+
shape=shape[:1],
|
|
112
|
+
name=self.dtypes.index[0],
|
|
113
|
+
dtype=self.dtypes.iloc[0],
|
|
114
|
+
index_value=index_value,
|
|
115
|
+
chunk_bytes=chunk_bytes,
|
|
116
|
+
chunk_size=chunk_size,
|
|
117
|
+
)
|
|
118
|
+
elif self.output_types[0] == OutputType.index:
|
|
119
|
+
return self.new_tileable(
|
|
120
|
+
[],
|
|
121
|
+
None,
|
|
122
|
+
shape=shape,
|
|
123
|
+
name=getattr(index_value, "name", None),
|
|
124
|
+
names=getattr(index_value, "names", None),
|
|
125
|
+
index_value=index_value,
|
|
126
|
+
chunk_bytes=chunk_bytes,
|
|
127
|
+
chunk_size=chunk_size,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def read_odps_table(
|
|
132
|
+
table_name: Union[str, Table],
|
|
133
|
+
partitions: Union[None, str, List[str]] = None,
|
|
134
|
+
columns: Optional[List[str]] = None,
|
|
135
|
+
index_col: Union[None, str, List[str]] = None,
|
|
136
|
+
odps_entry: ODPS = None,
|
|
137
|
+
string_as_binary: bool = None,
|
|
138
|
+
append_partitions: bool = False,
|
|
139
|
+
**kw,
|
|
140
|
+
):
|
|
141
|
+
"""
|
|
142
|
+
Read data from a MaxCompute (ODPS) table into DataFrame.
|
|
143
|
+
|
|
144
|
+
Supports specifying some columns as indexes. If not specified, RangeIndex
|
|
145
|
+
will be generated.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
table_name: Union[str, Table]
|
|
150
|
+
Name of the table to read from.
|
|
151
|
+
partitions: Union[None, str, List[str]]
|
|
152
|
+
Table partition or list of partitions to read from.
|
|
153
|
+
columns: Optional[List[str]]
|
|
154
|
+
Table columns to read from. You may also specify partition columns here.
|
|
155
|
+
If not specified, all table columns (or include partition columns if
|
|
156
|
+
`append_partitions` is True) will be included.
|
|
157
|
+
index_col: Union[None, str, List[str]]
|
|
158
|
+
Columns to be specified as indexes.
|
|
159
|
+
append_partitions: bool
|
|
160
|
+
If True, will add all partition columns as selected columns when
|
|
161
|
+
`columns` is not specified,
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
result: DataFrame
|
|
166
|
+
DataFrame read from MaxCompute (ODPS) table
|
|
167
|
+
"""
|
|
168
|
+
odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
|
|
169
|
+
if odps_entry is None:
|
|
170
|
+
raise ValueError("Missing odps_entry parameter")
|
|
171
|
+
if isinstance(table_name, Table):
|
|
172
|
+
table = table_name
|
|
173
|
+
else:
|
|
174
|
+
table = odps_entry.get_table(table_name)
|
|
175
|
+
|
|
176
|
+
if not table.table_schema.partitions and (
|
|
177
|
+
partitions is not None or append_partitions
|
|
178
|
+
):
|
|
179
|
+
raise ValueError("Cannot handle partitions on non-partition tables")
|
|
180
|
+
|
|
181
|
+
cols = (
|
|
182
|
+
table.table_schema.columns
|
|
183
|
+
if append_partitions or columns is not None
|
|
184
|
+
else table.table_schema.simple_columns
|
|
185
|
+
)
|
|
186
|
+
table_columns = [c.name.lower() for c in cols]
|
|
187
|
+
table_dtypes = odps_schema_to_pandas_dtypes(
|
|
188
|
+
table.table_schema, with_partitions=True
|
|
189
|
+
)
|
|
190
|
+
df_types = [table_dtypes[c] for c in table_columns]
|
|
191
|
+
|
|
192
|
+
if isinstance(index_col, str):
|
|
193
|
+
index_col = [index_col]
|
|
194
|
+
if index_col and columns is None:
|
|
195
|
+
index_col_set = set([c.lower() for c in index_col])
|
|
196
|
+
col_diff = sorted(index_col_set - set(table_columns))
|
|
197
|
+
if col_diff:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
f"Cannot found column {', '.join(col_diff)} specified "
|
|
200
|
+
f"in index_columns argument in source table"
|
|
201
|
+
)
|
|
202
|
+
columns = [c for c in table_columns if c not in index_col_set]
|
|
203
|
+
|
|
204
|
+
if not index_col:
|
|
205
|
+
index_dtypes = None
|
|
206
|
+
else:
|
|
207
|
+
table_index_types = [df_types[table_columns.index(col)] for col in index_col]
|
|
208
|
+
index_dtypes = pd.Series(table_index_types, index=index_col)
|
|
209
|
+
|
|
210
|
+
if columns is not None:
|
|
211
|
+
table_col_set = set([c.lower() for c in columns])
|
|
212
|
+
col_diff = sorted(table_col_set - set(table_columns))
|
|
213
|
+
if col_diff:
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"Cannot found column {', '.join(col_diff)} specified "
|
|
216
|
+
f"in columns argument in source table"
|
|
217
|
+
)
|
|
218
|
+
if any(col in table_col_set for col in index_col or ()):
|
|
219
|
+
raise ValueError("Index columns and columns shall not overlap.")
|
|
220
|
+
|
|
221
|
+
# reorder columns
|
|
222
|
+
new_columns = [c for c in table_columns if c in table_col_set]
|
|
223
|
+
df_types = [df_types[table_columns.index(col)] for col in new_columns]
|
|
224
|
+
table_columns = new_columns
|
|
225
|
+
columns = new_columns
|
|
226
|
+
|
|
227
|
+
shape = kw.pop("shape", None) or (np.nan, len(df_types))
|
|
228
|
+
dtypes = pd.Series(df_types, index=table_columns)
|
|
229
|
+
chunk_bytes = kw.pop("chunk_bytes", None)
|
|
230
|
+
chunk_size = kw.pop("chunk_size", None)
|
|
231
|
+
use_arrow_dtype = kw.pop("use_arrow_dtype", True)
|
|
232
|
+
|
|
233
|
+
partitions = partitions or kw.get("partition")
|
|
234
|
+
if isinstance(partitions, str):
|
|
235
|
+
partitions = [partitions]
|
|
236
|
+
|
|
237
|
+
append_partitions = append_partitions or any(
|
|
238
|
+
pt.name in (columns or ()) for pt in (table.table_schema.partitions or ())
|
|
239
|
+
)
|
|
240
|
+
op = DataFrameReadODPSTable(
|
|
241
|
+
table_name=table.full_table_name,
|
|
242
|
+
partitions=partitions,
|
|
243
|
+
dtypes=dtypes,
|
|
244
|
+
columns=columns,
|
|
245
|
+
use_arrow_dtype=use_arrow_dtype,
|
|
246
|
+
string_as_binary=string_as_binary,
|
|
247
|
+
append_partitions=append_partitions,
|
|
248
|
+
last_modified_time=to_timestamp(table.last_data_modified_time),
|
|
249
|
+
index_columns=index_col,
|
|
250
|
+
index_dtypes=index_dtypes,
|
|
251
|
+
**kw,
|
|
252
|
+
)
|
|
253
|
+
return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)
|