PyPI - maxframe - Versions diffs - 1.3.1__cp39-cp39-macosx_10_9_universal2.whl → 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl - Mend

maxframe 1.3.1__cp39-cp39-macosx_10_9_universal2.whl → 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (640) hide show

maxframe/_utils.cpython-39-darwin.so +0 -0
maxframe/_utils.pyi +21 -0
maxframe/_utils.pyx +4 -3
maxframe/codegen/__init__.py +27 -0
maxframe/{codegen.py → codegen/core.py} +49 -43
maxframe/codegen/spe/__init__.py +16 -0
maxframe/codegen/spe/core.py +307 -0
maxframe/codegen/spe/dataframe/__init__.py +37 -0
maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
maxframe/codegen/spe/dataframe/datasource.py +181 -0
maxframe/codegen/spe/dataframe/datastore.py +204 -0
maxframe/codegen/spe/dataframe/extensions.py +63 -0
maxframe/codegen/spe/dataframe/fetch.py +26 -0
maxframe/codegen/spe/dataframe/groupby.py +224 -0
maxframe/codegen/spe/dataframe/indexing.py +238 -0
maxframe/codegen/spe/dataframe/merge.py +73 -0
maxframe/codegen/spe/dataframe/misc.py +286 -0
maxframe/codegen/spe/dataframe/missing.py +64 -0
maxframe/codegen/spe/dataframe/reduction.py +160 -0
maxframe/codegen/spe/dataframe/sort.py +83 -0
maxframe/codegen/spe/dataframe/statistics.py +46 -0
maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
maxframe/codegen/spe/dataframe/tseries.py +46 -0
maxframe/codegen/spe/dataframe/udf.py +62 -0
maxframe/codegen/spe/dataframe/value_counts.py +31 -0
maxframe/codegen/spe/dataframe/window.py +65 -0
maxframe/codegen/spe/learn/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
maxframe/codegen/spe/learn/contrib/models.py +41 -0
maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
maxframe/codegen/spe/learn/utils/__init__.py +15 -0
maxframe/codegen/spe/learn/utils/checks.py +55 -0
maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
maxframe/codegen/spe/learn/utils/validation.py +35 -0
maxframe/codegen/spe/objects.py +26 -0
maxframe/codegen/spe/remote.py +29 -0
maxframe/codegen/spe/tensor/__init__.py +28 -0
maxframe/codegen/spe/tensor/arithmetic.py +95 -0
maxframe/codegen/spe/tensor/core.py +41 -0
maxframe/codegen/spe/tensor/datasource.py +165 -0
maxframe/codegen/spe/tensor/extensions.py +35 -0
maxframe/codegen/spe/tensor/fetch.py +26 -0
maxframe/codegen/spe/tensor/indexing.py +63 -0
maxframe/codegen/spe/tensor/linalg.py +63 -0
maxframe/codegen/spe/tensor/merge.py +31 -0
maxframe/codegen/spe/tensor/misc.py +121 -0
maxframe/codegen/spe/tensor/random.py +29 -0
maxframe/codegen/spe/tensor/reduction.py +39 -0
maxframe/codegen/spe/tensor/reshape.py +26 -0
maxframe/codegen/spe/tensor/sort.py +42 -0
maxframe/codegen/spe/tensor/special.py +35 -0
maxframe/codegen/spe/tensor/statistics.py +24 -0
maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
maxframe/codegen/spe/tests/__init__.py +13 -0
maxframe/codegen/spe/tests/test_remote.py +29 -0
maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
maxframe/codegen/spe/utils.py +54 -0
maxframe/codegen/tests/__init__.py +13 -0
maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
maxframe/config/__init__.py +1 -1
maxframe/config/config.py +50 -23
maxframe/config/tests/test_config.py +4 -12
maxframe/config/validators.py +5 -0
maxframe/conftest.py +38 -10
maxframe/core/__init__.py +1 -0
maxframe/core/context.py +110 -0
maxframe/core/entity/__init__.py +1 -0
maxframe/core/entity/core.py +0 -7
maxframe/core/entity/objects.py +19 -5
maxframe/core/entity/output_types.py +11 -0
maxframe/core/entity/tests/test_objects.py +11 -12
maxframe/core/entity/tileables.py +3 -1
maxframe/core/entity/utils.py +15 -0
maxframe/core/graph/__init__.py +6 -1
maxframe/core/graph/builder/base.py +5 -1
maxframe/core/graph/core.cpython-39-darwin.so +0 -0
maxframe/core/graph/core.pyx +17 -6
maxframe/core/graph/entity.py +18 -6
maxframe/core/operator/__init__.py +8 -3
maxframe/core/operator/base.py +35 -12
maxframe/core/operator/core.py +37 -14
maxframe/core/operator/fetch.py +5 -18
maxframe/core/operator/objects.py +0 -20
maxframe/core/operator/shuffle.py +6 -72
maxframe/dataframe/__init__.py +1 -0
maxframe/dataframe/accessors/datetime_/core.py +7 -4
maxframe/dataframe/accessors/string_/core.py +9 -6
maxframe/dataframe/arithmetic/core.py +31 -20
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/dataframe/core.py +98 -91
maxframe/dataframe/datasource/core.py +8 -1
maxframe/dataframe/datasource/date_range.py +8 -0
maxframe/dataframe/datasource/from_index.py +9 -5
maxframe/dataframe/datasource/from_records.py +9 -2
maxframe/dataframe/datasource/from_tensor.py +32 -21
maxframe/dataframe/datasource/read_csv.py +8 -2
maxframe/dataframe/datasource/read_odps_query.py +109 -19
maxframe/dataframe/datasource/read_odps_table.py +20 -5
maxframe/dataframe/datasource/read_parquet.py +8 -3
maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
maxframe/dataframe/datastore/to_csv.py +7 -3
maxframe/dataframe/datastore/to_odps.py +42 -6
maxframe/dataframe/extensions/__init__.py +6 -1
maxframe/dataframe/extensions/apply_chunk.py +96 -136
maxframe/dataframe/extensions/flatjson.py +3 -2
maxframe/dataframe/extensions/flatmap.py +15 -7
maxframe/dataframe/fetch/core.py +12 -1
maxframe/dataframe/groupby/__init__.py +7 -0
maxframe/dataframe/groupby/aggregation.py +9 -8
maxframe/dataframe/groupby/apply.py +50 -74
maxframe/dataframe/groupby/apply_chunk.py +393 -0
maxframe/dataframe/groupby/core.py +80 -17
maxframe/dataframe/groupby/extensions.py +26 -0
maxframe/dataframe/groupby/fill.py +9 -4
maxframe/dataframe/groupby/sample.py +7 -7
maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
maxframe/dataframe/groupby/transform.py +57 -54
maxframe/dataframe/indexing/align.py +7 -6
maxframe/dataframe/indexing/getitem.py +9 -8
maxframe/dataframe/indexing/iloc.py +28 -23
maxframe/dataframe/indexing/insert.py +7 -3
maxframe/dataframe/indexing/loc.py +9 -8
maxframe/dataframe/indexing/reindex.py +36 -30
maxframe/dataframe/indexing/rename_axis.py +18 -10
maxframe/dataframe/indexing/reset_index.py +0 -2
maxframe/dataframe/indexing/sample.py +13 -9
maxframe/dataframe/indexing/set_axis.py +9 -6
maxframe/dataframe/indexing/setitem.py +8 -5
maxframe/dataframe/indexing/where.py +12 -9
maxframe/dataframe/merge/__init__.py +0 -1
maxframe/dataframe/merge/concat.py +10 -31
maxframe/dataframe/merge/merge.py +2 -24
maxframe/dataframe/misc/__init__.py +6 -0
maxframe/dataframe/misc/_duplicate.py +7 -3
maxframe/dataframe/misc/apply.py +106 -139
maxframe/dataframe/misc/astype.py +3 -2
maxframe/dataframe/misc/case_when.py +11 -7
maxframe/dataframe/misc/cut.py +11 -10
maxframe/dataframe/misc/describe.py +7 -3
maxframe/dataframe/misc/drop.py +13 -11
maxframe/dataframe/misc/eval.py +0 -2
maxframe/dataframe/misc/get_dummies.py +78 -49
maxframe/dataframe/misc/isin.py +13 -10
maxframe/dataframe/misc/map.py +21 -6
maxframe/dataframe/misc/melt.py +8 -1
maxframe/dataframe/misc/pivot.py +232 -0
maxframe/dataframe/misc/pivot_table.py +52 -40
maxframe/dataframe/misc/rechunk.py +59 -0
maxframe/dataframe/misc/shift.py +7 -4
maxframe/dataframe/misc/stack.py +5 -3
maxframe/dataframe/misc/tests/test_misc.py +167 -1
maxframe/dataframe/misc/transform.py +63 -65
maxframe/dataframe/misc/value_counts.py +7 -4
maxframe/dataframe/missing/dropna.py +16 -7
maxframe/dataframe/missing/fillna.py +18 -10
maxframe/dataframe/missing/replace.py +10 -6
maxframe/dataframe/missing/tests/test_missing.py +2 -2
maxframe/dataframe/operators.py +1 -27
maxframe/dataframe/reduction/aggregation.py +65 -3
maxframe/dataframe/reduction/core.py +3 -1
maxframe/dataframe/reduction/median.py +1 -1
maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
maxframe/dataframe/reduction/unique.py +53 -7
maxframe/dataframe/statistics/corr.py +9 -6
maxframe/dataframe/statistics/quantile.py +9 -6
maxframe/dataframe/tseries/to_datetime.py +6 -4
maxframe/dataframe/utils.py +219 -31
maxframe/dataframe/window/rolling.py +7 -4
maxframe/env.py +1 -0
maxframe/errors.py +9 -0
maxframe/extension.py +13 -2
maxframe/io/objects/core.py +67 -51
maxframe/io/objects/tensor.py +73 -17
maxframe/io/objects/tests/test_object_io.py +10 -55
maxframe/io/odpsio/arrow.py +15 -2
maxframe/io/odpsio/schema.py +43 -13
maxframe/io/odpsio/tableio.py +63 -11
maxframe/io/odpsio/tests/test_arrow.py +1 -2
maxframe/io/odpsio/tests/test_schema.py +114 -1
maxframe/io/odpsio/tests/test_tableio.py +42 -0
maxframe/io/odpsio/tests/test_volumeio.py +21 -58
maxframe/io/odpsio/volumeio.py +23 -8
maxframe/learn/__init__.py +2 -2
maxframe/learn/contrib/__init__.py +2 -2
maxframe/learn/contrib/graph/connected_components.py +2 -1
maxframe/learn/contrib/lightgbm/__init__.py +33 -0
maxframe/learn/contrib/lightgbm/_predict.py +138 -0
maxframe/learn/contrib/lightgbm/_train.py +163 -0
maxframe/learn/contrib/lightgbm/callback.py +114 -0
maxframe/learn/contrib/lightgbm/classifier.py +199 -0
maxframe/learn/contrib/lightgbm/core.py +372 -0
maxframe/learn/contrib/lightgbm/dataset.py +153 -0
maxframe/learn/contrib/lightgbm/regressor.py +29 -0
maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
maxframe/learn/contrib/models.py +38 -9
maxframe/learn/contrib/utils.py +55 -0
maxframe/learn/contrib/xgboost/callback.py +86 -0
maxframe/learn/contrib/xgboost/classifier.py +26 -30
maxframe/learn/contrib/xgboost/core.py +54 -42
maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
maxframe/learn/contrib/xgboost/predict.py +16 -9
maxframe/learn/contrib/xgboost/regressor.py +28 -27
maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
maxframe/learn/contrib/xgboost/train.py +59 -16
maxframe/learn/core.py +252 -0
maxframe/learn/datasets/__init__.py +20 -0
maxframe/learn/datasets/samples_generator.py +628 -0
maxframe/learn/linear_model/__init__.py +15 -0
maxframe/learn/linear_model/_base.py +163 -0
maxframe/learn/linear_model/_lin_reg.py +175 -0
maxframe/learn/metrics/__init__.py +25 -0
maxframe/learn/metrics/_check_targets.py +95 -0
maxframe/learn/metrics/_classification.py +1121 -0
maxframe/learn/metrics/_regression.py +256 -0
maxframe/learn/model_selection/__init__.py +15 -0
maxframe/learn/model_selection/_split.py +451 -0
maxframe/learn/model_selection/tests/__init__.py +13 -0
maxframe/learn/model_selection/tests/test_split.py +156 -0
maxframe/learn/preprocessing/__init__.py +16 -0
maxframe/learn/preprocessing/_data/__init__.py +17 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
maxframe/learn/preprocessing/_data/normalize.py +127 -0
maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
maxframe/learn/preprocessing/_data/utils.py +79 -0
maxframe/learn/preprocessing/_label/__init__.py +16 -0
maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
maxframe/learn/utils/__init__.py +4 -0
maxframe/learn/utils/_encode.py +314 -0
maxframe/learn/utils/checks.py +161 -0
maxframe/learn/utils/core.py +33 -0
maxframe/learn/utils/extmath.py +176 -0
maxframe/learn/utils/multiclass.py +292 -0
maxframe/learn/utils/shuffle.py +114 -0
maxframe/learn/utils/sparsefuncs.py +87 -0
maxframe/learn/utils/validation.py +775 -0
maxframe/lib/__init__.py +0 -2
maxframe/lib/compat.py +145 -0
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
maxframe/lib/sparse/__init__.py +10 -15
maxframe/lib/sparse/array.py +45 -33
maxframe/lib/sparse/core.py +0 -2
maxframe/lib/sparse/linalg.py +31 -0
maxframe/lib/sparse/matrix.py +5 -2
maxframe/lib/sparse/tests/__init__.py +0 -2
maxframe/lib/sparse/tests/test_sparse.py +53 -53
maxframe/lib/sparse/vector.py +0 -2
maxframe/mixin.py +59 -2
maxframe/opcodes.py +13 -5
maxframe/protocol.py +67 -14
maxframe/remote/core.py +16 -14
maxframe/remote/run_script.py +6 -3
maxframe/serialization/__init__.py +2 -0
maxframe/serialization/core.cpython-39-darwin.so +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +3 -1
maxframe/serialization/core.pyx +82 -4
maxframe/serialization/pandas.py +5 -1
maxframe/serialization/serializables/core.py +6 -5
maxframe/serialization/serializables/field.py +2 -2
maxframe/serialization/serializables/tests/test_field_type.py +3 -5
maxframe/serialization/tests/test_serial.py +27 -0
maxframe/session.py +4 -71
maxframe/sperunner.py +165 -0
maxframe/tensor/__init__.py +35 -2
maxframe/tensor/arithmetic/__init__.py +2 -4
maxframe/tensor/arithmetic/abs.py +0 -2
maxframe/tensor/arithmetic/absolute.py +0 -2
maxframe/tensor/arithmetic/add.py +34 -4
maxframe/tensor/arithmetic/angle.py +0 -2
maxframe/tensor/arithmetic/arccos.py +1 -4
maxframe/tensor/arithmetic/arccosh.py +1 -3
maxframe/tensor/arithmetic/arcsin.py +0 -2
maxframe/tensor/arithmetic/arcsinh.py +0 -2
maxframe/tensor/arithmetic/arctan.py +0 -2
maxframe/tensor/arithmetic/arctan2.py +0 -2
maxframe/tensor/arithmetic/arctanh.py +0 -2
maxframe/tensor/arithmetic/around.py +0 -2
maxframe/tensor/arithmetic/bitand.py +0 -2
maxframe/tensor/arithmetic/bitor.py +1 -3
maxframe/tensor/arithmetic/bitxor.py +1 -3
maxframe/tensor/arithmetic/cbrt.py +0 -2
maxframe/tensor/arithmetic/ceil.py +0 -2
maxframe/tensor/arithmetic/clip.py +13 -13
maxframe/tensor/arithmetic/conj.py +0 -2
maxframe/tensor/arithmetic/copysign.py +0 -2
maxframe/tensor/arithmetic/core.py +47 -39
maxframe/tensor/arithmetic/cos.py +1 -3
maxframe/tensor/arithmetic/cosh.py +0 -2
maxframe/tensor/arithmetic/deg2rad.py +0 -2
maxframe/tensor/arithmetic/degrees.py +0 -2
maxframe/tensor/arithmetic/divide.py +0 -2
maxframe/tensor/arithmetic/equal.py +0 -2
maxframe/tensor/arithmetic/exp.py +1 -3
maxframe/tensor/arithmetic/exp2.py +0 -2
maxframe/tensor/arithmetic/expm1.py +0 -2
maxframe/tensor/arithmetic/fabs.py +0 -2
maxframe/tensor/arithmetic/fix.py +0 -2
maxframe/tensor/arithmetic/float_power.py +0 -2
maxframe/tensor/arithmetic/floor.py +0 -2
maxframe/tensor/arithmetic/floordiv.py +0 -2
maxframe/tensor/arithmetic/fmax.py +0 -2
maxframe/tensor/arithmetic/fmin.py +0 -2
maxframe/tensor/arithmetic/fmod.py +0 -2
maxframe/tensor/arithmetic/frexp.py +6 -2
maxframe/tensor/arithmetic/greater.py +0 -2
maxframe/tensor/arithmetic/greater_equal.py +0 -2
maxframe/tensor/arithmetic/hypot.py +0 -2
maxframe/tensor/arithmetic/i0.py +1 -3
maxframe/tensor/arithmetic/imag.py +0 -2
maxframe/tensor/arithmetic/invert.py +1 -3
maxframe/tensor/arithmetic/isclose.py +0 -2
maxframe/tensor/arithmetic/iscomplex.py +0 -2
maxframe/tensor/arithmetic/isfinite.py +1 -3
maxframe/tensor/arithmetic/isinf.py +0 -2
maxframe/tensor/arithmetic/isnan.py +0 -2
maxframe/tensor/arithmetic/isreal.py +0 -2
maxframe/tensor/arithmetic/ldexp.py +0 -2
maxframe/tensor/arithmetic/less.py +0 -2
maxframe/tensor/arithmetic/less_equal.py +0 -2
maxframe/tensor/arithmetic/log.py +1 -3
maxframe/tensor/arithmetic/log10.py +1 -3
maxframe/tensor/arithmetic/log1p.py +1 -3
maxframe/tensor/arithmetic/log2.py +1 -3
maxframe/tensor/arithmetic/logaddexp.py +0 -2
maxframe/tensor/arithmetic/logaddexp2.py +0 -2
maxframe/tensor/arithmetic/logical_and.py +0 -2
maxframe/tensor/arithmetic/logical_not.py +1 -3
maxframe/tensor/arithmetic/logical_or.py +0 -2
maxframe/tensor/arithmetic/logical_xor.py +0 -2
maxframe/tensor/arithmetic/lshift.py +0 -2
maxframe/tensor/arithmetic/maximum.py +0 -2
maxframe/tensor/arithmetic/minimum.py +0 -2
maxframe/tensor/arithmetic/mod.py +0 -2
maxframe/tensor/arithmetic/modf.py +6 -2
maxframe/tensor/arithmetic/multiply.py +37 -4
maxframe/tensor/arithmetic/nan_to_num.py +0 -2
maxframe/tensor/arithmetic/negative.py +0 -2
maxframe/tensor/arithmetic/nextafter.py +0 -2
maxframe/tensor/arithmetic/not_equal.py +0 -2
maxframe/tensor/arithmetic/positive.py +0 -2
maxframe/tensor/arithmetic/power.py +0 -2
maxframe/tensor/arithmetic/rad2deg.py +0 -2
maxframe/tensor/arithmetic/radians.py +0 -2
maxframe/tensor/arithmetic/real.py +0 -2
maxframe/tensor/arithmetic/reciprocal.py +5 -3
maxframe/tensor/arithmetic/rint.py +1 -3
maxframe/tensor/arithmetic/rshift.py +0 -2
maxframe/tensor/arithmetic/setimag.py +0 -2
maxframe/tensor/arithmetic/setreal.py +0 -2
maxframe/tensor/arithmetic/sign.py +0 -2
maxframe/tensor/arithmetic/signbit.py +0 -2
maxframe/tensor/arithmetic/sin.py +0 -2
maxframe/tensor/arithmetic/sinc.py +1 -3
maxframe/tensor/arithmetic/sinh.py +0 -2
maxframe/tensor/arithmetic/spacing.py +0 -2
maxframe/tensor/arithmetic/sqrt.py +0 -2
maxframe/tensor/arithmetic/square.py +0 -2
maxframe/tensor/arithmetic/subtract.py +4 -2
maxframe/tensor/arithmetic/tan.py +0 -2
maxframe/tensor/arithmetic/tanh.py +0 -2
maxframe/tensor/arithmetic/tests/__init__.py +0 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
maxframe/tensor/arithmetic/truediv.py +0 -2
maxframe/tensor/arithmetic/trunc.py +0 -2
maxframe/tensor/arithmetic/utils.py +32 -6
maxframe/tensor/array_utils.py +3 -25
maxframe/tensor/core.py +6 -6
maxframe/tensor/datasource/__init__.py +10 -2
maxframe/tensor/datasource/arange.py +0 -2
maxframe/tensor/datasource/array.py +3 -22
maxframe/tensor/datasource/core.py +15 -10
maxframe/tensor/datasource/diag.py +140 -0
maxframe/tensor/datasource/diagflat.py +69 -0
maxframe/tensor/datasource/empty.py +0 -2
maxframe/tensor/datasource/eye.py +95 -0
maxframe/tensor/datasource/from_dataframe.py +0 -2
maxframe/tensor/datasource/from_dense.py +0 -17
maxframe/tensor/datasource/from_sparse.py +0 -2
maxframe/tensor/datasource/full.py +0 -2
maxframe/tensor/datasource/identity.py +54 -0
maxframe/tensor/datasource/indices.py +115 -0
maxframe/tensor/datasource/linspace.py +140 -0
maxframe/tensor/datasource/meshgrid.py +135 -0
maxframe/tensor/datasource/ones.py +8 -3
maxframe/tensor/datasource/tests/test_datasource.py +32 -1
maxframe/tensor/datasource/tri_array.py +107 -0
maxframe/tensor/datasource/zeros.py +7 -3
maxframe/tensor/extensions/__init__.py +31 -0
maxframe/tensor/extensions/accessor.py +25 -0
maxframe/tensor/extensions/apply_chunk.py +137 -0
maxframe/tensor/indexing/__init__.py +1 -1
maxframe/tensor/indexing/choose.py +8 -6
maxframe/tensor/indexing/compress.py +0 -2
maxframe/tensor/indexing/extract.py +0 -2
maxframe/tensor/indexing/fill_diagonal.py +9 -6
maxframe/tensor/indexing/flatnonzero.py +1 -3
maxframe/tensor/indexing/getitem.py +10 -43
maxframe/tensor/indexing/nonzero.py +2 -4
maxframe/tensor/indexing/setitem.py +19 -9
maxframe/tensor/indexing/slice.py +6 -3
maxframe/tensor/indexing/take.py +0 -2
maxframe/tensor/indexing/tests/__init__.py +0 -2
maxframe/tensor/indexing/tests/test_indexing.py +0 -2
maxframe/tensor/indexing/unravel_index.py +6 -6
maxframe/tensor/lib/__init__.py +16 -0
maxframe/tensor/lib/index_tricks.py +404 -0
maxframe/tensor/linalg/__init__.py +36 -0
maxframe/tensor/linalg/dot.py +145 -0
maxframe/tensor/linalg/inner.py +36 -0
maxframe/tensor/linalg/inv.py +83 -0
maxframe/tensor/linalg/lu.py +115 -0
maxframe/tensor/linalg/matmul.py +225 -0
maxframe/tensor/linalg/qr.py +124 -0
maxframe/tensor/linalg/solve_triangular.py +103 -0
maxframe/tensor/linalg/svd.py +167 -0
maxframe/tensor/linalg/tensordot.py +213 -0
maxframe/tensor/linalg/vdot.py +73 -0
maxframe/tensor/merge/__init__.py +4 -0
maxframe/tensor/merge/append.py +74 -0
maxframe/tensor/merge/column_stack.py +63 -0
maxframe/tensor/merge/concatenate.py +3 -2
maxframe/tensor/merge/dstack.py +71 -0
maxframe/tensor/merge/hstack.py +70 -0
maxframe/tensor/merge/stack.py +0 -2
maxframe/tensor/merge/tests/test_merge.py +0 -2
maxframe/tensor/misc/__init__.py +18 -5
maxframe/tensor/misc/astype.py +10 -8
maxframe/tensor/misc/broadcast_to.py +1 -1
maxframe/tensor/misc/copy.py +64 -0
maxframe/tensor/misc/diff.py +115 -0
maxframe/tensor/misc/flatten.py +63 -0
maxframe/tensor/misc/in1d.py +94 -0
maxframe/tensor/misc/isin.py +130 -0
maxframe/tensor/misc/ndim.py +53 -0
maxframe/tensor/misc/ravel.py +0 -2
maxframe/tensor/misc/repeat.py +129 -0
maxframe/tensor/misc/searchsorted.py +147 -0
maxframe/tensor/misc/setdiff1d.py +58 -0
maxframe/tensor/misc/squeeze.py +117 -0
maxframe/tensor/misc/swapaxes.py +113 -0
maxframe/tensor/misc/tests/test_misc.py +0 -2
maxframe/tensor/misc/transpose.py +8 -4
maxframe/tensor/misc/trapezoid.py +123 -0
maxframe/tensor/misc/unique.py +0 -1
maxframe/tensor/misc/where.py +10 -8
maxframe/tensor/operators.py +0 -34
maxframe/tensor/random/__init__.py +3 -5
maxframe/tensor/random/binomial.py +0 -2
maxframe/tensor/random/bytes.py +0 -2
maxframe/tensor/random/chisquare.py +0 -2
maxframe/tensor/random/choice.py +9 -8
maxframe/tensor/random/core.py +20 -5
maxframe/tensor/random/dirichlet.py +0 -2
maxframe/tensor/random/exponential.py +0 -2
maxframe/tensor/random/f.py +2 -4
maxframe/tensor/random/gamma.py +0 -2
maxframe/tensor/random/geometric.py +0 -2
maxframe/tensor/random/gumbel.py +0 -2
maxframe/tensor/random/hypergeometric.py +0 -2
maxframe/tensor/random/laplace.py +2 -4
maxframe/tensor/random/logistic.py +0 -2
maxframe/tensor/random/lognormal.py +0 -2
maxframe/tensor/random/logseries.py +0 -2
maxframe/tensor/random/multinomial.py +0 -2
maxframe/tensor/random/multivariate_normal.py +0 -2
maxframe/tensor/random/negative_binomial.py +0 -2
maxframe/tensor/random/noncentral_chisquare.py +0 -2
maxframe/tensor/random/noncentral_f.py +1 -3
maxframe/tensor/random/normal.py +0 -2
maxframe/tensor/random/pareto.py +0 -2
maxframe/tensor/random/permutation.py +6 -3
maxframe/tensor/random/poisson.py +0 -2
maxframe/tensor/random/power.py +0 -2
maxframe/tensor/random/rand.py +0 -2
maxframe/tensor/random/randint.py +0 -2
maxframe/tensor/random/randn.py +0 -2
maxframe/tensor/random/random_integers.py +0 -2
maxframe/tensor/random/random_sample.py +0 -2
maxframe/tensor/random/rayleigh.py +0 -2
maxframe/tensor/random/standard_cauchy.py +0 -2
maxframe/tensor/random/standard_exponential.py +0 -2
maxframe/tensor/random/standard_gamma.py +0 -2
maxframe/tensor/random/standard_normal.py +0 -2
maxframe/tensor/random/standard_t.py +0 -2
maxframe/tensor/random/tests/__init__.py +0 -2
maxframe/tensor/random/tests/test_random.py +0 -2
maxframe/tensor/random/triangular.py +0 -2
maxframe/tensor/random/uniform.py +0 -2
maxframe/tensor/random/vonmises.py +0 -2
maxframe/tensor/random/wald.py +0 -2
maxframe/tensor/random/weibull.py +0 -2
maxframe/tensor/random/zipf.py +0 -2
maxframe/tensor/reduction/__init__.py +0 -2
maxframe/tensor/reduction/all.py +0 -2
maxframe/tensor/reduction/allclose.py +0 -2
maxframe/tensor/reduction/any.py +0 -2
maxframe/tensor/reduction/argmax.py +1 -3
maxframe/tensor/reduction/argmin.py +1 -3
maxframe/tensor/reduction/array_equal.py +0 -2
maxframe/tensor/reduction/core.py +0 -2
maxframe/tensor/reduction/count_nonzero.py +0 -2
maxframe/tensor/reduction/cumprod.py +0 -2
maxframe/tensor/reduction/cumsum.py +0 -2
maxframe/tensor/reduction/max.py +0 -2
maxframe/tensor/reduction/mean.py +0 -2
maxframe/tensor/reduction/min.py +0 -2
maxframe/tensor/reduction/nanargmax.py +0 -2
maxframe/tensor/reduction/nanargmin.py +0 -2
maxframe/tensor/reduction/nancumprod.py +0 -2
maxframe/tensor/reduction/nancumsum.py +0 -2
maxframe/tensor/reduction/nanmax.py +0 -2
maxframe/tensor/reduction/nanmean.py +0 -2
maxframe/tensor/reduction/nanmin.py +0 -2
maxframe/tensor/reduction/nanprod.py +0 -2
maxframe/tensor/reduction/nanstd.py +0 -2
maxframe/tensor/reduction/nansum.py +0 -2
maxframe/tensor/reduction/nanvar.py +0 -2
maxframe/tensor/reduction/prod.py +0 -2
maxframe/tensor/reduction/std.py +0 -2
maxframe/tensor/reduction/sum.py +0 -2
maxframe/tensor/reduction/tests/test_reduction.py +1 -4
maxframe/tensor/reduction/var.py +0 -2
maxframe/tensor/reshape/__init__.py +0 -2
maxframe/tensor/reshape/reshape.py +6 -5
maxframe/tensor/reshape/tests/__init__.py +0 -2
maxframe/tensor/reshape/tests/test_reshape.py +0 -2
maxframe/tensor/sort/__init__.py +16 -0
maxframe/tensor/sort/argsort.py +150 -0
maxframe/tensor/sort/sort.py +295 -0
maxframe/tensor/special/__init__.py +37 -0
maxframe/tensor/special/core.py +38 -0
maxframe/tensor/special/misc.py +142 -0
maxframe/tensor/special/statistical.py +56 -0
maxframe/tensor/statistics/__init__.py +5 -0
maxframe/tensor/statistics/average.py +143 -0
maxframe/tensor/statistics/bincount.py +133 -0
maxframe/tensor/statistics/quantile.py +10 -8
maxframe/tensor/ufunc/__init__.py +0 -2
maxframe/tensor/ufunc/ufunc.py +0 -2
maxframe/tensor/utils.py +21 -3
maxframe/tests/test_protocol.py +3 -3
maxframe/tests/test_utils.py +210 -1
maxframe/tests/utils.py +59 -1
maxframe/udf.py +76 -6
maxframe/utils.py +418 -17
{maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/METADATA +4 -1
maxframe-2.0.0b2.dist-info/RECORD +939 -0
{maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +19 -3
maxframe_client/fetcher.py +113 -6
maxframe_client/session/odps.py +173 -38
maxframe_client/session/task.py +3 -1
maxframe_client/tests/test_session.py +41 -5
maxframe-1.3.1.dist-info/RECORD +0 -705
{maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/from_records.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 # Copyright 1999-2025 Alibaba Group Holding Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import MutableMapping, Union
 import numpy as np
 import pandas as pd
@@ -58,6 +58,13 @@ class DataFrameFromRecords(DataFrameOperator, DataFrameOperatorMixin):
             columns_value=columns_value,
         )
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromRecords"
+    ):  # pragma: no cover
+        # todo implement this to facilitate local computation
+        ctx[op.outputs[0].key] = float("inf")
 def from_records(
     data,

maxframe/dataframe/datasource/from_tensor.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 # Copyright 1999-2025 Alibaba Group Holding Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,13 +13,13 @@
 # limitations under the License.
 from collections import OrderedDict
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, MutableMapping, Union
 import numpy as np
 import pandas as pd
 from ... import opcodes
-from ...core import ENTITY_TYPE, OutputType
+from ...core import ENTITY_TYPE, EntityData, OutputType
 from ...serialization.serializables import AnyField, KeyField
 from ...tensor.core import Tensor
 from ...tensor.datasource import tensor as astensor
@@ -46,24 +44,25 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
         kwargs["_output_types"] = [OutputType.dataframe]
         super().__init__(*args, **kwargs)
-    def _set_inputs(self, inputs: List[EntityType]):
-        super()._set_inputs(inputs)
-        inputs_iter = iter(self._inputs)
-        if self.input is not None:
-            if not isinstance(self.input, dict):
-                self.input = next(inputs_iter)
+    @classmethod
+    def _set_inputs(cls, op: "DataFrameFromTensor", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        inputs_iter = iter(op._inputs)
+        if op.input is not None:
+            if not isinstance(op.input, dict):
+                op.input = next(inputs_iter)
             else:
                 # check each value for input
                 new_input = OrderedDict()
-                for k, v in self.input.items():
+                for k, v in op.input.items():
                     if isinstance(v, ENTITY_TYPE):
                         new_input[k] = next(inputs_iter)
                     else:
                         new_input[k] = v
-                self.input = new_input
+                op.input = new_input
-        if isinstance(self.index, ENTITY_TYPE):
-            self.index = next(inputs_iter)
+        if isinstance(op.index, ENTITY_TYPE):
+            op.index = next(inputs_iter)
     def __call__(
         self,
@@ -138,7 +137,11 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
                 )
             index_value = self._process_index(index, tileables)
         else:
-            self.index = index = pd.RangeIndex(0, tileables[0].shape[0])
+            if np.isnan(tileables[0].shape[0]):
+                index = pd.RangeIndex(0)
+            else:
+                index = pd.RangeIndex(0, tileables[0].shape[0])
+            self.index = index
             index_value = parse_index(index)
         if columns is not None:
@@ -260,6 +263,13 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
             columns_value=columns_value,
         )
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromTensor"
+    ):  # pragma: no cover
+        # todo implement this to facilitate local computation
+        ctx[op.outputs[0].key] = float("inf")
 def dataframe_from_tensor(
     tensor: Tensor,
@@ -340,12 +350,13 @@ class SeriesFromTensor(DataFrameOperator, DataFrameOperatorMixin):
     input = KeyField("input")
     index = AnyField("index")
-    def _set_inputs(self, inputs: List[EntityType]):
-        super()._set_inputs(inputs)
-        if self.input is not None:
-            self.input = self.inputs[0]
-        if self.index is not None and hasattr(self.index, "key"):
-            self.index = self.inputs[-1]
+    @classmethod
+    def _set_inputs(cls, op: "SeriesFromTensor", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        if op.input is not None:
+            op.input = op.inputs[0]
+        if op.index is not None and hasattr(op.index, "key"):
+            op.index = op.inputs[-1]
     def __call__(
         self,

maxframe/dataframe/datasource/read_csv.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 # Copyright 1999-2025 Alibaba Group Holding Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,6 +13,7 @@
 # limitations under the License.
 from io import BytesIO
+from typing import MutableMapping, Union
 from urllib.parse import urlparse
 import numpy as np
@@ -129,6 +128,13 @@ class DataFrameReadCSV(
             chunk_bytes=chunk_bytes,
         )
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadCSV"
+    ):  # pragma: no cover
+        # todo implement this to facilitate local computation
+        ctx[op.outputs[0].key] = float("inf")
 def read_csv(
     path,

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -13,13 +13,18 @@
 # limitations under the License.
 import dataclasses
+import functools
+import io
 import logging
 import re
-from typing import Dict, List, Optional, Tuple, Union
+import tokenize
+from typing import Dict, List, MutableMapping, Optional, Tuple, Union
 import numpy as np
 import pandas as pd
 from odps import ODPS
+from odps.errors import ODPSError
+from odps.models import TableSchema
 from odps.types import Column, OdpsSchema, validate_data_type
 from odps.utils import split_sql_by_semicolon
@@ -110,7 +115,15 @@ def _split_explain_string(explain_string: str) -> List[str]:
     grouped = []
     for part in parts:
         part = part.strip("\n")
-        if grouped and not part.startswith(" "):
+        part_line1 = part.split("\n", 1)[0]
+        # initial line of part should not start with spaces (Statistics row)
+        #  or with quote marks
+        if (
+            grouped
+            and not part.startswith(" ")
+            and "'" not in part_line1
+            and '"' not in part_line1
+        ):
             final_parts.append("\n\n".join(grouped).strip())
             grouped = []
         grouped.append(part)
@@ -235,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
         return _parse_full_explain(explain_string)
-def _build_explain_sql(sql_stmt: str, no_split: bool = False) -> str:
+def _build_explain_sql(
+    sql_stmt: str, no_split: bool = False, use_output: bool = False
+) -> str:
+    clause = "EXPLAIN "
+    if use_output:
+        clause += "OUTPUT "
     if no_split:
-        return "EXPLAIN " + sql_stmt
+        return clause + sql_stmt
     sql_parts = split_sql_by_semicolon(sql_stmt)
     if not sql_parts:
         raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
-    sql_parts[-1] = "EXPLAIN " + sql_parts[-1]
+    sql_parts[-1] = clause + sql_parts[-1]
     return "\n".join(sql_parts)
@@ -267,6 +285,15 @@ class DataFrameReadODPSQuery(
     def set_pruned_columns(self, columns, *, keep_order=None):  # pragma: no cover
         self.columns = columns
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSQuery"
+    ):  # pragma: no cover
+        # use infinity to show that the size cannot be inferred
+        # todo when local catalyst is implemented,
+        #  a more precise estimation here can be useful then.
+        ctx[op.outputs[0].key] = float("inf")
     def __call__(self, chunk_bytes=None, chunk_size=None):
         if is_empty(self.index_columns):
             index_value = parse_index(pd.RangeIndex(0))
@@ -302,6 +329,73 @@ class DataFrameReadODPSQuery(
         )
+def _check_token_in_sql(token: str, sql: str) -> bool:
+    try:
+        names = set()
+        for tk_info in tokenize.tokenize(io.BytesIO(sql.encode()).readline):
+            if tk_info.type == tokenize.NAME:
+                names.add(tk_info.string)
+        return token in names
+    except:  # pragma: no cover
+        return False
+def _resolve_schema_by_explain(
+    odps_entry: ODPS,
+    query: str,
+    no_split_sql: bool = False,
+    hints: Dict[str, str] = None,
+    use_explain_output: bool = True,
+) -> OdpsSchema:
+    hints = (hints or dict()).copy()
+    hints["odps.sql.select.output.format"] = "json"
+    explain_stmt = _build_explain_sql(
+        query, no_split=no_split_sql, use_output=use_explain_output
+    )
+    inst = odps_entry.execute_sql(explain_stmt, hints=hints)
+    logger.debug("Explain output instance ID: %s", inst.id)
+    explain_str = list(inst.get_task_results().values())[0]
+    if use_explain_output:
+        if not explain_str or "nothing to explain" in explain_str:
+            raise ValueError("The SQL statement should be an instant query")
+        return TableSchema.parse(None, explain_str)
+    else:
+        return _parse_explained_schema(explain_str)
+def _resolve_query_schema(
+    odps_entry: ODPS,
+    query: str,
+    no_split_sql: bool = False,
+    hints: Dict[str, str] = None,
+    use_explain_output: Optional[bool] = None,
+) -> OdpsSchema:
+    methods = []
+    if use_explain_output is not False:
+        # None or True
+        methods.append(_resolve_schema_by_explain)
+    if use_explain_output is not True:
+        # None or False
+        methods.append(
+            functools.partial(_resolve_schema_by_explain, use_explain_output=False)
+        )
+    for idx, resolve_method in enumerate(methods):
+        try:
+            return resolve_method(
+                odps_entry, query, no_split_sql=no_split_sql, hints=hints
+            )
+        except ODPSError as ex:
+            msg = (
+                f"Failed to obtain schema from SQL explain: {ex!r}\n"
+                f"Explain instance ID: {ex.instance_id}"
+            )
+            if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
+                exc = ValueError(msg)
+                raise exc.with_traceback(ex.__traceback__) from None
+    # will this happen?
+    raise ValueError("Failed to obtain schema from SQL explain")  # pragma: no cover
 def read_odps_query(
     query: str,
     odps_entry: ODPS = None,
@@ -341,6 +435,8 @@ def read_odps_query(
         DataFrame read from MaxCompute (ODPS) table
     """
     no_split_sql = kw.pop("no_split_sql", False)
+    # if use_explain_output is None, will try two methods.
+    use_explain_output = kw.pop("use_explain_output", None)
     hints = options.sql.settings.copy() or {}
     if sql_hints:
@@ -365,24 +461,18 @@ def read_odps_query(
     col_renames = {}
     if not skip_schema:
-        explain_stmt = _build_explain_sql(query, no_split=no_split_sql)
-        inst = odps_entry.execute_sql(explain_stmt, hints=hints)
-        logger.debug("Explain instance ID: %s", inst.id)
-        explain_str = list(inst.get_task_results().values())[0]
-        try:
-            odps_schema = _parse_explained_schema(explain_str)
-        except BaseException as ex:
-            exc = ValueError(
-                f"Failed to obtain schema from SQL explain: {ex!r}"
-                f"\nExplain instance ID: {inst.id}"
-            )
-            raise exc.with_traceback(ex.__traceback__) from None
+        odps_schema = _resolve_query_schema(
+            odps_entry,
+            query,
+            no_split_sql=no_split_sql,
+            hints=hints,
+            use_explain_output=use_explain_output,
+        )
         new_columns = []
         for col in odps_schema.columns:
             anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
-            if anon_match and col.name not in query:
+            if anon_match and not _check_token_in_sql(col.name, query):
                 new_name = anonymous_col_prefix + anon_match.group(1)
                 col_renames[col.name] = new_name
                 new_columns.append(Column(new_name, col.type))

maxframe/dataframe/datasource/read_odps_table.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import logging
-from typing import List, Optional, Union
+from typing import List, MutableMapping, Optional, Union
 import numpy as np
 import pandas as pd
@@ -34,7 +34,7 @@ from ...serialization.serializables import (
     SeriesField,
     StringField,
 )
-from ...utils import is_empty
+from ...utils import estimate_table_size, is_empty
 from ..core import DataFrame  # noqa: F401
 from ..utils import parse_index
 from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
@@ -46,6 +46,7 @@ class DataFrameReadODPSTable(
     IncrementalIndexDatasource,
     ColumnPruneSupportedDataSourceMixin,
 ):
+    __slots__ = ("_odps_entry",)
     _op_type_ = opcodes.READ_ODPS_TABLE
     table_name = StringField("table_name")
@@ -61,7 +62,8 @@ class DataFrameReadODPSTable(
     index_dtypes = SeriesField("index_dtypes", default=None)
     def __init__(self, memory_scale=None, **kw):
-        output_type = kw.get("output_type", OutputType.dataframe)
+        output_type = kw.pop("output_type", OutputType.dataframe)
+        self._odps_entry = kw.pop("odps_entry", None)
         super(DataFrameReadODPSTable, self).__init__(
             memory_scale=memory_scale, _output_types=[output_type], **kw
         )
@@ -130,6 +132,18 @@ class DataFrameReadODPSTable(
                 chunk_size=chunk_size,
             )
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSTable"
+    ) -> None:
+        odps_entry = op._odps_entry or ODPS.from_global() or ODPS.from_environments()
+        if not odps_entry:  # pragma: no cover
+            ctx[op.outputs[0].key] = float("inf")
+            return
+        ctx[op.outputs[0].key] = estimate_table_size(
+            odps_entry, op.table_name, op.partitions
+        )
 def read_odps_table(
     table_name: Union[str, Table],
@@ -212,7 +226,8 @@ def read_odps_table(
         index_dtypes = pd.Series(table_index_types, index=index_col)
     if columns is not None:
-        table_col_set = set([c.lower() for c in columns])
+        new_columns = [c.lower() for c in columns]
+        table_col_set = set(new_columns)
         col_diff = sorted(table_col_set - set(table_columns))
         if col_diff:
             raise ValueError(
@@ -223,7 +238,6 @@ def read_odps_table(
             raise ValueError("Index columns and columns shall not overlap.")
         # reorder columns
-        new_columns = [c for c in table_columns if c in table_col_set]
         df_types = [df_types[table_columns.index(col)] for col in new_columns]
         table_columns = new_columns
         columns = new_columns
@@ -253,6 +267,7 @@ def read_odps_table(
         last_modified_time=to_timestamp(table.last_data_modified_time),
         index_columns=index_col,
         index_dtypes=index_dtypes,
+        odps_entry=odps_entry,
         **kw,
     )
     return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)

maxframe/dataframe/datasource/read_parquet.py CHANGED Viewed

@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
 # Copyright 1999-2025 Alibaba Group Holding Ltd.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +13,7 @@
 # limitations under the License.
 import os
-from typing import Dict
+from typing import Dict, MutableMapping, Union
 from urllib.parse import urlparse
 import numpy as np
@@ -308,6 +306,13 @@ class DataFrameReadParquet(
             columns_value=columns_value,
         )
+    @classmethod
+    def estimate_size(
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadParquet"
+    ):  # pragma: no cover
+        # todo implement this to facilitate local computation
+        ctx[op.outputs[0].key] = float("inf")
 def read_parquet(
     path,

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -15,15 +15,19 @@
 import os
 import uuid
 from collections import OrderedDict
+from math import isinf
+import mock
 import numpy as np
 import pandas as pd
 import pytest
 from odps import ODPS
 from odps import types as odps_types
+from odps.errors import ODPSError
 from .... import tensor as mt
 from ....core import OutputType
+from ....core.operator import estimate_size
 from ....tests.utils import tn
 from ....utils import lazy_import
 from ... import read_odps_query, read_odps_table
@@ -48,6 +52,7 @@ from ..read_odps_query import (
     ColumnSchema,
     _parse_full_explain,
     _parse_simple_explain,
+    _resolve_query_schema,
     _resolve_task_sector,
 )
 from ..series import from_pandas as from_pandas_series
@@ -71,6 +76,10 @@ def test_from_pandas_dataframe():
     assert df.index_value.max_val == 9
     np.testing.assert_equal(df.columns_value._index_value._data, data.columns.values)
+    result_ctx = dict()
+    estimate_size(result_ctx, df.op)
+    assert result_ctx[df.key] > 0 and not isinf(result_ctx[df.key])
     data2 = data[::2]
     df2 = from_pandas_df(data2, chunk_size=4)
@@ -258,6 +267,10 @@ def test_from_odps_table():
         ),
     )
+    result_ctx = dict()
+    estimate_size(result_ctx, df.op)
+    assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
     with pytest.raises(ValueError):
         read_odps_table(test_table, columns=["col3", "col4"])
     with pytest.raises(ValueError):
@@ -300,6 +313,7 @@ def test_from_odps_table():
         ),
     )
+    test_parted_table.create_partition("pt=20240103")
     df = read_odps_table(
         test_parted_table, columns=["col1", "col2", "pt"], partitions="pt=20240103"
     )
@@ -314,6 +328,10 @@ def test_from_odps_table():
         ),
     )
+    result_ctx = dict()
+    estimate_size(result_ctx, df.op)
+    assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
     out_idx = read_odps_table(
         test_table,
         columns=[],
@@ -345,7 +363,7 @@ def test_from_odps_query():
     with pytest.raises(ValueError) as err_info:
         read_odps_query(
-            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
+            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
             f"AS SELECT * FROM {table1_name}"
         )
     assert "instant query" in err_info.value.args[0]
@@ -545,3 +563,64 @@ def test_resolve_multi_join():
     for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
         assert col.name == exp_nm
         assert col.type == odps_types.validate_data_type(exp_tp)
+def test_resolve_break_lines():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-with-break-line.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    expected_col_types = {
+        "key": "string",
+        "value": "string",
+    }
+    schema = _parse_full_explain(sector)
+    for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
+        assert col.name == exp_nm
+        assert col.type == odps_types.validate_data_type(exp_tp)
+@pytest.mark.parametrize("use_explain_output", [None, False, True])
+def test_explain_use_explain_output(use_explain_output):
+    class MockInstance:
+        @property
+        def id(self):
+            return "mock_id"
+        def get_task_results(self):
+            return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
+    old_execute_sql = ODPS.execute_sql
+    exec_count = 0
+    def new_execute_sql(self, sql, *args, **kw):
+        nonlocal exec_count
+        exec_count += 1
+        if use_explain_output and sql.lower().startswith("explain output select"):
+            return MockInstance()
+        elif use_explain_output is None and sql.lower().startswith("explain output"):
+            raise ODPSError("ODPS-0130161: mock error")
+        return old_execute_sql(self, sql, *args, **kw)
+    odps_entry = ODPS.from_environments()
+    with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
+        with pytest.raises(ValueError):
+            _resolve_query_schema(
+                odps_entry, "not_a_sql", use_explain_output=use_explain_output
+            )
+        assert exec_count == (2 if use_explain_output is None else 1)
+        exec_count = 0
+        schema = _resolve_query_schema(
+            odps_entry,
+            "select cast(1 as bigint) as a_bigint",
+            use_explain_output=use_explain_output,
+        )
+        assert schema.columns[0].name == "a_bigint"
+        assert schema.columns[0].type == odps_types.bigint
+        assert exec_count == (2 if use_explain_output is None else 1)

maxframe/dataframe/datastore/tests/test_to_odps.py CHANGED Viewed

@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pandas as pd
 import pytest
-from ... import DataFrame
+from ... import DataFrame, Index
 from ..to_odps import to_odps_table
@@ -23,6 +24,25 @@ def df():
     return DataFrame({"A": [1, 2], "B": [3, 4]})
+@pytest.fixture
+def df_with_named_index():
+    return DataFrame({"A": [1, 2], "B": [3, 4]}, index=Index([1, 2], name="A"))
+@pytest.fixture
+def df_with_named_multi_indexes():
+    arrays = [
+        ["c1", "c2"],
+        ["d1", "d2"],
+        ["e1", "e2"],
+    ]
+    multi_index = pd.MultiIndex.from_arrays(arrays, names=("C", "D", "E"))
+    return DataFrame(
+        {"A": [1, 2], "B": [3, 4]},
+        index=multi_index,
+    )
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -46,3 +66,34 @@ def test_to_odps_table_validation(df, kwargs):
 )
 def test_to_odps_table_vaild(df, kwargs):
     to_odps_table(df, "test_table", **kwargs)
+def test_to_odps_table_column_conflicts(
+    df, df_with_named_index, df_with_named_multi_indexes
+):
+    to_odps_table(df.reset_index(), "test_table", index=False)
+    to_odps_table(df.reset_index(), "test_table", index_label="C")
+    with pytest.raises(ValueError):
+        to_odps_table(df.reset_index(), "test_table")
+    to_odps_table(df_with_named_index, "test_table", index=False)
+    to_odps_table(df_with_named_index, "test_table", index_label="C")
+    with pytest.raises(ValueError):
+        to_odps_table(df_with_named_index, "test_table")
+    to_odps_table(df, "test_table", partition="C='1'")
+    with pytest.raises(ValueError):
+        to_odps_table(df, "test_table", partition="A='1'")
+    with pytest.raises(ValueError):
+        to_odps_table(df, "test_table", partition="A='1'")
+    to_odps_table(df_with_named_multi_indexes, "test_table")
+    to_odps_table(
+        df_with_named_multi_indexes, "test_table", partition="C='1'", index=False
+    )
+    with pytest.raises(ValueError):
+        to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
+    df_with_named_multi_indexes.index.names = ["C1", "D1", "E1"]
+    to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")