maxframe 1.3.1__cp39-cp39-macosx_10_9_universal2.whl → 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/METADATA +4 -1
- maxframe-2.0.0b2.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/top_level.txt +0 -0
|
@@ -12,8 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import MutableMapping, Union
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
|
-
import pandas as pd
|
|
17
18
|
|
|
18
19
|
from ... import opcodes
|
|
19
20
|
from ...core import OutputType
|
|
@@ -26,12 +27,13 @@ from ...serialization.serializables import (
|
|
|
26
27
|
StringField,
|
|
27
28
|
TupleField,
|
|
28
29
|
)
|
|
29
|
-
from ...
|
|
30
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
31
|
+
from ...utils import copy_if_possible, get_func_token, make_dtype, make_dtypes, tokenize
|
|
30
32
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
31
33
|
from ..utils import (
|
|
34
|
+
InferredDataFrameMeta,
|
|
32
35
|
copy_func_scheduling_hints,
|
|
33
|
-
|
|
34
|
-
make_dtypes,
|
|
36
|
+
infer_dataframe_return_value,
|
|
35
37
|
parse_index,
|
|
36
38
|
validate_output_types,
|
|
37
39
|
)
|
|
@@ -56,6 +58,7 @@ class GroupByApply(
|
|
|
56
58
|
args = TupleField("args", default_factory=tuple)
|
|
57
59
|
kwds = DictField("kwds", default_factory=dict)
|
|
58
60
|
maybe_agg = BoolField("maybe_agg", default=None)
|
|
61
|
+
|
|
59
62
|
logic_key = StringField("logic_key", default=None)
|
|
60
63
|
func_key = AnyField("func_key", default=None)
|
|
61
64
|
need_clean_up_func = BoolField("need_clean_up_func", default=False)
|
|
@@ -65,6 +68,9 @@ class GroupByApply(
|
|
|
65
68
|
if hasattr(self, "func"):
|
|
66
69
|
copy_func_scheduling_hints(self.func, self)
|
|
67
70
|
|
|
71
|
+
def has_custom_code(self) -> bool:
|
|
72
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
73
|
+
|
|
68
74
|
def _update_key(self):
|
|
69
75
|
values = [v for v in self._values_ if v is not self.func] + [
|
|
70
76
|
get_func_token(self.func)
|
|
@@ -73,96 +79,66 @@ class GroupByApply(
|
|
|
73
79
|
return self
|
|
74
80
|
|
|
75
81
|
def _infer_df_func_returns(
|
|
76
|
-
self, in_groupby,
|
|
77
|
-
):
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
# it's an aggregation operation
|
|
94
|
-
self.maybe_agg = True
|
|
95
|
-
|
|
96
|
-
# todo return proper index when sort=True is implemented
|
|
97
|
-
index_value = parse_index(infer_df.index[:0], in_df.key, self.func)
|
|
98
|
-
|
|
99
|
-
# for backward compatibility
|
|
100
|
-
dtype = dtype if dtype is not None else dtypes
|
|
101
|
-
if isinstance(infer_df, pd.DataFrame):
|
|
102
|
-
output_type = output_type or OutputType.dataframe
|
|
103
|
-
new_dtypes = new_dtypes or infer_df.dtypes
|
|
104
|
-
elif isinstance(infer_df, pd.Series):
|
|
105
|
-
output_type = output_type or OutputType.series
|
|
106
|
-
new_dtypes = new_dtypes or (
|
|
107
|
-
name or infer_df.name,
|
|
108
|
-
dtype or infer_df.dtype,
|
|
109
|
-
)
|
|
110
|
-
else:
|
|
111
|
-
output_type = OutputType.series
|
|
112
|
-
new_dtypes = (name, dtype or pd.Series(infer_df).dtype)
|
|
113
|
-
except: # noqa: E722 # nosec
|
|
114
|
-
pass
|
|
115
|
-
|
|
82
|
+
self, in_groupby, dtypes=None, dtype=None, name=None, index=None
|
|
83
|
+
) -> InferredDataFrameMeta:
|
|
84
|
+
def infer_func(groupby_obj):
|
|
85
|
+
args = copy_if_possible(self.args)
|
|
86
|
+
kwds = copy_if_possible(self.kwds)
|
|
87
|
+
return groupby_obj.apply(self.func, *args, **kwds)
|
|
88
|
+
|
|
89
|
+
output_type = self.output_types[0] if self.output_types else None
|
|
90
|
+
inferred_meta = infer_dataframe_return_value(
|
|
91
|
+
in_groupby,
|
|
92
|
+
infer_func,
|
|
93
|
+
dtypes=dtypes,
|
|
94
|
+
dtype=dtype,
|
|
95
|
+
name=name,
|
|
96
|
+
index=index,
|
|
97
|
+
output_type=output_type,
|
|
98
|
+
)
|
|
116
99
|
self.output_types = (
|
|
117
|
-
[output_type]
|
|
118
|
-
if not self.output_types and output_type
|
|
100
|
+
[inferred_meta.output_type]
|
|
101
|
+
if not self.output_types and inferred_meta.output_type
|
|
119
102
|
else self.output_types
|
|
120
103
|
)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
return dtypes, index_value
|
|
104
|
+
self.maybe_agg = inferred_meta.maybe_agg
|
|
105
|
+
return inferred_meta
|
|
124
106
|
|
|
125
107
|
def __call__(self, groupby, dtypes=None, dtype=None, name=None, index=None):
|
|
126
|
-
in_df = groupby
|
|
127
108
|
if self.output_types and self.output_types[0] == OutputType.df_or_series:
|
|
128
109
|
return self.new_df_or_series([groupby])
|
|
129
|
-
while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
130
|
-
in_df = in_df.inputs[0]
|
|
131
|
-
|
|
132
|
-
with quiet_stdio():
|
|
133
|
-
dtypes, index_value = self._infer_df_func_returns(
|
|
134
|
-
groupby, in_df, dtypes, dtype=dtype, name=name, index=index
|
|
135
|
-
)
|
|
136
|
-
if index_value is None:
|
|
137
|
-
index_value = parse_index(None, (in_df.key, in_df.index_value.key))
|
|
138
|
-
for arg, desc in zip((self.output_types, dtypes), ("output_types", "dtypes")):
|
|
139
|
-
if arg is None:
|
|
140
|
-
raise TypeError(
|
|
141
|
-
f"Cannot determine {desc} by calculating with enumerate data, "
|
|
142
|
-
"please specify it as arguments"
|
|
143
|
-
)
|
|
144
110
|
|
|
111
|
+
inferred_meta = self._infer_df_func_returns(
|
|
112
|
+
groupby, dtypes=dtypes, dtype=dtype, name=name, index=index
|
|
113
|
+
)
|
|
114
|
+
inferred_meta.check_absence("output_type", "dtypes", "dtype")
|
|
145
115
|
if self.output_types[0] == OutputType.dataframe:
|
|
146
|
-
new_shape = (np.nan, len(dtypes))
|
|
116
|
+
new_shape = (np.nan, len(inferred_meta.dtypes))
|
|
147
117
|
return self.new_dataframe(
|
|
148
118
|
[groupby],
|
|
149
119
|
shape=new_shape,
|
|
150
|
-
dtypes=dtypes,
|
|
151
|
-
index_value=index_value,
|
|
152
|
-
columns_value=parse_index(dtypes.index, store_data=True),
|
|
120
|
+
dtypes=inferred_meta.dtypes,
|
|
121
|
+
index_value=inferred_meta.index_value,
|
|
122
|
+
columns_value=parse_index(inferred_meta.dtypes.index, store_data=True),
|
|
153
123
|
)
|
|
154
124
|
else:
|
|
155
|
-
name = name or dtypes[0]
|
|
156
|
-
dtype = dtype or dtypes[1]
|
|
157
125
|
new_shape = (np.nan,)
|
|
158
126
|
return self.new_series(
|
|
159
127
|
[groupby],
|
|
160
|
-
name=name,
|
|
128
|
+
name=inferred_meta.name,
|
|
161
129
|
shape=new_shape,
|
|
162
|
-
dtype=dtype,
|
|
163
|
-
index_value=index_value,
|
|
130
|
+
dtype=inferred_meta.dtype,
|
|
131
|
+
index_value=inferred_meta.index_value,
|
|
164
132
|
)
|
|
165
133
|
|
|
134
|
+
@classmethod
|
|
135
|
+
def estimate_size(
|
|
136
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "GroupByApply"
|
|
137
|
+
) -> None:
|
|
138
|
+
if isinstance(op.func, MarkedFunction):
|
|
139
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
140
|
+
super().estimate_size(ctx, op)
|
|
141
|
+
|
|
166
142
|
|
|
167
143
|
def groupby_apply(
|
|
168
144
|
groupby,
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Callable, Dict, List, MutableMapping, Tuple, Union
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import OutputType
|
|
22
|
+
from ...lib.version import parse as parse_version
|
|
23
|
+
from ...serialization.serializables import (
|
|
24
|
+
DictField,
|
|
25
|
+
FunctionField,
|
|
26
|
+
Int32Field,
|
|
27
|
+
TupleField,
|
|
28
|
+
)
|
|
29
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
30
|
+
from ...utils import copy_if_possible
|
|
31
|
+
from ..core import (
|
|
32
|
+
DATAFRAME_GROUPBY_TYPE,
|
|
33
|
+
GROUPBY_TYPE,
|
|
34
|
+
DataFrameGroupBy,
|
|
35
|
+
IndexValue,
|
|
36
|
+
SeriesGroupBy,
|
|
37
|
+
)
|
|
38
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
39
|
+
from ..utils import (
|
|
40
|
+
InferredDataFrameMeta,
|
|
41
|
+
build_empty_df,
|
|
42
|
+
copy_func_scheduling_hints,
|
|
43
|
+
infer_dataframe_return_value,
|
|
44
|
+
make_column_list,
|
|
45
|
+
make_dtype,
|
|
46
|
+
make_dtypes,
|
|
47
|
+
parse_index,
|
|
48
|
+
validate_output_types,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
_need_enforce_group_keys = parse_version(pd.__version__) < parse_version("1.5.0")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class GroupByApplyChunk(DataFrameOperatorMixin, DataFrameOperator):
|
|
55
|
+
_op_type_ = opcodes.APPLY_CHUNK
|
|
56
|
+
_op_module_ = "dataframe.groupby"
|
|
57
|
+
|
|
58
|
+
func = FunctionField("func")
|
|
59
|
+
batch_rows = Int32Field("batch_rows", default=None)
|
|
60
|
+
args = TupleField("args", default=None)
|
|
61
|
+
kwargs = DictField("kwargs", default=None)
|
|
62
|
+
|
|
63
|
+
groupby_params = DictField("groupby_params", default=None)
|
|
64
|
+
|
|
65
|
+
def __init__(self, output_type=None, **kw):
|
|
66
|
+
if output_type:
|
|
67
|
+
kw["_output_types"] = [output_type]
|
|
68
|
+
super().__init__(**kw)
|
|
69
|
+
if hasattr(self, "func"):
|
|
70
|
+
copy_func_scheduling_hints(self.func, self)
|
|
71
|
+
|
|
72
|
+
def has_custom_code(self) -> bool:
|
|
73
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
74
|
+
|
|
75
|
+
def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
|
|
76
|
+
# return dataframe
|
|
77
|
+
if self.output_types[0] == OutputType.dataframe:
|
|
78
|
+
dtypes = make_dtypes(dtypes)
|
|
79
|
+
# apply_chunk will use generate new range index for results
|
|
80
|
+
return self.new_dataframe(
|
|
81
|
+
[df],
|
|
82
|
+
shape=df.shape if element_wise else (np.nan, len(dtypes)),
|
|
83
|
+
index_value=index_value,
|
|
84
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
85
|
+
dtypes=dtypes,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# return series
|
|
89
|
+
return self.new_series(
|
|
90
|
+
[df], shape=(np.nan,), name=name, dtype=dtype, index_value=index_value
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def _call_series(self, series, dtypes, dtype, name, index_value, element_wise):
|
|
94
|
+
if self.output_types[0] == OutputType.series:
|
|
95
|
+
shape = series.shape if element_wise else (np.nan,)
|
|
96
|
+
return self.new_series(
|
|
97
|
+
[series],
|
|
98
|
+
dtype=dtype,
|
|
99
|
+
shape=shape,
|
|
100
|
+
index_value=index_value,
|
|
101
|
+
name=name,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
dtypes = make_dtypes(dtypes)
|
|
105
|
+
return self.new_dataframe(
|
|
106
|
+
[series],
|
|
107
|
+
shape=(np.nan, len(dtypes)),
|
|
108
|
+
index_value=index_value,
|
|
109
|
+
columns_value=parse_index(dtypes.index, store_data=True),
|
|
110
|
+
dtypes=dtypes,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def __call__(
|
|
114
|
+
self,
|
|
115
|
+
groupby: Union[DataFrameGroupBy, SeriesGroupBy],
|
|
116
|
+
dtypes: Union[Tuple[str, Any], Dict[str, Any]] = None,
|
|
117
|
+
dtype: Any = None,
|
|
118
|
+
name: Any = None,
|
|
119
|
+
output_type=None,
|
|
120
|
+
index=None,
|
|
121
|
+
):
|
|
122
|
+
input_df = groupby.inputs[0]
|
|
123
|
+
if isinstance(input_df, GROUPBY_TYPE):
|
|
124
|
+
input_df = input_df.inputs[0]
|
|
125
|
+
|
|
126
|
+
# if skip_infer, directly build a frame
|
|
127
|
+
if self.output_types and self.output_types[0] == OutputType.df_or_series:
|
|
128
|
+
return self.new_df_or_series([input_df])
|
|
129
|
+
|
|
130
|
+
# infer return index and dtypes
|
|
131
|
+
inferred_meta = self._infer_batch_func_returns(
|
|
132
|
+
groupby,
|
|
133
|
+
output_type=output_type,
|
|
134
|
+
dtypes=dtypes,
|
|
135
|
+
dtype=dtype,
|
|
136
|
+
name=name,
|
|
137
|
+
index=index,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
if inferred_meta.index_value is None:
|
|
141
|
+
inferred_meta.index_value = parse_index(
|
|
142
|
+
None, (groupby.key, groupby.index_value.key, self.func)
|
|
143
|
+
)
|
|
144
|
+
inferred_meta.check_absence("output_type", "dtypes", "dtype")
|
|
145
|
+
|
|
146
|
+
if isinstance(groupby, DATAFRAME_GROUPBY_TYPE):
|
|
147
|
+
return self._call_dataframe(
|
|
148
|
+
input_df,
|
|
149
|
+
dtypes=inferred_meta.dtypes,
|
|
150
|
+
dtype=inferred_meta.dtype,
|
|
151
|
+
name=inferred_meta.name,
|
|
152
|
+
index_value=inferred_meta.index_value,
|
|
153
|
+
element_wise=inferred_meta.elementwise,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return self._call_series(
|
|
157
|
+
input_df,
|
|
158
|
+
dtypes=inferred_meta.dtypes,
|
|
159
|
+
dtype=inferred_meta.dtype,
|
|
160
|
+
name=inferred_meta.name,
|
|
161
|
+
index_value=inferred_meta.index_value,
|
|
162
|
+
element_wise=inferred_meta.elementwise,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _infer_batch_func_returns(
|
|
166
|
+
self,
|
|
167
|
+
input_groupby: Union[DataFrameGroupBy, SeriesGroupBy],
|
|
168
|
+
output_type: OutputType,
|
|
169
|
+
dtypes: Union[pd.Series, List[Any], Dict[str, Any]] = None,
|
|
170
|
+
dtype: Any = None,
|
|
171
|
+
name: Any = None,
|
|
172
|
+
index: Union[pd.Index, IndexValue] = None,
|
|
173
|
+
elementwise: bool = None,
|
|
174
|
+
) -> InferredDataFrameMeta:
|
|
175
|
+
def infer_func(groupby_obj):
|
|
176
|
+
args = copy_if_possible(self.args or ())
|
|
177
|
+
kwargs = copy_if_possible(self.kwargs or {})
|
|
178
|
+
|
|
179
|
+
in_obj = input_groupby
|
|
180
|
+
while isinstance(in_obj, GROUPBY_TYPE):
|
|
181
|
+
in_obj = in_obj.inputs[0]
|
|
182
|
+
|
|
183
|
+
by_cols = make_column_list(groupby_params.get("by"), in_obj.dtypes) or []
|
|
184
|
+
if not groupby_params.get("selection"):
|
|
185
|
+
selection = [
|
|
186
|
+
c for c in input_groupby.inputs[0].dtypes.index if c not in by_cols
|
|
187
|
+
]
|
|
188
|
+
groupby_obj = groupby_obj[selection]
|
|
189
|
+
res = groupby_obj.apply(self.func, *args, **kwargs)
|
|
190
|
+
if _need_enforce_group_keys and groupby_params.get("group_keys"):
|
|
191
|
+
by_levels = (
|
|
192
|
+
make_column_list(groupby_params.get("level"), in_obj.index.names)
|
|
193
|
+
or []
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
input_df = input_groupby
|
|
197
|
+
while isinstance(input_df, GROUPBY_TYPE):
|
|
198
|
+
input_df = input_df.inputs[0]
|
|
199
|
+
|
|
200
|
+
idx_df = res.index.to_frame()
|
|
201
|
+
if by_cols:
|
|
202
|
+
idx_names = by_cols + list(res.index.names)
|
|
203
|
+
mock_idx_df = build_empty_df(
|
|
204
|
+
input_df.dtypes[by_cols], index=idx_df.index
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
idx_names = by_levels + list(res.index.names)
|
|
208
|
+
if len(in_obj.index.names) > 1:
|
|
209
|
+
idx_dtypes = in_obj.index_value.value.dtypes
|
|
210
|
+
else:
|
|
211
|
+
idx_dtypes = pd.Series(
|
|
212
|
+
[in_obj.index.dtype], index=[in_obj.index.name]
|
|
213
|
+
)
|
|
214
|
+
mock_idx_df = build_empty_df(
|
|
215
|
+
idx_dtypes[by_levels], index=idx_df.index
|
|
216
|
+
)
|
|
217
|
+
idx_df = pd.concat([mock_idx_df, idx_df], axis=1)
|
|
218
|
+
res.index = pd.MultiIndex.from_frame(idx_df, names=idx_names)
|
|
219
|
+
return res
|
|
220
|
+
|
|
221
|
+
groupby_params = input_groupby.op.groupby_params
|
|
222
|
+
inferred_meta = infer_dataframe_return_value(
|
|
223
|
+
input_groupby,
|
|
224
|
+
infer_func,
|
|
225
|
+
output_type=output_type,
|
|
226
|
+
dtypes=dtypes,
|
|
227
|
+
dtype=dtype,
|
|
228
|
+
name=name,
|
|
229
|
+
index=index,
|
|
230
|
+
elementwise=elementwise,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# merge specified and inferred index, dtypes, output_type
|
|
234
|
+
# elementwise used to decide shape
|
|
235
|
+
self.output_types = (
|
|
236
|
+
[inferred_meta.output_type]
|
|
237
|
+
if not self.output_types and inferred_meta.output_type
|
|
238
|
+
else self.output_types
|
|
239
|
+
)
|
|
240
|
+
if self.output_types:
|
|
241
|
+
inferred_meta.output_type = self.output_types[0]
|
|
242
|
+
inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
|
|
243
|
+
if index is not None:
|
|
244
|
+
inferred_meta.index_value = (
|
|
245
|
+
parse_index(index)
|
|
246
|
+
if index is not input_groupby.index_value
|
|
247
|
+
else input_groupby.index_value
|
|
248
|
+
)
|
|
249
|
+
else:
|
|
250
|
+
inferred_meta.index_value = inferred_meta.index_value
|
|
251
|
+
inferred_meta.elementwise = elementwise or inferred_meta.elementwise
|
|
252
|
+
return inferred_meta
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def estimate_size(
|
|
256
|
+
cls,
|
|
257
|
+
ctx: MutableMapping[str, Union[int, float]],
|
|
258
|
+
op: "GroupByApplyChunk",
|
|
259
|
+
) -> None:
|
|
260
|
+
if isinstance(op.func, MarkedFunction):
|
|
261
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
262
|
+
super().estimate_size(ctx, op)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def df_groupby_apply_chunk(
|
|
266
|
+
dataframe_groupby,
|
|
267
|
+
func: Union[str, Callable],
|
|
268
|
+
batch_rows=None,
|
|
269
|
+
dtypes=None,
|
|
270
|
+
dtype=None,
|
|
271
|
+
name=None,
|
|
272
|
+
output_type=None,
|
|
273
|
+
index=None,
|
|
274
|
+
skip_infer=False,
|
|
275
|
+
args=(),
|
|
276
|
+
**kwargs,
|
|
277
|
+
):
|
|
278
|
+
"""
|
|
279
|
+
Apply function `func` group-wise and combine the results together.
|
|
280
|
+
The pandas DataFrame given to the function is a chunk of the input
|
|
281
|
+
dataframe, consider as a batch rows.
|
|
282
|
+
|
|
283
|
+
The function passed to `apply` must take a dataframe as its first
|
|
284
|
+
argument and return a DataFrame, Series or scalar. `apply` will
|
|
285
|
+
then take care of combining the results back together into a single
|
|
286
|
+
dataframe or series. `apply` is therefore a highly flexible
|
|
287
|
+
grouping method.
|
|
288
|
+
|
|
289
|
+
Don't expect to receive all rows of the DataFrame in the function,
|
|
290
|
+
as it depends on the implementation of MaxFrame and the internal
|
|
291
|
+
running state of MaxCompute.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
func : callable
|
|
296
|
+
A callable that takes a dataframe as its first argument, and
|
|
297
|
+
returns a dataframe, a series or a scalar. In addition the
|
|
298
|
+
callable may take positional and keyword arguments.
|
|
299
|
+
|
|
300
|
+
batch_rows : int
|
|
301
|
+
Specify expected number of rows in a batch, as well as the len of
|
|
302
|
+
function input dataframe. When the remaining data is insufficient,
|
|
303
|
+
it may be less than this number.
|
|
304
|
+
|
|
305
|
+
output_type : {'dataframe', 'series'}, default None
|
|
306
|
+
Specify type of returned object. See `Notes` for more details.
|
|
307
|
+
|
|
308
|
+
dtypes : Series, default None
|
|
309
|
+
Specify dtypes of returned DataFrames. See `Notes` for more details.
|
|
310
|
+
|
|
311
|
+
dtype : numpy.dtype, default None
|
|
312
|
+
Specify dtype of returned Series. See `Notes` for more details.
|
|
313
|
+
|
|
314
|
+
name : str, default None
|
|
315
|
+
Specify name of returned Series. See `Notes` for more details.
|
|
316
|
+
|
|
317
|
+
index : Index, default None
|
|
318
|
+
Specify index of returned object. See `Notes` for more details.
|
|
319
|
+
|
|
320
|
+
skip_infer: bool, default False
|
|
321
|
+
Whether infer dtypes when dtypes or output_type is not specified.
|
|
322
|
+
|
|
323
|
+
args, kwargs : tuple and dict
|
|
324
|
+
Optional positional and keyword arguments to pass to `func`.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
applied : Series or DataFrame
|
|
329
|
+
|
|
330
|
+
See Also
|
|
331
|
+
--------
|
|
332
|
+
Series.apply : Apply a function to a Series.
|
|
333
|
+
DataFrame.apply : Apply a function to each row or column of a DataFrame.
|
|
334
|
+
DataFrame.mf.apply_chunk : Apply a function to row batches of a DataFrame.
|
|
335
|
+
|
|
336
|
+
Notes
|
|
337
|
+
-----
|
|
338
|
+
When deciding output dtypes and shape of the return value, MaxFrame will
|
|
339
|
+
try applying ``func`` onto a mock grouped object, and the apply call
|
|
340
|
+
may fail. When this happens, you need to specify the type of apply
|
|
341
|
+
call (DataFrame or Series) in output_type.
|
|
342
|
+
|
|
343
|
+
* For DataFrame output, you need to specify a list or a pandas Series
|
|
344
|
+
as ``dtypes`` of output DataFrame. ``index`` of output can also be
|
|
345
|
+
specified.
|
|
346
|
+
* For Series output, you need to specify ``dtype`` and ``name`` of
|
|
347
|
+
output Series.
|
|
348
|
+
|
|
349
|
+
MaxFrame adopts expected behavior of pandas>=3.0 by ignoring group columns
|
|
350
|
+
in user function input. If you still need a group column for your function
|
|
351
|
+
input, try selecting it right after `groupby` results, for instance,
|
|
352
|
+
``df.groupby("A")[["A", "B", "C"]].mf.apply_batch(func)`` will pass data of
|
|
353
|
+
column A into ``func``.
|
|
354
|
+
"""
|
|
355
|
+
if not isinstance(func, Callable):
|
|
356
|
+
raise TypeError("function must be a callable object")
|
|
357
|
+
|
|
358
|
+
if batch_rows is not None:
|
|
359
|
+
if not isinstance(batch_rows, int):
|
|
360
|
+
raise TypeError("batch_rows must be an integer")
|
|
361
|
+
elif batch_rows <= 0:
|
|
362
|
+
raise ValueError("batch_rows must be greater than 0")
|
|
363
|
+
|
|
364
|
+
if dtype is not None:
|
|
365
|
+
dtype = make_dtype(dtype)
|
|
366
|
+
|
|
367
|
+
output_types = kwargs.pop("output_types", None)
|
|
368
|
+
object_type = kwargs.pop("object_type", None)
|
|
369
|
+
output_types = validate_output_types(
|
|
370
|
+
output_type=output_type, output_types=output_types, object_type=object_type
|
|
371
|
+
)
|
|
372
|
+
output_type = output_types[0] if output_types else None
|
|
373
|
+
if skip_infer and output_type is None:
|
|
374
|
+
output_type = OutputType.df_or_series
|
|
375
|
+
|
|
376
|
+
# bind args and kwargs
|
|
377
|
+
op = GroupByApplyChunk(
|
|
378
|
+
func=func,
|
|
379
|
+
batch_rows=batch_rows,
|
|
380
|
+
output_type=output_type,
|
|
381
|
+
args=args,
|
|
382
|
+
kwargs=kwargs,
|
|
383
|
+
groupby_params=dataframe_groupby.op.groupby_params,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
return op(
|
|
387
|
+
dataframe_groupby,
|
|
388
|
+
dtypes=dtypes,
|
|
389
|
+
dtype=dtype,
|
|
390
|
+
name=name,
|
|
391
|
+
index=index,
|
|
392
|
+
output_type=output_type,
|
|
393
|
+
)
|