maxframe 1.3.1__cp38-cp38-win32.whl → 2.0.0b1__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +0 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
|
-
from typing import Any, Callable, Dict, List, Tuple, Union
|
|
16
|
+
from typing import Any, Callable, Dict, List, MutableMapping, Tuple, Union
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import pandas as pd
|
|
@@ -26,22 +26,24 @@ from ...serialization.serializables import (
|
|
|
26
26
|
Int32Field,
|
|
27
27
|
TupleField,
|
|
28
28
|
)
|
|
29
|
-
from ...
|
|
29
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
30
|
+
from ...utils import copy_if_possible, make_dtype, make_dtypes
|
|
30
31
|
from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
|
|
31
32
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
32
33
|
from ..utils import (
|
|
34
|
+
InferredDataFrameMeta,
|
|
33
35
|
build_df,
|
|
34
|
-
build_series,
|
|
35
36
|
copy_func_scheduling_hints,
|
|
36
|
-
|
|
37
|
+
infer_dataframe_return_value,
|
|
37
38
|
pack_func_args,
|
|
38
39
|
parse_index,
|
|
39
40
|
validate_output_types,
|
|
40
41
|
)
|
|
41
42
|
|
|
42
43
|
|
|
43
|
-
class
|
|
44
|
+
class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
44
45
|
_op_type_ = opcodes.APPLY_CHUNK
|
|
46
|
+
_legacy_name = "DataFrameApplyChunkOperator"
|
|
45
47
|
|
|
46
48
|
func = FunctionField("func")
|
|
47
49
|
batch_rows = Int32Field("batch_rows", default=None)
|
|
@@ -55,7 +57,10 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
55
57
|
if hasattr(self, "func"):
|
|
56
58
|
copy_func_scheduling_hints(self.func, self)
|
|
57
59
|
|
|
58
|
-
def
|
|
60
|
+
def has_custom_code(self) -> bool:
|
|
61
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
62
|
+
|
|
63
|
+
def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
|
|
59
64
|
# return dataframe
|
|
60
65
|
if self.output_types[0] == OutputType.dataframe:
|
|
61
66
|
dtypes = make_dtypes(dtypes)
|
|
@@ -69,26 +74,13 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
69
74
|
)
|
|
70
75
|
|
|
71
76
|
# return series
|
|
72
|
-
if not isinstance(dtypes, tuple):
|
|
73
|
-
raise TypeError(
|
|
74
|
-
"Cannot determine dtype, " "please specify `dtype` as argument"
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
name, dtype = dtypes
|
|
78
77
|
return self.new_series(
|
|
79
78
|
[df], shape=(np.nan,), name=name, dtype=dtype, index_value=index_value
|
|
80
79
|
)
|
|
81
80
|
|
|
82
|
-
def _call_series(self, series, dtypes, index_value, element_wise):
|
|
81
|
+
def _call_series(self, series, dtypes, dtype, name, index_value, element_wise):
|
|
83
82
|
if self.output_types[0] == OutputType.series:
|
|
84
|
-
if not isinstance(dtypes, tuple):
|
|
85
|
-
raise TypeError(
|
|
86
|
-
"Cannot determine dtype, " "please specify `dtype` as argument"
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
name, dtype = dtypes
|
|
90
83
|
shape = series.shape if element_wise else (np.nan,)
|
|
91
|
-
|
|
92
84
|
return self.new_series(
|
|
93
85
|
[series],
|
|
94
86
|
dtype=dtype,
|
|
@@ -110,6 +102,8 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
110
102
|
self,
|
|
111
103
|
df_or_series: Union[DataFrame, Series],
|
|
112
104
|
dtypes: Union[Tuple[str, Any], Dict[str, Any]] = None,
|
|
105
|
+
dtype: Any = None,
|
|
106
|
+
name: Any = None,
|
|
113
107
|
output_type=None,
|
|
114
108
|
index=None,
|
|
115
109
|
):
|
|
@@ -123,145 +117,104 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
123
117
|
return self.new_df_or_series([df_or_series])
|
|
124
118
|
|
|
125
119
|
# infer return index and dtypes
|
|
126
|
-
|
|
120
|
+
inferred_meta = self._infer_batch_func_returns(
|
|
127
121
|
df_or_series,
|
|
128
|
-
origin_func=self.func,
|
|
129
122
|
packed_func=packed_func,
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
123
|
+
output_type=output_type,
|
|
124
|
+
dtypes=dtypes,
|
|
125
|
+
dtype=dtype,
|
|
126
|
+
name=name,
|
|
127
|
+
index=index,
|
|
133
128
|
)
|
|
134
129
|
|
|
135
|
-
if index_value is None:
|
|
136
|
-
index_value = parse_index(
|
|
130
|
+
if inferred_meta.index_value is None:
|
|
131
|
+
inferred_meta.index_value = parse_index(
|
|
137
132
|
None, (df_or_series.key, df_or_series.index_value.key, self.func)
|
|
138
133
|
)
|
|
139
|
-
|
|
140
|
-
if arg is None:
|
|
141
|
-
raise TypeError(
|
|
142
|
-
f"Cannot determine {desc} by calculating with enumerate data, "
|
|
143
|
-
"please specify it as arguments"
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
if dtypes is None or len(dtypes) == 0:
|
|
147
|
-
raise TypeError(
|
|
148
|
-
"Cannot determine {dtypes} or {dtype} by calculating with enumerate data, "
|
|
149
|
-
"please specify it as arguments"
|
|
150
|
-
)
|
|
134
|
+
inferred_meta.check_absence("output_type", "dtypes", "dtype")
|
|
151
135
|
|
|
152
136
|
if isinstance(df_or_series, DATAFRAME_TYPE):
|
|
153
137
|
return self._call_dataframe(
|
|
154
138
|
df_or_series,
|
|
155
|
-
dtypes=dtypes,
|
|
156
|
-
|
|
157
|
-
|
|
139
|
+
dtypes=inferred_meta.dtypes,
|
|
140
|
+
dtype=inferred_meta.dtype,
|
|
141
|
+
name=inferred_meta.name,
|
|
142
|
+
index_value=inferred_meta.index_value,
|
|
143
|
+
element_wise=inferred_meta.elementwise,
|
|
158
144
|
)
|
|
159
145
|
|
|
160
146
|
return self._call_series(
|
|
161
147
|
df_or_series,
|
|
162
|
-
dtypes=dtypes,
|
|
163
|
-
|
|
164
|
-
|
|
148
|
+
dtypes=inferred_meta.dtypes,
|
|
149
|
+
dtype=inferred_meta.dtype,
|
|
150
|
+
name=inferred_meta.name,
|
|
151
|
+
index_value=inferred_meta.index_value,
|
|
152
|
+
element_wise=inferred_meta.elementwise,
|
|
165
153
|
)
|
|
166
154
|
|
|
167
155
|
def _infer_batch_func_returns(
|
|
168
156
|
self,
|
|
169
157
|
input_df_or_series: Union[DataFrame, Series],
|
|
170
|
-
origin_func: Union[str, Callable, np.ufunc],
|
|
171
158
|
packed_func: Union[Callable, functools.partial],
|
|
172
|
-
|
|
173
|
-
given_dtypes: Union[Tuple[str, Any], pd.Series, List[Any], Dict[str, Any]],
|
|
174
|
-
given_index: Union[pd.Index, IndexValue],
|
|
175
|
-
given_elementwise: bool = False,
|
|
159
|
+
output_type: OutputType,
|
|
176
160
|
*args,
|
|
161
|
+
dtypes: Union[pd.Series, List[Any], Dict[str, Any]] = None,
|
|
162
|
+
dtype: Any = None,
|
|
163
|
+
name: Any = None,
|
|
164
|
+
index: Union[pd.Index, IndexValue] = None,
|
|
165
|
+
elementwise: bool = None,
|
|
177
166
|
**kwargs,
|
|
178
|
-
):
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
inferred_dtypes = given_dtypes
|
|
190
|
-
|
|
191
|
-
# build same schema frame toto execute
|
|
192
|
-
if isinstance(input_df_or_series, DATAFRAME_TYPE):
|
|
193
|
-
empty_data = build_df(input_df_or_series, fill_value=1, size=1)
|
|
194
|
-
else:
|
|
195
|
-
empty_data = build_series(
|
|
196
|
-
input_df_or_series, size=1, name=input_df_or_series.name
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
try:
|
|
200
|
-
# execute
|
|
201
|
-
with np.errstate(all="ignore"), quiet_stdio():
|
|
202
|
-
infer_result = packed_func(empty_data, *args, **kwargs)
|
|
203
|
-
|
|
204
|
-
# if executed successfully, get index and dtypes from returned object
|
|
205
|
-
if inferred_index_value is None:
|
|
206
|
-
if (
|
|
207
|
-
infer_result is None
|
|
208
|
-
or not hasattr(infer_result, "index")
|
|
209
|
-
or infer_result.index is None
|
|
210
|
-
):
|
|
211
|
-
inferred_index_value = parse_index(pd.RangeIndex(-1))
|
|
212
|
-
elif infer_result.index is empty_data.index:
|
|
213
|
-
inferred_index_value = input_df_or_series.index_value
|
|
214
|
-
else:
|
|
215
|
-
inferred_index_value = parse_index(infer_result.index, packed_func)
|
|
216
|
-
|
|
217
|
-
if isinstance(infer_result, pd.DataFrame):
|
|
218
|
-
if (
|
|
219
|
-
given_output_type is not None
|
|
220
|
-
and given_output_type != OutputType.dataframe
|
|
221
|
-
):
|
|
222
|
-
raise TypeError(
|
|
223
|
-
f'Cannot infer output_type as "series", '
|
|
224
|
-
f'please specify `output_type` as "dataframe"'
|
|
225
|
-
)
|
|
226
|
-
inferred_output_type = given_output_type or OutputType.dataframe
|
|
227
|
-
inferred_dtypes = (
|
|
228
|
-
given_dtypes if given_dtypes is not None else infer_result.dtypes
|
|
229
|
-
)
|
|
230
|
-
else:
|
|
231
|
-
if (
|
|
232
|
-
given_output_type is not None
|
|
233
|
-
and given_output_type == OutputType.dataframe
|
|
234
|
-
):
|
|
235
|
-
raise TypeError(
|
|
236
|
-
f'Cannot infer output_type as "dataframe", '
|
|
237
|
-
f'please specify `output_type` as "series"'
|
|
238
|
-
)
|
|
239
|
-
inferred_output_type = given_output_type or OutputType.series
|
|
240
|
-
inferred_dtypes = (infer_result.name, infer_result.dtype)
|
|
241
|
-
except: # noqa: E722
|
|
242
|
-
pass
|
|
167
|
+
) -> InferredDataFrameMeta:
|
|
168
|
+
inferred_meta = infer_dataframe_return_value(
|
|
169
|
+
input_df_or_series,
|
|
170
|
+
functools.partial(packed_func, *args, **kwargs),
|
|
171
|
+
output_type=output_type,
|
|
172
|
+
dtypes=dtypes,
|
|
173
|
+
dtype=dtype,
|
|
174
|
+
name=name,
|
|
175
|
+
index=index,
|
|
176
|
+
elementwise=elementwise,
|
|
177
|
+
)
|
|
243
178
|
|
|
244
179
|
# merge specified and inferred index, dtypes, output_type
|
|
245
180
|
# elementwise used to decide shape
|
|
246
181
|
self.output_types = (
|
|
247
|
-
[
|
|
248
|
-
if not self.output_types and
|
|
182
|
+
[inferred_meta.output_type]
|
|
183
|
+
if not self.output_types and inferred_meta.output_type
|
|
249
184
|
else self.output_types
|
|
250
185
|
)
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
186
|
+
if self.output_types:
|
|
187
|
+
inferred_meta.output_type = self.output_types[0]
|
|
188
|
+
inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
|
|
189
|
+
if index is not None:
|
|
190
|
+
inferred_meta.index_value = (
|
|
191
|
+
parse_index(index)
|
|
192
|
+
if index is not input_df_or_series.index_value
|
|
256
193
|
else input_df_or_series.index_value
|
|
257
194
|
)
|
|
258
|
-
|
|
259
|
-
return
|
|
195
|
+
inferred_meta.elementwise = elementwise or inferred_meta.elementwise
|
|
196
|
+
return inferred_meta
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def estimate_size(
|
|
200
|
+
cls,
|
|
201
|
+
ctx: MutableMapping[str, Union[int, float]],
|
|
202
|
+
op: "DataFrameApplyChunk",
|
|
203
|
+
) -> None:
|
|
204
|
+
if isinstance(op.func, MarkedFunction):
|
|
205
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
206
|
+
super().estimate_size(ctx, op)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Keep for import compatibility
|
|
210
|
+
DataFrameApplyChunkOperator = DataFrameApplyChunk
|
|
260
211
|
|
|
261
212
|
|
|
262
213
|
def get_packed_func(df, func, *args, **kwargs) -> Any:
|
|
263
214
|
stub_df = build_df(df, fill_value=1, size=1)
|
|
264
|
-
|
|
215
|
+
n_args = copy_if_possible(args)
|
|
216
|
+
n_kwargs = copy_if_possible(kwargs)
|
|
217
|
+
return pack_func_args(stub_df, func, *n_args, **n_kwargs)
|
|
265
218
|
|
|
266
219
|
|
|
267
220
|
def df_apply_chunk(
|
|
@@ -477,7 +430,8 @@ def df_apply_chunk(
|
|
|
477
430
|
elif batch_rows <= 0:
|
|
478
431
|
raise ValueError("batch_rows must be greater than 0")
|
|
479
432
|
|
|
480
|
-
|
|
433
|
+
if dtype is not None:
|
|
434
|
+
dtype = make_dtype(dtype)
|
|
481
435
|
|
|
482
436
|
output_types = kwargs.pop("output_types", None)
|
|
483
437
|
object_type = kwargs.pop("object_type", None)
|
|
@@ -489,7 +443,7 @@ def df_apply_chunk(
|
|
|
489
443
|
output_type = OutputType.df_or_series
|
|
490
444
|
|
|
491
445
|
# bind args and kwargs
|
|
492
|
-
op =
|
|
446
|
+
op = DataFrameApplyChunk(
|
|
493
447
|
func=func,
|
|
494
448
|
batch_rows=batch_rows,
|
|
495
449
|
output_type=output_type,
|
|
@@ -500,14 +454,17 @@ def df_apply_chunk(
|
|
|
500
454
|
return op(
|
|
501
455
|
dataframe,
|
|
502
456
|
dtypes=dtypes,
|
|
457
|
+
dtype=dtype,
|
|
458
|
+
name=name,
|
|
503
459
|
index=index,
|
|
460
|
+
output_type=output_type,
|
|
504
461
|
)
|
|
505
462
|
|
|
506
463
|
|
|
507
464
|
def series_apply_chunk(
|
|
508
465
|
dataframe_or_series,
|
|
509
466
|
func: Union[str, Callable],
|
|
510
|
-
batch_rows,
|
|
467
|
+
batch_rows=None,
|
|
511
468
|
dtypes=None,
|
|
512
469
|
dtype=None,
|
|
513
470
|
name=None,
|
|
@@ -714,11 +671,11 @@ def series_apply_chunk(
|
|
|
714
671
|
if not isinstance(func, Callable):
|
|
715
672
|
raise TypeError("function must be a callable object")
|
|
716
673
|
|
|
717
|
-
if not
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
674
|
+
if batch_rows is not None:
|
|
675
|
+
if not isinstance(batch_rows, int):
|
|
676
|
+
raise TypeError("batch_rows must be an integer")
|
|
677
|
+
if batch_rows <= 0:
|
|
678
|
+
raise ValueError("batch_rows must be greater than 0")
|
|
722
679
|
|
|
723
680
|
# bind args and kwargs
|
|
724
681
|
output_types = kwargs.pop("output_types", None)
|
|
@@ -730,7 +687,7 @@ def series_apply_chunk(
|
|
|
730
687
|
if skip_infer and output_type is None:
|
|
731
688
|
output_type = OutputType.df_or_series
|
|
732
689
|
|
|
733
|
-
op =
|
|
690
|
+
op = DataFrameApplyChunk(
|
|
734
691
|
func=func,
|
|
735
692
|
batch_rows=batch_rows,
|
|
736
693
|
output_type=output_type,
|
|
@@ -738,10 +695,13 @@ def series_apply_chunk(
|
|
|
738
695
|
kwargs=kwargs,
|
|
739
696
|
)
|
|
740
697
|
|
|
741
|
-
|
|
698
|
+
if dtype is not None:
|
|
699
|
+
dtype = make_dtype(dtype)
|
|
742
700
|
return op(
|
|
743
701
|
dataframe_or_series,
|
|
744
|
-
dtypes=dtypes,
|
|
702
|
+
dtypes=make_dtypes(dtypes),
|
|
703
|
+
dtype=dtype,
|
|
704
|
+
name=name,
|
|
745
705
|
output_type=output_type,
|
|
746
706
|
index=index,
|
|
747
707
|
)
|
|
@@ -18,9 +18,10 @@ from ... import opcodes
|
|
|
18
18
|
from ...core import OutputType
|
|
19
19
|
from ...serialization.serializables import ListField
|
|
20
20
|
from ...serialization.serializables.field_type import FieldTypes
|
|
21
|
+
from ...utils import make_dtype, make_dtypes
|
|
21
22
|
from ..core import DataFrame
|
|
22
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
23
|
-
from ..utils import
|
|
24
|
+
from ..utils import parse_index
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -36,7 +37,7 @@ class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
36
37
|
shape=series.shape,
|
|
37
38
|
index_value=series.index_value,
|
|
38
39
|
name=name,
|
|
39
|
-
dtype=dtype,
|
|
40
|
+
dtype=make_dtype(dtype),
|
|
40
41
|
)
|
|
41
42
|
return self.new_dataframe(
|
|
42
43
|
[series],
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Callable
|
|
15
|
+
from typing import Callable, MutableMapping, Union
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pandas as pd
|
|
@@ -25,14 +25,11 @@ from ...serialization.serializables import (
|
|
|
25
25
|
FunctionField,
|
|
26
26
|
TupleField,
|
|
27
27
|
)
|
|
28
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
29
|
+
from ...utils import make_dtypes
|
|
28
30
|
from ..core import DataFrame
|
|
29
31
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
30
|
-
from ..utils import
|
|
31
|
-
copy_func_scheduling_hints,
|
|
32
|
-
gen_unknown_index_value,
|
|
33
|
-
make_dtypes,
|
|
34
|
-
parse_index,
|
|
35
|
-
)
|
|
32
|
+
from ..utils import copy_func_scheduling_hints, gen_unknown_index_value, parse_index
|
|
36
33
|
|
|
37
34
|
|
|
38
35
|
class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -48,6 +45,9 @@ class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
48
45
|
if hasattr(self, "func"):
|
|
49
46
|
copy_func_scheduling_hints(self.func, self)
|
|
50
47
|
|
|
48
|
+
def has_custom_code(self) -> bool:
|
|
49
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
50
|
+
|
|
51
51
|
def _call_dataframe(self, df: DataFrame, dtypes: pd.Series):
|
|
52
52
|
dtypes = make_dtypes(dtypes)
|
|
53
53
|
index_value = gen_unknown_index_value(
|
|
@@ -101,6 +101,14 @@ class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
101
101
|
else:
|
|
102
102
|
return self._call_series_or_index(df_or_series, dtypes=dtypes)
|
|
103
103
|
|
|
104
|
+
@classmethod
|
|
105
|
+
def estimate_size(
|
|
106
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFlatMapOperator"
|
|
107
|
+
) -> None:
|
|
108
|
+
if isinstance(op.func, MarkedFunction):
|
|
109
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
110
|
+
super().estimate_size(ctx, op)
|
|
111
|
+
|
|
104
112
|
|
|
105
113
|
def df_flatmap(dataframe, func: Callable, dtypes=None, raw=False, args=(), **kwargs):
|
|
106
114
|
"""
|
maxframe/dataframe/fetch/core.py
CHANGED
|
@@ -12,8 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import MutableMapping, Union
|
|
16
|
+
|
|
15
17
|
from ...core import OutputType, register_fetch_class
|
|
16
|
-
from ...core.operator import Fetch, FetchMixin, FetchShuffle
|
|
18
|
+
from ...core.operator import Fetch, FetchMixin, FetchShuffle, Operator
|
|
17
19
|
from ...serialization.serializables import FieldTypes, TupleField
|
|
18
20
|
from ...utils import on_deserialize_shape, on_serialize_shape
|
|
19
21
|
from ..operators import DataFrameOperatorMixin
|
|
@@ -61,6 +63,15 @@ class DataFrameFetch(Fetch, DataFrameFetchMixin):
|
|
|
61
63
|
new_kws = self._extract_dataframe_or_series_kws(kws, **kw)
|
|
62
64
|
return super()._new_tileables(inputs, kws=new_kws, **kw)
|
|
63
65
|
|
|
66
|
+
@classmethod
|
|
67
|
+
def estimate_size(
|
|
68
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "Operator"
|
|
69
|
+
) -> None:
|
|
70
|
+
# use infinity to show that the size cannot be inferred
|
|
71
|
+
# todo when local catalyst is implemented, and it should get the estimated size
|
|
72
|
+
# from the source.
|
|
73
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
74
|
+
|
|
64
75
|
|
|
65
76
|
class DataFrameFetchShuffle(FetchShuffle, DataFrameFetchMixin):
|
|
66
77
|
# required fields
|
|
@@ -18,11 +18,14 @@ from .core import NamedAgg
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def _install():
|
|
21
|
+
from ...core import CachedAccessor
|
|
21
22
|
from ..core import DATAFRAME_GROUPBY_TYPE, DATAFRAME_TYPE, GROUPBY_TYPE, SERIES_TYPE
|
|
22
23
|
from .aggregation import agg
|
|
23
24
|
from .apply import groupby_apply
|
|
25
|
+
from .apply_chunk import df_groupby_apply_chunk
|
|
24
26
|
from .core import groupby
|
|
25
27
|
from .cum import cumcount, cummax, cummin, cumprod, cumsum
|
|
28
|
+
from .extensions import DataFrameGroupByMaxFrameAccessor
|
|
26
29
|
from .fill import bfill, ffill, fillna
|
|
27
30
|
from .getitem import df_groupby_getitem
|
|
28
31
|
from .head import head
|
|
@@ -75,8 +78,12 @@ def _install():
|
|
|
75
78
|
setattr(cls, "backfill", bfill)
|
|
76
79
|
setattr(cls, "fillna", fillna)
|
|
77
80
|
|
|
81
|
+
DataFrameGroupByMaxFrameAccessor._register("apply_chunk", df_groupby_apply_chunk)
|
|
82
|
+
|
|
78
83
|
for cls in DATAFRAME_GROUPBY_TYPE:
|
|
79
84
|
setattr(cls, "__getitem__", df_groupby_getitem)
|
|
85
|
+
if DataFrameGroupByMaxFrameAccessor._api_count:
|
|
86
|
+
cls.mf = CachedAccessor("mf", DataFrameGroupByMaxFrameAccessor)
|
|
80
87
|
|
|
81
88
|
|
|
82
89
|
_install()
|
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
16
|
import logging
|
|
17
|
-
from typing import Callable, Dict
|
|
17
|
+
from typing import Callable, Dict, List
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
21
21
|
|
|
22
22
|
from ... import opcodes
|
|
23
|
-
from ...core import ENTITY_TYPE, OutputType
|
|
23
|
+
from ...core import ENTITY_TYPE, EntityData, OutputType
|
|
24
24
|
from ...serialization.serializables import (
|
|
25
25
|
AnyField,
|
|
26
26
|
DictField,
|
|
@@ -155,17 +155,18 @@ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
155
155
|
index_levels = Int32Field("index_levels")
|
|
156
156
|
size_recorder_name = StringField("size_recorder_name")
|
|
157
157
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
158
|
+
@classmethod
|
|
159
|
+
def _set_inputs(cls, op: "DataFrameGroupByAgg", inputs: List[EntityData]):
|
|
160
|
+
super()._set_inputs(op, inputs)
|
|
161
|
+
inputs_iter = iter(op._inputs[1:])
|
|
162
|
+
if len(op._inputs) > 1:
|
|
162
163
|
by = []
|
|
163
|
-
for v in
|
|
164
|
+
for v in op.groupby_params["by"]:
|
|
164
165
|
if isinstance(v, ENTITY_TYPE):
|
|
165
166
|
by.append(next(inputs_iter))
|
|
166
167
|
else:
|
|
167
168
|
by.append(v)
|
|
168
|
-
|
|
169
|
+
op.groupby_params["by"] = by
|
|
169
170
|
|
|
170
171
|
def _get_inputs(self, inputs):
|
|
171
172
|
if isinstance(self.groupby_params["by"], list):
|