maxframe 1.3.1__cp311-cp311-macosx_10_9_universal2.whl → 2.0.0b1__cp311-cp311-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-311-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-311-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-311-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
18
|
+
from ...core import EntityData
|
|
16
19
|
from ...serialization.serializables import AnyField, Int32Field
|
|
17
20
|
from ...utils import no_default
|
|
18
21
|
from ..operators import SERIES_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -28,14 +31,15 @@ class DataFrameReplace(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
28
31
|
regex = AnyField("regex", default=None)
|
|
29
32
|
method = AnyField("method", default=no_default)
|
|
30
33
|
|
|
31
|
-
|
|
32
|
-
|
|
34
|
+
@classmethod
|
|
35
|
+
def _set_inputs(cls, op: "DataFrameReplace", inputs: List[EntityData]):
|
|
36
|
+
super()._set_inputs(op, inputs)
|
|
33
37
|
input_iter = iter(inputs)
|
|
34
38
|
next(input_iter)
|
|
35
|
-
if isinstance(
|
|
36
|
-
|
|
37
|
-
if isinstance(
|
|
38
|
-
|
|
39
|
+
if isinstance(op.to_replace, SERIES_TYPE):
|
|
40
|
+
op.to_replace = next(input_iter)
|
|
41
|
+
if isinstance(op.value, SERIES_TYPE):
|
|
42
|
+
op.value = next(input_iter)
|
|
39
43
|
|
|
40
44
|
def __call__(self, df_or_series):
|
|
41
45
|
inputs = [df_or_series]
|
|
@@ -64,7 +64,7 @@ def test_drop_na():
|
|
|
64
64
|
df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list("ABCDEFGHIJ"))
|
|
65
65
|
for _ in range(30):
|
|
66
66
|
df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
|
|
67
|
-
for
|
|
67
|
+
for _ in range(random.randint(1, 5)):
|
|
68
68
|
row = random.randint(0, 19)
|
|
69
69
|
for idx in range(0, 10):
|
|
70
70
|
df_raw.iloc[row, idx] = random.randint(0, 99)
|
|
@@ -79,7 +79,7 @@ def test_replace():
|
|
|
79
79
|
df_raw = pd.DataFrame(-1, index=range(0, 20), columns=list("ABCDEFGHIJ"))
|
|
80
80
|
for _ in range(30):
|
|
81
81
|
df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
|
|
82
|
-
for
|
|
82
|
+
for _ in range(random.randint(1, 5)):
|
|
83
83
|
row = random.randint(0, 19)
|
|
84
84
|
for idx in range(0, 10):
|
|
85
85
|
df_raw.iloc[row, idx] = random.randint(0, 99)
|
maxframe/dataframe/operators.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
|
-
from ..core import
|
|
18
|
+
from ..core import OutputType
|
|
19
19
|
from ..core.operator import Operator, ShuffleProxy, TileableOperatorMixin
|
|
20
20
|
from ..tensor.core import TENSOR_TYPE
|
|
21
21
|
from ..tensor.datasource import tensor as astensor
|
|
@@ -208,32 +208,6 @@ class DataFrameOperatorMixin(TileableOperatorMixin):
|
|
|
208
208
|
inputs, shape=shape, dtype=dtype, categories_value=categories_value, **kw
|
|
209
209
|
)[0]
|
|
210
210
|
|
|
211
|
-
@classmethod
|
|
212
|
-
def _process_groupby_params(cls, groupby_params):
|
|
213
|
-
new_groupby_params = groupby_params.copy()
|
|
214
|
-
if isinstance(groupby_params["by"], list):
|
|
215
|
-
by = []
|
|
216
|
-
for v in groupby_params["by"]:
|
|
217
|
-
if isinstance(v, ENTITY_TYPE):
|
|
218
|
-
by.append(cls.concat_tileable_chunks(v).chunks[0])
|
|
219
|
-
else:
|
|
220
|
-
by.append(v)
|
|
221
|
-
new_groupby_params["by"] = by
|
|
222
|
-
return new_groupby_params
|
|
223
|
-
|
|
224
|
-
@classmethod
|
|
225
|
-
def _get_groupby_inputs(cls, groupby, groupby_params):
|
|
226
|
-
inputs = [groupby]
|
|
227
|
-
chunk_inputs = list(groupby.chunks)
|
|
228
|
-
if isinstance(groupby_params["by"], list):
|
|
229
|
-
for chunk_v, v in zip(
|
|
230
|
-
groupby_params["by"], groupby.op.groupby_params["by"]
|
|
231
|
-
):
|
|
232
|
-
if isinstance(v, ENTITY_TYPE):
|
|
233
|
-
inputs.append(v)
|
|
234
|
-
chunk_inputs.append(chunk_v)
|
|
235
|
-
return inputs, chunk_inputs
|
|
236
|
-
|
|
237
211
|
@staticmethod
|
|
238
212
|
def _process_input(x):
|
|
239
213
|
from .initializer import DataFrame, Series
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import copy
|
|
16
16
|
import functools
|
|
17
|
+
import inspect
|
|
17
18
|
import itertools
|
|
18
19
|
from collections import OrderedDict
|
|
19
20
|
from collections.abc import Iterable
|
|
@@ -21,10 +22,13 @@ from typing import List
|
|
|
21
22
|
|
|
22
23
|
import numpy as np
|
|
23
24
|
import pandas as pd
|
|
25
|
+
import pyarrow as pa
|
|
24
26
|
|
|
25
27
|
from ... import opcodes
|
|
26
28
|
from ... import tensor as maxframe_tensor
|
|
27
29
|
from ...core import ENTITY_TYPE, OutputType, enter_mode
|
|
30
|
+
from ...io.odpsio.schema import pandas_dtype_to_arrow_type
|
|
31
|
+
from ...lib.dtypes_extension import ArrowDtype
|
|
28
32
|
from ...serialization.serializables import AnyField, BoolField, DictField, ListField
|
|
29
33
|
from ...typing_ import TileableType
|
|
30
34
|
from ...utils import lazy_import, pd_release_version
|
|
@@ -37,6 +41,7 @@ from .core import (
|
|
|
37
41
|
ReductionPostStep,
|
|
38
42
|
ReductionPreStep,
|
|
39
43
|
)
|
|
44
|
+
from .unique import _unique
|
|
40
45
|
|
|
41
46
|
cp = lazy_import("cupy", rename="cp")
|
|
42
47
|
cudf = lazy_import("cudf")
|
|
@@ -71,6 +76,7 @@ _agg_functions = {
|
|
|
71
76
|
"kurt": lambda x, skipna=True, bias=False: x.kurt(skipna=skipna, bias=bias),
|
|
72
77
|
"kurtosis": lambda x, skipna=True, bias=False: x.kurtosis(skipna=skipna, bias=bias),
|
|
73
78
|
"nunique": lambda x: x.nunique(),
|
|
79
|
+
"unique": lambda x: _unique(x, output_list_scalar=True),
|
|
74
80
|
"median": lambda x, skipna=True: x.median(skipna=skipna),
|
|
75
81
|
}
|
|
76
82
|
|
|
@@ -99,6 +105,46 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
99
105
|
[np.number, np.bool_] if op.numeric_only else [np.bool_]
|
|
100
106
|
).dtypes
|
|
101
107
|
|
|
108
|
+
def _fill_df_dtypes(self, in_df, dtypes):
|
|
109
|
+
if all(dt != np.dtype("O") for dt in dtypes):
|
|
110
|
+
return dtypes
|
|
111
|
+
|
|
112
|
+
if isinstance(self.func, dict):
|
|
113
|
+
col_func_it = self.func.items()
|
|
114
|
+
else:
|
|
115
|
+
assert in_df.ndim == 2
|
|
116
|
+
col_func_it = itertools.product(in_df.dtypes.index, self.func)
|
|
117
|
+
|
|
118
|
+
col_to_dt = dict(in_df.dtypes.items())
|
|
119
|
+
|
|
120
|
+
new_dt = OrderedDict()
|
|
121
|
+
for (col_name, func), (out_col_name, dt) in zip(col_func_it, dtypes.items()):
|
|
122
|
+
if dt != np.dtype("O"):
|
|
123
|
+
new_dt[out_col_name] = dt
|
|
124
|
+
elif func == "unique":
|
|
125
|
+
in_dt = col_to_dt[col_name]
|
|
126
|
+
if in_dt == np.dtype("O"):
|
|
127
|
+
in_dt = pd.StringDtype()
|
|
128
|
+
arrow_dt = pandas_dtype_to_arrow_type(in_dt)
|
|
129
|
+
new_dt[out_col_name] = ArrowDtype(pa.list_(arrow_dt))
|
|
130
|
+
else:
|
|
131
|
+
# do nothing as the result might be string
|
|
132
|
+
new_dt[out_col_name] = dt
|
|
133
|
+
return pd.Series(list(new_dt.values()), index=new_dt.keys())
|
|
134
|
+
|
|
135
|
+
def _fill_series_dtype(self, in_data, dtype):
|
|
136
|
+
if len(self.func) != 1 or dtype != np.dtype("O") or in_data.ndim > 1:
|
|
137
|
+
return dtype
|
|
138
|
+
|
|
139
|
+
if self.func[0] == "unique":
|
|
140
|
+
in_dt = in_data.dtype
|
|
141
|
+
if in_dt == np.dtype("O"):
|
|
142
|
+
in_dt = pd.StringDtype()
|
|
143
|
+
arrow_dt = pandas_dtype_to_arrow_type(in_dt)
|
|
144
|
+
return ArrowDtype(pa.list_(arrow_dt))
|
|
145
|
+
else:
|
|
146
|
+
return dtype
|
|
147
|
+
|
|
102
148
|
def _calc_result_shape(self, df):
|
|
103
149
|
if df.ndim == 2:
|
|
104
150
|
if self.numeric_only:
|
|
@@ -114,16 +160,23 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
114
160
|
)
|
|
115
161
|
|
|
116
162
|
result_df = test_obj.agg(self.raw_func, axis=self.axis, **self.raw_func_kw)
|
|
163
|
+
if isinstance(result_df, pd.DataFrame):
|
|
164
|
+
out_dtypes = self._fill_df_dtypes(df, result_df.dtypes)
|
|
165
|
+
elif isinstance(result_df, pd.Series):
|
|
166
|
+
dtype = self._fill_series_dtype(df, result_df.dtype)
|
|
167
|
+
out_dtypes = pd.Series([dtype], index=[result_df.name])
|
|
168
|
+
else:
|
|
169
|
+
out_dtypes = pd.Series([np.array(result_df).dtype], index=[None])
|
|
117
170
|
|
|
118
171
|
if isinstance(result_df, pd.DataFrame):
|
|
119
172
|
self.output_types = [OutputType.dataframe]
|
|
120
|
-
return
|
|
173
|
+
return out_dtypes, result_df.index
|
|
121
174
|
elif isinstance(result_df, pd.Series):
|
|
122
175
|
self.output_types = [OutputType.series]
|
|
123
|
-
return
|
|
176
|
+
return out_dtypes, result_df.index
|
|
124
177
|
else:
|
|
125
178
|
self.output_types = [OutputType.scalar]
|
|
126
|
-
return
|
|
179
|
+
return out_dtypes.iloc[0], None
|
|
127
180
|
|
|
128
181
|
def __call__(self, df, output_type=None, dtypes=None, index=None):
|
|
129
182
|
self._output_types = df.op.output_types
|
|
@@ -379,6 +432,15 @@ def aggregate(df, func=None, axis=0, **kw):
|
|
|
379
432
|
min 1
|
|
380
433
|
"""
|
|
381
434
|
axis = validate_axis(axis, df)
|
|
435
|
+
if func == "unique":
|
|
436
|
+
# workaround for direct call of unique function which
|
|
437
|
+
# returns a tensor directly
|
|
438
|
+
func = getattr(df, func)
|
|
439
|
+
if "axis" in inspect.getfullargspec(func).args:
|
|
440
|
+
kw = kw.copy()
|
|
441
|
+
kw["axis"] = axis
|
|
442
|
+
return func(**kw)
|
|
443
|
+
|
|
382
444
|
if (
|
|
383
445
|
df.ndim == 2
|
|
384
446
|
and isinstance(func, dict)
|
|
@@ -226,7 +226,9 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
|
226
226
|
# handle pandas Dtypes in the future more carefully.
|
|
227
227
|
reduced_dtype = np.dtype("O")
|
|
228
228
|
else:
|
|
229
|
-
|
|
229
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
230
|
+
|
|
231
|
+
reduced_dtype = find_common_type(dtypes)
|
|
230
232
|
|
|
231
233
|
if level is not None:
|
|
232
234
|
return self._call_groupby_level(df[reduced_cols], level)
|
|
@@ -31,7 +31,7 @@ def median_series(df, axis=None, skipna=True, level=None, method=None):
|
|
|
31
31
|
axis=axis,
|
|
32
32
|
skipna=skipna,
|
|
33
33
|
level=level,
|
|
34
|
-
output_types=[OutputType.
|
|
34
|
+
output_types=[OutputType.series if level is not None else OutputType.scalar],
|
|
35
35
|
method=method,
|
|
36
36
|
)
|
|
37
37
|
return op(df)
|
|
@@ -19,9 +19,11 @@ from typing import NamedTuple
|
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
22
23
|
import pytest
|
|
23
24
|
|
|
24
25
|
from .... import dataframe as md
|
|
26
|
+
from ....lib.dtypes_extension import ArrowDtype
|
|
25
27
|
from ....tensor import Tensor
|
|
26
28
|
from ....tests.utils import assert_mf_index_dtype
|
|
27
29
|
from ...core import DataFrame, IndexValue, OutputType, Series
|
|
@@ -29,6 +31,7 @@ from ...datasource.dataframe import from_pandas as from_pandas_df
|
|
|
29
31
|
from ...datasource.series import from_pandas as from_pandas_series
|
|
30
32
|
from .. import (
|
|
31
33
|
CustomReduction,
|
|
34
|
+
DataFrameAggregate,
|
|
32
35
|
DataFrameAll,
|
|
33
36
|
DataFrameAny,
|
|
34
37
|
DataFrameCount,
|
|
@@ -196,6 +199,36 @@ def test_nunique():
|
|
|
196
199
|
assert isinstance(result2.op, DataFrameNunique)
|
|
197
200
|
|
|
198
201
|
|
|
202
|
+
def test_unique():
|
|
203
|
+
pd_df = pd.DataFrame(
|
|
204
|
+
{
|
|
205
|
+
"col1": pd.Series(np.random.choice(["a", "b", "c", "d"], 100)),
|
|
206
|
+
"col2": pd.Series(np.random.choice([0, 1, 2, 3], 100)),
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
df = from_pandas_df(pd_df, chunk_size=3)
|
|
210
|
+
result = df.agg(["unique"])
|
|
211
|
+
|
|
212
|
+
assert result.shape == (1, 2)
|
|
213
|
+
assert result.op.output_types[0] == OutputType.dataframe
|
|
214
|
+
assert isinstance(result.op, DataFrameAggregate)
|
|
215
|
+
pd.testing.assert_series_equal(
|
|
216
|
+
result.dtypes,
|
|
217
|
+
pd.Series(
|
|
218
|
+
[ArrowDtype(pa.list_(pa.string())), ArrowDtype(pa.list_(pa.int64()))],
|
|
219
|
+
index=pd_df.columns,
|
|
220
|
+
),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
pd_s = pd.Series(np.random.choice(["a", "b", "c", "d"], 100))
|
|
224
|
+
ms = from_pandas_series(pd_s, chunk_size=3)
|
|
225
|
+
result = ms.agg(["unique"])
|
|
226
|
+
assert result.shape == (1,)
|
|
227
|
+
assert result.op.output_types[0] == OutputType.series
|
|
228
|
+
assert isinstance(result.op, DataFrameAggregate)
|
|
229
|
+
assert result.dtype == ArrowDtype(pa.list_(pa.string()))
|
|
230
|
+
|
|
231
|
+
|
|
199
232
|
def test_dataframe_aggregate():
|
|
200
233
|
data = pd.DataFrame(np.random.rand(20, 19))
|
|
201
234
|
agg_funcs = [
|
|
@@ -12,14 +12,21 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
import numpy as np
|
|
17
16
|
import pandas as pd
|
|
17
|
+
import pyarrow as pa
|
|
18
18
|
|
|
19
19
|
from ... import opcodes
|
|
20
20
|
from ...core import ENTITY_TYPE, OutputType
|
|
21
|
+
from ...io.odpsio.schema import (
|
|
22
|
+
pandas_dtype_to_arrow_type,
|
|
23
|
+
pandas_dtypes_to_arrow_schema,
|
|
24
|
+
)
|
|
25
|
+
from ...lib.dtypes_extension import ArrowDtype
|
|
26
|
+
from ...serialization.serializables import BoolField
|
|
21
27
|
from ...tensor.core import TensorOrder
|
|
22
28
|
from ...utils import lazy_import
|
|
29
|
+
from ..core import DATAFRAME_TYPE
|
|
23
30
|
from ..initializer import Series as asseries
|
|
24
31
|
from .core import CustomReduction, DataFrameReductionMixin, DataFrameReductionOperator
|
|
25
32
|
|
|
@@ -40,6 +47,12 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
40
47
|
_op_type_ = opcodes.UNIQUE
|
|
41
48
|
_func_name = "unique"
|
|
42
49
|
|
|
50
|
+
output_list_scalar = BoolField("output_list_scalar", default=False)
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def is_atomic(self):
|
|
54
|
+
return True
|
|
55
|
+
|
|
43
56
|
@classmethod
|
|
44
57
|
def get_reduction_callable(cls, op):
|
|
45
58
|
return UniqueReduction(name=cls._func_name, is_gpu=op.is_gpu())
|
|
@@ -47,10 +60,43 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
47
60
|
def __call__(self, a):
|
|
48
61
|
if not isinstance(a, ENTITY_TYPE):
|
|
49
62
|
a = asseries(a)
|
|
50
|
-
self.
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
63
|
+
self.axis = 0
|
|
64
|
+
if isinstance(a, DATAFRAME_TYPE):
|
|
65
|
+
assert self.output_list_scalar and self.axis == 0
|
|
66
|
+
pa_schema = pandas_dtypes_to_arrow_schema(a.dtypes, unknown_as_string=True)
|
|
67
|
+
if len(set(pa_schema.types)) == 1:
|
|
68
|
+
out_dtype = ArrowDtype(pa.list_(pa_schema.types[0]))
|
|
69
|
+
else:
|
|
70
|
+
out_dtype = np.dtype("O")
|
|
71
|
+
kw = {
|
|
72
|
+
"dtype": out_dtype,
|
|
73
|
+
"index_value": a.columns_value,
|
|
74
|
+
"shape": (a.shape[1],),
|
|
75
|
+
}
|
|
76
|
+
self.output_types = [OutputType.series]
|
|
77
|
+
return self.new_tileables([a], **kw)[0]
|
|
78
|
+
else:
|
|
79
|
+
if self.output_list_scalar:
|
|
80
|
+
arrow_type = pa.list_(
|
|
81
|
+
pandas_dtype_to_arrow_type(a.dtype, unknown_as_string=True)
|
|
82
|
+
)
|
|
83
|
+
kw = {
|
|
84
|
+
"dtype": ArrowDtype(arrow_type),
|
|
85
|
+
"shape": (),
|
|
86
|
+
}
|
|
87
|
+
self.output_types = [OutputType.scalar]
|
|
88
|
+
else:
|
|
89
|
+
kw = {
|
|
90
|
+
"dtype": a.dtype,
|
|
91
|
+
"shape": (np.nan,),
|
|
92
|
+
}
|
|
93
|
+
self.output_types = [OutputType.tensor]
|
|
94
|
+
return self.new_tileables([a], order=TensorOrder.C_ORDER, **kw)[0]
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _unique(values, method="tree", **kwargs):
|
|
98
|
+
op = DataFrameUnique(method=method, **kwargs)
|
|
99
|
+
return op(values)
|
|
54
100
|
|
|
55
101
|
|
|
56
102
|
def unique(values, method="tree"):
|
|
@@ -62,6 +108,7 @@ def unique(values, method="tree"):
|
|
|
62
108
|
values : 1d array-like
|
|
63
109
|
method : 'shuffle' or 'tree', 'tree' method provide a better performance, 'shuffle'
|
|
64
110
|
is recommended if the number of unique values is very large.
|
|
111
|
+
|
|
65
112
|
See Also
|
|
66
113
|
--------
|
|
67
114
|
Index.unique
|
|
@@ -86,5 +133,4 @@ def unique(values, method="tree"):
|
|
|
86
133
|
array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
|
|
87
134
|
dtype=object)
|
|
88
135
|
"""
|
|
89
|
-
|
|
90
|
-
return op(values)
|
|
136
|
+
return _unique(values, method=method)
|
|
@@ -12,11 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
|
|
18
20
|
from ... import opcodes
|
|
19
|
-
from ...core import ENTITY_TYPE
|
|
21
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
20
22
|
from ...serialization.serializables import AnyField, BoolField, Int32Field, KeyField
|
|
21
23
|
from ...tensor.utils import filter_inputs
|
|
22
24
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
@@ -33,12 +35,13 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
33
35
|
axis = Int32Field("axis", default=None)
|
|
34
36
|
drop = BoolField("drop", default=None)
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
@classmethod
|
|
39
|
+
def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):
|
|
40
|
+
super()._set_inputs(op, inputs)
|
|
41
|
+
inputs_iter = iter(op._inputs)
|
|
39
42
|
next(inputs_iter)
|
|
40
|
-
if isinstance(
|
|
41
|
-
|
|
43
|
+
if isinstance(op.other, ENTITY_TYPE):
|
|
44
|
+
op.other = next(inputs_iter)
|
|
42
45
|
|
|
43
46
|
def __call__(self, df_or_series):
|
|
44
47
|
if isinstance(df_or_series, SERIES_TYPE):
|
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
from pandas.core.dtypes.cast import find_common_type
|
|
18
20
|
|
|
19
21
|
from ... import opcodes
|
|
20
|
-
from ...core import ENTITY_TYPE
|
|
22
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
21
23
|
from ...serialization.serializables import (
|
|
22
24
|
AnyField,
|
|
23
25
|
BoolField,
|
|
@@ -50,11 +52,12 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
50
52
|
def __init__(self, output_types=None, **kw):
|
|
51
53
|
super().__init__(_output_types=output_types, **kw)
|
|
52
54
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
@classmethod
|
|
56
|
+
def _set_inputs(cls, op: "DataFrameQuantile", inputs: List[EntityData]):
|
|
57
|
+
super()._set_inputs(op, inputs)
|
|
58
|
+
op.input = op._inputs[0]
|
|
59
|
+
if isinstance(op.q, TENSOR_TYPE):
|
|
60
|
+
op.q = op._inputs[-1]
|
|
58
61
|
|
|
59
62
|
def _calc_dtype_on_axis_1(self, a, dtypes):
|
|
60
63
|
quantile_dtypes = []
|
|
@@ -12,13 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Any
|
|
15
|
+
from typing import Any, List
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pandas as pd
|
|
19
19
|
from pandas.api.types import is_dict_like, is_scalar
|
|
20
20
|
|
|
21
21
|
from ... import opcodes
|
|
22
|
+
from ...core import EntityData
|
|
22
23
|
from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
|
|
23
24
|
from ...tensor import tensor as astensor
|
|
24
25
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
@@ -54,9 +55,10 @@ class DataFrameToDatetime(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
54
55
|
if k not in self._no_copy_attrs_ and k != "arg" and hasattr(self, k)
|
|
55
56
|
)
|
|
56
57
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
@classmethod
|
|
59
|
+
def _set_inputs(cls, op: "DataFrameToDatetime", inputs: List[EntityData]):
|
|
60
|
+
super()._set_inputs(op, inputs)
|
|
61
|
+
op.arg = op._inputs[0]
|
|
60
62
|
|
|
61
63
|
def __call__(self, arg):
|
|
62
64
|
if is_scalar(arg):
|