maxframe 1.3.1__cp310-cp310-win32.whl → 2.0.0b1__cp310-cp310-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp310-win32.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp310-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp310-win32.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp310-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ...serialization.serializables import AnyField
|
|
18
|
+
from ...tensor.rechunk.rechunk import chunk_size_type
|
|
19
|
+
from ...typing_ import TileableType
|
|
20
|
+
from ..core import DATAFRAME_TYPE
|
|
21
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DataFrameRechunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
25
|
+
_op_type_ = opcodes.RECHUNK
|
|
26
|
+
|
|
27
|
+
chunk_size = AnyField("chunk_size")
|
|
28
|
+
|
|
29
|
+
def __call__(self, x):
|
|
30
|
+
if isinstance(x, DATAFRAME_TYPE):
|
|
31
|
+
return self.new_dataframe(
|
|
32
|
+
[x],
|
|
33
|
+
shape=x.shape,
|
|
34
|
+
dtypes=x.dtypes,
|
|
35
|
+
columns_value=x.columns_value,
|
|
36
|
+
index_value=x.index_value,
|
|
37
|
+
)
|
|
38
|
+
else:
|
|
39
|
+
self.output_types = x.op.output_types
|
|
40
|
+
f = (
|
|
41
|
+
self.new_series
|
|
42
|
+
if self.output_types[0] == OutputType.series
|
|
43
|
+
else self.new_index
|
|
44
|
+
)
|
|
45
|
+
return f(
|
|
46
|
+
[x],
|
|
47
|
+
shape=x.shape,
|
|
48
|
+
dtype=x.dtype,
|
|
49
|
+
index_value=x.index_value,
|
|
50
|
+
name=x.name,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def rechunk(a: TileableType, chunk_size: chunk_size_type, reassign_worker=False):
|
|
55
|
+
op = DataFrameRechunk(
|
|
56
|
+
chunk_size=chunk_size,
|
|
57
|
+
reassign_worker=reassign_worker,
|
|
58
|
+
)
|
|
59
|
+
return op(a)
|
maxframe/dataframe/misc/shift.py
CHANGED
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import pandas as pd
|
|
16
18
|
|
|
17
19
|
from ... import opcodes
|
|
18
|
-
from ...core import OutputType
|
|
20
|
+
from ...core import EntityData, OutputType
|
|
19
21
|
from ...serialization.serializables import AnyField, Int8Field, Int64Field, KeyField
|
|
20
22
|
from ...utils import no_default, pd_release_version
|
|
21
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -39,9 +41,10 @@ class DataFrameShift(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
41
|
def input(self):
|
|
40
42
|
return self._input
|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
@classmethod
|
|
45
|
+
def _set_inputs(cls, op: "DataFrameShift", inputs: List[EntityData]):
|
|
46
|
+
super()._set_inputs(op, inputs)
|
|
47
|
+
op._input = op._inputs[0]
|
|
45
48
|
|
|
46
49
|
def _call_dataframe(self, df):
|
|
47
50
|
test_df = build_df(df)
|
maxframe/dataframe/misc/stack.py
CHANGED
|
@@ -18,6 +18,7 @@ import numpy as np
|
|
|
18
18
|
import pandas as pd
|
|
19
19
|
|
|
20
20
|
from ... import opcodes
|
|
21
|
+
from ...core import EntityData
|
|
21
22
|
from ...serialization.serializables import AnyField, BoolField, KeyField
|
|
22
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
23
24
|
from ..utils import build_df, parse_index
|
|
@@ -30,9 +31,10 @@ class DataFrameStack(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
30
31
|
level = AnyField("level", default=None)
|
|
31
32
|
dropna = BoolField("dropna", default=None)
|
|
32
33
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
@classmethod
|
|
35
|
+
def _set_inputs(cls, op: "DataFrameStack", inputs: List[EntityData]):
|
|
36
|
+
super()._set_inputs(op, inputs)
|
|
37
|
+
op._input_df = op._inputs[0]
|
|
36
38
|
|
|
37
39
|
@classmethod
|
|
38
40
|
def _calc_size(cls, size: int, level: Union[List, int], dtypes: pd.Series):
|
|
@@ -31,6 +31,166 @@ from ...datasource.series import from_pandas as from_pandas_series
|
|
|
31
31
|
from .. import astype, cut
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
def test_dataframe_apply():
|
|
35
|
+
cols = [chr(ord("A") + i) for i in range(10)]
|
|
36
|
+
df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols))
|
|
37
|
+
|
|
38
|
+
df = from_pandas_df(df_raw, chunk_size=5)
|
|
39
|
+
|
|
40
|
+
def df_func_with_err(v):
|
|
41
|
+
assert len(v) > 2
|
|
42
|
+
return v.sort_values()
|
|
43
|
+
|
|
44
|
+
def df_series_func_with_err(v):
|
|
45
|
+
assert len(v) > 2
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
with pytest.raises(TypeError):
|
|
49
|
+
df.apply(df_func_with_err)
|
|
50
|
+
|
|
51
|
+
r = df.apply(df_func_with_err, output_type="dataframe", dtypes=df_raw.dtypes)
|
|
52
|
+
assert r.shape == (np.nan, df.shape[-1])
|
|
53
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
54
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
55
|
+
assert r.op.elementwise is False
|
|
56
|
+
|
|
57
|
+
r = df.apply(
|
|
58
|
+
df_series_func_with_err, output_type="series", dtype=object, name="output"
|
|
59
|
+
)
|
|
60
|
+
assert r.dtype == np.dtype("O")
|
|
61
|
+
assert r.shape == (df.shape[-1],)
|
|
62
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
63
|
+
assert r.op.output_types[0] == OutputType.series
|
|
64
|
+
assert r.op.elementwise is False
|
|
65
|
+
|
|
66
|
+
r = df.apply("ffill")
|
|
67
|
+
assert r.op._op_type_ == opcodes.FILL_NA
|
|
68
|
+
|
|
69
|
+
r = df.apply(np.sqrt)
|
|
70
|
+
assert all(v == np.dtype("float64") for v in r.dtypes) is True
|
|
71
|
+
assert r.shape == df.shape
|
|
72
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
73
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
74
|
+
assert r.op.elementwise is True
|
|
75
|
+
|
|
76
|
+
r = df.apply(lambda x: pd.Series([1, 2]))
|
|
77
|
+
assert all(v == np.dtype("int64") for v in r.dtypes) is True
|
|
78
|
+
assert r.shape == (np.nan, df.shape[1])
|
|
79
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
80
|
+
assert r.op.elementwise is False
|
|
81
|
+
|
|
82
|
+
r = df.apply(np.sum, axis="index")
|
|
83
|
+
assert np.dtype("int64") == r.dtype
|
|
84
|
+
assert r.shape == (df.shape[1],)
|
|
85
|
+
assert r.op.output_types[0] == OutputType.series
|
|
86
|
+
assert r.op.elementwise is False
|
|
87
|
+
|
|
88
|
+
r = df.apply(np.sum, axis="columns")
|
|
89
|
+
assert np.dtype("int64") == r.dtype
|
|
90
|
+
assert r.shape == (df.shape[0],)
|
|
91
|
+
assert r.op.output_types[0] == OutputType.series
|
|
92
|
+
assert r.op.elementwise is False
|
|
93
|
+
|
|
94
|
+
r = df.apply(lambda x: pd.Series([1, 2], index=["foo", "bar"]), axis=1)
|
|
95
|
+
assert all(v == np.dtype("int64") for v in r.dtypes) is True
|
|
96
|
+
assert r.shape == (df.shape[0], 2)
|
|
97
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
98
|
+
assert r.op.elementwise is False
|
|
99
|
+
|
|
100
|
+
r = df.apply(lambda x: [1, 2], axis=1, result_type="expand")
|
|
101
|
+
assert all(v == np.dtype("int64") for v in r.dtypes) is True
|
|
102
|
+
assert r.shape == (df.shape[0], 2)
|
|
103
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
104
|
+
assert r.op.elementwise is False
|
|
105
|
+
|
|
106
|
+
r = df.apply(lambda x: list(range(10)), axis=1, result_type="reduce")
|
|
107
|
+
assert np.dtype("object") == r.dtype
|
|
108
|
+
assert r.shape == (df.shape[0],)
|
|
109
|
+
assert r.op.output_types[0] == OutputType.series
|
|
110
|
+
assert r.op.elementwise is False
|
|
111
|
+
|
|
112
|
+
r = df.apply(lambda x: list(range(10)), axis=1, result_type="broadcast")
|
|
113
|
+
assert all(v == np.dtype("int64") for v in r.dtypes) is True
|
|
114
|
+
assert r.shape == (df.shape[0], 10)
|
|
115
|
+
assert r.op.output_types[0] == OutputType.dataframe
|
|
116
|
+
assert r.op.elementwise is False
|
|
117
|
+
|
|
118
|
+
raw = pd.DataFrame({"a": [np.array([1, 2, 3]), np.array([4, 5, 6])]})
|
|
119
|
+
df = from_pandas_df(raw)
|
|
120
|
+
df2 = df.apply(
|
|
121
|
+
lambda x: x["a"].astype(pd.Series),
|
|
122
|
+
axis=1,
|
|
123
|
+
output_type="dataframe",
|
|
124
|
+
dtypes=pd.Series([np.dtype(float)] * 3),
|
|
125
|
+
)
|
|
126
|
+
assert df2.ndim == 2
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_series_apply():
|
|
130
|
+
idxes = [chr(ord("A") + i) for i in range(20)]
|
|
131
|
+
s_raw = pd.Series([i**2 for i in range(20)], index=idxes)
|
|
132
|
+
|
|
133
|
+
series = from_pandas_series(s_raw, chunk_size=5)
|
|
134
|
+
|
|
135
|
+
r = series.apply("add", args=(1,))
|
|
136
|
+
assert r.op._op_type_ == opcodes.ADD
|
|
137
|
+
|
|
138
|
+
r = series.apply(np.sqrt)
|
|
139
|
+
assert np.dtype("float64") == r.dtype
|
|
140
|
+
assert r.shape == series.shape
|
|
141
|
+
assert r.index_value is series.index_value
|
|
142
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
143
|
+
assert r.op.output_types[0] == OutputType.series
|
|
144
|
+
|
|
145
|
+
r = series.apply("sqrt")
|
|
146
|
+
assert np.dtype("float64") == r.dtype
|
|
147
|
+
assert r.shape == series.shape
|
|
148
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
149
|
+
assert r.op.output_types[0] == OutputType.series
|
|
150
|
+
|
|
151
|
+
r = series.apply(lambda x: [x, x + 1], convert_dtype=False)
|
|
152
|
+
assert np.dtype("object") == r.dtype
|
|
153
|
+
assert r.shape == series.shape
|
|
154
|
+
assert r.op._op_type_ == opcodes.APPLY
|
|
155
|
+
assert r.op.output_types[0] == OutputType.series
|
|
156
|
+
|
|
157
|
+
s_raw2 = pd.Series([np.array([1, 2, 3]), np.array([4, 5, 6])])
|
|
158
|
+
series = from_pandas_series(s_raw2)
|
|
159
|
+
|
|
160
|
+
r = series.apply(np.sum)
|
|
161
|
+
assert r.dtype == np.dtype(object)
|
|
162
|
+
|
|
163
|
+
r = series.apply(lambda x: pd.Series([1]), output_type="dataframe")
|
|
164
|
+
expected = s_raw2.apply(lambda x: pd.Series([1]))
|
|
165
|
+
pd.testing.assert_series_equal(r.dtypes, expected.dtypes)
|
|
166
|
+
|
|
167
|
+
dtypes = pd.Series([np.dtype(float)] * 3)
|
|
168
|
+
r = series.apply(pd.Series, output_type="dataframe", dtypes=dtypes)
|
|
169
|
+
assert r.ndim == 2
|
|
170
|
+
pd.testing.assert_series_equal(r.dtypes, dtypes)
|
|
171
|
+
assert r.shape == (2, 3)
|
|
172
|
+
|
|
173
|
+
def apply_with_error(_):
|
|
174
|
+
raise ValueError
|
|
175
|
+
|
|
176
|
+
r = series.apply(apply_with_error, output_type="dataframe", dtypes=dtypes)
|
|
177
|
+
assert r.ndim == 2
|
|
178
|
+
|
|
179
|
+
r = series.apply(
|
|
180
|
+
pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
|
|
181
|
+
)
|
|
182
|
+
assert r.ndim == 2
|
|
183
|
+
pd.testing.assert_series_equal(r.dtypes, dtypes)
|
|
184
|
+
assert r.shape == (2, 3)
|
|
185
|
+
|
|
186
|
+
with pytest.raises(AttributeError, match="abc"):
|
|
187
|
+
series.apply("abc")
|
|
188
|
+
|
|
189
|
+
with pytest.raises(TypeError):
|
|
190
|
+
# dtypes not provided
|
|
191
|
+
series.apply(lambda x: x.tolist(), output_type="dataframe")
|
|
192
|
+
|
|
193
|
+
|
|
34
194
|
def test_transform():
|
|
35
195
|
cols = [chr(ord("A") + i) for i in range(10)]
|
|
36
196
|
df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols))
|
|
@@ -339,6 +499,11 @@ def test_get_dummies():
|
|
|
339
499
|
r = get_dummies(df)
|
|
340
500
|
assert isinstance(r, DATAFRAME_TYPE)
|
|
341
501
|
|
|
502
|
+
raw = pd.Series(["a", "a", "b", "c"])
|
|
503
|
+
ms = from_pandas_series(raw, chunk_size=2)
|
|
504
|
+
r = get_dummies(ms)
|
|
505
|
+
assert isinstance(r, DATAFRAME_TYPE)
|
|
506
|
+
|
|
342
507
|
|
|
343
508
|
def test_to_numeric():
|
|
344
509
|
raw = pd.DataFrame({"a": [1.0, 2, 3, -3]})
|
|
@@ -381,7 +546,7 @@ def test_apply():
|
|
|
381
546
|
|
|
382
547
|
keys = [1, 2]
|
|
383
548
|
|
|
384
|
-
@with_running_options(engine="spe")
|
|
549
|
+
@with_running_options(engine="spe", memory="40GB")
|
|
385
550
|
def f(x, keys):
|
|
386
551
|
if x["a"] in keys:
|
|
387
552
|
return [1, 0]
|
|
@@ -398,6 +563,7 @@ def test_apply():
|
|
|
398
563
|
)
|
|
399
564
|
assert apply_df.shape == (3, 2)
|
|
400
565
|
assert apply_df.op.expect_engine == "SPE"
|
|
566
|
+
assert apply_df.op.expect_resources == {"cpu": 1, "memory": "40GB", "gpu": 0}
|
|
401
567
|
|
|
402
568
|
|
|
403
569
|
def test_pivot_table():
|
|
@@ -12,23 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Any, Union
|
|
15
|
+
from typing import Any, MutableMapping, Union
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
|
-
import pandas as pd
|
|
19
18
|
from pandas import DataFrame, Series
|
|
20
19
|
|
|
21
20
|
from ... import opcodes
|
|
22
21
|
from ...core import OutputType
|
|
23
22
|
from ...serialization.serializables import AnyField, BoolField, DictField, TupleField
|
|
24
|
-
from ...
|
|
23
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
24
|
+
from ...utils import copy_if_possible, pd_release_version
|
|
25
25
|
from ..core import DATAFRAME_TYPE
|
|
26
26
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
27
27
|
from ..utils import (
|
|
28
|
+
InferredDataFrameMeta,
|
|
28
29
|
build_df,
|
|
29
30
|
build_series,
|
|
30
31
|
copy_func_scheduling_hints,
|
|
31
|
-
|
|
32
|
+
infer_dataframe_return_value,
|
|
32
33
|
pack_func_args,
|
|
33
34
|
parse_index,
|
|
34
35
|
validate_axis,
|
|
@@ -53,63 +54,55 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
53
54
|
if hasattr(self, "func"):
|
|
54
55
|
copy_func_scheduling_hints(self.func, self)
|
|
55
56
|
|
|
56
|
-
def
|
|
57
|
-
|
|
58
|
-
test_df = _build_stub_pandas_obj(df, self.output_types[0])
|
|
59
|
-
if self.output_types[0] == OutputType.dataframe:
|
|
60
|
-
try:
|
|
61
|
-
with np.errstate(all="ignore"), quiet_stdio():
|
|
62
|
-
if self.call_agg:
|
|
63
|
-
infer_df = test_df.agg(packed_funcs, axis=self.axis)
|
|
64
|
-
else:
|
|
65
|
-
infer_df = test_df.transform(packed_funcs, axis=self.axis)
|
|
66
|
-
except: # noqa: E722
|
|
67
|
-
infer_df = None
|
|
68
|
-
else:
|
|
69
|
-
try:
|
|
70
|
-
with np.errstate(all="ignore"), quiet_stdio():
|
|
71
|
-
if self.call_agg:
|
|
72
|
-
infer_df = test_df.agg(packed_funcs)
|
|
73
|
-
else:
|
|
74
|
-
if not _with_convert_dtype:
|
|
75
|
-
infer_df = test_df.transform(packed_funcs)
|
|
76
|
-
else: # pragma: no cover
|
|
77
|
-
infer_df = test_df.transform(
|
|
78
|
-
packed_funcs, convert_dtype=self.convert_dtype
|
|
79
|
-
)
|
|
80
|
-
except: # noqa: E722
|
|
81
|
-
infer_df = None
|
|
82
|
-
|
|
83
|
-
if infer_df is None and dtypes is None:
|
|
84
|
-
raise TypeError(
|
|
85
|
-
"Failed to infer dtype, please specify dtypes as arguments."
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
if infer_df is None:
|
|
89
|
-
is_df = self.output_types[0] == OutputType.dataframe
|
|
90
|
-
else:
|
|
91
|
-
is_df = isinstance(infer_df, pd.DataFrame)
|
|
92
|
-
|
|
93
|
-
if is_df:
|
|
94
|
-
new_dtypes = make_dtypes(dtypes) if dtypes is not None else infer_df.dtypes
|
|
95
|
-
self.output_types = [OutputType.dataframe]
|
|
96
|
-
else:
|
|
97
|
-
new_dtypes = (
|
|
98
|
-
dtypes if dtypes is not None else (infer_df.name, infer_df.dtype)
|
|
99
|
-
)
|
|
100
|
-
self.output_types = [OutputType.series]
|
|
101
|
-
|
|
102
|
-
return new_dtypes
|
|
57
|
+
def has_custom_code(self) -> bool:
|
|
58
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
103
59
|
|
|
104
|
-
def
|
|
60
|
+
def _infer_df_func_returns(
|
|
61
|
+
self, df, dtypes=None, dtype=None, name=None, index=None
|
|
62
|
+
) -> InferredDataFrameMeta:
|
|
63
|
+
def infer_func(df_obj):
|
|
64
|
+
if self.call_agg:
|
|
65
|
+
return df_obj.agg(self.func, self.axis)
|
|
66
|
+
else:
|
|
67
|
+
return df_obj.transform(self.func, self.axis)
|
|
68
|
+
|
|
69
|
+
res = infer_dataframe_return_value(
|
|
70
|
+
df,
|
|
71
|
+
infer_func,
|
|
72
|
+
self.output_types[0] if self.output_types else None,
|
|
73
|
+
dtypes=dtypes,
|
|
74
|
+
dtype=dtype,
|
|
75
|
+
name=name,
|
|
76
|
+
index=index,
|
|
77
|
+
inherit_index=True,
|
|
78
|
+
)
|
|
79
|
+
res.check_absence("dtypes", "dtype")
|
|
80
|
+
return res
|
|
81
|
+
|
|
82
|
+
def __call__(
|
|
83
|
+
self, df, dtypes=None, dtype=None, name=None, index=None, skip_infer=None
|
|
84
|
+
):
|
|
105
85
|
axis = getattr(self, "axis", None) or 0
|
|
106
86
|
self.axis = validate_axis(axis, df)
|
|
107
87
|
if not skip_infer:
|
|
108
|
-
|
|
88
|
+
inferred_meta = self._infer_df_func_returns(
|
|
89
|
+
df, dtypes=dtypes, dtype=dtype, name=name, index=index
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
index_value = parse_index(index) if index else df.index_value
|
|
93
|
+
inferred_meta = InferredDataFrameMeta(
|
|
94
|
+
self.output_types[0],
|
|
95
|
+
dtypes=dtypes,
|
|
96
|
+
dtype=dtype,
|
|
97
|
+
name=name,
|
|
98
|
+
index_value=index_value,
|
|
99
|
+
)
|
|
109
100
|
|
|
101
|
+
self._output_types = [inferred_meta.output_type]
|
|
110
102
|
if self.output_types[0] == OutputType.dataframe:
|
|
111
103
|
new_shape = list(df.shape)
|
|
112
|
-
new_index_value =
|
|
104
|
+
new_index_value = inferred_meta.index_value
|
|
105
|
+
dtypes = inferred_meta.dtypes
|
|
113
106
|
if len(new_shape) == 1:
|
|
114
107
|
new_shape.append(len(dtypes) if dtypes is not None else np.nan)
|
|
115
108
|
else:
|
|
@@ -118,6 +111,7 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
118
111
|
if self.call_agg:
|
|
119
112
|
new_shape[self.axis] = np.nan
|
|
120
113
|
new_index_value = parse_index(None, (df.key, df.index_value.key))
|
|
114
|
+
|
|
121
115
|
if dtypes is None:
|
|
122
116
|
columns_value = None
|
|
123
117
|
else:
|
|
@@ -130,11 +124,6 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
130
124
|
columns_value=columns_value,
|
|
131
125
|
)
|
|
132
126
|
else:
|
|
133
|
-
if dtypes is not None:
|
|
134
|
-
name, dtype = dtypes
|
|
135
|
-
else:
|
|
136
|
-
name, dtype = None, None
|
|
137
|
-
|
|
138
127
|
if isinstance(df, DATAFRAME_TYPE):
|
|
139
128
|
new_shape = (df.shape[1 - axis],)
|
|
140
129
|
new_index_value = [df.columns_value, df.index_value][axis]
|
|
@@ -145,15 +134,25 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
145
134
|
return self.new_series(
|
|
146
135
|
[df],
|
|
147
136
|
shape=new_shape,
|
|
148
|
-
name=name,
|
|
149
|
-
dtype=dtype,
|
|
137
|
+
name=inferred_meta.name,
|
|
138
|
+
dtype=inferred_meta.dtype,
|
|
150
139
|
index_value=new_index_value,
|
|
151
140
|
)
|
|
152
141
|
|
|
142
|
+
@classmethod
|
|
143
|
+
def estimate_size(
|
|
144
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "TransformOperator"
|
|
145
|
+
) -> None:
|
|
146
|
+
if isinstance(op.func, MarkedFunction):
|
|
147
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
148
|
+
super().estimate_size(ctx, op)
|
|
149
|
+
|
|
153
150
|
|
|
154
151
|
def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
|
|
155
152
|
stub_df = _build_stub_pandas_obj(df, output_type)
|
|
156
|
-
|
|
153
|
+
n_args = copy_if_possible(args)
|
|
154
|
+
n_kwds = copy_if_possible(kwds)
|
|
155
|
+
return pack_func_args(stub_df, func, *n_args, **n_kwds)
|
|
157
156
|
|
|
158
157
|
|
|
159
158
|
def _build_stub_pandas_obj(df, output_type) -> Union[DataFrame, Series]:
|
|
@@ -241,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
|
|
|
241
240
|
axis=axis,
|
|
242
241
|
args=args,
|
|
243
242
|
kwds=kwargs,
|
|
244
|
-
output_types=[OutputType.dataframe],
|
|
243
|
+
output_types=[OutputType.dataframe] if not call_agg else None,
|
|
245
244
|
call_agg=call_agg,
|
|
246
245
|
)
|
|
247
246
|
return op(df, dtypes=dtypes, skip_infer=skip_infer)
|
|
@@ -337,5 +336,4 @@ def series_transform(
|
|
|
337
336
|
output_types=[OutputType.series],
|
|
338
337
|
call_agg=call_agg,
|
|
339
338
|
)
|
|
340
|
-
|
|
341
|
-
return op(series, dtypes=dtypes, skip_infer=skip_infer)
|
|
339
|
+
return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)
|
|
@@ -12,11 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
|
|
18
20
|
from ... import opcodes
|
|
19
|
-
from ...core import OutputType
|
|
21
|
+
from ...core import EntityData, OutputType
|
|
20
22
|
from ...serialization.serializables import BoolField, Int64Field, KeyField, StringField
|
|
21
23
|
from ...utils import pd_release_version
|
|
22
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -42,9 +44,10 @@ class DataFrameValueCounts(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
42
44
|
super().__init__(**kw)
|
|
43
45
|
self.output_types = [OutputType.series]
|
|
44
46
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
47
|
+
@classmethod
|
|
48
|
+
def _set_inputs(cls, op: "DataFrameValueCounts", inputs: List[EntityData]):
|
|
49
|
+
super()._set_inputs(op, inputs)
|
|
50
|
+
op.input = op._inputs[0]
|
|
48
51
|
|
|
49
52
|
def __call__(self, inp):
|
|
50
53
|
test_series = build_series(inp).value_counts(normalize=self.normalize)
|
|
@@ -37,6 +37,8 @@ class DataFrameDropNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
37
37
|
drop_directly = BoolField("drop_directly", default=None)
|
|
38
38
|
# size of subset, used when how == 'any'
|
|
39
39
|
subset_size = Int32Field("subset_size", default=None)
|
|
40
|
+
# if True, drop index
|
|
41
|
+
ignore_index = BoolField("ignore_index", default=False)
|
|
40
42
|
|
|
41
43
|
def __init__(self, sparse=None, output_types=None, **kw):
|
|
42
44
|
super().__init__(_output_types=output_types, sparse=sparse, **kw)
|
|
@@ -52,7 +54,13 @@ class DataFrameDropNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
def df_dropna(
|
|
55
|
-
df,
|
|
57
|
+
df,
|
|
58
|
+
axis=0,
|
|
59
|
+
how=no_default,
|
|
60
|
+
thresh=no_default,
|
|
61
|
+
subset=None,
|
|
62
|
+
inplace=False,
|
|
63
|
+
ignore_index=False,
|
|
56
64
|
):
|
|
57
65
|
"""
|
|
58
66
|
Remove missing values.
|
|
@@ -69,11 +77,6 @@ def df_dropna(
|
|
|
69
77
|
* 0, or 'index' : Drop rows which contain missing values.
|
|
70
78
|
* 1, or 'columns' : Drop columns which contain missing value.
|
|
71
79
|
|
|
72
|
-
.. versionchanged:: 1.0.0
|
|
73
|
-
|
|
74
|
-
Pass tuple or list to drop on multiple axes.
|
|
75
|
-
Only a single axis is allowed.
|
|
76
|
-
|
|
77
80
|
how : {'any', 'all'}, default 'any'
|
|
78
81
|
Determine if row or column is removed from DataFrame, when we have
|
|
79
82
|
at least one NA or all NA.
|
|
@@ -88,6 +91,8 @@ def df_dropna(
|
|
|
88
91
|
these would be a list of columns to include.
|
|
89
92
|
inplace : bool, default False
|
|
90
93
|
If True, do operation inplace and return None.
|
|
94
|
+
ignore_index : bool, default False
|
|
95
|
+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
91
96
|
|
|
92
97
|
Returns
|
|
93
98
|
-------
|
|
@@ -168,6 +173,7 @@ def df_dropna(
|
|
|
168
173
|
how=how,
|
|
169
174
|
thresh=thresh,
|
|
170
175
|
subset=subset,
|
|
176
|
+
ignore_index=ignore_index,
|
|
171
177
|
output_types=[OutputType.dataframe],
|
|
172
178
|
)
|
|
173
179
|
out_df = op(df)
|
|
@@ -177,7 +183,7 @@ def df_dropna(
|
|
|
177
183
|
return out_df
|
|
178
184
|
|
|
179
185
|
|
|
180
|
-
def series_dropna(series, axis=0, inplace=False, how=None):
|
|
186
|
+
def series_dropna(series, axis=0, inplace=False, how=None, ignore_index=False):
|
|
181
187
|
"""
|
|
182
188
|
Return a new Series with missing values removed.
|
|
183
189
|
|
|
@@ -192,6 +198,8 @@ def series_dropna(series, axis=0, inplace=False, how=None):
|
|
|
192
198
|
If True, do operation inplace and return None.
|
|
193
199
|
how : str, optional
|
|
194
200
|
Not in use. Kept for compatibility.
|
|
201
|
+
ignore_index : bool, default False
|
|
202
|
+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
195
203
|
|
|
196
204
|
Returns
|
|
197
205
|
-------
|
|
@@ -253,6 +261,7 @@ def series_dropna(series, axis=0, inplace=False, how=None):
|
|
|
253
261
|
op = DataFrameDropNA(
|
|
254
262
|
axis=axis,
|
|
255
263
|
how=how,
|
|
264
|
+
ignore_index=ignore_index,
|
|
256
265
|
output_types=[OutputType.series],
|
|
257
266
|
)
|
|
258
267
|
out_series = op(series)
|
|
@@ -12,18 +12,21 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import pandas as pd
|
|
16
18
|
|
|
17
19
|
from ... import opcodes
|
|
18
|
-
from ...core import ENTITY_TYPE, Entity, get_output_types
|
|
20
|
+
from ...core import ENTITY_TYPE, Entity, EntityData, get_output_types
|
|
19
21
|
from ...serialization.serializables import AnyField, Int64Field, StringField
|
|
20
22
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
21
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
24
|
from ..utils import validate_axis
|
|
23
25
|
|
|
24
26
|
|
|
25
|
-
class
|
|
27
|
+
class DataFrameFillNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
26
28
|
_op_type_ = opcodes.FILL_NA
|
|
29
|
+
_legacy_name = "FillNA"
|
|
27
30
|
|
|
28
31
|
value = AnyField(
|
|
29
32
|
"value", on_serialize=lambda x: x.data if isinstance(x, Entity) else x
|
|
@@ -33,13 +36,14 @@ class FillNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
33
36
|
limit = Int64Field("limit", default=None)
|
|
34
37
|
downcast = AnyField("downcast", default=None)
|
|
35
38
|
|
|
36
|
-
def __init__(self, output_limit=1, **kw):
|
|
37
|
-
super().__init__(output_limit=output_limit, **kw)
|
|
39
|
+
def __init__(self, output_limit=1, output_types=None, **kw):
|
|
40
|
+
super().__init__(output_limit=output_limit, _output_types=output_types, **kw)
|
|
38
41
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
@classmethod
|
|
43
|
+
def _set_inputs(cls, op: "DataFrameFillNA", inputs: List[EntityData]):
|
|
44
|
+
super()._set_inputs(op, inputs)
|
|
45
|
+
if op.method is None and len(inputs) > 1:
|
|
46
|
+
op.value = op._inputs[1]
|
|
43
47
|
|
|
44
48
|
def __call__(self, a, value_df=None):
|
|
45
49
|
method = getattr(self, "method", None)
|
|
@@ -81,6 +85,10 @@ class FillNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
81
85
|
)
|
|
82
86
|
|
|
83
87
|
|
|
88
|
+
# keep for import compatibility
|
|
89
|
+
FillNA = DataFrameFillNA
|
|
90
|
+
|
|
91
|
+
|
|
84
92
|
def fillna(
|
|
85
93
|
df, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None
|
|
86
94
|
):
|
|
@@ -198,7 +206,7 @@ def fillna(
|
|
|
198
206
|
else:
|
|
199
207
|
value_df = None
|
|
200
208
|
|
|
201
|
-
op =
|
|
209
|
+
op = DataFrameFillNA(
|
|
202
210
|
value=value,
|
|
203
211
|
method=method,
|
|
204
212
|
axis=axis,
|
|
@@ -267,7 +275,7 @@ def index_fillna(index, value=None, downcast=None):
|
|
|
267
275
|
if isinstance(value, (list, pd.Series, SERIES_TYPE)):
|
|
268
276
|
raise ValueError("'value' must be a scalar, passed: %s" % type(value))
|
|
269
277
|
|
|
270
|
-
op =
|
|
278
|
+
op = DataFrameFillNA(
|
|
271
279
|
value=value,
|
|
272
280
|
downcast=downcast,
|
|
273
281
|
output_types=get_output_types(index),
|