maxframe 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +62 -9
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +128 -3
- maxframe/dataframe/reduction/core.py +20 -6
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/llm/models/dashscope.py +34 -0
- maxframe/learn/contrib/llm/models/managed.py +15 -0
- maxframe/learn/contrib/llm/multi_modal.py +92 -0
- maxframe/learn/contrib/llm/text.py +21 -5
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +13 -8
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
- maxframe-2.0.0.dist-info/RECORD +939 -0
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.0.dist-info/RECORD +0 -705
- {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -12,11 +12,19 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
|
|
18
|
-
from ...core import OutputType
|
|
19
|
-
from ...serialization.serializables import
|
|
20
|
+
from ...core import EntityData, OutputType
|
|
21
|
+
from ...serialization.serializables import (
|
|
22
|
+
AnyField,
|
|
23
|
+
BoolField,
|
|
24
|
+
KeyField,
|
|
25
|
+
ListField,
|
|
26
|
+
StringField,
|
|
27
|
+
)
|
|
20
28
|
from ..datasource.dataframe import from_pandas as from_pandas_df
|
|
21
29
|
from ..datasource.series import from_pandas as from_pandas_series
|
|
22
30
|
from ..initializer import Series as asseries
|
|
@@ -34,55 +42,26 @@ class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
34
42
|
drop_first = BoolField("drop_first", default=None)
|
|
35
43
|
dtype = AnyField("dtype", default=None)
|
|
36
44
|
|
|
37
|
-
|
|
38
|
-
|
|
45
|
+
agg_results = KeyField("agg_results", default=None)
|
|
46
|
+
|
|
47
|
+
def __init__(self, **kw):
|
|
48
|
+
super().__init__(**kw)
|
|
39
49
|
self.output_types = [OutputType.dataframe]
|
|
40
50
|
|
|
51
|
+
@classmethod
|
|
52
|
+
def _set_inputs(cls, op: "DataFrameGetDummies", inputs: List[EntityData]):
|
|
53
|
+
super()._set_inputs(op, inputs)
|
|
54
|
+
if op.agg_results is not None: # pragma: no branch
|
|
55
|
+
op.agg_results = inputs[-1]
|
|
56
|
+
|
|
41
57
|
def __call__(self, data):
|
|
42
|
-
if
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if self.prefix is not None:
|
|
50
|
-
if isinstance(self.prefix, list):
|
|
51
|
-
if self.columns is not None:
|
|
52
|
-
encoding_col_num = len(self.columns)
|
|
53
|
-
else:
|
|
54
|
-
encoding_col_num = 0
|
|
55
|
-
for dtype in data.dtypes.values:
|
|
56
|
-
if dtype.kind in _encoding_dtype_kind:
|
|
57
|
-
encoding_col_num += 1
|
|
58
|
-
prefix_num = len(self.prefix)
|
|
59
|
-
if prefix_num != encoding_col_num:
|
|
60
|
-
raise ValueError(
|
|
61
|
-
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
62
|
-
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
63
|
-
)
|
|
64
|
-
elif isinstance(self.prefix, dict):
|
|
65
|
-
if self.columns is not None:
|
|
66
|
-
encoding_col_num = len(self.columns)
|
|
67
|
-
prefix_num = len(self.prefix)
|
|
68
|
-
if prefix_num != encoding_col_num:
|
|
69
|
-
raise ValueError(
|
|
70
|
-
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
71
|
-
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
72
|
-
)
|
|
73
|
-
columns = self.prefix.keys()
|
|
74
|
-
for columns_columnname, prefix_columnname in zip(
|
|
75
|
-
columns, list(self.columns)
|
|
76
|
-
):
|
|
77
|
-
if columns_columnname != prefix_columnname:
|
|
78
|
-
raise KeyError(f"{columns_columnname}")
|
|
79
|
-
else:
|
|
80
|
-
self.columns = list(self.prefix.keys())
|
|
81
|
-
# Convert prefix from dict to list, to simplify tile work
|
|
82
|
-
self.prefix = list(self.prefix.values())
|
|
83
|
-
|
|
84
|
-
return self.new_dataframe(
|
|
85
|
-
[data],
|
|
58
|
+
if not self.columns:
|
|
59
|
+
self.agg_results = data.agg(["unique"])
|
|
60
|
+
else:
|
|
61
|
+
self.agg_results = data[self.columns].agg(["unique"])
|
|
62
|
+
|
|
63
|
+
return self.new_tileable(
|
|
64
|
+
[data, self.agg_results],
|
|
86
65
|
shape=(np.nan, np.nan),
|
|
87
66
|
dtypes=None,
|
|
88
67
|
index_value=data.index_value,
|
|
@@ -127,7 +106,7 @@ def get_dummies(
|
|
|
127
106
|
drop_first : bool, default False
|
|
128
107
|
Whether to get k-1 dummies out of k categorical levels by removing the
|
|
129
108
|
first level.
|
|
130
|
-
dtype : dtype, default
|
|
109
|
+
dtype : dtype, default bool
|
|
131
110
|
Data type for new columns. Only a single dtype is allowed.
|
|
132
111
|
|
|
133
112
|
Returns
|
|
@@ -195,6 +174,56 @@ def get_dummies(
|
|
|
195
174
|
if columns is not None and not isinstance(columns, list):
|
|
196
175
|
raise TypeError("Input must be a list-like for parameter `columns`")
|
|
197
176
|
|
|
177
|
+
if isinstance(data, (list, tuple)):
|
|
178
|
+
data = asseries(data)
|
|
179
|
+
elif isinstance(data, pd.Series):
|
|
180
|
+
data = from_pandas_series(data)
|
|
181
|
+
elif isinstance(data, pd.DataFrame):
|
|
182
|
+
data = from_pandas_df(data)
|
|
183
|
+
|
|
184
|
+
dtype = dtype if dtype is not None else np.dtype(bool)
|
|
185
|
+
|
|
186
|
+
if prefix is not None:
|
|
187
|
+
if isinstance(prefix, list):
|
|
188
|
+
if columns is not None:
|
|
189
|
+
encoding_col_num = len(columns)
|
|
190
|
+
else:
|
|
191
|
+
encoding_col_num = 0
|
|
192
|
+
for dt in data.dtypes.values:
|
|
193
|
+
if dt.kind in _encoding_dtype_kind:
|
|
194
|
+
encoding_col_num += 1
|
|
195
|
+
prefix_num = len(prefix)
|
|
196
|
+
if prefix_num != encoding_col_num:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
199
|
+
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
200
|
+
)
|
|
201
|
+
elif isinstance(prefix, dict):
|
|
202
|
+
if columns is not None:
|
|
203
|
+
encoding_col_num = len(columns)
|
|
204
|
+
prefix_num = len(prefix)
|
|
205
|
+
if prefix_num != encoding_col_num:
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"Length of 'prefix' ({prefix_num}) did not match "
|
|
208
|
+
+ f"the length of the columns being encoded ({encoding_col_num})"
|
|
209
|
+
)
|
|
210
|
+
prefix_cols = prefix.keys()
|
|
211
|
+
for columns_columnname, prefix_columnname in zip(
|
|
212
|
+
prefix_cols, list(columns)
|
|
213
|
+
):
|
|
214
|
+
if columns_columnname != prefix_columnname:
|
|
215
|
+
raise KeyError(f"{columns_columnname}")
|
|
216
|
+
else:
|
|
217
|
+
columns = list(prefix.keys())
|
|
218
|
+
# Convert prefix from dict to list, to simplify tile work
|
|
219
|
+
prefix = list(prefix.values())
|
|
220
|
+
|
|
221
|
+
if not columns and data.ndim == 2:
|
|
222
|
+
columns = []
|
|
223
|
+
for col_name, dt in data.dtypes.items():
|
|
224
|
+
if dt.kind in _encoding_dtype_kind:
|
|
225
|
+
columns.append(col_name)
|
|
226
|
+
|
|
198
227
|
op = DataFrameGetDummies(
|
|
199
228
|
prefix=prefix,
|
|
200
229
|
prefix_sep=prefix_sep,
|
maxframe/dataframe/misc/isin.py
CHANGED
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
from pandas.api.types import is_list_like
|
|
18
20
|
|
|
19
21
|
from ... import opcodes
|
|
20
|
-
from ...core import ENTITY_TYPE
|
|
22
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
21
23
|
from ...serialization.serializables import AnyField, KeyField
|
|
22
24
|
from ...tensor.core import TENSOR_TYPE
|
|
23
25
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
@@ -30,21 +32,22 @@ class DataFrameIsin(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
30
32
|
input = KeyField("input")
|
|
31
33
|
values = AnyField("values", default=None)
|
|
32
34
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
@classmethod
|
|
36
|
+
def _set_inputs(cls, op: "DataFrameIsin", inputs: List[EntityData]):
|
|
37
|
+
super()._set_inputs(op, inputs)
|
|
38
|
+
inputs_iter = iter(op._inputs)
|
|
39
|
+
op.input = next(inputs_iter)
|
|
40
|
+
if len(op._inputs) > 1:
|
|
41
|
+
if isinstance(op.values, dict):
|
|
39
42
|
new_values = dict()
|
|
40
|
-
for k, v in
|
|
43
|
+
for k, v in op.values.items():
|
|
41
44
|
if isinstance(v, ENTITY_TYPE):
|
|
42
45
|
new_values[k] = next(inputs_iter)
|
|
43
46
|
else:
|
|
44
47
|
new_values[k] = v
|
|
45
|
-
|
|
48
|
+
op.values = new_values
|
|
46
49
|
else:
|
|
47
|
-
|
|
50
|
+
op.values = op._inputs[1]
|
|
48
51
|
|
|
49
52
|
def __call__(self, elements):
|
|
50
53
|
inputs = [elements]
|
maxframe/dataframe/misc/map.py
CHANGED
|
@@ -13,14 +13,15 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
|
-
from
|
|
16
|
+
from typing import List, MutableMapping, Union
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import pandas as pd
|
|
20
20
|
|
|
21
21
|
from ... import opcodes
|
|
22
|
-
from ...core import OutputType
|
|
22
|
+
from ...core import EntityData, OutputType
|
|
23
23
|
from ...serialization.serializables import AnyField, KeyField, StringField
|
|
24
|
+
from ...udf import BuiltinFunction, MarkedFunction
|
|
24
25
|
from ...utils import quiet_stdio
|
|
25
26
|
from ..core import SERIES_TYPE
|
|
26
27
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -41,11 +42,17 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
41
42
|
if hasattr(self, "arg"):
|
|
42
43
|
copy_func_scheduling_hints(self.arg, self)
|
|
43
44
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
@classmethod
|
|
46
|
+
def _set_inputs(cls, op: "DataFrameMap", inputs: List[EntityData]):
|
|
47
|
+
super()._set_inputs(op, inputs)
|
|
48
|
+
op.input = op._inputs[0]
|
|
47
49
|
if len(inputs) == 2:
|
|
48
|
-
|
|
50
|
+
op.arg = op._inputs[1]
|
|
51
|
+
|
|
52
|
+
def has_custom_code(self) -> bool:
|
|
53
|
+
return not isinstance(
|
|
54
|
+
self.arg, (dict, SERIES_TYPE, pd.Series)
|
|
55
|
+
) and not isinstance(self.arg, BuiltinFunction)
|
|
49
56
|
|
|
50
57
|
def __call__(self, series, dtype, skip_infer=False):
|
|
51
58
|
if dtype is None and not skip_infer:
|
|
@@ -112,6 +119,14 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
112
119
|
name=series.name,
|
|
113
120
|
)
|
|
114
121
|
|
|
122
|
+
@classmethod
|
|
123
|
+
def estimate_size(
|
|
124
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameMap"
|
|
125
|
+
) -> None:
|
|
126
|
+
if isinstance(op.arg, MarkedFunction):
|
|
127
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
128
|
+
super().estimate_size(ctx, op)
|
|
129
|
+
|
|
115
130
|
|
|
116
131
|
def series_map(
|
|
117
132
|
series, arg, na_action=None, dtype=None, memory_scale=None, skip_infer=False
|
maxframe/dataframe/misc/melt.py
CHANGED
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
18
|
from ... import opcodes
|
|
19
|
-
from ...serialization.serializables import AnyField, StringField
|
|
19
|
+
from ...serialization.serializables import AnyField, BoolField, StringField
|
|
20
20
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin, OutputType
|
|
21
21
|
from ..utils import build_empty_df, parse_index
|
|
22
22
|
|
|
@@ -29,6 +29,7 @@ class DataFrameMelt(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
29
29
|
var_name = StringField("var_name", default=None)
|
|
30
30
|
value_name = StringField("value_name", default=None)
|
|
31
31
|
col_level = AnyField("col_level", default=None)
|
|
32
|
+
ignore_index = BoolField("ignore_index", default=False)
|
|
32
33
|
|
|
33
34
|
def __call__(self, df):
|
|
34
35
|
empty_result = build_empty_df(df.dtypes).melt(
|
|
@@ -37,6 +38,7 @@ class DataFrameMelt(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
37
38
|
var_name=self.var_name,
|
|
38
39
|
value_name=self.value_name,
|
|
39
40
|
col_level=self.col_level,
|
|
41
|
+
ignore_index=self.ignore_index,
|
|
40
42
|
)
|
|
41
43
|
self._output_types = [OutputType.dataframe]
|
|
42
44
|
return self.new_tileable(
|
|
@@ -55,6 +57,7 @@ def melt(
|
|
|
55
57
|
var_name=None,
|
|
56
58
|
value_name="value",
|
|
57
59
|
col_level=None,
|
|
60
|
+
ignore_index=False,
|
|
58
61
|
):
|
|
59
62
|
"""
|
|
60
63
|
Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
|
|
@@ -79,6 +82,9 @@ def melt(
|
|
|
79
82
|
Name to use for the 'value' column.
|
|
80
83
|
col_level : int or str, optional
|
|
81
84
|
If columns are a MultiIndex then use this level to melt.
|
|
85
|
+
ignore_index : bool, default True
|
|
86
|
+
If True, original index is ignored. If False, the original index
|
|
87
|
+
is retained. Index labels will be repeated as necessary.
|
|
82
88
|
|
|
83
89
|
Returns
|
|
84
90
|
-------
|
|
@@ -158,5 +164,6 @@ def melt(
|
|
|
158
164
|
var_name=var_name,
|
|
159
165
|
value_name=value_name,
|
|
160
166
|
col_level=col_level,
|
|
167
|
+
ignore_index=ignore_index,
|
|
161
168
|
)
|
|
162
169
|
return op(frame)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from pandas.api.types import is_list_like
|
|
21
|
+
|
|
22
|
+
from ... import opcodes
|
|
23
|
+
from ...core import EntityData, OutputType
|
|
24
|
+
from ...serialization.serializables import AnyField, KeyField
|
|
25
|
+
from ...utils import no_default
|
|
26
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
27
|
+
from ..utils import build_df, make_column_list, parse_index
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DataFramePivot(DataFrameOperator, DataFrameOperatorMixin):
|
|
31
|
+
_op_type_ = opcodes.PIVOT
|
|
32
|
+
|
|
33
|
+
values = AnyField("values", default=None)
|
|
34
|
+
index = AnyField("index", default=None)
|
|
35
|
+
columns = AnyField("columns", default=None)
|
|
36
|
+
|
|
37
|
+
agg_results = KeyField("agg_results", default=None)
|
|
38
|
+
|
|
39
|
+
def __init__(self, aggfunc=None, **kw):
|
|
40
|
+
if aggfunc is None:
|
|
41
|
+
aggfunc = "mean"
|
|
42
|
+
super().__init__(aggfunc=aggfunc, **kw)
|
|
43
|
+
self._output_types = [OutputType.dataframe]
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _set_inputs(cls, op: "DataFramePivot", inputs: List[EntityData]):
|
|
47
|
+
super()._set_inputs(op, inputs)
|
|
48
|
+
if op.agg_results is not None: # pragma: no branch
|
|
49
|
+
op.agg_results = inputs[-1]
|
|
50
|
+
|
|
51
|
+
def __call__(self, df):
|
|
52
|
+
index_list = make_column_list(self.index, df.dtypes)
|
|
53
|
+
columns_list = make_column_list(self.columns, df.dtypes)
|
|
54
|
+
|
|
55
|
+
if not index_list:
|
|
56
|
+
index_data = pd.Index([])
|
|
57
|
+
elif len(index_list) == 1:
|
|
58
|
+
index_data = pd.Index(
|
|
59
|
+
[], dtype=df.dtypes[index_list[0]], name=index_list[0]
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
index_data = pd.MultiIndex.from_frame(build_df(df[index_list]))
|
|
63
|
+
index_value = parse_index(index_data, df)
|
|
64
|
+
|
|
65
|
+
self.agg_results = df[columns_list].drop_duplicates()
|
|
66
|
+
columns_value = dtypes = None
|
|
67
|
+
|
|
68
|
+
inputs = [df]
|
|
69
|
+
if self.agg_results is not None:
|
|
70
|
+
inputs.append(self.agg_results)
|
|
71
|
+
return self.new_dataframe(
|
|
72
|
+
inputs,
|
|
73
|
+
shape=(np.nan, np.nan),
|
|
74
|
+
dtypes=dtypes,
|
|
75
|
+
columns_value=columns_value,
|
|
76
|
+
index_value=index_value,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def pivot(data, columns, index=None, values=None):
|
|
81
|
+
"""
|
|
82
|
+
Return reshaped DataFrame organized by given index / column values.
|
|
83
|
+
|
|
84
|
+
Reshape data (produce a "pivot" table) based on column values. Uses
|
|
85
|
+
unique values from specified `index` / `columns` to form axes of the
|
|
86
|
+
resulting DataFrame. This function does not support data
|
|
87
|
+
aggregation, multiple values will result in a MultiIndex in the
|
|
88
|
+
columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
|
|
89
|
+
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
index : str or object or a list of str, optional
|
|
93
|
+
Column to use to make new frame's index. If None, uses
|
|
94
|
+
existing index.
|
|
95
|
+
|
|
96
|
+
columns : str or object or a list of str
|
|
97
|
+
Column to use to make new frame's columns.
|
|
98
|
+
|
|
99
|
+
values : str, object or a list of the previous, optional
|
|
100
|
+
Column(s) to use for populating new frame's values. If not
|
|
101
|
+
specified, all remaining columns will be used and the result will
|
|
102
|
+
have hierarchically indexed columns.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
DataFrame
|
|
107
|
+
Returns reshaped DataFrame.
|
|
108
|
+
|
|
109
|
+
Raises
|
|
110
|
+
------
|
|
111
|
+
ValueError:
|
|
112
|
+
When there are any `index`, `columns` combinations with multiple
|
|
113
|
+
values. `DataFrame.pivot_table` when you need to aggregate.
|
|
114
|
+
|
|
115
|
+
See Also
|
|
116
|
+
--------
|
|
117
|
+
DataFrame.pivot_table : Generalization of pivot that can handle
|
|
118
|
+
duplicate values for one index/column pair.
|
|
119
|
+
DataFrame.unstack : Pivot based on the index values instead of a
|
|
120
|
+
column.
|
|
121
|
+
wide_to_long : Wide panel to long format. Less flexible but more
|
|
122
|
+
user-friendly than melt.
|
|
123
|
+
|
|
124
|
+
Notes
|
|
125
|
+
-----
|
|
126
|
+
For finer-tuned control, see hierarchical indexing documentation along
|
|
127
|
+
with the related stack/unstack methods.
|
|
128
|
+
|
|
129
|
+
Examples
|
|
130
|
+
--------
|
|
131
|
+
>>> import maxframe.dataframe as md
|
|
132
|
+
>>> df = md.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
|
|
133
|
+
... 'two'],
|
|
134
|
+
... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
|
|
135
|
+
... 'baz': [1, 2, 3, 4, 5, 6],
|
|
136
|
+
... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
|
|
137
|
+
>>> df.execute()
|
|
138
|
+
foo bar baz zoo
|
|
139
|
+
0 one A 1 x
|
|
140
|
+
1 one B 2 y
|
|
141
|
+
2 one C 3 z
|
|
142
|
+
3 two A 4 q
|
|
143
|
+
4 two B 5 w
|
|
144
|
+
5 two C 6 t
|
|
145
|
+
|
|
146
|
+
>>> df.pivot(index='foo', columns='bar', values='baz').execute()
|
|
147
|
+
bar A B C
|
|
148
|
+
foo
|
|
149
|
+
one 1 2 3
|
|
150
|
+
two 4 5 6
|
|
151
|
+
|
|
152
|
+
>>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']).execute()
|
|
153
|
+
baz zoo
|
|
154
|
+
bar A B C A B C
|
|
155
|
+
foo
|
|
156
|
+
one 1 2 3 x y z
|
|
157
|
+
two 4 5 6 q w t
|
|
158
|
+
|
|
159
|
+
You could also assign a list of column names or a list of index names.
|
|
160
|
+
|
|
161
|
+
>>> df = md.DataFrame({
|
|
162
|
+
... "lev1": [1, 1, 1, 2, 2, 2],
|
|
163
|
+
... "lev2": [1, 1, 2, 1, 1, 2],
|
|
164
|
+
... "lev3": [1, 2, 1, 2, 1, 2],
|
|
165
|
+
... "lev4": [1, 2, 3, 4, 5, 6],
|
|
166
|
+
... "values": [0, 1, 2, 3, 4, 5]})
|
|
167
|
+
>>> df.execute()
|
|
168
|
+
lev1 lev2 lev3 lev4 values
|
|
169
|
+
0 1 1 1 1 0
|
|
170
|
+
1 1 1 2 2 1
|
|
171
|
+
2 1 2 1 3 2
|
|
172
|
+
3 2 1 2 4 3
|
|
173
|
+
4 2 1 1 5 4
|
|
174
|
+
5 2 2 2 6 5
|
|
175
|
+
|
|
176
|
+
>>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values").execute()
|
|
177
|
+
lev2 1 2
|
|
178
|
+
lev3 1 2 1 2
|
|
179
|
+
lev1
|
|
180
|
+
1 0.0 1.0 2.0 NaN
|
|
181
|
+
2 4.0 3.0 NaN 5.0
|
|
182
|
+
|
|
183
|
+
>>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values").execute()
|
|
184
|
+
lev3 1 2
|
|
185
|
+
lev1 lev2
|
|
186
|
+
1 1 0.0 1.0
|
|
187
|
+
2 2.0 NaN
|
|
188
|
+
2 1 4.0 3.0
|
|
189
|
+
2 NaN 5.0
|
|
190
|
+
|
|
191
|
+
A ValueError is raised if there are any duplicates.
|
|
192
|
+
|
|
193
|
+
>>> df = md.DataFrame({"foo": ['one', 'one', 'two', 'two'],
|
|
194
|
+
... "bar": ['A', 'A', 'B', 'C'],
|
|
195
|
+
... "baz": [1, 2, 3, 4]})
|
|
196
|
+
>>> df.execute()
|
|
197
|
+
foo bar baz
|
|
198
|
+
0 one A 1
|
|
199
|
+
1 one A 2
|
|
200
|
+
2 two B 3
|
|
201
|
+
3 two C 4
|
|
202
|
+
|
|
203
|
+
Notice that the first two rows are the same for our `index`
|
|
204
|
+
and `columns` arguments.
|
|
205
|
+
|
|
206
|
+
>>> df.pivot(index='foo', columns='bar', values='baz').execute()
|
|
207
|
+
Traceback (most recent call last):
|
|
208
|
+
...
|
|
209
|
+
ValueError: Index contains duplicate entries, cannot reshape
|
|
210
|
+
"""
|
|
211
|
+
values_list = make_column_list(values, data.dtypes)
|
|
212
|
+
index_list = make_column_list(index, data.dtypes)
|
|
213
|
+
columns_list = make_column_list(columns, data.dtypes)
|
|
214
|
+
|
|
215
|
+
name_to_attr = {"values": values_list, "index": index_list, "columns": columns_list}
|
|
216
|
+
for key, val in name_to_attr.items():
|
|
217
|
+
if val is None:
|
|
218
|
+
continue
|
|
219
|
+
if not is_list_like(val):
|
|
220
|
+
raise ValueError(f"Need to specify {key} as a list-like object.")
|
|
221
|
+
non_exist_key = next((c for c in val if c not in data.dtypes.index), no_default)
|
|
222
|
+
if non_exist_key is not no_default:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Column {non_exist_key} specified in {key} is not a valid column."
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
op = DataFramePivot(
|
|
228
|
+
values=values,
|
|
229
|
+
index=index,
|
|
230
|
+
columns=columns,
|
|
231
|
+
)
|
|
232
|
+
return op(data)
|
|
@@ -12,16 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
from pandas.api.types import is_list_like
|
|
18
20
|
|
|
19
21
|
from ... import opcodes
|
|
20
|
-
from ...core import OutputType
|
|
21
|
-
from ...serialization.serializables import AnyField, BoolField, StringField
|
|
22
|
+
from ...core import EntityData, OutputType
|
|
23
|
+
from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
|
|
22
24
|
from ...utils import no_default
|
|
23
25
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
-
from ..utils import build_df, parse_index
|
|
26
|
+
from ..utils import build_df, make_column_list, parse_index
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -37,35 +39,53 @@ class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
37
39
|
margins_name = StringField("margins_name", default=None)
|
|
38
40
|
sort = BoolField("sort", default=False)
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
42
|
+
agg_results = KeyField("agg_results", default=None)
|
|
43
|
+
|
|
44
|
+
def __init__(self, aggfunc=None, **kw):
|
|
45
|
+
if aggfunc is None:
|
|
46
|
+
aggfunc = "mean"
|
|
47
|
+
super().__init__(aggfunc=aggfunc, **kw)
|
|
48
|
+
self._output_types = [OutputType.dataframe]
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def _set_inputs(cls, op: "DataFramePivotTable", inputs: List[EntityData]):
|
|
52
|
+
super()._set_inputs(op, inputs)
|
|
53
|
+
if op.agg_results is not None: # pragma: no branch
|
|
54
|
+
op.agg_results = inputs[-1]
|
|
43
55
|
|
|
44
56
|
def __call__(self, df):
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
57
|
+
index_list = make_column_list(self.index, df.dtypes)
|
|
58
|
+
columns_list = make_column_list(self.columns, df.dtypes)
|
|
59
|
+
values_list = make_column_list(self.values, df.dtypes)
|
|
60
|
+
|
|
61
|
+
if not index_list:
|
|
62
|
+
index_data = pd.Index([])
|
|
63
|
+
elif len(index_list) == 1:
|
|
64
|
+
index_data = pd.Index(
|
|
65
|
+
[], dtype=df.dtypes[index_list[0]], name=index_list[0]
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
index_data = pd.MultiIndex.from_frame(build_df(df[index_list]))
|
|
69
|
+
index_value = parse_index(index_data, df)
|
|
70
|
+
|
|
71
|
+
if columns_list is None: # output columns can be determined
|
|
72
|
+
sel_df = df
|
|
73
|
+
groupby_obj = sel_df.groupby(index_list)
|
|
74
|
+
if values_list:
|
|
75
|
+
groupby_obj = groupby_obj[values_list]
|
|
76
|
+
aggregated_df = groupby_obj.agg(self.aggfunc)
|
|
77
|
+
index_value = aggregated_df.index_value
|
|
78
|
+
columns_value = aggregated_df.columns_value
|
|
79
|
+
dtypes = aggregated_df.dtypes
|
|
80
|
+
else:
|
|
81
|
+
self.agg_results = df[columns_list].drop_duplicates()
|
|
82
|
+
columns_value = dtypes = None
|
|
83
|
+
|
|
84
|
+
inputs = [df]
|
|
85
|
+
if self.agg_results is not None:
|
|
86
|
+
inputs.append(self.agg_results)
|
|
67
87
|
return self.new_dataframe(
|
|
68
|
-
|
|
88
|
+
inputs,
|
|
69
89
|
shape=(np.nan, np.nan),
|
|
70
90
|
dtypes=dtypes,
|
|
71
91
|
columns_value=columns_value,
|
|
@@ -219,17 +239,9 @@ def pivot_table(
|
|
|
219
239
|
"No group keys passed, need to specify at least one of index or columns"
|
|
220
240
|
)
|
|
221
241
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
return [col]
|
|
226
|
-
except TypeError:
|
|
227
|
-
return col
|
|
228
|
-
return col
|
|
229
|
-
|
|
230
|
-
values_list = make_col_list(values)
|
|
231
|
-
index_list = make_col_list(index)
|
|
232
|
-
columns_list = make_col_list(columns)
|
|
242
|
+
values_list = make_column_list(values, data.dtypes)
|
|
243
|
+
index_list = make_column_list(index, data.dtypes)
|
|
244
|
+
columns_list = make_column_list(columns, data.dtypes)
|
|
233
245
|
|
|
234
246
|
name_to_attr = {"values": values_list, "index": index_list, "columns": columns_list}
|
|
235
247
|
for key, val in name_to_attr.items():
|