maxframe 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-38-darwin.so +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cpython-38-darwin.so +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/read_odps_query.py +76 -16
- maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +4 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/io/odpsio/tests/test_volumeio.py +4 -15
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +87 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cpython-38-darwin.so +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +3 -13
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...serialization.serializables import AnyField, DictField
|
|
21
|
+
from ..window.expanding import Expanding
|
|
22
|
+
from .core import BaseGroupByWindowOp
|
|
23
|
+
|
|
24
|
+
_supported_funcs = {
|
|
25
|
+
"sum",
|
|
26
|
+
"prod",
|
|
27
|
+
"mean",
|
|
28
|
+
"std",
|
|
29
|
+
"var",
|
|
30
|
+
"median",
|
|
31
|
+
"min",
|
|
32
|
+
"max",
|
|
33
|
+
"count",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GroupByExpandingAgg(BaseGroupByWindowOp):
|
|
38
|
+
_op_type_ = opcodes.EXPANDING_AGG
|
|
39
|
+
|
|
40
|
+
func = AnyField("func", default=None)
|
|
41
|
+
kwargs = DictField("kwargs", default_factory=dict)
|
|
42
|
+
|
|
43
|
+
def _calc_mock_result_df(self, mock_groupby):
|
|
44
|
+
expanding_args = self.window_params.copy()
|
|
45
|
+
# exclude MF-specific args
|
|
46
|
+
for key in Expanding._mf_specific_fields:
|
|
47
|
+
expanding_args.pop(key, None)
|
|
48
|
+
|
|
49
|
+
def apply_func(frame, **_):
|
|
50
|
+
func = self.func
|
|
51
|
+
if self.func == "prod":
|
|
52
|
+
func = lambda x: x.prod()
|
|
53
|
+
return frame.expanding(**expanding_args).agg(func)
|
|
54
|
+
|
|
55
|
+
is_cumcount = self.kwargs.get("cumcount", False)
|
|
56
|
+
# special handling for cumcount which returns a series
|
|
57
|
+
if is_cumcount:
|
|
58
|
+
return mock_groupby.cumcount(ascending=self.kwargs.get("ascending", True))
|
|
59
|
+
if self.func in ["sum", "max", "min", "prod"]:
|
|
60
|
+
return getattr(mock_groupby, f"cum{self.func}")()
|
|
61
|
+
return mock_groupby.apply(apply_func, include_groups=False)
|
|
62
|
+
|
|
63
|
+
def get_sort_cols_to_asc(self) -> Dict[Any, bool]:
|
|
64
|
+
res = super().get_sort_cols_to_asc()
|
|
65
|
+
if self.func == "count" and self.kwargs.get("ascending") is not None:
|
|
66
|
+
res = {k: not v for k, v in res.items()}
|
|
67
|
+
return res
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ExpandingGroupby(Expanding):
|
|
71
|
+
def aggregate(self, func, **kwargs):
|
|
72
|
+
if func not in _supported_funcs:
|
|
73
|
+
raise NotImplementedError(f"func {func} is not supported")
|
|
74
|
+
op = GroupByExpandingAgg(
|
|
75
|
+
func=func,
|
|
76
|
+
groupby_params=self.input.op.groupby_params,
|
|
77
|
+
window_params=self.params,
|
|
78
|
+
kwargs=kwargs,
|
|
79
|
+
)
|
|
80
|
+
return op(self.input)
|
|
81
|
+
|
|
82
|
+
agg = aggregate
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def expanding(
|
|
86
|
+
groupby,
|
|
87
|
+
min_periods=1,
|
|
88
|
+
*,
|
|
89
|
+
shift=0,
|
|
90
|
+
reverse_range=False,
|
|
91
|
+
order_cols=None,
|
|
92
|
+
ascending=True,
|
|
93
|
+
):
|
|
94
|
+
"""
|
|
95
|
+
Return an expanding grouper, providing expanding
|
|
96
|
+
functionality per group.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
min_periods : int, default None
|
|
101
|
+
Minimum number of observations in window required to have a value;
|
|
102
|
+
otherwise, result is ``np.nan``.
|
|
103
|
+
|
|
104
|
+
shift : int, default 0
|
|
105
|
+
If specified, the window will be shifted by `shift` rows (or data will be
|
|
106
|
+
shifted by `-shift` rows) before computing window function.
|
|
107
|
+
|
|
108
|
+
reverse_range : bool, default False
|
|
109
|
+
If True, the window for current row is expanded from the last row to
|
|
110
|
+
the current instead of the first row.
|
|
111
|
+
|
|
112
|
+
Returns
|
|
113
|
+
-------
|
|
114
|
+
maxframe.dataframe.groupby.ExpandingGroupby
|
|
115
|
+
|
|
116
|
+
See Also
|
|
117
|
+
--------
|
|
118
|
+
Series.groupby : Apply a function groupby to a Series.
|
|
119
|
+
DataFrame.groupby : Apply a function groupby
|
|
120
|
+
to each row or column of a DataFrame.
|
|
121
|
+
"""
|
|
122
|
+
return ExpandingGroupby(
|
|
123
|
+
input=groupby,
|
|
124
|
+
min_periods=min_periods,
|
|
125
|
+
shift=shift,
|
|
126
|
+
reverse_range=reverse_range,
|
|
127
|
+
order_cols=order_cols,
|
|
128
|
+
ascending=ascending,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
_cum_doc_template = """
|
|
133
|
+
Cumulative %(func_name)s for each group.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
Series or DataFrame
|
|
138
|
+
|
|
139
|
+
See Also
|
|
140
|
+
--------
|
|
141
|
+
Series.groupby : Apply a function groupby to a Series.
|
|
142
|
+
DataFrame.groupby : Apply a function groupby
|
|
143
|
+
to each row or column of a DataFrame.
|
|
144
|
+
|
|
145
|
+
Examples
|
|
146
|
+
--------
|
|
147
|
+
For SeriesGroupBy:
|
|
148
|
+
|
|
149
|
+
>>> import maxframe.dataframe as md
|
|
150
|
+
>>> lst = ['a', 'a', 'b']
|
|
151
|
+
>>> ser = md.Series([6, 2, 0], index=lst)
|
|
152
|
+
>>> ser.execute()
|
|
153
|
+
a 6
|
|
154
|
+
a 2
|
|
155
|
+
b 0
|
|
156
|
+
dtype: int64
|
|
157
|
+
>>> ser.groupby(level=0).cum%(func_name)s().execute()
|
|
158
|
+
%(series_result)s
|
|
159
|
+
|
|
160
|
+
For DataFrameGroupBy:
|
|
161
|
+
|
|
162
|
+
>>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
|
|
163
|
+
>>> df = md.DataFrame(data, columns=["a", "b", "c"],
|
|
164
|
+
... index=["fox", "gorilla", "lion"])
|
|
165
|
+
>>> df.execute()
|
|
166
|
+
a b c
|
|
167
|
+
fox 1 8 2
|
|
168
|
+
gorilla 1 2 5
|
|
169
|
+
lion 2 6 9
|
|
170
|
+
>>> df.groupby("a").groups.execute()
|
|
171
|
+
{1: ['fox', 'gorilla'], 2: ['lion']}
|
|
172
|
+
>>> df.groupby("a").cum%(func_name)s().execute()
|
|
173
|
+
%(df_result)s
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _build_cum_docs(func_name):
|
|
178
|
+
ser = pd.Series([6, 2, 0], index=list("aab"))
|
|
179
|
+
data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
|
|
180
|
+
df = pd.DataFrame(data, columns=["a", "b", "c"], index=["fox", "gorilla", "lion"])
|
|
181
|
+
cum_func_name = f"cum{func_name}"
|
|
182
|
+
return _cum_doc_template % dict(
|
|
183
|
+
func_name=func_name,
|
|
184
|
+
series_result=getattr(ser.groupby(level=0), cum_func_name)().to_string(),
|
|
185
|
+
df_result=getattr(df.groupby("a"), cum_func_name)().to_string(),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def cumcount(groupby, ascending: bool = True):
|
|
190
|
+
"""
|
|
191
|
+
Number each item in each group from 0 to the length of that group - 1.
|
|
192
|
+
|
|
193
|
+
Essentially this is equivalent to
|
|
194
|
+
|
|
195
|
+
.. code-block:: python
|
|
196
|
+
|
|
197
|
+
self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
ascending : bool, default True
|
|
202
|
+
If False, number in reverse, from length of group - 1 to 0.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
Series
|
|
207
|
+
Sequence number of each element within each group.
|
|
208
|
+
|
|
209
|
+
See Also
|
|
210
|
+
--------
|
|
211
|
+
.ngroup : Number the groups themselves.
|
|
212
|
+
|
|
213
|
+
Examples
|
|
214
|
+
--------
|
|
215
|
+
>>> import maxframe.tensor as mt
|
|
216
|
+
>>> import maxframe.dataframe as md
|
|
217
|
+
>>> df = md.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']],
|
|
218
|
+
... columns=['A'])
|
|
219
|
+
>>> df.execute()
|
|
220
|
+
A
|
|
221
|
+
0 a
|
|
222
|
+
1 a
|
|
223
|
+
2 a
|
|
224
|
+
3 b
|
|
225
|
+
4 b
|
|
226
|
+
5 a
|
|
227
|
+
>>> df.groupby('A').cumcount().execute()
|
|
228
|
+
0 0
|
|
229
|
+
1 1
|
|
230
|
+
2 2
|
|
231
|
+
3 0
|
|
232
|
+
4 1
|
|
233
|
+
5 3
|
|
234
|
+
dtype: int64
|
|
235
|
+
>>> df.groupby('A').cumcount(ascending=False).execute()
|
|
236
|
+
0 3
|
|
237
|
+
1 2
|
|
238
|
+
2 1
|
|
239
|
+
3 1
|
|
240
|
+
4 0
|
|
241
|
+
5 0
|
|
242
|
+
dtype: int64
|
|
243
|
+
"""
|
|
244
|
+
return groupby.expanding().agg("count", ascending=ascending, cumcount=True)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def cummin(groupby):
|
|
248
|
+
return groupby.expanding().min()
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def cummax(groupby):
|
|
252
|
+
return groupby.expanding().max()
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def cumprod(groupby):
|
|
256
|
+
return groupby.expanding().prod()
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def cumsum(groupby):
|
|
260
|
+
return groupby.expanding().sum()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
for _func in (cummin, cummax, cumprod, cumsum):
|
|
264
|
+
_func.__doc__ = _build_cum_docs(_func.__name__[3:])
|
|
@@ -23,7 +23,7 @@ from ..utils import parse_index
|
|
|
23
23
|
|
|
24
24
|
class GroupByFill(DataFrameOperator, DataFrameOperatorMixin):
|
|
25
25
|
_op_module_ = "dataframe.groupby"
|
|
26
|
-
_legacy_name = "GroupByFillOperator"
|
|
26
|
+
_legacy_name = "GroupByFillOperator" # since v2.0.0
|
|
27
27
|
|
|
28
28
|
value = AnyField("value", default=None)
|
|
29
29
|
method = StringField("method", default=None)
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from collections.abc import Iterable
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
|
-
from ...core import OutputType
|
|
18
|
+
from ...core import ENTITY_TYPE, OutputType
|
|
19
19
|
from ...serialization.serializables import AnyField
|
|
20
20
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
21
|
from ..utils import parse_index
|
|
@@ -38,7 +38,8 @@ class GroupByIndex(DataFrameOperatorMixin, DataFrameOperator):
|
|
|
38
38
|
|
|
39
39
|
def build_mock_groupby(self, **kwargs):
|
|
40
40
|
groupby_op = self.inputs[0].op
|
|
41
|
-
|
|
41
|
+
selection = kwargs.pop("selection", None) or self.selection
|
|
42
|
+
return groupby_op.build_mock_groupby(**kwargs)[selection]
|
|
42
43
|
|
|
43
44
|
def __call__(self, groupby):
|
|
44
45
|
indexed = groupby.op.build_mock_groupby()[self.selection]
|
|
@@ -55,8 +56,10 @@ class GroupByIndex(DataFrameOperatorMixin, DataFrameOperator):
|
|
|
55
56
|
else:
|
|
56
57
|
self.output_types = [OutputType.dataframe_groupby]
|
|
57
58
|
|
|
58
|
-
if
|
|
59
|
-
self.selection,
|
|
59
|
+
if (
|
|
60
|
+
isinstance(self.selection, Iterable)
|
|
61
|
+
and not isinstance(self.selection, str)
|
|
62
|
+
and not isinstance(self.selection, ENTITY_TYPE)
|
|
60
63
|
):
|
|
61
64
|
item_list = list(self.selection)
|
|
62
65
|
else:
|
|
@@ -80,7 +83,11 @@ def df_groupby_getitem(df_groupby, item):
|
|
|
80
83
|
|
|
81
84
|
if hashable and item in df_groupby.dtypes:
|
|
82
85
|
output_types = [OutputType.series_groupby]
|
|
83
|
-
elif
|
|
86
|
+
elif (
|
|
87
|
+
isinstance(item, Iterable)
|
|
88
|
+
and not isinstance(item, ENTITY_TYPE)
|
|
89
|
+
and all(it in df_groupby.dtypes for it in item)
|
|
90
|
+
):
|
|
84
91
|
output_types = [OutputType.dataframe_groupby]
|
|
85
92
|
else:
|
|
86
93
|
raise NameError(f"Cannot slice groupby with {item!r}")
|
|
@@ -17,8 +17,10 @@ import pandas as pd
|
|
|
17
17
|
|
|
18
18
|
from ... import opcodes
|
|
19
19
|
from ...core import OutputType
|
|
20
|
+
from ...serialization import PickleContainer
|
|
20
21
|
from ...serialization.serializables import BoolField, DictField, Int64Field
|
|
21
|
-
from ...
|
|
22
|
+
from ...udf import BuiltinFunction
|
|
23
|
+
from ...utils import find_objects, pd_release_version
|
|
22
24
|
from ..core import IndexValue
|
|
23
25
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
26
|
from ..utils import parse_index
|
|
@@ -34,6 +36,14 @@ class GroupByHead(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
34
36
|
groupby_params = DictField("groupby_params", default=dict())
|
|
35
37
|
enable_negative = BoolField("enable_negative", default=_pandas_enable_negative)
|
|
36
38
|
|
|
39
|
+
def has_custom_code(self) -> bool:
|
|
40
|
+
callable_bys = find_objects(
|
|
41
|
+
self.groupby_params.get("by"), types=PickleContainer, checker=callable
|
|
42
|
+
)
|
|
43
|
+
if not callable_bys:
|
|
44
|
+
return False
|
|
45
|
+
return any(not isinstance(fun, BuiltinFunction) for fun in callable_bys)
|
|
46
|
+
|
|
37
47
|
def __call__(self, groupby):
|
|
38
48
|
df = groupby
|
|
39
49
|
while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...serialization.serializables import BoolField, FieldTypes, ListField, StringField
|
|
19
|
+
from ..utils import make_column_list
|
|
20
|
+
from .core import BaseGroupByWindowOp
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GroupByRank(BaseGroupByWindowOp):
|
|
24
|
+
_op_type_ = opcodes.RANK
|
|
25
|
+
|
|
26
|
+
method = StringField("method", default=None)
|
|
27
|
+
ascending = ListField("ascending", FieldTypes.bool, default=True)
|
|
28
|
+
na_option = StringField("na_option", default=None)
|
|
29
|
+
pct = BoolField("pct", default=False)
|
|
30
|
+
|
|
31
|
+
def _calc_mock_result_df(self, mock_groupby):
|
|
32
|
+
return mock_groupby.rank(
|
|
33
|
+
method=self.method,
|
|
34
|
+
ascending=self.ascending,
|
|
35
|
+
na_option=self.na_option,
|
|
36
|
+
pct=self.pct,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def get_sort_cols_to_asc(self) -> Dict[Any, bool]:
|
|
40
|
+
if self.inputs[0].ndim == 1:
|
|
41
|
+
return {self.inputs[0].name: self.ascending}
|
|
42
|
+
selections = make_column_list(
|
|
43
|
+
self.groupby_params.get("selection", []), self.inputs[0].dtypes
|
|
44
|
+
)
|
|
45
|
+
if not selections:
|
|
46
|
+
by_cols = set(
|
|
47
|
+
make_column_list(
|
|
48
|
+
self.groupby_params.get("by") or [], self.inputs[0].dtypes
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
selections = [c for c in self.inputs[0].dtypes.index if c not in by_cols]
|
|
52
|
+
|
|
53
|
+
if len(self.ascending) < len(selections):
|
|
54
|
+
ascending = [self.ascending[0]] * len(selections)
|
|
55
|
+
else:
|
|
56
|
+
ascending = self.ascending
|
|
57
|
+
return dict(zip(selections, ascending))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def rank(groupby, method="average", ascending=True, na_option="keep", pct=False):
|
|
61
|
+
"""
|
|
62
|
+
Provide the rank of values within each group.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
|
|
67
|
+
* average: average rank of group.
|
|
68
|
+
* min: lowest rank in group.
|
|
69
|
+
* max: highest rank in group.
|
|
70
|
+
* first: ranks assigned in order they appear in the array.
|
|
71
|
+
* dense: like 'min', but rank always increases by 1 between groups.
|
|
72
|
+
ascending : bool, default True
|
|
73
|
+
False for ranks by high (1) to low (N).
|
|
74
|
+
na_option : {'keep', 'top', 'bottom'}, default 'keep'
|
|
75
|
+
* keep: leave NA values where they are.
|
|
76
|
+
* top: smallest rank if ascending.
|
|
77
|
+
* bottom: smallest rank if descending.
|
|
78
|
+
pct : bool, default False
|
|
79
|
+
Compute percentage rank of data within each group.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
DataFrame with ranking of values within each group
|
|
84
|
+
|
|
85
|
+
See Also
|
|
86
|
+
--------
|
|
87
|
+
Series.groupby : Apply a function groupby to a Series.
|
|
88
|
+
DataFrame.groupby : Apply a function groupby
|
|
89
|
+
to each row or column of a DataFrame.
|
|
90
|
+
|
|
91
|
+
Examples
|
|
92
|
+
--------
|
|
93
|
+
>>> import maxframe.dataframe as md
|
|
94
|
+
>>> df = md.DataFrame(
|
|
95
|
+
... {
|
|
96
|
+
... "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
|
|
97
|
+
... "value": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
|
|
98
|
+
... }
|
|
99
|
+
... )
|
|
100
|
+
>>> df.execute()
|
|
101
|
+
group value
|
|
102
|
+
0 a 2
|
|
103
|
+
1 a 4
|
|
104
|
+
2 a 2
|
|
105
|
+
3 a 3
|
|
106
|
+
4 a 5
|
|
107
|
+
5 b 1
|
|
108
|
+
6 b 2
|
|
109
|
+
7 b 4
|
|
110
|
+
8 b 1
|
|
111
|
+
9 b 5
|
|
112
|
+
>>> for method in ['average', 'min', 'max', 'dense', 'first']:
|
|
113
|
+
... df[f'{method}_rank'] = df.groupby('group')['value'].rank(method)
|
|
114
|
+
>>> df.execute()
|
|
115
|
+
group value average_rank min_rank max_rank dense_rank first_rank
|
|
116
|
+
0 a 2 1.5 1.0 2.0 1.0 1.0
|
|
117
|
+
1 a 4 4.0 4.0 4.0 3.0 4.0
|
|
118
|
+
2 a 2 1.5 1.0 2.0 1.0 2.0
|
|
119
|
+
3 a 3 3.0 3.0 3.0 2.0 3.0
|
|
120
|
+
4 a 5 5.0 5.0 5.0 4.0 5.0
|
|
121
|
+
5 b 1 1.5 1.0 2.0 1.0 1.0
|
|
122
|
+
6 b 2 3.0 3.0 3.0 2.0 3.0
|
|
123
|
+
7 b 4 4.0 4.0 4.0 3.0 4.0
|
|
124
|
+
8 b 1 1.5 1.0 2.0 1.0 2.0
|
|
125
|
+
9 b 5 5.0 5.0 5.0 4.0 5.0
|
|
126
|
+
"""
|
|
127
|
+
if not isinstance(ascending, list):
|
|
128
|
+
ascending = [ascending]
|
|
129
|
+
op = GroupByRank(
|
|
130
|
+
method=method,
|
|
131
|
+
ascending=ascending,
|
|
132
|
+
na_option=na_option,
|
|
133
|
+
pct=pct,
|
|
134
|
+
groupby_params=groupby.op.groupby_params,
|
|
135
|
+
)
|
|
136
|
+
return op(groupby)
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, DictField
|
|
17
|
+
from ..window.rolling import Rolling
|
|
18
|
+
from .core import BaseGroupByWindowOp
|
|
19
|
+
|
|
20
|
+
_supported_funcs = {"sum", "mean", "std", "var", "median", "min", "max", "count"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GroupByRollingAgg(BaseGroupByWindowOp):
|
|
24
|
+
_op_type_ = opcodes.ROLLING_AGG
|
|
25
|
+
|
|
26
|
+
func = AnyField("func", default=None)
|
|
27
|
+
kwargs = DictField("kwargs", default_factory=dict)
|
|
28
|
+
|
|
29
|
+
def _calc_mock_result_df(self, mock_groupby):
|
|
30
|
+
rolling_args = self.window_params.copy()
|
|
31
|
+
# exclude MF-specific args
|
|
32
|
+
for key in Rolling._mf_specific_fields:
|
|
33
|
+
rolling_args.pop(key, None)
|
|
34
|
+
|
|
35
|
+
def apply_func(frame, **_):
|
|
36
|
+
return getattr(frame.rolling(**rolling_args), self.func)()
|
|
37
|
+
|
|
38
|
+
return mock_groupby.apply(apply_func, include_groups=False)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RollingGroupby(Rolling):
|
|
42
|
+
def aggregate(self, func, **kwargs):
|
|
43
|
+
if func not in _supported_funcs:
|
|
44
|
+
raise NotImplementedError(f"func {func} is not supported")
|
|
45
|
+
op = GroupByRollingAgg(
|
|
46
|
+
func=func,
|
|
47
|
+
groupby_params=self.input.op.groupby_params,
|
|
48
|
+
window_params=self.params,
|
|
49
|
+
kwargs=kwargs,
|
|
50
|
+
)
|
|
51
|
+
return op(self.input)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def rolling(
|
|
55
|
+
groupby,
|
|
56
|
+
window,
|
|
57
|
+
min_periods=None,
|
|
58
|
+
*,
|
|
59
|
+
center=False,
|
|
60
|
+
win_type=None,
|
|
61
|
+
on=None,
|
|
62
|
+
axis=0,
|
|
63
|
+
closed=None,
|
|
64
|
+
shift=0,
|
|
65
|
+
order_cols=None,
|
|
66
|
+
ascending=True,
|
|
67
|
+
) -> RollingGroupby:
|
|
68
|
+
"""
|
|
69
|
+
Return a rolling grouper, providing rolling functionality per group.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
window : int, timedelta, str, offset, or BaseIndexer subclass
|
|
74
|
+
Size of the moving window.
|
|
75
|
+
|
|
76
|
+
If an integer, the fixed number of observations used for
|
|
77
|
+
each window.
|
|
78
|
+
|
|
79
|
+
If a timedelta, str, or offset, the time period of each window. Each
|
|
80
|
+
window will be a variable sized based on the observations included in
|
|
81
|
+
the time-period. This is only valid for datetimelike indexes.
|
|
82
|
+
To learn more about the offsets & frequency strings, please see `this link
|
|
83
|
+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
|
|
84
|
+
|
|
85
|
+
If a BaseIndexer subclass, the window boundaries
|
|
86
|
+
based on the defined ``get_window_bounds`` method. Additional rolling
|
|
87
|
+
keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
|
|
88
|
+
``step`` will be passed to ``get_window_bounds``.
|
|
89
|
+
|
|
90
|
+
min_periods : int, default None
|
|
91
|
+
Minimum number of observations in window required to have a value;
|
|
92
|
+
otherwise, result is ``np.nan``.
|
|
93
|
+
|
|
94
|
+
For a window that is specified by an offset,
|
|
95
|
+
``min_periods`` will default to 1.
|
|
96
|
+
|
|
97
|
+
For a window that is specified by an integer, ``min_periods`` will default
|
|
98
|
+
to the size of the window.
|
|
99
|
+
|
|
100
|
+
center : bool, default False
|
|
101
|
+
If False, set the window labels as the right edge of the window index.
|
|
102
|
+
|
|
103
|
+
If True, set the window labels as the center of the window index.
|
|
104
|
+
|
|
105
|
+
win_type : str, default None
|
|
106
|
+
If ``None``, all points are evenly weighted.
|
|
107
|
+
|
|
108
|
+
If a string, it must be a valid `scipy.signal window function
|
|
109
|
+
<https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows>`__.
|
|
110
|
+
|
|
111
|
+
Certain Scipy window types require additional parameters to be passed
|
|
112
|
+
in the aggregation function. The additional parameters must match
|
|
113
|
+
the keywords specified in the Scipy window type method signature.
|
|
114
|
+
|
|
115
|
+
on : str, optional
|
|
116
|
+
For a DataFrame, a column label or Index level on which
|
|
117
|
+
to calculate the rolling window, rather than the DataFrame's index.
|
|
118
|
+
|
|
119
|
+
Provided integer column is ignored and excluded from result since
|
|
120
|
+
an integer index is not used to calculate the rolling window.
|
|
121
|
+
|
|
122
|
+
axis : int or str, default 0
|
|
123
|
+
If ``0`` or ``'index'``, roll across the rows.
|
|
124
|
+
|
|
125
|
+
If ``1`` or ``'columns'``, roll across the columns.
|
|
126
|
+
|
|
127
|
+
For `Series` this parameter is unused and defaults to 0.
|
|
128
|
+
|
|
129
|
+
closed : str, default None
|
|
130
|
+
If ``'right'``, the first point in the window is excluded from calculations.
|
|
131
|
+
|
|
132
|
+
If ``'left'``, the last point in the window is excluded from calculations.
|
|
133
|
+
|
|
134
|
+
If ``'both'``, no points in the window are excluded from calculations.
|
|
135
|
+
|
|
136
|
+
If ``'neither'``, the first and last points in the window are excluded
|
|
137
|
+
from calculations.
|
|
138
|
+
|
|
139
|
+
Default ``None`` (``'right'``).
|
|
140
|
+
|
|
141
|
+
shift : int, default 0
|
|
142
|
+
If specified, the window will be shifted by `shift` rows (or data will be
|
|
143
|
+
shifted by `-shift` rows) before computing window function.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
maxframe.dataframe.groupby.RollingGroupby
|
|
148
|
+
Return a new grouper with our rolling appended.
|
|
149
|
+
|
|
150
|
+
See Also
|
|
151
|
+
--------
|
|
152
|
+
Series.rolling : Calling object with Series data.
|
|
153
|
+
DataFrame.rolling : Calling object with DataFrames.
|
|
154
|
+
Series.groupby : Apply a function groupby to a Series.
|
|
155
|
+
DataFrame.groupby : Apply a function groupby.
|
|
156
|
+
|
|
157
|
+
Examples
|
|
158
|
+
--------
|
|
159
|
+
>>> import maxframe.dataframe as md
|
|
160
|
+
>>> df = md.DataFrame({'A': [1, 1, 2, 2],
|
|
161
|
+
... 'B': [1, 2, 3, 4],
|
|
162
|
+
... 'C': [0.362, 0.227, 1.267, -0.562]})
|
|
163
|
+
>>> df.execute()
|
|
164
|
+
A B C
|
|
165
|
+
0 1 1 0.362
|
|
166
|
+
1 1 2 0.227
|
|
167
|
+
2 2 3 1.267
|
|
168
|
+
3 2 4 -0.562
|
|
169
|
+
|
|
170
|
+
>>> df.groupby('A').rolling(2).sum().execute()
|
|
171
|
+
B C
|
|
172
|
+
A
|
|
173
|
+
1 0 NaN NaN
|
|
174
|
+
1 3.0 0.589
|
|
175
|
+
2 2 NaN NaN
|
|
176
|
+
3 7.0 0.705
|
|
177
|
+
|
|
178
|
+
>>> df.groupby('A').rolling(2, min_periods=1).sum().execute()
|
|
179
|
+
B C
|
|
180
|
+
A
|
|
181
|
+
1 0 1.0 0.362
|
|
182
|
+
1 3.0 0.589
|
|
183
|
+
2 2 3.0 1.267
|
|
184
|
+
3 7.0 0.705
|
|
185
|
+
|
|
186
|
+
>>> df.groupby('A').rolling(2, on='B').sum().execute()
|
|
187
|
+
B C
|
|
188
|
+
A
|
|
189
|
+
1 0 1 NaN
|
|
190
|
+
1 2 0.589
|
|
191
|
+
2 2 3 NaN
|
|
192
|
+
3 4 0.705
|
|
193
|
+
"""
|
|
194
|
+
return RollingGroupby(
|
|
195
|
+
input=groupby,
|
|
196
|
+
window=window,
|
|
197
|
+
min_periods=min_periods,
|
|
198
|
+
center=center,
|
|
199
|
+
win_type=win_type,
|
|
200
|
+
on=on,
|
|
201
|
+
axis=axis,
|
|
202
|
+
closed=closed,
|
|
203
|
+
shift=shift,
|
|
204
|
+
order_cols=order_cols,
|
|
205
|
+
ascending=ascending,
|
|
206
|
+
)
|