PyPI - maxframe - Versions diffs - 2.0.0b2__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl - Mend

maxframe 2.0.0b2__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp37-win_amd64.pyd +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/extensions/map_reduce.py ADDED Viewed

@@ -0,0 +1,263 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+from typing import Any, Callable, List, Optional, Union
+import pandas as pd
+def _has_end_arg(func) -> bool:
+    f_args = inspect.getfullargspec(func)
+    return "end" in f_args.args or "end" in f_args.kwonlyargs
+def _gen_combined_mapper(
+    mapper: Callable,
+    combiner: Callable,
+    group_cols: List[Any],
+    order_cols: List[Any],
+    ascending: Union[bool, List[bool]] = True,
+):
+    class CombinedMapper:
+        def __init__(self):
+            if isinstance(mapper, type):
+                self.f = mapper()
+            else:
+                self.f = mapper
+            if isinstance(combiner, type):
+                self.combiner = combiner()
+            else:
+                self.combiner = combiner
+        def _combine_mapper_result(self, mapper_result, end=False):
+            if mapper_result is None:
+                return None
+            res = mapper_result
+            if order_cols:
+                res = mapper_result.sort_values(order_cols, ascending=ascending)
+            kw = {"end": end} if _has_end_arg(self.combiner) else {}
+            gcols = group_cols or list(res.columns)
+            return res.groupby(gcols, group_keys=False)[list(res.columns)].apply(
+                self.combiner, **kw
+            )
+        def __call__(self, batch, end=False):
+            kw = {"end": end} if _has_end_arg(self.f) else {}
+            f_ret = self.f(batch, **kw)
+            return self._combine_mapper_result(f_ret, end=end)
+        def close(self) -> None:
+            if hasattr(self.f, "close"):
+                self.f.close()
+            if hasattr(self.combiner, "close"):
+                self.combiner.close()
+    return CombinedMapper
+def map_reduce(
+    df,
+    mapper: Optional[Callable] = None,
+    reducer: Optional[Callable] = None,
+    group_cols: Optional[List[Any]] = None,
+    *,
+    order_cols: List[Any] = None,
+    ascending: Union[bool, List[bool]] = True,
+    combiner: Callable = None,
+    batch_rows: Optional[int] = 1024,
+    mapper_dtypes: pd.Series = None,
+    mapper_index: pd.Index = None,
+    mapper_batch_rows: Optional[int] = None,
+    reducer_dtypes: pd.Series = None,
+    reducer_index: pd.Index = None,
+    reducer_batch_rows: Optional[int] = None,
+    ignore_index: bool = False,
+):
+    """
+    Map-reduce API over certain DataFrames. This function is roughly
+    a shortcut for
+    .. code-block:: python
+        df.mf.apply_chunk(mapper).groupby(group_keys).mf.apply_chunk(reducer)
+    Parameters
+    ----------
+    mapper : function or type
+        Mapper function or class.
+    reducer : function or type
+        Reducer function or class.
+    group_cols : str or list[str]
+        The keys to group after mapper. If absent, all columns in the mapped
+        DataFrame will be used.
+    order_cols : str or list[str]
+        The columns to sort after groupby.
+    ascending : bool or list[bool] or None
+        Whether columns should be in ascending order or not, only effective when
+        `order_cols` are specified. If a list of booleans are passed, orders of
+        every column in `order_cols` are specified.
+    combiner : function or class
+        Combiner function or class. Should accept and returns the same schema
+        of mapper outputs.
+    batch_rows : int or None
+        Rows in batches for mappers and reducers. Ignored if `mapper_batch_rows`
+        specified for mappers or `reducer_batch_rows` specified for reducers.
+        1024 by default.
+    mapper_dtypes : pd.Series or dict or None
+        Output dtypes of mapper stage.
+    mapper_index : pd.Index or None
+        Index of DataFrame returned by mappers.
+    mapper_batch_rows : int or None
+        Rows in batches for mappers. If specified, `batch_rows` will be ignored
+        for mappers.
+    reducer_dtypes : pd.Series or dict or None
+        Output dtypes of reducer stage.
+    reducer_index : pd.Index or None
+        Index of DataFrame returned by reducers.
+    reducer_batch_rows : int or None
+        Rows in batches for mappers. If specified, `batch_rows` will be ignored
+        for reducers.
+    ignore_index : bool
+        If true, indexes generated at mapper or reducer functions will be ignored.
+    Returns
+    -------
+    output: DataFrame
+        Result DataFrame after map and reduce.
+    Examples
+    --------
+    We first define a DataFrame with a column of several words.
+    >>> from collections import defaultdict
+    >>> import maxframe.dataframe as md
+    >>> from maxframe.udf import with_running_options
+    >>> df = pd.DataFrame(
+    >>>     {
+    >>>         "name": ["name key", "name", "key", "name", "key name"],
+    >>>         "id": [4, 2, 4, 3, 3],
+    >>>         "fid": [5.3, 3.5, 4.2, 2.2, 4.1],
+    >>>     }
+    >>> )
+    Then we write a mapper function which accepts batches in the DataFrame
+    and returns counts of words in every row.
+    >>> def mapper(batch):
+    >>>     word_to_count = defaultdict(lambda: 0)
+    >>>     for words in batch["name"]:
+    >>>         for w in words.split():
+    >>>             word_to_count[w] += 1
+    >>>     return pd.DataFrame(
+    >>>         [list(tp) for tp in word_to_count.items()], columns=["word", "count"]
+    >>>     )
+    After that we write a reducer function which aggregates records with
+    the same word. Running options such as CPU specifications can be supplied
+    as well.
+    >>> @with_running_options(cpu=2)
+    >>> class TestReducer:
+    >>>     def __init__(self):
+    >>>         self._word_to_count = defaultdict(lambda: 0)
+    >>>
+    >>>     def __call__(self, batch, end=False):
+    >>>         word = None
+    >>>         for _, row in batch.iterrows():
+    >>>             word = row.iloc[0]
+    >>>             self._word_to_count[row.iloc[0]] += row.iloc[1]
+    >>>         if end:
+    >>>             return pd.DataFrame(
+    >>>                 [[word, self._word_to_count[word]]], columns=["word", "count"]
+    >>>             )
+    >>>
+    >>>     def close(self):
+    >>>         # you can do several cleanups here
+    >>>         print("close")
+    Finally we can call `map_reduce` with mappers and reducers specified above.
+    >>> res = df.mf.map_reduce(
+    >>>     mapper,
+    >>>     TestReducer,
+    >>>     group_cols=["word"],
+    >>>     mapper_dtypes={"word": "str", "count": "int"},
+    >>>     mapper_index=pd.Index([0]),
+    >>>     reducer_dtypes={"word": "str", "count": "int"},
+    >>>     reducer_index=pd.Index([0]),
+    >>>     ignore_index=True,
+    >>> )
+    >>> res.execute().fetch()
+       word  count
+    0   key      3
+    1  name      4
+    See Also
+    --------
+    DataFrame.mf.apply_chunk, DataFrame.groupby.mf.apply_chunk
+    """
+    mapper_batch_rows = mapper_batch_rows or batch_rows
+    reducer_batch_rows = reducer_batch_rows or batch_rows
+    def check_arg(arg_type, locals_):
+        if locals_.get(arg_type) is not None:
+            return
+        for suffix in ("dtypes", "index"):
+            arg_name = f"{arg_type}_{suffix}"
+            if locals_.get(arg_name) is not None:
+                raise ValueError(f"Cannot specify {arg_name} when {arg_type} is None")
+    if mapper is None:
+        check_arg("mapper", locals())
+        mapped = df
+        group_cols = group_cols or df.dtypes.index
+        if combiner is not None:
+            raise ValueError("Combiner cannot be set when mapper is None")
+    else:
+        if combiner is not None:
+            mapper = _gen_combined_mapper(
+                mapper, combiner, group_cols, order_cols, ascending=ascending
+            )
+        mapped = df.mf.apply_chunk(
+            mapper,
+            batch_rows=mapper_batch_rows,
+            dtypes=mapper_dtypes,
+            output_type="dataframe",
+            index=mapper_index,
+        )
+        group_cols = group_cols or list(df.dtypes.index)
+    if reducer is None:
+        check_arg("reducer", locals())
+        res = mapped
+    else:
+        res = mapped.groupby(group_cols, group_keys=False)[
+            list(mapped.dtypes.index)
+        ].mf.apply_chunk(
+            reducer,
+            batch_rows=reducer_batch_rows,
+            dtypes=reducer_dtypes,
+            output_type="dataframe",
+            index=reducer_index,
+            order_cols=order_cols,
+            ascending=ascending,
+        )
+    if ignore_index:
+        return res.reset_index(drop=True)
+    return res

maxframe/dataframe/extensions/rebalance.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ... import opcodes
+from ...serialization.serializables import Float64Field, Int64Field
+from ...tensor.extensions.rebalance import RebalanceMixin
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import validate_axis
+class DataFrameRebalance(RebalanceMixin, DataFrameOperatorMixin, DataFrameOperator):
+    _op_type_ = opcodes.REBALANCE
+    axis = Int64Field("axis")
+    factor = Float64Field("factor", default=None)
+    num_partitions = Int64Field("num_partitions")
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+def rebalance(df_or_series, axis=0, factor=None, num_partitions=None):
+    """
+    Make data more balanced across entire cluster.
+    Parameters
+    ----------
+    axis : int
+        The axis to rebalance.
+    factor : float
+        Specified so that number of chunks after balance is
+        total number of input chunks * factor.
+    num_partitions : int
+        Specified so the number of chunks are at most
+        num_partitions.
+    Returns
+    -------
+    Series or DataFrame
+        Result of DataFrame or Series after rebalanced.
+    """
+    axis = validate_axis(axis, df_or_series)
+    if num_partitions is None and factor is None:
+        raise ValueError("Need to specify num_partitions or factor")
+    if num_partitions is not None and factor is not None:
+        raise ValueError(
+            "num_partitions and factor cannot be specified at the same time"
+        )
+    op = DataFrameRebalance(axis=axis, factor=factor, num_partitions=num_partitions)
+    return op(df_or_series)

maxframe/dataframe/extensions/tests/test_apply_chunk.py CHANGED Viewed

@@ -91,15 +91,22 @@ def test_apply_chunk_infer_dtypes_and_index(df1, df2, df3):
     assert result.index_value is df2.index_value
     assert result.dtypes.equals(df2.dtypes)
+    def process(data, param, k) -> pd.DataFrame[df2.dtypes]:
+        return data * param * k
+    result = df2.mf.apply_chunk(process, batch_rows=3, args=(4,), k=1)
+    assert result.index_value is df2.index_value
+    assert result.dtypes.equals(df2.dtypes)
     # mark functions
     from ....udf import with_python_requirements, with_resources
     @with_resources("empty.txt")
     @with_python_requirements("numpy")
-    def process(data, k):
+    def process(data, k) -> pd.DataFrame[df1.dtypes]:
         return data
-    result = df1.mf.apply_chunk(process, batch_rows=3, output_type="dataframe", k=1)
+    result = df1.mf.apply_chunk(process, batch_rows=3, k=1)
     assert result.index_value is df1.index_value
     assert result.dtypes.equals(df1.dtypes)
     assert isinstance(result.op.func, MarkedFunction)

maxframe/dataframe/extensions/tests/test_extensions.py CHANGED Viewed

@@ -60,6 +60,40 @@ def df3():
     )
+@pytest.fixture
+def df4():
+    return DataFrame(
+        {
+            "name1": ["a", "b", "c", "d"],
+            "name2": ["a", "b", "c", "d"],
+            "num": [1, 2, 3, 4],
+            "kv": [
+                "k1=1.1,k2=3.1,k3=1.0",
+                "k1=7.1,k4=8.2",
+                "k5=1.2,k7=1.5",
+                "k3=1.1,k9=1",
+            ],
+            "vk": ["v1=1.1,v2=1.2", "v3=1.1,v4=1.2", "v5=1.1,v6=1.2", "v7=1.1,v8=1.2"],
+        }
+    )
+@pytest.fixture
+def df5():
+    return DataFrame(
+        {
+            "name1": ["name1", "name2", "name3", "name4", "name5"],
+            "name2": ["name1", "name2", "name3", "name4", "name5"],
+            "k1": [1.0, None, 7.1, None, None],
+            "k2": [3.0, 3.0, None, 1.2, 1.0],
+            "k3": [None, 5.1, None, 1.5, None],
+            "k5": [10.0, None, None, None, None],
+            "k7": [None, None, 8.2, None, None],
+            "k9": [None, None, None, None, 1.1],
+        }
+    )
 def test_flatmap(df1, df2, df3):
     def f(x, keys):
         if x["a"] in keys:
@@ -142,3 +176,23 @@ def test_flatjson():
         )
     with pytest.raises(ValueError):
         s1.mf.flatjson(["$.a"])
+def test_extract_kv(df4):
+    extract_kv_df = df4.mf.extract_kv(
+        columns=["kv", "vk"], kv_delim=",", item_delim="="
+    )
+    assert extract_kv_df.shape == (4, np.nan)
+    assert extract_kv_df.index_value.key == df4.index_value.key
+    with pytest.raises(ValueError):
+        df4.mf.extract_kv(columns=["name"])
+    with pytest.raises(ValueError):
+        df4.mf.extract_kv(columns=["num"])
+def test_collect_kv(df5):
+    collect_kv_df = df5.mf.collect_kv(columns=["k1", "k2", "k3", "k5", "k7", "k9"])
+    assert collect_kv_df.shape == (5, 3)
+    assert collect_kv_df.index_value.key == df5.index_value.key
+    with pytest.raises(ValueError):
+        df5.mf.collect_kv(columns=["num"])

maxframe/dataframe/extensions/tests/test_map_reduce.py ADDED Viewed

@@ -0,0 +1,135 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import functools
+from collections import defaultdict
+import pandas as pd
+import pytest
+from .... import dataframe as md
+from ...groupby.apply_chunk import GroupByApplyChunk
+from .. import DataFrameApplyChunk
+@pytest.fixture
+def df1():
+    return md.DataFrame(
+        {
+            "name": ["name key", "name", "key", "name", "key name"],
+            "id": [4, 2, 4, 3, 3],
+            "fid": [5.3, 3.5, 4.2, 2.2, 4.1],
+        }
+    )
+def test_map_reduce_with_map_only(df1):
+    func = functools.partial(lambda x: x)
+    with pytest.raises(ValueError):
+        df1.mf.map_reduce(func, reducer_dtypes={"col": "string"})
+    mapped = df1.mf.map_reduce(func)
+    assert isinstance(mapped.op, DataFrameApplyChunk)
+    assert mapped.op.func is func
+    map_combined = df1.mf.map_reduce(
+        func, combiner=func, mapper_dtypes=df1.dtypes, mapper_index=df1.index
+    )
+    assert isinstance(map_combined.op, DataFrameApplyChunk)
+    assert map_combined.op.func.__name__ == "CombinedMapper"
+def test_mapper_with_combiner(df1):
+    class BaseFunc:
+        def __init__(self):
+            self._word_to_count = defaultdict(lambda: 0)
+        def _collect_df(self):
+            word_to_count = self._word_to_count.copy()
+            self._word_to_count.clear()
+            return pd.DataFrame(
+                [list(tp) for tp in word_to_count.items()],
+                columns=["word", "count"],
+            )
+        def close(self):
+            print(f"Close {type(self)}")
+    class MapperCls(BaseFunc):
+        def __call__(self, batch, end=False):
+            for words in batch["name"]:
+                for w in words.split():
+                    self._word_to_count[w] += 1
+            if end:
+                return self._collect_df()
+    class CombinerCls(BaseFunc):
+        def __call__(self, batch, end=False):
+            for _, row in batch.iterrows():
+                self._word_to_count[row["word"]] = row["count"]
+            if end:
+                return self._collect_df()
+    map_combined = df1.mf.map_reduce(
+        MapperCls,
+        combiner=CombinerCls,
+        group_cols="word",
+        mapper_dtypes={"word": "str", "count": "int"},
+        mapper_index=df1.index,
+    )
+    raw = df1.op.data
+    combiner = map_combined.op.func()
+    ret1 = combiner(raw.iloc[:3], end=True)
+    ret2 = combiner(raw.iloc[3:], end=True)
+    close_ret = combiner.close()
+    expected1 = pd.DataFrame([["key", 2], ["name", 2]], columns=["word", "count"])
+    expected2 = pd.DataFrame([["key", 1], ["name", 2]], columns=["word", "count"])
+    assert close_ret is None
+    pd.testing.assert_frame_equal(ret1.reset_index(drop=True), expected1)
+    pd.testing.assert_frame_equal(ret2.reset_index(drop=True), expected2)
+def test_map_reduce_with_reduce_only(df1):
+    func = functools.partial(lambda x: x)
+    with pytest.raises(ValueError):
+        df1.mf.map_reduce(reducer=func, mapper_dtypes={"col": "string"})
+    reduced = df1.mf.map_reduce(reducer=func, group_cols="name")
+    assert isinstance(reduced.op, GroupByApplyChunk)
+    assert reduced.op.func is func
+    assert reduced.op.groupby_params["by"] == ["name"]
+def test_map_reduce_with_both_funcs(df1):
+    map_func = functools.partial(lambda x: x)
+    class ReducerCls:
+        def __call__(self, batch):
+            return batch
+    reduced = df1.mf.map_reduce(
+        mapper=map_func,
+        reducer=ReducerCls,
+        group_cols="name",
+        reducer_dtypes=df1.dtypes,
+        reducer_index=df1.index,
+    )
+    assert isinstance(reduced.op, GroupByApplyChunk)
+    assert reduced.op.func is ReducerCls
+    assert reduced.op.groupby_params["by"] == ["name"]
+    assert isinstance(reduced.inputs[0].op, DataFrameApplyChunk)
+    assert reduced.inputs[0].op.func is map_func

maxframe/dataframe/groupby/__init__.py CHANGED Viewed

@@ -15,6 +15,8 @@
 # noinspection PyUnresolvedReferences
 from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
 from .core import NamedAgg
+from .expanding import ExpandingGroupby
+from .rolling import RollingGroupby
 def _install():
@@ -24,12 +26,15 @@ def _install():
     from .apply import groupby_apply
     from .apply_chunk import df_groupby_apply_chunk
     from .core import groupby
-    from .cum import cumcount, cummax, cummin, cumprod, cumsum
+    from .expanding import cumcount, cummax, cummin, cumprod, cumsum, expanding
     from .extensions import DataFrameGroupByMaxFrameAccessor
     from .fill import bfill, ffill, fillna
     from .getitem import df_groupby_getitem
     from .head import head
+    from .rank import rank
+    from .rolling import rolling
     from .sample import groupby_sample
+    from .shift import shift
     from .transform import groupby_transform
     for cls in DATAFRAME_TYPE:
@@ -69,6 +74,12 @@ def _install():
         setattr(cls, "cumprod", cumprod)
         setattr(cls, "cumsum", cumsum)
+        setattr(cls, "expanding", expanding)
+        setattr(cls, "rolling", rolling)
+        setattr(cls, "shift", shift)
+        setattr(cls, "rank", rank)
         setattr(cls, "head", head)
         setattr(cls, "sample", groupby_sample)