PyPI - maxframe - Versions diffs - 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl - Mend

maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cpython-39-darwin.so +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cpython-39-darwin.so +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cpython-39-darwin.so +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/sort/nsmallest.py ADDED Viewed

@@ -0,0 +1,228 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core import OutputType
+from .sort_values import DataFrameSortValues
+def _nsmallest(df, n, columns=None, keep="first"):
+    op = DataFrameSortValues(
+        output_types=[OutputType.dataframe],
+        axis=0,
+        by=columns,
+        ignore_index=False,
+        ascending=True,
+        nrows=n,
+        keep_kind=keep,
+    )
+    return op(df)
+def df_nsmallest(df, n, columns, keep="first"):
+    """
+    Return the first `n` rows ordered by `columns` in ascending order.
+    Return the first `n` rows with the smallest values in `columns`, in
+    ascending order. The columns that are not specified are returned as
+    well, but not used for ordering.
+    This method is equivalent to
+    ``df.sort_values(columns, ascending=True).head(n)``, but more
+    performant.
+    Parameters
+    ----------
+    n : int
+        Number of items to retrieve.
+    columns : list or str
+        Column name or names to order by.
+    keep : {'first', 'last', 'all'}, default 'first'
+        Where there are duplicate values:
+        - ``first`` : take the first occurrence.
+        - ``last`` : take the last occurrence.
+        - ``all`` : do not drop any duplicates, even it means
+          selecting more than `n` items.
+    Returns
+    -------
+    DataFrame
+    See Also
+    --------
+    DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
+        descending order.
+    DataFrame.sort_values : Sort DataFrame by the values.
+    DataFrame.head : Return the first `n` rows without re-ordering.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
+    ...                                   434000, 434000, 337000, 337000,
+    ...                                   11300, 11300],
+    ...                    'GDP': [1937894, 2583560 , 12011, 4520, 12128,
+    ...                            17036, 182, 38, 311],
+    ...                    'alpha-2': ["IT", "FR", "MT", "MV", "BN",
+    ...                                "IS", "NR", "TV", "AI"]},
+    ...                   index=["Italy", "France", "Malta",
+    ...                          "Maldives", "Brunei", "Iceland",
+    ...                          "Nauru", "Tuvalu", "Anguilla"])
+    >>> df.execute()
+              population      GDP alpha-2
+    Italy       59000000  1937894      IT
+    France      65000000  2583560      FR
+    Malta         434000    12011      MT
+    Maldives      434000     4520      MV
+    Brunei        434000    12128      BN
+    Iceland       337000    17036      IS
+    Nauru         337000      182      NR
+    Tuvalu         11300       38      TV
+    Anguilla       11300      311      AI
+    In the following example, we will use ``nsmallest`` to select the
+    three rows having the smallest values in column "population".
+    >>> df.nsmallest(3, 'population').execute()
+              population    GDP alpha-2
+    Tuvalu         11300     38      TV
+    Anguilla       11300    311      AI
+    Iceland       337000  17036      IS
+    When using ``keep='last'``, ties are resolved in reverse order:
+    >>> df.nsmallest(3, 'population', keep='last').execute()
+              population  GDP alpha-2
+    Anguilla       11300  311      AI
+    Tuvalu         11300   38      TV
+    Nauru         337000  182      NR
+    When using ``keep='all'``, all duplicate items are maintained:
+    >>> df.nsmallest(3, 'population', keep='all').execute()
+              population    GDP alpha-2
+    Tuvalu         11300     38      TV
+    Anguilla       11300    311      AI
+    Iceland       337000  17036      IS
+    Nauru         337000    182      NR
+    To order by the smallest values in column "population" and then "GDP", we can
+    specify multiple columns like in the next example.
+    >>> df.nsmallest(3, ['population', 'GDP']).execute()
+              population  GDP alpha-2
+    Tuvalu         11300   38      TV
+    Anguilla       11300  311      AI
+    Nauru         337000  182      NR
+    """
+    return _nsmallest(df, n, columns, keep=keep)
+def series_nsmallest(df, n, keep="first"):
+    """
+    Return the smallest `n` elements.
+    Parameters
+    ----------
+    n : int, default 5
+        Return this many ascending sorted values.
+    keep : {'first', 'last', 'all'}, default 'first'
+        When there are duplicate values that cannot all fit in a
+        Series of `n` elements:
+        - ``first`` : return the first `n` occurrences in order
+            of appearance.
+        - ``last`` : return the last `n` occurrences in reverse
+            order of appearance.
+        - ``all`` : keep all occurrences. This can result in a Series of
+            size larger than `n`.
+    Returns
+    -------
+    Series
+        The `n` smallest values in the Series, sorted in increasing order.
+    See Also
+    --------
+    Series.nlargest: Get the `n` largest elements.
+    Series.sort_values: Sort Series by values.
+    Series.head: Return the first `n` rows.
+    Notes
+    -----
+    Faster than ``.sort_values().head(n)`` for small `n` relative to
+    the size of the ``Series`` object.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> countries_population = {"Italy": 59000000, "France": 65000000,
+    ...                         "Brunei": 434000, "Malta": 434000,
+    ...                         "Maldives": 434000, "Iceland": 337000,
+    ...                         "Nauru": 11300, "Tuvalu": 11300,
+    ...                         "Anguilla": 11300, "Montserrat": 5200}
+    >>> s = md.Series(countries_population)
+    >>> s.execute()
+    Italy       59000000
+    France      65000000
+    Brunei        434000
+    Malta         434000
+    Maldives      434000
+    Iceland       337000
+    Nauru          11300
+    Tuvalu         11300
+    Anguilla       11300
+    Montserrat      5200
+    dtype: int64
+    The `n` smallest elements where ``n=5`` by default.
+    >>> s.nsmallest().execute()
+    Montserrat    5200
+    Nauru        11300
+    Tuvalu       11300
+    Anguilla     11300
+    Iceland     337000
+    dtype: int64
+    The `n` smallest elements where ``n=3``. Default `keep` value is
+    'first' so Nauru and Tuvalu will be kept.
+    >>> s.nsmallest(3).execute()
+    Montserrat   5200
+    Nauru       11300
+    Tuvalu      11300
+    dtype: int64
+    The `n` smallest elements where ``n=3`` and keeping the last
+    duplicates. Anguilla and Tuvalu will be kept since they are the last
+    with value 11300 based on the index order.
+    >>> s.nsmallest(3, keep='last').execute()
+    Montserrat   5200
+    Anguilla    11300
+    Tuvalu      11300
+    dtype: int64
+    The `n` smallest elements where ``n=3`` with all duplicates kept. Note
+    that the returned Series has four elements due to the three duplicates.
+    >>> s.nsmallest(3, keep='all').execute()
+    Montserrat   5200
+    Nauru       11300
+    Tuvalu      11300
+    Anguilla    11300
+    dtype: int64
+    """
+    return _nsmallest(df, n, keep=keep)

maxframe/dataframe/statistics/__init__.py CHANGED Viewed

@@ -16,15 +16,15 @@
 def _install():
     from ..core import DATAFRAME_TYPE, SERIES_TYPE
     from .corr import df_corr, df_corrwith, series_autocorr, series_corr
-    from .quantile import quantile_dataframe, quantile_series
+    from .quantile import dataframe_quantile, series_quantile
     for t in SERIES_TYPE:
-        t.quantile = quantile_series
+        t.quantile = series_quantile
         t.corr = series_corr
         t.autocorr = series_autocorr
     for t in DATAFRAME_TYPE:
-        t.quantile = quantile_dataframe
+        t.quantile = dataframe_quantile
         t.corr = df_corr
         t.corrwith = df_corrwith

maxframe/dataframe/statistics/corr.py CHANGED Viewed

@@ -34,6 +34,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
     min_periods = Int32Field("min_periods", default=None)
     axis = Int32Field("axis", default=None)
     drop = BoolField("drop", default=None)
+    ddof = Int32Field("ddof", default=0)
     @classmethod
     def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):

maxframe/dataframe/statistics/quantile.py CHANGED Viewed

@@ -213,7 +213,7 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
             return self._call_series(a, inputs)
-def quantile_series(series, q=0.5, interpolation="linear"):
+def series_quantile(series, q=0.5, interpolation="linear"):
     """
     Return value at the given quantile.
@@ -268,7 +268,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
     return op(series, q_input=q_input)
-def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
+def dataframe_quantile(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
     # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
     """
     Return values at the given quantile over requested axis.

maxframe/dataframe/tests/test_typing.py ADDED Viewed

@@ -0,0 +1,104 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pandas as pd
+from ...core import OutputType
+from ..typing_ import get_function_output_meta
+def test_dataframe_type_annotation():
+    def func() -> pd.DataFrame[int]:
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type.name == "dataframe"
+    assert len(meta.dtypes) == 1
+    assert meta.dtypes[0] == np.dtype(int)
+    def func() -> pd.DataFrame[{"col1": int, "col2": float}]:  # noqa: F821
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type.name == "dataframe"
+    assert len(meta.dtypes) == 2
+    assert meta.dtypes[0] == np.dtype(int)
+    assert meta.dtypes[1] == np.dtype(float)
+    def func() -> pd.DataFrame[str, {"col1": int, "col2": float}]:  # noqa: F821
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type.name == "dataframe"
+    assert len(meta.dtypes) == 2
+    assert meta.index_value.value.dtype == np.dtype("O")
+    assert list(meta.dtypes.index) == ["col1", "col2"]
+    assert list(meta.dtypes) == [np.dtype(int), np.dtype(float)]
+def test_series_type_annotation():
+    def func() -> pd.Series[np.str_]:
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type == OutputType.series
+    assert meta.dtype == np.dtype(np.str_)
+    def func() -> pd.Series[("idx_name", str), ("series_name", np.int64)]:  # noqa: F821
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type == OutputType.series
+    assert meta.name == "series_name"
+    assert meta.dtype == np.dtype(np.int64)
+    assert meta.index_value.value._name == "idx_name"
+    assert meta.index_value.value.dtype == np.dtype("O")
+def test_index_type_annotation():
+    def func() -> pd.Index[np.int64]:
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type == OutputType.index
+    def func() -> pd.Index[[("ix1", str), ("ix2", np.int64)]]:  # noqa: F821
+        pass
+    meta = get_function_output_meta(func)
+    assert meta is not None
+    assert meta.output_type == OutputType.index
+    assert meta.index_value.value.names == ["ix1", "ix2"]
+    assert list(meta.index_value.value.dtypes) == [np.dtype("O"), np.dtype("int64")]
+def test_function_output_meta_corner_cases():
+    def func():
+        pass
+    assert get_function_output_meta(func) is None
+    assert get_function_output_meta("non-func-obj") is None
+    def func() -> int:
+        pass
+    meta = get_function_output_meta(func)
+    assert meta.dtype == np.dtype("int64")

maxframe/dataframe/tests/test_utils.py CHANGED Viewed

@@ -17,9 +17,16 @@ import pandas as pd
 import pyarrow as pa
 import pytest
-from ...udf import MarkedFunction, with_python_requirements, with_resources
+from ...config import option_context
+from ...core.operator import Operator
+from ...udf import (
+    MarkedFunction,
+    with_python_requirements,
+    with_resources,
+    with_running_options,
+)
 from ...utils import ARROW_DTYPE_NOT_SUPPORTED
-from ..utils import _generate_value, pack_func_args
+from ..utils import _generate_value, copy_func_scheduling_hints, pack_func_args
 try:
     from pandas import ArrowDtype
@@ -84,6 +91,20 @@ def test_pack_function(df1):
             [(np.int32(1), "1")],
         ),
         (pa.map_(pa.int32(), pa.string()), 1, [(np.int32(1), "1")]),
+        (
+            ArrowDtype(
+                pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())])
+            )
+            if ArrowDtype
+            else None,
+            1,
+            {"a": np.int32(1), "b": "1"},
+        ),
+        (
+            pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())]),
+            1,
+            {"a": np.int32(1), "b": "1"},
+        ),
         (pa.int32(), 1, np.int32(1)),
         (np.datetime64, "2023-01-01", pd.Timestamp("2023-01-01")),
         (np.timedelta64, "1D", pd.Timedelta("1D")),
@@ -99,3 +120,46 @@ def test_pack_function(df1):
 def test_generate_value(dtype, fill_value, expected):
     result = _generate_value(dtype, fill_value)
     assert result == expected
+def test_copy_func_scheduling_hints():
+    # Test with a regular function (no scheduling hints)
+    with option_context() as options:
+        options.function.default_running_options = {}  # No default options
+        def regular_func(x):
+            return x + 1
+        op1 = Operator()
+        copy_func_scheduling_hints(regular_func, op1)
+        # Should not set any attributes since regular function has no hints
+        assert not hasattr(op1, "expect_engine") or op1.expect_engine is None
+        assert not hasattr(op1, "expect_resources") or op1.expect_resources is None
+        assert not hasattr(op1, "gpu") or op1.gpu is None
+    # Test with MarkedFunction with scheduling hints
+    @with_running_options(engine="DPE", cpu=4, memory="8GiB")
+    def marked_func(x):
+        return x + 1
+    op2 = Operator()
+    copy_func_scheduling_hints(marked_func, op2)
+    assert op2.expect_engine == "DPE"
+    # The expect_resources will include default values for gpu and gu_quota
+    expected_resources = {"cpu": 4, "memory": "8GiB", "gpu": 0, "gu_quota": None}
+    assert op2.expect_resources == expected_resources
+    # Test with MarkedFunction with GPU
+    @with_running_options(gu=2)
+    def gpu_func(x):
+        return x + 1
+    op3 = Operator()
+    copy_func_scheduling_hints(gpu_func, op3)
+    assert op3.gpu is True
+    # The expect_resources will include the gu value and default values
+    # System has default options: {'cpu': 1, 'memory': '4GiB', 'gpu': 0}
+    # The with_running_options decorator will override the gpu value with the gu value
+    expected_resources = {"gpu": 2, "gu_quota": None, "cpu": 1, "memory": "4GiB"}
+    assert op3.expect_resources == expected_resources

maxframe/dataframe/typing_.py ADDED Viewed

@@ -0,0 +1,185 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import dataclasses
+import functools
+import inspect
+from typing import Any, Callable, Generic, List, Optional, TypeVar
+import pandas as pd
+from ..core import OutputType
+from ..typing_ import PandasDType
+from ..utils import make_dtype
+from .utils import InferredDataFrameMeta, parse_index
+# TypeVars
+T = TypeVar("T")
+@dataclasses.dataclass
+class _FieldDef:
+    name: Any
+    dtype: PandasDType
+def _item_to_field_def(item_):
+    if isinstance(item_, tuple):
+        tp = make_dtype(item_[1])
+        return _FieldDef(name=item_[0], dtype=tp)
+    else:
+        tp = make_dtype(item_)
+        return _FieldDef(name=None, dtype=tp)
+class IndexType:
+    def __init__(self, index_fields: List[_FieldDef]):
+        self.index_fields = index_fields
+    def __repr__(self):
+        return f"IndexType({[f.dtype for f in self.index_fields]})"
+    @classmethod
+    def from_getitem_args(cls, item) -> "IndexType":
+        if isinstance(item, (dict, pd.Series)):
+            item = list(item.items())
+        if isinstance(item, list):
+            return IndexType([_item_to_field_def(tp) for tp in item])
+        else:
+            return IndexType([_item_to_field_def(item)])
+class SeriesType(Generic[T]):
+    def __init__(
+        self, index_fields: Optional[List[_FieldDef]], name_and_dtype: _FieldDef
+    ):
+        self.index_fields = index_fields
+        self.name_and_dtype = name_and_dtype
+    def __repr__(self) -> str:
+        return "SeriesType[{}]".format(self.name_and_dtype.dtype)
+    @classmethod
+    def from_getitem_args(cls, item) -> "SeriesType":
+        if not isinstance(item, tuple):
+            item = (item,)
+        if len(item) == 1:
+            tp = _item_to_field_def(item[0])
+            return SeriesType(None, tp)
+        else:
+            tp = _item_to_field_def(item[1])
+            idx_fields = IndexType.from_getitem_args(item[0]).index_fields
+            return SeriesType(idx_fields, tp)
+class DataFrameType:
+    def __init__(
+        self,
+        index_fields: Optional[List[_FieldDef]],
+        data_fields: List[_FieldDef],
+    ):
+        self.index_fields = index_fields
+        self.data_fields = data_fields
+    def __repr__(self) -> str:
+        types = [field.dtype for field in self.data_fields]
+        return f"DataFrameType[{types}]"
+    @classmethod
+    def from_getitem_args(cls, item) -> "DataFrameType":
+        if not isinstance(item, tuple):
+            item = (item,)
+        fields = IndexType.from_getitem_args(item[-1]).index_fields
+        if len(item) == 1:
+            return DataFrameType(None, fields)
+        else:
+            idx_fields = IndexType.from_getitem_args(item[0]).index_fields
+            return DataFrameType(idx_fields, fields)
+def get_function_output_meta(
+    func: Callable, df_obj=None
+) -> Optional[InferredDataFrameMeta]:
+    try:
+        func_argspec = inspect.getfullargspec(func)
+        ret_type = (func_argspec.annotations or {}).get("return")
+        if ret_type is None:
+            return None
+    except:
+        return None
+    dtypes = dtype = name = None
+    index_fields = None
+    if isinstance(ret_type, DataFrameType):
+        output_type = OutputType.dataframe
+        dtypes = pd.Series(
+            [fd.dtype for fd in ret_type.data_fields],
+            index=[fd.name for fd in ret_type.data_fields],
+        )
+        index_fields = ret_type.index_fields
+    elif isinstance(ret_type, SeriesType):
+        output_type = OutputType.series
+        dtype = ret_type.name_and_dtype.dtype
+        name = ret_type.name_and_dtype.name
+        index_fields = ret_type.index_fields
+    elif isinstance(ret_type, IndexType):
+        output_type = OutputType.index
+        index_fields = ret_type.index_fields
+    else:
+        output_type = OutputType.scalar
+        try:
+            dtype = make_dtype(ret_type)
+        except:
+            return None
+    if index_fields is not None:
+        if len(index_fields) == 1:
+            mock_idx = pd.Index(
+                [], dtype=index_fields[0].dtype, name=index_fields[0].name
+            )
+        else:
+            col_names = [index_field.name for index_field in index_fields]
+            col_dtypes = pd.Series(
+                [index_field.dtype for index_field in index_fields], index=col_names
+            )
+            mock_df = pd.DataFrame([], columns=col_names).astype(col_dtypes)
+            mock_idx = pd.MultiIndex.from_frame(mock_df)
+        index_value = parse_index(mock_idx, df_obj, store_data=False)
+    else:
+        index_value = None
+    return InferredDataFrameMeta(
+        output_type=output_type,
+        index_value=index_value,
+        dtypes=dtypes,
+        dtype=dtype,
+        name=name,
+    )
+def register_pandas_typing_funcs():
+    def _cls_getitem_func(cls, item, type_cls):
+        return type_cls.from_getitem_args(item)
+    for pd_cls, type_cls in [
+        (pd.DataFrame, DataFrameType),
+        (pd.Series, SeriesType),
+        (pd.Index, IndexType),
+    ]:
+        if hasattr(pd_cls, "__class_getitem__"):  # pragma: no cover
+            continue
+        pd_cls.__class_getitem__ = classmethod(
+            functools.partial(_cls_getitem_func, type_cls=type_cls)
+        )