PyPI - maxframe - Versions diffs - 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (395) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/read_odps_query.py +76 -16
maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/objects/tests/test_object_io.py +4 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/io/odpsio/tests/test_volumeio.py +4 -15
maxframe/io/odpsio/volumeio.py +23 -8
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +87 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +3 -13
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/reduction/unique.py CHANGED Viewed

@@ -28,12 +28,19 @@ from ...tensor.core import TensorOrder
 from ...utils import lazy_import
 from ..core import DATAFRAME_TYPE
 from ..initializer import Series as asseries
-from .core import CustomReduction, DataFrameReductionMixin, DataFrameReductionOperator
+from .core import (
+    CustomReduction,
+    DataFrameReduction,
+    DataFrameReductionMixin,
+    ReductionCallable,
+)
 cudf = lazy_import("cudf")
 class UniqueReduction(CustomReduction):
+    _func_name = "unique"
     def agg(self, data):  # noqa: W0221  # pylint: disable=arguments-differ
         xdf = cudf if self.is_gpu() else pd
         # convert to series data
@@ -43,7 +50,12 @@ class UniqueReduction(CustomReduction):
         return data.unique()
-class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
+class UniqueReductionCallable(ReductionCallable):
+    def __call__(self, value):
+        return UniqueReduction(name="unique", is_gpu=self.kwargs["is_gpu"])(value)
+class DataFrameUnique(DataFrameReduction, DataFrameReductionMixin):
     _op_type_ = opcodes.UNIQUE
     _func_name = "unique"
@@ -53,9 +65,14 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
     def is_atomic(self):
         return True
+    def get_reduction_args(self, axis=None):
+        return {}
     @classmethod
     def get_reduction_callable(cls, op):
-        return UniqueReduction(name=cls._func_name, is_gpu=op.is_gpu())
+        return UniqueReductionCallable(
+            func_name=cls._func_name, kwargs=dict(is_gpu=op.is_gpu())
+        )
     def __call__(self, a):
         if not isinstance(a, ENTITY_TYPE):

maxframe/dataframe/reduction/var.py CHANGED Viewed

@@ -15,10 +15,21 @@
 from ... import opcodes
 from ...core import OutputType
 from ...serialization.serializables import Int32Field
-from .core import DataFrameReductionMixin, DataFrameReductionOperator
+from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
-class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
+class VarReductionCallable(ReductionCallable):
+    def __call__(self, value):
+        skipna, ddof = self.kwargs["skipna"], self.kwargs["ddof"]
+        cnt = value.count()
+        if ddof == 0:
+            return (value**2).mean(skipna=skipna) - (value.mean(skipna=skipna)) ** 2
+        return (
+            (value**2).sum(skipna=skipna) - value.sum(skipna=skipna) ** 2 / cnt
+        ) / (cnt - ddof)
+class DataFrameVar(DataFrameReduction, DataFrameReductionMixin):
     _op_type_ = opcodes.VAR
     _func_name = "var"
@@ -27,16 +38,9 @@ class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
     @classmethod
     def get_reduction_callable(cls, op: "DataFrameVar"):
         skipna, ddof = op.skipna, op.ddof
-        def var(x):
-            cnt = x.count()
-            if ddof == 0:
-                return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2
-            return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / (
-                cnt - ddof
-            )
-        return var
+        return VarReductionCallable(
+            func_name="var", kwargs={"skipna": skipna, "ddof": ddof}
+        )
 def var_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):

maxframe/dataframe/reshape/__init__.py ADDED Viewed

@@ -0,0 +1,38 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .melt import melt
+from .pivot import pivot
+from .pivot_table import pivot_table
+from .stack import stack
+from .unstack import unstack
+def _install():
+    from ..core import DATAFRAME_TYPE, SERIES_TYPE
+    for t in DATAFRAME_TYPE:
+        setattr(t, "melt", melt)
+        setattr(t, "pivot", pivot)
+        setattr(t, "pivot_table", pivot_table)
+        setattr(t, "stack", stack)
+        setattr(t, "unstack", unstack)
+    for t in SERIES_TYPE:
+        setattr(t, "unstack", unstack)
+_install()
+del _install

maxframe/dataframe/{misc → reshape}/pivot.py RENAMED Viewed

@@ -35,6 +35,7 @@ class DataFramePivot(DataFrameOperator, DataFrameOperatorMixin):
     columns = AnyField("columns", default=None)
     agg_results = KeyField("agg_results", default=None)
+    fill_value = AnyField("fill_value", default=None)
     def __init__(self, aggfunc=None, **kw):
         if aggfunc is None:

maxframe/dataframe/{misc → reshape}/pivot_table.py RENAMED Viewed

@@ -39,6 +39,7 @@ class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
     margins_name = StringField("margins_name", default=None)
     sort = BoolField("sort", default=False)
+    # generate intermediate agg result to infer value of columns
     agg_results = KeyField("agg_results", default=None)
     def __init__(self, aggfunc=None, **kw):

maxframe/dataframe/reshape/unstack.py ADDED Viewed

@@ -0,0 +1,114 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...udf import builtin_function
+from ..core import SERIES_TYPE
+@builtin_function
+def _adjust_names(pivoted, idx_names=None, col_names=None, remove_col_level=False):
+    if remove_col_level:
+        pivoted = pivoted.droplevel(0, axis=1)
+    if idx_names:
+        pivoted = pivoted.rename_axis(idx_names, axis=0)
+    if col_names:
+        pivoted = pivoted.rename_axis(col_names, axis=1)
+    return pivoted
+def unstack(df_or_series, level=-1, fill_value=None):
+    """
+    Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
+    Parameters
+    ----------
+    level : int, str, or list of these, default last level
+        Level(s) to unstack, can pass level name.
+    fill_value : scalar value, default None
+        Value to use when replacing NaN values.
+    Returns
+    -------
+    DataFrame
+        Unstacked Series.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([1, 2, 3, 4],
+    ...               index=md.MultiIndex.from_product([['one', 'two'],
+    ...                                                 ['a', 'b']]))
+    >>> s.execute()
+    one  a    1
+         b    2
+    two  a    3
+         b    4
+    dtype: int64
+    >>> s.unstack(level=-1).execute()
+         a  b
+    one  1  2
+    two  3  4
+    >>> s.unstack(level=0).execute()
+         one  two
+    a    1    3
+    b    2    4
+    """
+    if df_or_series.index.nlevels == 1:
+        if isinstance(df_or_series, SERIES_TYPE):
+            raise ValueError("index must be a MultiIndex to unstack")
+        raw_idx_nlevels = df_or_series.index.nlevels
+        stacked = df_or_series.stack(level=level, fill_value=fill_value)
+        orig_order = list(stacked.index.nlevels)
+        new_order = orig_order[raw_idx_nlevels:] + orig_order[:raw_idx_nlevels]
+        return stacked.reorder_levels(new_order)
+    nlevels = df_or_series.index.nlevels
+    idx_names = list(df_or_series.index.names)
+    if df_or_series.ndim == 1:
+        col_names = []
+    else:
+        col_names = list(df_or_series.columns.names)
+    name_to_idx = {name: i for i, name in enumerate(idx_names)}
+    level = level if isinstance(level, list) else [level]
+    level_idxes = [(nlevels + name_to_idx.get(lv, lv)) % nlevels for lv in level]
+    level_idx_set = set(level_idxes)
+    level_cols = [f"__level_{idx}" for idx in range(nlevels)]
+    if df_or_series.ndim == 1:
+        data = df_or_series.to_frame(name="__data")
+    else:
+        data = df_or_series
+    pivot_val_col = list(data.dtypes.index)
+    pivot_col_col = [level_cols[i] for i in level_idxes]
+    pivot_idx_col = [level_cols[i] for i in range(nlevels) if i not in level_idx_set]
+    new_idx_names = [idx_names[i] for i in range(nlevels) if i not in level_idx_set]
+    new_col_names = col_names + [idx_names[i] for i in level_idxes]
+    data = data.reset_index(names=level_cols)
+    pivoted = data.pivot(
+        index=pivot_idx_col, columns=pivot_col_col, values=pivot_val_col
+    )
+    pivoted.op.fill_value = fill_value
+    return pivoted.mf.apply_chunk(
+        _adjust_names,
+        col_names=new_col_names,
+        idx_names=new_idx_names,
+        remove_col_level=df_or_series.ndim == 1,
+        skip_infer=True,
+        output_type="dataframe",
+    )

maxframe/dataframe/sort/__init__.py CHANGED Viewed

@@ -18,14 +18,22 @@ from .sort_values import DataFrameSortValues
 def _install():
     from ..core import DATAFRAME_TYPE, SERIES_TYPE
+    from .argsort import series_argsort
+    from .nlargest import df_nlargest, series_nlargest
+    from .nsmallest import df_nsmallest, series_nsmallest
     from .sort_index import sort_index
     from .sort_values import dataframe_sort_values, series_sort_values
     for cls in DATAFRAME_TYPE:
+        setattr(cls, "nlargest", df_nlargest)
+        setattr(cls, "nsmallest", df_nsmallest)
         setattr(cls, "sort_values", dataframe_sort_values)
         setattr(cls, "sort_index", sort_index)
     for cls in SERIES_TYPE:
+        setattr(cls, "argsort", series_argsort)
+        setattr(cls, "nlargest", series_nlargest)
+        setattr(cls, "nsmallest", series_nsmallest)
         setattr(cls, "sort_values", series_sort_values)
         setattr(cls, "sort_index", sort_index)

maxframe/dataframe/sort/argsort.py ADDED Viewed

@@ -0,0 +1,62 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+def series_argsort(series, axis=0, kind="quicksort", order=None, stable=None):
+    """
+    Return the integer indices that would sort the Series values.
+    Override ndarray.argsort. Argsorts the value, omitting NA/null values,
+    and places the result in the same locations as the non-NA values.
+    Parameters
+    ----------
+    axis : {0 or 'index'}
+        Unused. Parameter needed for compatibility with DataFrame.
+    kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
+        Choice of sorting algorithm. See :func:`numpy.sort` for more
+        information. 'mergesort' and 'stable' are the only stable algorithms.
+    order : None
+        Has no effect but is accepted for compatibility with numpy.
+    stable : None
+        Has no effect but is accepted for compatibility with numpy.
+    Returns
+    -------
+    Series[np.intp]
+        Positions of values within the sort order with -1 indicating
+        nan values.
+    See Also
+    --------
+    numpy.ndarray.argsort : Returns the indices that would sort this array.
+    Examples
+    --------
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([3, 2, 1])
+    >>> s.argsort().execute()
+    0    2
+    1    1
+    2    0
+    dtype: int64
+    """
+    from ... import tensor as mt
+    from ..datasource.from_tensor import series_from_tensor
+    _ = axis, order, stable
+    axis = 0
+    t = mt.argsort(series.to_tensor(), axis=axis, kind=kind)
+    return series_from_tensor(t, index=series.index)

maxframe/dataframe/sort/core.py CHANGED Viewed

@@ -34,3 +34,4 @@ class DataFrameSortOperator(DataFrameOperator):
     parallel_kind = StringField("parallel_kind")
     psrs_kinds = ListField("psrs_kinds", FieldTypes.string)
     nrows = Int64Field("nrows", default=None)
+    keep_kind = StringField("keep_kind", default="head")

maxframe/dataframe/sort/nlargest.py ADDED Viewed

@@ -0,0 +1,238 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...core import OutputType
+from .sort_values import DataFrameSortValues
+def _nlargest(df, n, columns=None, keep="first"):
+    op = DataFrameSortValues(
+        output_types=[OutputType.dataframe],
+        axis=0,
+        by=columns,
+        ignore_index=False,
+        ascending=False,
+        nrows=n,
+        keep_kind=keep,
+    )
+    return op(df)
+def df_nlargest(df, n, columns, keep="first"):
+    """
+    Return the first `n` rows ordered by `columns` in descending order.
+    Return the first `n` rows with the largest values in `columns`, in
+    descending order. The columns that are not specified are returned as
+    well, but not used for ordering.
+    This method is equivalent to
+    ``df.sort_values(columns, ascending=False).head(n)``, but more
+    performant.
+    Parameters
+    ----------
+    n : int
+        Number of rows to return.
+    columns : label or list of labels
+        Column label(s) to order by.
+    keep : {'first', 'last', 'all'}, default 'first'
+        Where there are duplicate values:
+        - `first` : prioritize the first occurrence(s)
+        - `last` : prioritize the last occurrence(s)
+        - ``all`` : do not drop any duplicates, even it means
+                    selecting more than `n` items.
+    Returns
+    -------
+    DataFrame
+        The first `n` rows ordered by the given columns in descending
+        order.
+    See Also
+    --------
+    DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
+        ascending order.
+    DataFrame.sort_values : Sort DataFrame by the values.
+    DataFrame.head : Return the first `n` rows without re-ordering.
+    Notes
+    -----
+    This function cannot be used with all column types. For example, when
+    specifying columns with `object` or `category` dtypes, ``TypeError`` is
+    raised.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
+    ...                                   434000, 434000, 337000, 11300,
+    ...                                   11300, 11300],
+    ...                    'GDP': [1937894, 2583560 , 12011, 4520, 12128,
+    ...                            17036, 182, 38, 311],
+    ...                    'alpha-2': ["IT", "FR", "MT", "MV", "BN",
+    ...                                "IS", "NR", "TV", "AI"]},
+    ...                   index=["Italy", "France", "Malta",
+    ...                          "Maldives", "Brunei", "Iceland",
+    ...                          "Nauru", "Tuvalu", "Anguilla"])
+    >>> df.execute()
+              population      GDP alpha-2
+    Italy       59000000  1937894      IT
+    France      65000000  2583560      FR
+    Malta         434000    12011      MT
+    Maldives      434000     4520      MV
+    Brunei        434000    12128      BN
+    Iceland       337000    17036      IS
+    Nauru          11300      182      NR
+    Tuvalu         11300       38      TV
+    Anguilla       11300      311      AI
+    In the following example, we will use ``nlargest`` to select the three
+    rows having the largest values in column "population".
+    >>> df.nlargest(3, 'population').execute()
+            population      GDP alpha-2
+    France    65000000  2583560      FR
+    Italy     59000000  1937894      IT
+    Malta       434000    12011      MT
+    When using ``keep='last'``, ties are resolved in reverse order:
+    >>> df.nlargest(3, 'population', keep='last').execute()
+            population      GDP alpha-2
+    France    65000000  2583560      FR
+    Italy     59000000  1937894      IT
+    Brunei      434000    12128      BN
+    When using ``keep='all'``, all duplicate items are maintained:
+    >>> df.nlargest(3, 'population', keep='all').execute()
+              population      GDP alpha-2
+    France      65000000  2583560      FR
+    Italy       59000000  1937894      IT
+    Malta         434000    12011      MT
+    Maldives      434000     4520      MV
+    Brunei        434000    12128      BN
+    To order by the largest values in column "population" and then "GDP",
+    we can specify multiple columns like in the next example.
+    >>> df.nlargest(3, ['population', 'GDP']).execute()
+            population      GDP alpha-2
+    France    65000000  2583560      FR
+    Italy     59000000  1937894      IT
+    Brunei      434000    12128      BN
+    """
+    return _nlargest(df, n, columns, keep=keep)
+def series_nlargest(df, n, keep="first"):
+    """
+    Return the largest `n` elements.
+    Parameters
+    ----------
+    n : int, default 5
+        Return this many descending sorted values.
+    keep : {'first', 'last', 'all'}, default 'first'
+        When there are duplicate values that cannot all fit in a
+        Series of `n` elements:
+        - ``first`` : return the first `n` occurrences in order
+            of appearance.
+        - ``last`` : return the last `n` occurrences in reverse
+            order of appearance.
+        - ``all`` : keep all occurrences. This can result in a Series of
+            size larger than `n`.
+    Returns
+    -------
+    Series
+        The `n` largest values in the Series, sorted in decreasing order.
+    See Also
+    --------
+    Series.nsmallest: Get the `n` smallest elements.
+    Series.sort_values: Sort Series by values.
+    Series.head: Return the first `n` rows.
+    Notes
+    -----
+    Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
+    relative to the size of the ``Series`` object.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> countries_population = {"Italy": 59000000, "France": 65000000,
+    ...                         "Malta": 434000, "Maldives": 434000,
+    ...                         "Brunei": 434000, "Iceland": 337000,
+    ...                         "Nauru": 11300, "Tuvalu": 11300,
+    ...                         "Anguilla": 11300, "Montserrat": 5200}
+    >>> s = md.Series(countries_population)
+    >>> s.execute()
+    Italy       59000000
+    France      65000000
+    Malta         434000
+    Maldives      434000
+    Brunei        434000
+    Iceland       337000
+    Nauru          11300
+    Tuvalu         11300
+    Anguilla       11300
+    Montserrat      5200
+    dtype: int64
+    The `n` largest elements where ``n=5`` by default.
+    >>> s.nlargest().execute()
+    France      65000000
+    Italy       59000000
+    Malta         434000
+    Maldives      434000
+    Brunei        434000
+    dtype: int64
+    The `n` largest elements where ``n=3``. Default `keep` value is 'first'
+    so Malta will be kept.
+    >>> s.nlargest(3).execute()
+    France    65000000
+    Italy     59000000
+    Malta       434000
+    dtype: int64
+    The `n` largest elements where ``n=3`` and keeping the last duplicates.
+    Brunei will be kept since it is the last with value 434000 based on
+    the index order.
+    >>> s.nlargest(3, keep='last').execute()
+    France      65000000
+    Italy       59000000
+    Brunei        434000
+    dtype: int64
+    The `n` largest elements where ``n=3`` with all duplicates kept. Note
+    that the returned Series has five elements due to the three duplicates.
+    >>> s.nlargest(3, keep='all').execute()
+    France      65000000
+    Italy       59000000
+    Malta         434000
+    Maldives      434000
+    Brunei        434000
+    dtype: int64
+    """
+    return _nlargest(df, n, keep=keep)