PyPI - maxframe - Versions diffs - 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl - Mend

maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cpython-39-darwin.so +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cpython-39-darwin.so +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cpython-39-darwin.so +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/misc/describe.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ...core import EntityData
 from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
 from ..core import SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
-from ..utils import build_empty_df, parse_index
+from ..utils import build_df, parse_index
 class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
     def __call__(self, df_or_series):
         if isinstance(df_or_series, SERIES_TYPE):
-            if not np.issubdtype(df_or_series.dtype, np.number):
-                raise NotImplementedError("non-numeric type is not supported for now")
             test_series = pd.Series([], dtype=df_or_series.dtype).describe(
                 percentiles=self.percentiles,
                 include=self.include,
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
                 index_value=parse_index(test_series.index, store_data=True),
             )
         else:
-            test_inp_df = build_empty_df(df_or_series.dtypes)
+            test_inp_df = build_df(df_or_series)
             test_df = test_inp_df.describe(
                 percentiles=self.percentiles,
                 include=self.include,
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
                 # MaxFrame DataFrame allows user to specify percentiles=False
                 # to skip computation about percentiles
                 test_df.drop(["50%"], axis=0, inplace=True)
-            for dtype in test_df.dtypes:
-                if not np.issubdtype(dtype, np.number):
-                    raise NotImplementedError(
-                        "non-numeric type is not supported for now"
-                    )
             return self.new_dataframe(
                 [df_or_series],
                 shape=test_df.shape,
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
 def describe(df_or_series, percentiles=None, include=None, exclude=None):
+    """
+    Generate descriptive statistics.
+    Descriptive statistics include those that summarize the central
+    tendency, dispersion and shape of a
+    dataset's distribution, excluding ``NaN`` values.
+    Analyzes both numeric and object series, as well
+    as ``DataFrame`` column sets of mixed data types. The output
+    will vary depending on what is provided. Refer to the notes
+    below for more detail.
+    Parameters
+    ----------
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output. All should
+        fall between 0 and 1. The default is
+        ``[.25, .5, .75]``, which returns the 25th, 50th, and
+        75th percentiles.
+    include : 'all', list-like of dtypes or None (default), optional
+        A white list of data types to include in the result. Ignored
+        for ``Series``. Here are the options:
+        - 'all' : All columns of the input will be included in the output.
+        - A list-like of dtypes : Limits the results to the
+          provided data types.
+          To limit the result to numeric types submit
+          ``numpy.number``. To limit it instead to object columns submit
+          the ``numpy.object`` data type. Strings
+          can also be used in the style of
+          ``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
+        - None (default) : The result will include all numeric columns.
+    exclude : list-like of dtypes or None (default), optional,
+        A black list of data types to omit from the result. Ignored
+        for ``Series``. Here are the options:
+        - A list-like of dtypes : Excludes the provided data types
+          from the result. To exclude numeric types submit
+          ``numpy.number``. To exclude object columns submit the data
+          type ``numpy.object``. Strings can also be used in the style of
+          ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
+        - None (default) : The result will exclude nothing.
+    Returns
+    -------
+    Series or DataFrame
+        Summary statistics of the Series or Dataframe provided.
+    See Also
+    --------
+    DataFrame.count: Count number of non-NA/null observations.
+    DataFrame.max: Maximum of the values in the object.
+    DataFrame.min: Minimum of the values in the object.
+    DataFrame.mean: Mean of the values.
+    DataFrame.std: Standard deviation of the observations.
+    DataFrame.select_dtypes: Subset of a DataFrame including/excluding
+        columns based on their dtype.
+    Notes
+    -----
+    For numeric data, the result's index will include ``count``,
+    ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
+    upper percentiles. By default the lower percentile is ``25`` and the
+    upper percentile is ``75``. The ``50`` percentile is the
+    same as the median.
+    For object data (e.g. strings or timestamps), the result's index
+    will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
+    is the most common value. The ``freq`` is the most common value's
+    frequency. Timestamps also include the ``first`` and ``last`` items.
+    If multiple object values have the highest count, then the
+    ``count`` and ``top`` results will be arbitrarily chosen from
+    among those with the highest count.
+    For mixed data types provided via a ``DataFrame``, the default is to
+    return only an analysis of numeric columns. If the dataframe consists
+    only of object data without any numeric columns, the default is to
+    return an analysis of object columns. If ``include='all'`` is provided
+    as an option, the result will include a union of attributes of each type.
+    The `include` and `exclude` parameters can be used to limit
+    which columns in a ``DataFrame`` are analyzed for the output.
+    The parameters are ignored when analyzing a ``Series``.
+    Examples
+    --------
+    Describing a numeric ``Series``.
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([1, 2, 3])
+    >>> s.describe().execute()
+    count    3.0
+    mean     2.0
+    std      1.0
+    min      1.0
+    25%      1.5
+    50%      2.0
+    75%      2.5
+    max      3.0
+    dtype: float64
+    Describing a ``DataFrame``. By default only numeric fields
+    are returned.
+    >>> df = md.DataFrame({'numeric': [1, 2, 3],
+    ...                    'object': ['a', 'b', 'c']
+    ...                    })
+    >>> df.describe().execute()
+           numeric
+    count      3.0
+    mean       2.0
+    std        1.0
+    min        1.0
+    25%        1.5
+    50%        2.0
+    75%        2.5
+    max        3.0
+    Describing all columns of a ``DataFrame`` regardless of data type.
+    >>> df.describe(include='all').execute()  # doctest: +SKIP.execute()
+           numeric object
+    count      3.0      3
+    unique     NaN      3
+    top        NaN      a
+    freq       NaN      1
+    mean       2.0    NaN
+    std        1.0    NaN
+    min        1.0    NaN
+    25%        1.5    NaN
+    50%        2.0    NaN
+    75%        2.5    NaN
+    max        3.0    NaN
+    Describing a column from a ``DataFrame`` by accessing it as
+    an attribute.
+    >>> df.numeric.describe().execute()
+    count    3.0
+    mean     2.0
+    std      1.0
+    min      1.0
+    25%      1.5
+    50%      2.0
+    75%      2.5
+    max      3.0
+    Name: numeric, dtype: float64
+    Including only numeric columns in a ``DataFrame`` description.
+    >>> df.describe(include=[mt.number]).execute()
+           numeric
+    count      3.0
+    mean       2.0
+    std        1.0
+    min        1.0
+    25%        1.5
+    50%        2.0
+    75%        2.5
+    max        3.0
+    Including only string columns in a ``DataFrame`` description.
+    >>> df.describe(include=[object]).execute()  # doctest: +SKIP.execute()
+           object
+    count       3
+    unique      3
+    top         a
+    freq        1
+    """
+    # fixme add support for categorical columns once implemented
     if percentiles is False:
         percentiles = []
     elif percentiles is None:

maxframe/dataframe/misc/drop_duplicates.py CHANGED Viewed

@@ -19,10 +19,10 @@ from ... import opcodes
 from ...serialization.serializables import BoolField
 from ..operators import OutputType
 from ..utils import gen_unknown_index_value, parse_index
-from ._duplicate import DuplicateOperand, validate_subset
+from ._duplicate import BaseDuplicateOp, validate_subset
-class DataFrameDropDuplicates(DuplicateOperand):
+class DataFrameDropDuplicates(BaseDuplicateOp):
     _op_type_ = opcodes.DROP_DUPLICATES
     ignore_index = BoolField("ignore_index", default=True)

maxframe/dataframe/misc/duplicated.py CHANGED Viewed

@@ -16,10 +16,10 @@ import numpy as np
 from ... import opcodes
 from ...core import OutputType
-from ._duplicate import DuplicateOperand, validate_subset
+from ._duplicate import BaseDuplicateOp, validate_subset
-class DataFrameDuplicated(DuplicateOperand):
+class DataFrameDuplicated(BaseDuplicateOp):
     _op_type_ = opcodes.DUPLICATED
     def __init__(self, output_types=None, **kw):

maxframe/dataframe/misc/get_dummies.py CHANGED Viewed

@@ -25,12 +25,14 @@ from ...serialization.serializables import (
     ListField,
     StringField,
 )
+from ...utils import make_dtype, pd_release_version
 from ..datasource.dataframe import from_pandas as from_pandas_df
 from ..datasource.series import from_pandas as from_pandas_series
 from ..initializer import Series as asseries
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 _encoding_dtype_kind = ["O", "S", "U"]
+_ret_uint8 = pd_release_version < (2, 0, 0)
 class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
@@ -181,7 +183,9 @@ def get_dummies(
     elif isinstance(data, pd.DataFrame):
         data = from_pandas_df(data)
-    dtype = dtype if dtype is not None else np.dtype(bool)
+    dtype = make_dtype(
+        dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
+    )
     if prefix is not None:
         if isinstance(prefix, list):

maxframe/dataframe/misc/isin.py CHANGED Viewed

@@ -133,7 +133,7 @@ def series_isin(elements, values):
     5    False
     Name: animal, dtype: bool
     """
-    if is_list_like(values):
+    if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
         values = list(values)
     elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
         raise TypeError(
@@ -207,7 +207,7 @@ def df_isin(df, values):
     falcon      True       True
     dog        False      False
     """
-    if is_list_like(values) and not isinstance(values, dict):
+    if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
         values = list(values)
     elif not isinstance(
         values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)

maxframe/dataframe/misc/map.py CHANGED Viewed

@@ -251,3 +251,97 @@ def index_map(
     """
     op = DataFrameMap(arg=mapper, na_action=na_action, memory_scale=memory_scale)
     return op(idx, dtype=dtype, skip_infer=skip_infer)
+def df_map(
+    df, func, na_action=None, dtypes=None, dtype=None, skip_infer=False, **kwargs
+):
+    """
+    Apply a function to a Dataframe elementwise.
+    This method applies a function that accepts and returns a scalar
+    to every element of a DataFrame.
+    Parameters
+    ----------
+    func : callable
+        Python function, returns a single value from a single value.
+    na_action : {None, 'ignore'}, default None
+        If 'ignore', propagate NaN values, without passing them to func.
+    dtypes : Series, default None
+        Specify dtypes of returned DataFrames.
+    dtype : np.dtype, default None
+        Specify dtypes of all columns of returned DataFrames, only
+        effective when dtypes is not specified.
+    skip_infer: bool, default False
+        Whether infer dtypes when dtypes or dtype is not specified.
+    **kwargs
+        Additional keyword arguments to pass as keywords arguments to
+        `func`.
+    Returns
+    -------
+    DataFrame
+        Transformed DataFrame.
+    See Also
+    --------
+    DataFrame.apply : Apply a function along input axis of DataFrame.
+    DataFrame.replace: Replace values given in `to_replace` with `value`.
+    Series.map : Apply a function elementwise on a Series.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame([[1, 2.12], [3.356, 4.567]])
+    >>> df.execute()
+           0      1
+    0  1.000  2.120
+    1  3.356  4.567
+    >>> df.map(lambda x: len(str(x))).execute()
+       0  1
+    0  3  4
+    1  5  5
+    Like Series.map, NA values can be ignored:
+    >>> df_copy = df.copy()
+    >>> df_copy.iloc[0, 0] = md.NA
+    >>> df_copy.map(lambda x: len(str(x)), na_action='ignore').execute()
+         0  1
+    0  NaN  4
+    1  5.0  5
+    It is also possible to use `map` with functions that are not
+    `lambda` functions:
+    >>> df.map(round, ndigits=1).execute()
+         0    1
+    0  1.0  2.1
+    1  3.4  4.6
+    Note that a vectorized version of `func` often exists, which will
+    be much faster. You could square each number elementwise.
+    >>> df.map(lambda x: x**2).execute()
+               0          1
+    0   1.000000   4.494400
+    1  11.262736  20.857489
+    But it's better to avoid map in that case.
+    >>> (df ** 2).execute()
+               0          1
+    0   1.000000   4.494400
+    1  11.262736  20.857489
+    """
+    if dtypes is None and dtype is not None:
+        dtypes = pd.Series([dtype] * df.shape[1], index=df.dtypes.index)
+    def _wrapper(row):
+        return row.map(func, na_action=na_action, **kwargs)
+    return df.apply(
+        _wrapper, axis=1, dtypes=dtypes, skip_infer=skip_infer, elementwise=True
+    )

maxframe/dataframe/misc/tests/test_misc.py CHANGED Viewed

@@ -16,6 +16,8 @@ import numpy as np
 import pandas as pd
 import pytest
+from maxframe import options
 from .... import opcodes
 from ....core import OutputType
 from ....dataframe import DataFrame
@@ -124,6 +126,7 @@ def test_dataframe_apply():
         dtypes=pd.Series([np.dtype(float)] * 3),
     )
     assert df2.ndim == 2
+    assert df2.op.expect_resources == options.function.default_running_options
 def test_series_apply():
@@ -180,6 +183,8 @@ def test_series_apply():
         pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
     )
     assert r.ndim == 2
+    assert r.op.expect_resources == options.function.default_running_options
     pd.testing.assert_series_equal(r.dtypes, dtypes)
     assert r.shape == (2, 3)
@@ -305,6 +310,7 @@ def test_transform():
     assert r.shape == series.shape
     assert r.op._op_type_ == opcodes.TRANSFORM
     assert r.op.output_types[0] == OutputType.series
+    assert r.op.expect_resources == options.function.default_running_options
 def test_series_isin():
@@ -563,12 +569,17 @@ def test_apply():
     )
     assert apply_df.shape == (3, 2)
     assert apply_df.op.expect_engine == "SPE"
-    assert apply_df.op.expect_resources == {"cpu": 1, "memory": "40GB", "gpu": 0}
+    assert apply_df.op.expect_resources == {
+        "cpu": 4,
+        "memory": "40GB",
+        "gpu": 0,
+        "gu_quota": None,
+    }
 def test_pivot_table():
     from ...groupby.aggregation import DataFrameGroupByAgg
-    from ...misc.pivot_table import DataFramePivotTable
+    from ...reshape.pivot_table import DataFramePivotTable
     raw = pd.DataFrame(
         {

maxframe/dataframe/misc/to_numeric.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import numpy as np
 import pandas as pd
+from ... import opcodes
 from ...core import ENTITY_TYPE, OutputType
 from ...serialization.serializables import StringField
 from ...tensor import tensor as astensor
@@ -23,6 +24,8 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
 class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.TO_NUMERIC
     errors = StringField("errors")
     downcast = StringField("downcast")

maxframe/dataframe/misc/transform.py CHANGED Viewed

@@ -38,8 +38,9 @@ from ..utils import (
 _with_convert_dtype = pd_release_version < (1, 2, 0)
-class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
+class DataFrameTransform(DataFrameOperator, DataFrameOperatorMixin):
     _op_type_ = opcodes.TRANSFORM
+    _legacy_name = "TransformOperator"
     func = AnyField("func", default=None)
     axis = AnyField("axis", default=None)
@@ -141,13 +142,17 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
     @classmethod
     def estimate_size(
-        cls, ctx: MutableMapping[str, Union[int, float]], op: "TransformOperator"
+        cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameTransform"
     ) -> None:
         if isinstance(op.func, MarkedFunction):
             ctx[op.outputs[0].key] = float("inf")
         super().estimate_size(ctx, op)
+# keep for import compatibility
+TransformOperator = DataFrameTransform
 def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
     stub_df = _build_stub_pandas_obj(df, output_type)
     n_args = copy_if_possible(args)
@@ -235,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
     """
     call_agg = kwargs.pop("_call_agg", False)
     func = get_packed_funcs(df, OutputType.dataframe, func, *args, **kwargs)
-    op = TransformOperator(
+    op = DataFrameTransform(
         func=func,
         axis=axis,
         args=args,
@@ -327,13 +332,15 @@ def series_transform(
     """
     call_agg = kwargs.pop("_call_agg", False)
     func = get_packed_funcs(series, OutputType.series, func, *args, **kwargs)
-    op = TransformOperator(
+    op = DataFrameTransform(
         func=func,
         axis=axis,
         convert_dtype=convert_dtype,
         args=args,
         kwds=kwargs,
-        output_types=[OutputType.series],
+        output_types=[OutputType.series]
+        if not call_agg and not isinstance(func, list)
+        else None,
         call_agg=call_agg,
     )
     return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)

maxframe/dataframe/misc/transpose.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import pandas as pd
 from ... import opcodes
 from ...core import OutputType
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -30,10 +32,20 @@ class DataFrameTranspose(DataFrameOperator, DataFrameOperatorMixin):
         new_shape = arg.shape[::-1]
         columns_value = arg.index_value
         index_value = parse_index(arg.dtypes.index)
+        if not arg.index_value.has_value:
+            dtypes = None
+        else:
+            from pandas.core.dtypes.cast import find_common_type
+            dtype = find_common_type(list(arg.dtypes))
+            pd_index = arg.index_value.to_pandas()
+            dtypes = pd.Series([dtype] * len(pd_index), index=pd_index)
         return self.new_dataframe(
             [arg],
             shape=new_shape,
-            dtypes=None,
+            dtypes=dtypes,
             columns_value=columns_value,
             index_value=index_value,
         )

maxframe/dataframe/misc/valid_index.py ADDED Viewed

@@ -0,0 +1,115 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...udf import builtin_function
+@builtin_function
+def _item_or_none(item):
+    if len(item) > 0:
+        return item[0]
+    return None
+def _valid_index(df_or_series, slc: slice):
+    from ... import tensor as mt
+    idx = df_or_series.dropna(how="all").index[slc]
+    return mt.array(idx).mf.apply_chunk(_item_or_none, dtype=idx.dtype)
+_doc = """
+Return index for %(pos)s non-NA value or None, if no non-NA value is found.
+Returns
+-------
+type of index
+Examples
+--------
+For Series:
+>>> import maxframe.dataframe as md
+>>> s = md.Series([None, 3, 4])
+>>> s.first_valid_index().execute()
+1
+>>> s.last_valid_index().execute()
+2
+>>> s = md.Series([None, None])
+>>> print(s.first_valid_index()).execute()
+None
+>>> print(s.last_valid_index()).execute()
+None
+If all elements in Series are NA/null, returns None.
+>>> s = md.Series()
+>>> print(s.first_valid_index()).execute()
+None
+>>> print(s.last_valid_index()).execute()
+None
+If Series is empty, returns None.
+For DataFrame:
+>>> df = md.DataFrame({'A': [None, None, 2], 'B': [None, 3, 4]})
+>>> df.execute()
+     A      B
+0  NaN    NaN
+1  NaN    3.0
+2  2.0    4.0
+>>> df.first_valid_index().execute()
+1
+>>> df.last_valid_index().execute()
+2
+>>> df = md.DataFrame({'A': [None, None, None], 'B': [None, None, None]})
+>>> df.execute()
+     A      B
+0  None   None
+1  None   None
+2  None   None
+>>> print(df.first_valid_index()).execute()
+None
+>>> print(df.last_valid_index()).execute()
+None
+If all elements in DataFrame are NA/null, returns None.
+>>> df = md.DataFrame()
+>>> df.execute()
+Empty DataFrame
+Columns: []
+Index: []
+>>> print(df.first_valid_index()).execute()
+None
+>>> print(df.last_valid_index()).execute()
+None
+If DataFrame is empty, returns None.
+"""
+def first_valid_index(df_or_series):
+    return _valid_index(df_or_series, slice(None, 1))
+def last_valid_index(df_or_series):
+    return _valid_index(df_or_series, slice(-1, None))
+first_valid_index.__doc__ = _doc % dict(pos="first")
+last_valid_index.__doc__ = _doc % dict(pos="last")