PyPI - maxframe - Versions diffs - 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl - Mend

maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cpython-39-darwin.so +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +6 -6
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +34 -1
maxframe/codegen/spe/dataframe/misc.py +9 -33
maxframe/codegen/spe/dataframe/reduction.py +14 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +30 -17
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +70 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +44 -17
maxframe/core/accessor.py +2 -2
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +1 -1
maxframe/core/graph/core.cpython-39-darwin.so +0 -0
maxframe/core/graph/entity.py +1 -2
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +10 -3
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +14 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +63 -118
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +2 -3
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +5 -1
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +30 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +12 -1
maxframe/dataframe/groupby/aggregation.py +78 -45
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +18 -2
maxframe/dataframe/groupby/core.py +96 -12
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +20 -1
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +1 -1
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +12 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +16 -10
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +51 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +94 -0
maxframe/dataframe/misc/tests/test_misc.py +13 -2
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +13 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +29 -15
maxframe/dataframe/reduction/aggregation.py +38 -9
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +100 -0
maxframe/dataframe/reduction/argmin.py +100 -0
maxframe/dataframe/reduction/core.py +65 -18
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/nunique.py +9 -8
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +8 -0
maxframe/dataframe/sort/argsort.py +62 -0
maxframe/dataframe/sort/core.py +1 -0
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +95 -26
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +2 -0
maxframe/learn/contrib/xgboost/core.py +86 -1
maxframe/learn/contrib/xgboost/train.py +5 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/utils/__init__.py +1 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +37 -0
maxframe/learn/utils/odpsio.py +193 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +122 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +33 -15
maxframe/protocol.py +12 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cpython-39-darwin.so +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +29 -2
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
maxframe/tensor/core.py +3 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_utils.py +43 -1
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +27 -2
maxframe/utils.py +193 -19
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +4 -1
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/extensions/apply_chunk.py CHANGED Viewed

@@ -26,9 +26,10 @@ from ...serialization.serializables import (
     Int32Field,
     TupleField,
 )
+from ...typing_ import TileableType
 from ...udf import BuiltinFunction, MarkedFunction
 from ...utils import copy_if_possible, make_dtype, make_dtypes
-from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
+from ..core import DATAFRAME_TYPE, INDEX_TYPE, DataFrame, IndexValue, Series
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import (
     InferredDataFrameMeta,
@@ -43,7 +44,7 @@ from ..utils import (
 class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
     _op_type_ = opcodes.APPLY_CHUNK
-    _legacy_name = "DataFrameApplyChunkOperator"
+    _legacy_name = "DataFrameApplyChunkOperator"  # since v2.0.0
     func = FunctionField("func")
     batch_rows = Int32Field("batch_rows", default=None)
@@ -60,16 +61,26 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
     def has_custom_code(self) -> bool:
         return not isinstance(self.func, BuiltinFunction)
+    def check_inputs(self, inputs: List[TileableType]):
+        # for apply_chunk we allow called on non-deterministic tileables
+        pass
     def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
         # return dataframe
         if self.output_types[0] == OutputType.dataframe:
             dtypes = make_dtypes(dtypes)
+            if dtypes is not None:
+                shape = df.shape if element_wise else (np.nan, len(dtypes))
+                cols_value = parse_index(dtypes.index, store_data=True)
+            else:
+                shape = (np.nan, np.nan)
+                cols_value = None
             # apply_chunk will use generate new range index for results
             return self.new_dataframe(
                 [df],
-                shape=df.shape if element_wise else (np.nan, len(dtypes)),
+                shape=shape,
                 index_value=index_value,
-                columns_value=parse_index(dtypes.index, store_data=True),
+                columns_value=cols_value,
                 dtypes=dtypes,
             )
@@ -106,11 +117,17 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
         name: Any = None,
         output_type=None,
         index=None,
+        skip_infer=False,
     ):
         args = self.args or ()
         kwargs = self.kwargs or {}
         # if not dtypes and not skip_infer:
-        packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
+        try:
+            packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
+        except:
+            if not skip_infer:
+                raise
+            packed_func = self.func
         # if skip_infer, directly build a frame
         if self.output_types and self.output_types[0] == OutputType.df_or_series:
@@ -125,13 +142,15 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
             dtype=dtype,
             name=name,
             index=index,
+            skip_infer=skip_infer,
         )
         if inferred_meta.index_value is None:
             inferred_meta.index_value = parse_index(
                 None, (df_or_series.key, df_or_series.index_value.key, self.func)
             )
-        inferred_meta.check_absence("output_type", "dtypes", "dtype")
+        if not skip_infer:
+            inferred_meta.check_absence("output_type", "dtypes", "dtype")
         if isinstance(df_or_series, DATAFRAME_TYPE):
             return self._call_dataframe(
@@ -163,6 +182,7 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
         name: Any = None,
         index: Union[pd.Index, IndexValue] = None,
         elementwise: bool = None,
+        skip_infer: bool = False,
         **kwargs,
     ) -> InferredDataFrameMeta:
         inferred_meta = infer_dataframe_return_value(
@@ -174,7 +194,10 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
             name=name,
             index=index,
             elementwise=elementwise,
+            skip_infer=skip_infer,
         )
+        if skip_infer:
+            return inferred_meta
         # merge specified and inferred index, dtypes, output_type
         # elementwise used to decide shape
@@ -186,6 +209,8 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
         if self.output_types:
             inferred_meta.output_type = self.output_types[0]
         inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
+        if isinstance(index, INDEX_TYPE):
+            index = index.index_value
         if index is not None:
             inferred_meta.index_value = (
                 parse_index(index)
@@ -458,6 +483,7 @@ def df_apply_chunk(
         name=name,
         index=index,
         output_type=output_type,
+        skip_infer=skip_infer,
     )

maxframe/dataframe/extensions/cartesian_chunk.py ADDED Viewed

@@ -0,0 +1,153 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+import numpy as np
+import pandas as pd
+from ... import opcodes
+from ...core import EntityData, OutputType
+from ...serialization.serializables import (
+    DictField,
+    FunctionField,
+    KeyField,
+    TupleField,
+)
+from ...udf import BuiltinFunction
+from ...utils import quiet_stdio
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import (
+    build_df,
+    build_empty_df,
+    build_series,
+    parse_index,
+    validate_output_types,
+)
+class DataFrameCartesianChunk(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.CARTESIAN_CHUNK
+    left = KeyField("left")
+    right = KeyField("right")
+    func = FunctionField("func")
+    args = TupleField("args")
+    kwargs = DictField("kwargs")
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+        if self.memory_scale is None:
+            self.memory_scale = 2.0
+    @classmethod
+    def _set_inputs(cls, op: "DataFrameCartesianChunk", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        op.left, op.right = op.inputs[:2]
+    @staticmethod
+    def _build_test_obj(obj):
+        return (
+            build_df(obj, size=2)
+            if obj.ndim == 2
+            else build_series(obj, size=2, name=obj.name)
+        )
+    def has_custom_code(self) -> bool:
+        return not isinstance(self.func, BuiltinFunction)
+    def __call__(self, left, right, index=None, dtypes=None):
+        test_left = self._build_test_obj(left)
+        test_right = self._build_test_obj(right)
+        output_type = self._output_types[0] if self._output_types else None
+        if output_type == OutputType.df_or_series:
+            return self.new_df_or_series([left, right])
+        # try run to infer meta
+        try:
+            with np.errstate(all="ignore"), quiet_stdio():
+                obj = self.func(test_left, test_right, *self.args, **self.kwargs)
+        except:  # noqa: E722  # nosec  # pylint: disable=bare-except
+            if output_type == OutputType.series:
+                obj = pd.Series([], dtype=np.dtype(object))
+            elif output_type == OutputType.dataframe and dtypes is not None:
+                obj = build_empty_df(dtypes)
+            else:
+                raise TypeError(
+                    "Cannot determine `output_type`, "
+                    "you have to specify it as `dataframe` or `series`, "
+                    "for dataframe, `dtypes` is required as well "
+                    "if output_type='dataframe'"
+                )
+        if getattr(obj, "ndim", 0) == 1 or output_type == OutputType.series:
+            shape = self.kwargs.pop("shape", (np.nan,))
+            if index is None:
+                index = obj.index
+            index_value = parse_index(
+                index, left, right, self.func, self.args, self.kwargs
+            )
+            return self.new_series(
+                [left, right],
+                dtype=obj.dtype,
+                shape=shape,
+                index_value=index_value,
+                name=obj.name,
+            )
+        else:
+            dtypes = dtypes if dtypes is not None else obj.dtypes
+            # dataframe
+            shape = (np.nan, len(dtypes))
+            columns_value = parse_index(dtypes.index, store_data=True)
+            if index is None:
+                index = obj.index
+            index_value = parse_index(
+                index, left, right, self.func, self.args, self.kwargs
+            )
+            return self.new_dataframe(
+                [left, right],
+                shape=shape,
+                dtypes=dtypes,
+                index_value=index_value,
+                columns_value=columns_value,
+            )
+def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
+    output_type = kwargs.pop("output_type", None)
+    output_types = kwargs.pop("output_types", None)
+    object_type = kwargs.pop("object_type", None)
+    output_types = validate_output_types(
+        output_type=output_type, output_types=output_types, object_type=object_type
+    )
+    output_type = output_types[0] if output_types else None
+    if output_type:
+        output_types = [output_type]
+    elif skip_infer:
+        output_types = [OutputType.df_or_series]
+    index = kwargs.pop("index", None)
+    dtypes = kwargs.pop("dtypes", None)
+    memory_scale = kwargs.pop("memory_scale", None)
+    op = DataFrameCartesianChunk(
+        left=left,
+        right=right,
+        func=func,
+        args=args,
+        kwargs=kwargs,
+        output_types=output_types,
+        memory_scale=memory_scale,
+    )
+    return op(left, right, index=index, dtypes=dtypes)

maxframe/dataframe/extensions/collect_kv.py ADDED Viewed

@@ -0,0 +1,126 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import pandas as pd
+from ... import opcodes
+from ...serialization.serializables import AnyField, StringField
+from ...utils import no_default
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import make_column_list
+class DataFrameCollectKv(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.COLLECT_KV
+    columns = AnyField("columns", default=None)
+    kv_delim = StringField("kv_delim", default=None)
+    item_delim = StringField("item_delim", default=None)
+    kv_col = StringField("kv_col", default=None)
+    def __call__(self, df):
+        if self.columns is None:
+            cols = list(df.dtypes.index)
+        else:
+            cols = self.columns if isinstance(self.columns, list) else [self.columns]
+        new_dtypes = df.dtypes.drop(cols, errors="ignore")
+        new_dtypes = pd.concat(
+            [new_dtypes, pd.Series([np.dtype("object")], index=[self.kv_col])]
+        )
+        shape = (df.shape[0], len(new_dtypes))
+        return self.new_dataframe(
+            [df],
+            shape=shape,
+            dtypes=new_dtypes,
+            index_value=df.index_value,
+            columns_value=new_dtypes.index,
+        )
+def collect_kv(
+    data,
+    columns=None,
+    kv_delim="=",
+    item_delim=",",
+    kv_col="kv_col",
+):
+    """
+    Merge values in specified columns into a key-value represented column.
+    Parameters
+    ----------
+    columns : list, default None
+        The columns to be merged.
+    kv_delim : str, default '='
+        Delimiter between key and value.
+    item_delim : str, default ','
+        Delimiter between key-value pairs.
+    kv_col : str, default 'kv_col'
+        Name of the new key-value column
+    Returns
+    -------
+    DataFrame
+        converted data frame
+    See Also
+    --------
+    DataFrame.mf.extract_kv
+    Examples
+    -------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
+    ...                    "k1": [1.0, NaN, 7.1, NaN, NaN],
+    ...                    "k2": [3.0, 3.0, NaN, 1.2, 1.0],
+    ...                    "k3": [NaN, 5.1, NaN, 1.5, NaN],
+    ...                    "k5": [10.0, NaN, NaN, NaN, NaN,],
+    ...                    "k7": [NaN, NaN, 8.2, NaN, NaN, ],
+    ...                    "k9": [NaN, NaN, NaN, NaN, 1.1]})
+    >>> df.execute()
+       name   k1   k2   k3   k5    k7   k9
+    0  name1  1.0  3.0  NaN  10.0  NaN  NaN
+    1  name2  NaN  3.0  5.1  NaN   NaN  NaN
+    2  name3  7.1  NaN  NaN  NaN   8.2  NaN
+    3  name4  NaN  1.2  1.5  NaN   NaN  NaN
+    4  name5  NaN  1.0  NaN  NaN   NaN  1.1
+    The field names to be merged are specified by columns
+    kv_delim is to delimit the key and value and '=' is default
+    item_delim is to delimit the Key-Value pairs, ',' is default
+    The new column name is specified by kv_col, 'kv_col' is default
+    >>> df.mf.collect_kv(columns=['k1', 'k2', 'k3', 'k5', 'k7', 'k9']).execute()
+       name   kv_col
+    0  name1  k1=1.0,k2=3.0,k5=10.0
+    1  name2  k2=3.0,k3=5.1
+    2  name3  k1=7.1,k7=8.2
+    3  name4  k2=1.2,k3=1.5
+    4  name5  k2=1.0,k9=1.1
+    """
+    columns_list = make_column_list(columns, data.dtypes) or []
+    non_exist_key = next(
+        (c for c in columns_list if c not in data.dtypes.index), no_default
+    )
+    if columns_list and non_exist_key is not no_default:
+        raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
+    op = DataFrameCollectKv(
+        columns=columns,
+        kv_delim=kv_delim,
+        item_delim=item_delim,
+        kv_col=kv_col,
+    )
+    return op(data)

maxframe/dataframe/extensions/extract_kv.py ADDED Viewed

@@ -0,0 +1,177 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+import numpy as np
+import pandas as pd
+from ... import opcodes
+from ...core import EntityData, OutputType
+from ...serialization.serializables import AnyField, KeyField, StringField
+from ...utils import make_dtype, no_default
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+from ..utils import make_column_list
+class DataFrameExtractKv(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.EXTRACT_KV
+    columns = AnyField("columns", default=None)
+    kv_delim = StringField("kv_delim", default="=")
+    item_delim = StringField("item_delim", default=",")
+    dtype = AnyField("dtype", default=None)
+    fill_value = AnyField("fill_value", default=None)
+    errors = StringField("errors", default="raise")
+    # intermediate agg data
+    agg_results = KeyField("agg_results", default=None)
+    def __init__(self, kv_delim="=", item_delim=",", **kw):
+        super().__init__(kv_delim=kv_delim, item_delim=item_delim, **kw)
+        self.output_types = [OutputType.dataframe]
+    @classmethod
+    def _set_inputs(cls, op: "DataFrameExtractKv", inputs: List[EntityData]):
+        super()._set_inputs(op, inputs)
+        if op.agg_results is not None:
+            op.agg_results = inputs[-1]
+    def __call__(self, df):
+        shape = (df.shape[0], np.nan)
+        errors_arg = self.errors
+        def get_keys(row, cols, kv_delim, item_delim):
+            for col in cols:
+                if row[col] is not None:
+                    pairs = row[col].split(item_delim)
+                else:
+                    pairs = []
+                for pair in pairs:
+                    result = pair.split(kv_delim, 1)
+                    if len(result) == 2:
+                        yield f"{col}_{result[0]}"
+                    elif errors_arg == "raise":
+                        raise ValueError(f"Malformed data {pair} in column '{col}'.")
+        all_keys = df.mf.flatmap(
+            get_keys,
+            dtypes=pd.Series([str], index=["keys_cols"]),
+            cols=self.columns,
+            kv_delim=self.kv_delim,
+            item_delim=self.item_delim,
+        )
+        self.agg_results = all_keys.drop_duplicates().sort_values(by="keys_cols")
+        inputs = [df]
+        inputs.append(self.agg_results)
+        return self.new_dataframe(
+            inputs,
+            shape=shape,
+            dtypes=None,
+            index_value=df.index_value,
+            columns_value=None,
+        )
+def extract_kv(
+    data,
+    columns=None,
+    kv_delim="=",
+    item_delim=",",
+    dtype="float",
+    fill_value=None,
+    errors="raise",
+):
+    """
+    Extract values in key-value represented columns into standalone columns.
+    New column names will be the name of the key-value column followed by
+    an underscore and the key.
+    Parameters
+    ----------
+    columns : list, default None
+        The key-value columns to be extracted.
+    kv_delim : str, default '='
+        Delimiter between key and value.
+    item_delim : str, default ','
+        Delimiter between key-value pairs.
+    dtype : str
+        Type of value columns to generate.
+    fill_value : object, default None
+        Default value for missing key-value pairs.
+    errors : {'ignore', 'raise'}, default 'raise'
+        * If 'raise', then invalid parsing will raise an exception.
+        * If 'ignore', then invalid parsing will return the input.
+    Returns
+    -------
+    DataFrame
+        extracted data frame
+    See Also
+    --------
+    DataFrame.mf.collect_kv
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
+    ...                    "kv": ["k1=1.0,k2=3.0,k5=10.0",
+    ...                           "k2=3.0,k3=5.1",
+    ...                           "k1=7.1,k7=8.2",
+    ...                           "k2=1.2,k3=1.5",
+    ...                           "k2=1.0,k9=1.1"]})
+    >>> df.execute()
+       name   kv
+    0  name1  k1=1.0,k2=3.0,k5=10.0
+    1  name2  k2=3.0,k3=5.1
+    2  name3  k1=7.1,k7=8.2
+    3  name4  k2=1.2,k3=1.5
+    4  name5  k2=1.0,k9=1.1
+    The field names to be expanded are specified by columns
+    kv_delim is to delimit the key and value and '=' is default
+    item_delim is to delimit the Key-Value pairs, ',' is default
+    The output field name is the original field name connect with the key by "_"
+    fill_value is used to fill missing values, None is default
+    >>> df.mf.extract_kv(columns=['kv'], kv_delim='=', item_delim=',').execute()
+       name   kv_k1   kv_k2   kv_k3   kv_k5   kv_k7   kv_k9
+    0  name1  1.0     3.0     NaN     10.0    NaN     NaN
+    1  name2  NaN     3.0     5.1     NaN     NaN     NaN
+    2  name3  7.1     NaN     NaN     NaN     8.2     NaN
+    3  name4  NaN     1.2     1.5     NaN     NaN     NaN
+    4  name5  NaN     1.0     NaN     NaN     NaN     1.1
+    """
+    if columns is None:
+        columns = data.dtypes.index.tolist()
+    columns_list = make_column_list(columns, data.dtypes)
+    non_exist_key = next(
+        (c for c in columns_list if c not in data.dtypes.index), no_default
+    )
+    if non_exist_key is not no_default:
+        raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
+    for col in columns_list:
+        if str(data.dtypes[col]) not in ("object", "string"):
+            raise ValueError(f"Column '{col}' must be of string type.")
+    op = DataFrameExtractKv(
+        columns=columns,
+        kv_delim=kv_delim,
+        item_delim=item_delim,
+        dtype=make_dtype(dtype),
+        fill_value=fill_value,
+        errors=errors,
+    )
+    return op(data)