maxframe 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -26,7 +26,9 @@ from ...serialization.serializables import (
|
|
|
26
26
|
AnyField,
|
|
27
27
|
BoolField,
|
|
28
28
|
DataTypeField,
|
|
29
|
+
DictField,
|
|
29
30
|
Int32Field,
|
|
31
|
+
Serializable,
|
|
30
32
|
StringField,
|
|
31
33
|
)
|
|
32
34
|
from ...typing_ import TileableType
|
|
@@ -48,8 +50,12 @@ _level_reduction_keep_object = pd_release_version[:2] < (1, 3)
|
|
|
48
50
|
# results in object.
|
|
49
51
|
_reduce_bool_as_object = pd_release_version[:2] != (1, 2)
|
|
50
52
|
|
|
53
|
+
_idx_reduction_without_numeric_only = pd_release_version[:2] < (1, 5)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class DataFrameReduction(DataFrameOperator):
|
|
57
|
+
_legacy_name = "DataFrameReductionOperator" # since v2.2.0
|
|
51
58
|
|
|
52
|
-
class DataFrameReductionOperator(DataFrameOperator):
|
|
53
59
|
axis = AnyField("axis", default=None)
|
|
54
60
|
skipna = BoolField("skipna", default=True)
|
|
55
61
|
level = AnyField("level", default=None)
|
|
@@ -59,8 +65,13 @@ class DataFrameReductionOperator(DataFrameOperator):
|
|
|
59
65
|
method = StringField("method", default=None)
|
|
60
66
|
|
|
61
67
|
dtype = DataTypeField("dtype", default=None)
|
|
68
|
+
combine_size = Int32Field("combine_size", default=None)
|
|
69
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
62
70
|
|
|
63
71
|
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
72
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
73
|
+
"use_inf_as_na", pd.get_option("mode.use_inf_as_na")
|
|
74
|
+
)
|
|
64
75
|
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
65
76
|
|
|
66
77
|
@property
|
|
@@ -78,23 +89,28 @@ class DataFrameReductionOperator(DataFrameOperator):
|
|
|
78
89
|
return {k: v for k, v in args.items() if v is not None}
|
|
79
90
|
|
|
80
91
|
|
|
81
|
-
|
|
92
|
+
# Keep for import compatibility
|
|
93
|
+
DataFrameReductionOperator = DataFrameReduction
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DataFrameCumReduction(DataFrameOperator):
|
|
97
|
+
_legacy_name = "DataFrameCumReductionOperator" # since v2.2.0
|
|
98
|
+
|
|
82
99
|
axis = AnyField("axis", default=None)
|
|
83
100
|
skipna = BoolField("skipna", default=None)
|
|
84
101
|
|
|
85
102
|
dtype = DataTypeField("dtype", default=None)
|
|
103
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
86
104
|
|
|
87
105
|
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
106
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
107
|
+
"use_inf_as_na", pd.get_option("mode.use_inf_as_na")
|
|
108
|
+
)
|
|
88
109
|
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
89
110
|
|
|
90
111
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
kw.pop("bool_only", None)
|
|
94
|
-
kw.pop("numeric_only", None)
|
|
95
|
-
return getattr(value, func_name)(**kw)
|
|
96
|
-
else:
|
|
97
|
-
return getattr(value, func_name)(**kw)
|
|
112
|
+
# Keep for import compatibility
|
|
113
|
+
DataFrameCumReductionOperator = DataFrameCumReduction
|
|
98
114
|
|
|
99
115
|
|
|
100
116
|
@functools.lru_cache(100)
|
|
@@ -117,6 +133,8 @@ def _get_series_reduction_dtype(
|
|
|
117
133
|
reduced = test_series.size
|
|
118
134
|
elif func_name == "str_concat":
|
|
119
135
|
reduced = pd.Series([test_series.str.cat()])
|
|
136
|
+
elif func_name in ("idxmin", "idxmax", "argmin", "argmax"):
|
|
137
|
+
reduced = getattr(test_series, func_name)(axis=axis, skipna=skipna)
|
|
120
138
|
else:
|
|
121
139
|
reduced = getattr(test_series, func_name)(
|
|
122
140
|
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
@@ -135,6 +153,8 @@ def _get_df_reduction_dtype(
|
|
|
135
153
|
reduced = getattr(test_df, func_name)(axis=axis)
|
|
136
154
|
elif func_name in ("all", "any"):
|
|
137
155
|
reduced = getattr(test_df, func_name)(axis=axis, bool_only=bool_only)
|
|
156
|
+
elif _idx_reduction_without_numeric_only and func_name in ("idxmin", "idxmax"):
|
|
157
|
+
reduced = getattr(test_df, func_name)(axis=axis, skipna=skipna)
|
|
138
158
|
elif func_name == "str_concat":
|
|
139
159
|
reduced = test_df.apply(lambda s: s.str.cat(), axis=axis)
|
|
140
160
|
else:
|
|
@@ -146,6 +166,27 @@ def _get_df_reduction_dtype(
|
|
|
146
166
|
return reduced.dtype
|
|
147
167
|
|
|
148
168
|
|
|
169
|
+
class ReductionCallable(Serializable):
|
|
170
|
+
func_name = StringField("func_name")
|
|
171
|
+
kwargs = DictField("kwargs", default=None)
|
|
172
|
+
|
|
173
|
+
def __name__(self):
|
|
174
|
+
return self.func_name
|
|
175
|
+
|
|
176
|
+
def __call__(self, value):
|
|
177
|
+
kw = self.kwargs.copy()
|
|
178
|
+
if value.ndim == 1:
|
|
179
|
+
kw.pop("bool_only", None)
|
|
180
|
+
kw.pop("numeric_only", None)
|
|
181
|
+
return getattr(value, self.func_name)(**kw)
|
|
182
|
+
else:
|
|
183
|
+
return getattr(value, self.func_name)(**kw)
|
|
184
|
+
|
|
185
|
+
def __maxframe_tokenize__(self):
|
|
186
|
+
# make sure compiled functions are correctly cached
|
|
187
|
+
return type(self), self.func_name, self.kwargs
|
|
188
|
+
|
|
189
|
+
|
|
149
190
|
class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
150
191
|
@classmethod
|
|
151
192
|
def get_reduction_callable(cls, op):
|
|
@@ -154,9 +195,7 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
|
154
195
|
skipna=op.skipna, numeric_only=op.numeric_only, bool_only=op.bool_only
|
|
155
196
|
)
|
|
156
197
|
kw = {k: v for k, v in kw.items() if v is not None}
|
|
157
|
-
|
|
158
|
-
fun.__name__ = func_name
|
|
159
|
-
return fun
|
|
198
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
160
199
|
|
|
161
200
|
def _call_groupby_level(self, df, level):
|
|
162
201
|
return df.groupby(level=level).agg(
|
|
@@ -426,6 +465,8 @@ _func_name_converts = dict(
|
|
|
426
465
|
true_divide="truediv",
|
|
427
466
|
floor_divide="floordiv",
|
|
428
467
|
power="pow",
|
|
468
|
+
subtract="sub",
|
|
469
|
+
multiply="mul",
|
|
429
470
|
)
|
|
430
471
|
_func_compile_cache = dict() # type: Dict[str, ReductionSteps]
|
|
431
472
|
|
|
@@ -442,8 +483,8 @@ _idl_primitive_types = (
|
|
|
442
483
|
|
|
443
484
|
IN_VAR_IDL_OP = "in_var"
|
|
444
485
|
OUT_VAR_IDL_OP = "out_var"
|
|
445
|
-
|
|
446
|
-
|
|
486
|
+
MASK_VAR_IDL_OP = "mask"
|
|
487
|
+
WHERE_VAR_IDL_OP = "where"
|
|
447
488
|
LET_VAR_OP = "let"
|
|
448
489
|
UNARY_IDL_OP_PREFIX = "unary:"
|
|
449
490
|
BINARY_IDL_OP_PREFIX = "bin:"
|
|
@@ -471,7 +512,7 @@ class ReductionCompiler:
|
|
|
471
512
|
def _check_function_valid(cls, func):
|
|
472
513
|
if isinstance(func, functools.partial):
|
|
473
514
|
return cls._check_function_valid(func.func)
|
|
474
|
-
elif isinstance(func, CustomReduction):
|
|
515
|
+
elif isinstance(func, (CustomReduction, ReductionCallable)):
|
|
475
516
|
return
|
|
476
517
|
|
|
477
518
|
func_code = func.__code__
|
|
@@ -569,6 +610,7 @@ class ReductionCompiler:
|
|
|
569
610
|
from ..datasource.dataframe import DataFrameDataSource
|
|
570
611
|
from ..datasource.series import SeriesDataSource
|
|
571
612
|
from ..indexing.where import DataFrameWhere
|
|
613
|
+
from .custom_reduction import DataFrameCustomReduction
|
|
572
614
|
|
|
573
615
|
func_token = tokenize(func, self._axis, func_name, ndim)
|
|
574
616
|
if func_token in _func_compile_cache:
|
|
@@ -639,6 +681,9 @@ class ReductionCompiler:
|
|
|
639
681
|
else:
|
|
640
682
|
map_func_name, agg_func_name = step_func_name, step_func_name
|
|
641
683
|
|
|
684
|
+
if isinstance(t.op, DataFrameCustomReduction):
|
|
685
|
+
custom_reduction = custom_reduction or t.op.custom_reduction
|
|
686
|
+
|
|
642
687
|
# build agg description
|
|
643
688
|
agg_funcs.append(
|
|
644
689
|
ReductionAggStep(
|
|
@@ -705,7 +750,7 @@ class ReductionCompiler:
|
|
|
705
750
|
input_op_types = (
|
|
706
751
|
DataFrameDataSource,
|
|
707
752
|
SeriesDataSource,
|
|
708
|
-
|
|
753
|
+
DataFrameReduction,
|
|
709
754
|
)
|
|
710
755
|
|
|
711
756
|
def _gen_expr_str(t):
|
|
@@ -776,9 +821,11 @@ class ReductionCompiler:
|
|
|
776
821
|
cond = _interpret_var(t.op.condition)
|
|
777
822
|
x = _interpret_var(t.op.x)
|
|
778
823
|
y = _interpret_var(t.op.y)
|
|
779
|
-
statements = [[
|
|
824
|
+
statements = [[WHERE_VAR_IDL_OP, var_name, [cond, x, y], {}]]
|
|
780
825
|
elif isinstance(t.op, DataFrameWhere):
|
|
781
|
-
func_name =
|
|
826
|
+
func_name = (
|
|
827
|
+
MASK_VAR_IDL_OP if t.op.replace_true else WHERE_VAR_IDL_OP
|
|
828
|
+
)
|
|
782
829
|
inp = _interpret_var(t.op.input)
|
|
783
830
|
cond = _interpret_var(t.op.cond)
|
|
784
831
|
other = _interpret_var(t.op.other)
|
|
@@ -14,10 +14,18 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import DataFrameReductionMixin,
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class
|
|
20
|
+
class CountReductionCallable(ReductionCallable):
|
|
21
|
+
def __call__(self, value):
|
|
22
|
+
skipna, numeric_only = self.kwargs["skipna"], self.kwargs["numeric_only"]
|
|
23
|
+
if value.ndim == 1:
|
|
24
|
+
return value.count()
|
|
25
|
+
return value.count(skipna=skipna, numeric_only=numeric_only)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DataFrameCount(DataFrameReduction, DataFrameReductionMixin):
|
|
21
29
|
_op_type_ = opcodes.COUNT
|
|
22
30
|
_func_name = "count"
|
|
23
31
|
|
|
@@ -28,13 +36,9 @@ class DataFrameCount(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
28
36
|
@classmethod
|
|
29
37
|
def get_reduction_callable(cls, op):
|
|
30
38
|
skipna, numeric_only = op.skipna, op.numeric_only
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return value.count()
|
|
35
|
-
return value.count(skipna=skipna, numeric_only=numeric_only)
|
|
36
|
-
|
|
37
|
-
return count
|
|
39
|
+
return CountReductionCallable(
|
|
40
|
+
func_name="count", kwargs={"skipna": skipna, "numeric_only": numeric_only}
|
|
41
|
+
)
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
def count_series(series, level=None, **kw):
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cov_dataframe(df, min_periods=None, ddof=1, numeric_only=True):
|
|
17
|
+
"""
|
|
18
|
+
Compute pairwise covariance of columns, excluding NA/null values.
|
|
19
|
+
|
|
20
|
+
Compute the pairwise covariance among the series of a DataFrame.
|
|
21
|
+
The returned data frame is the `covariance matrix
|
|
22
|
+
<https://en.wikipedia.org/wiki/Covariance_matrix>`__ of the columns
|
|
23
|
+
of the DataFrame.
|
|
24
|
+
|
|
25
|
+
Both NA and null values are automatically excluded from the
|
|
26
|
+
calculation. (See the note below about bias from missing values.)
|
|
27
|
+
A threshold can be set for the minimum number of
|
|
28
|
+
observations for each value created. Comparisons with observations
|
|
29
|
+
below this threshold will be returned as ``NaN``.
|
|
30
|
+
|
|
31
|
+
This method is generally used for the analysis of time series data to
|
|
32
|
+
understand the relationship between different measures
|
|
33
|
+
across time.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
min_periods : int, optional
|
|
38
|
+
Minimum number of observations required per pair of columns
|
|
39
|
+
to have a valid result.
|
|
40
|
+
|
|
41
|
+
ddof : int, default 1
|
|
42
|
+
Delta degrees of freedom. The divisor used in calculations
|
|
43
|
+
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
44
|
+
This argument is applicable only when no ``nan`` is in the dataframe.
|
|
45
|
+
|
|
46
|
+
numeric_only : bool, default True
|
|
47
|
+
Include only `float`, `int` or `boolean` data.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
DataFrame
|
|
52
|
+
The covariance matrix of the series of the DataFrame.
|
|
53
|
+
|
|
54
|
+
See Also
|
|
55
|
+
--------
|
|
56
|
+
Series.cov : Compute covariance with another Series.
|
|
57
|
+
core.window.ewm.ExponentialMovingWindow.cov : Exponential weighted sample
|
|
58
|
+
covariance.
|
|
59
|
+
core.window.expanding.Expanding.cov : Expanding sample covariance.
|
|
60
|
+
core.window.rolling.Rolling.cov : Rolling sample covariance.
|
|
61
|
+
|
|
62
|
+
Notes
|
|
63
|
+
-----
|
|
64
|
+
Returns the covariance matrix of the DataFrame's time series.
|
|
65
|
+
The covariance is normalized by N-ddof.
|
|
66
|
+
|
|
67
|
+
For DataFrames that have Series that are missing data (assuming that
|
|
68
|
+
data is `missing at random
|
|
69
|
+
<https://en.wikipedia.org/wiki/Missing_data#Missing_at_random>`__)
|
|
70
|
+
the returned covariance matrix will be an unbiased estimate
|
|
71
|
+
of the variance and covariance between the member Series.
|
|
72
|
+
|
|
73
|
+
However, for many applications this estimate may not be acceptable
|
|
74
|
+
because the estimate covariance matrix is not guaranteed to be positive
|
|
75
|
+
semi-definite. This could lead to estimate correlations having
|
|
76
|
+
absolute values which are greater than one, and/or a non-invertible
|
|
77
|
+
covariance matrix. See `Estimation of covariance matrices
|
|
78
|
+
<https://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_
|
|
79
|
+
matrices>`__ for more details.
|
|
80
|
+
|
|
81
|
+
Examples
|
|
82
|
+
--------
|
|
83
|
+
>>> import maxframe.tensor as mt
|
|
84
|
+
>>> import maxframe.dataframe as md
|
|
85
|
+
>>> df = md.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
|
|
86
|
+
... columns=['dogs', 'cats'])
|
|
87
|
+
>>> df.cov().execute()
|
|
88
|
+
dogs cats
|
|
89
|
+
dogs 0.666667 -1.000000
|
|
90
|
+
cats -1.000000 1.666667
|
|
91
|
+
|
|
92
|
+
>>> mt.random.seed(42)
|
|
93
|
+
>>> df = md.DataFrame(mt.random.randn(1000, 5),
|
|
94
|
+
... columns=['a', 'b', 'c', 'd', 'e'])
|
|
95
|
+
>>> df.cov().execute()
|
|
96
|
+
a b c d e
|
|
97
|
+
a 0.998438 -0.020161 0.059277 -0.008943 0.014144
|
|
98
|
+
b -0.020161 1.059352 -0.008543 -0.024738 0.009826
|
|
99
|
+
c 0.059277 -0.008543 1.010670 -0.001486 -0.000271
|
|
100
|
+
d -0.008943 -0.024738 -0.001486 0.921297 -0.013692
|
|
101
|
+
e 0.014144 0.009826 -0.000271 -0.013692 0.977795
|
|
102
|
+
|
|
103
|
+
**Minimum number of periods**
|
|
104
|
+
|
|
105
|
+
This method also supports an optional ``min_periods`` keyword
|
|
106
|
+
that specifies the required minimum number of non-NA observations for
|
|
107
|
+
each column pair in order to have a valid result:
|
|
108
|
+
|
|
109
|
+
>>> mt.random.seed(42)
|
|
110
|
+
>>> df = md.DataFrame(mt.random.randn(20, 3),
|
|
111
|
+
... columns=['a', 'b', 'c'])
|
|
112
|
+
>>> df.loc[df.index[:5], 'a'] = mt.nan
|
|
113
|
+
>>> df.loc[df.index[5:10], 'b'] = mt.nan
|
|
114
|
+
>>> df.cov(min_periods=12).execute()
|
|
115
|
+
a b c
|
|
116
|
+
a 0.316741 NaN -0.150812
|
|
117
|
+
b NaN 1.248003 0.191417
|
|
118
|
+
c -0.150812 0.191417 0.895202
|
|
119
|
+
"""
|
|
120
|
+
from ..statistics.corr import DataFrameCorr
|
|
121
|
+
|
|
122
|
+
if not numeric_only:
|
|
123
|
+
raise NotImplementedError("numeric_only==True not supported")
|
|
124
|
+
op = DataFrameCorr(method="cov", min_periods=min_periods, ddof=ddof)
|
|
125
|
+
return op(df)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cov_series(series, other, min_periods=None, ddof=1):
|
|
129
|
+
"""
|
|
130
|
+
Compute covariance with Series, excluding missing values.
|
|
131
|
+
|
|
132
|
+
The two `Series` objects are not required to be the same length and
|
|
133
|
+
will be aligned internally before the covariance is calculated.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
other : Series
|
|
138
|
+
Series with which to compute the covariance.
|
|
139
|
+
min_periods : int, optional
|
|
140
|
+
Minimum number of observations needed to have a valid result.
|
|
141
|
+
ddof : int, default 1
|
|
142
|
+
Delta degrees of freedom. The divisor used in calculations
|
|
143
|
+
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
float
|
|
148
|
+
Covariance between Series and other normalized by N-1
|
|
149
|
+
(unbiased estimator).
|
|
150
|
+
|
|
151
|
+
See Also
|
|
152
|
+
--------
|
|
153
|
+
DataFrame.cov : Compute pairwise covariance of columns.
|
|
154
|
+
|
|
155
|
+
Examples
|
|
156
|
+
--------
|
|
157
|
+
>>> import maxframe.dataframe as md
|
|
158
|
+
>>> s1 = md.Series([0.90010907, 0.13484424, 0.62036035])
|
|
159
|
+
>>> s2 = md.Series([0.12528585, 0.26962463, 0.51111198])
|
|
160
|
+
>>> s1.cov(s2).execute()
|
|
161
|
+
-0.01685762652715874
|
|
162
|
+
"""
|
|
163
|
+
from ..statistics.corr import DataFrameCorr
|
|
164
|
+
|
|
165
|
+
op = DataFrameCorr(other=other, method="cov", min_periods=min_periods, ddof=ddof)
|
|
166
|
+
return op(series)
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCummax(
|
|
19
|
+
class DataFrameCummax(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMMAX
|
|
21
21
|
_func_name = "cummax"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCummin(
|
|
19
|
+
class DataFrameCummin(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMMIN
|
|
21
21
|
_func_name = "cummin"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCumprod(
|
|
19
|
+
class DataFrameCumprod(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMPROD
|
|
21
21
|
_func_name = "cumprod"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCumsum(
|
|
19
|
+
class DataFrameCumsum(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMSUM
|
|
21
21
|
_func_name = "cumsum"
|
|
22
22
|
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import AnyField
|
|
18
|
-
from .core import
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class DataFrameCustomReduction(
|
|
21
|
+
class DataFrameCustomReduction(DataFrameReduction, DataFrameReductionMixin):
|
|
22
22
|
_op_type_ = opcodes.CUSTOM_REDUCTION
|
|
23
23
|
_func_name = "custom_reduction"
|
|
24
24
|
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameIdxMax(DataFrameReduction, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.IDXMAX
|
|
23
|
+
_func_name = "idxmax"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_atomic(self):
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def get_reduction_args(self, axis=None):
|
|
30
|
+
args = dict(skipna=self.skipna)
|
|
31
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
32
|
+
args["axis"] = axis
|
|
33
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_reduction_callable(cls, op):
|
|
37
|
+
func_name = getattr(op, "_func_name")
|
|
38
|
+
kw = dict(skipna=op.skipna)
|
|
39
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
40
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def idxmax_dataframe(df, axis=0, skipna=True):
|
|
44
|
+
"""
|
|
45
|
+
Return index of first occurrence of maximum over requested axis.
|
|
46
|
+
|
|
47
|
+
NA/null values are excluded.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
52
|
+
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
|
|
53
|
+
skipna : bool, default True
|
|
54
|
+
Exclude NA/null values. If an entire row/column is NA, the result
|
|
55
|
+
will be NA.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Series
|
|
60
|
+
Indexes of maxima along the specified axis.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
ValueError
|
|
65
|
+
* If the row/column is empty
|
|
66
|
+
|
|
67
|
+
See Also
|
|
68
|
+
--------
|
|
69
|
+
Series.idxmax : Return index of the maximum element.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
This method is the DataFrame version of ``ndarray.argmax``.
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
Consider a dataset containing food consumption in Argentina.
|
|
78
|
+
|
|
79
|
+
>>> import maxframe.dataframe as md
|
|
80
|
+
>>> df = md.DataFrame({'consumption': [10.51, 103.11, 55.48],
|
|
81
|
+
... 'co2_emissions': [37.2, 19.66, 1712]},
|
|
82
|
+
... index=['Pork', 'Wheat Products', 'Beef'])
|
|
83
|
+
|
|
84
|
+
>>> df.execute()
|
|
85
|
+
consumption co2_emissions
|
|
86
|
+
Pork 10.51 37.20
|
|
87
|
+
Wheat Products 103.11 19.66
|
|
88
|
+
Beef 55.48 1712.00
|
|
89
|
+
|
|
90
|
+
By default, it returns the index for the maximum value in each column.
|
|
91
|
+
|
|
92
|
+
>>> df.idxmax().execute()
|
|
93
|
+
consumption Wheat Products
|
|
94
|
+
co2_emissions Beef
|
|
95
|
+
dtype: object
|
|
96
|
+
|
|
97
|
+
To return the index for the maximum value in each row, use ``axis="columns"``.
|
|
98
|
+
|
|
99
|
+
>>> df.idxmax(axis="columns").execute()
|
|
100
|
+
Pork co2_emissions
|
|
101
|
+
Wheat Products consumption
|
|
102
|
+
Beef co2_emissions
|
|
103
|
+
dtype: object
|
|
104
|
+
"""
|
|
105
|
+
axis = validate_axis(axis, df)
|
|
106
|
+
op = DataFrameIdxMax(
|
|
107
|
+
axis=axis,
|
|
108
|
+
skipna=skipna,
|
|
109
|
+
output_types=[OutputType.series],
|
|
110
|
+
)
|
|
111
|
+
return op(df)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def idxmax_series(series, axis=0, skipna=True):
|
|
115
|
+
"""
|
|
116
|
+
Return the row label of the maximum value.
|
|
117
|
+
|
|
118
|
+
If multiple values equal the maximum, the first row label with that
|
|
119
|
+
value is returned.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
axis : int, default 0
|
|
124
|
+
For compatibility with DataFrame.idxmax. Redundant for application
|
|
125
|
+
on Series.
|
|
126
|
+
skipna : bool, default True
|
|
127
|
+
Exclude NA/null values. If the entire Series is NA, the result
|
|
128
|
+
will be NA.
|
|
129
|
+
*args, **kwargs
|
|
130
|
+
Additional arguments and keywords have no effect but might be
|
|
131
|
+
accepted for compatibility with NumPy.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Index
|
|
136
|
+
Label of the maximum value.
|
|
137
|
+
|
|
138
|
+
Raises
|
|
139
|
+
------
|
|
140
|
+
ValueError
|
|
141
|
+
If the Series is empty.
|
|
142
|
+
|
|
143
|
+
See Also
|
|
144
|
+
--------
|
|
145
|
+
numpy.argmax : Return indices of the maximum values
|
|
146
|
+
along the given axis.
|
|
147
|
+
DataFrame.idxmax : Return index of first occurrence of maximum
|
|
148
|
+
over requested axis.
|
|
149
|
+
Series.idxmin : Return index *label* of the first occurrence
|
|
150
|
+
of minimum of values.
|
|
151
|
+
|
|
152
|
+
Notes
|
|
153
|
+
-----
|
|
154
|
+
This method is the Series version of ``ndarray.argmax``. This method
|
|
155
|
+
returns the label of the maximum, while ``ndarray.argmax`` returns
|
|
156
|
+
the position. To get the position, use ``series.values.argmax()``.
|
|
157
|
+
|
|
158
|
+
Examples
|
|
159
|
+
--------
|
|
160
|
+
>>> import maxframe.dataframe as md
|
|
161
|
+
>>> s = md.Series(data=[1, None, 4, 3, 4],
|
|
162
|
+
... index=['A', 'B', 'C', 'D', 'E'])
|
|
163
|
+
>>> s.execute()
|
|
164
|
+
A 1.0
|
|
165
|
+
B NaN
|
|
166
|
+
C 4.0
|
|
167
|
+
D 3.0
|
|
168
|
+
E 4.0
|
|
169
|
+
dtype: float64
|
|
170
|
+
|
|
171
|
+
>>> s.idxmax().execute()
|
|
172
|
+
'C'
|
|
173
|
+
|
|
174
|
+
If `skipna` is False and there is an NA value in the data,
|
|
175
|
+
the function returns ``nan``.
|
|
176
|
+
|
|
177
|
+
>>> s.idxmax(skipna=False).execute()
|
|
178
|
+
nan
|
|
179
|
+
"""
|
|
180
|
+
validate_axis(axis, series)
|
|
181
|
+
op = DataFrameIdxMax(
|
|
182
|
+
dropna=skipna,
|
|
183
|
+
output_types=[OutputType.scalar],
|
|
184
|
+
)
|
|
185
|
+
return op(series)
|