maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameIdxMax(DataFrameReduction, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.IDXMAX
|
|
23
|
+
_func_name = "idxmax"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_atomic(self):
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def get_reduction_args(self, axis=None):
|
|
30
|
+
args = dict(skipna=self.skipna)
|
|
31
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
32
|
+
args["axis"] = axis
|
|
33
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_reduction_callable(cls, op):
|
|
37
|
+
func_name = getattr(op, "_func_name")
|
|
38
|
+
kw = dict(skipna=op.skipna)
|
|
39
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
40
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def idxmax_dataframe(df, axis=0, skipna=True):
|
|
44
|
+
"""
|
|
45
|
+
Return index of first occurrence of maximum over requested axis.
|
|
46
|
+
|
|
47
|
+
NA/null values are excluded.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
52
|
+
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
|
|
53
|
+
skipna : bool, default True
|
|
54
|
+
Exclude NA/null values. If an entire row/column is NA, the result
|
|
55
|
+
will be NA.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Series
|
|
60
|
+
Indexes of maxima along the specified axis.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
ValueError
|
|
65
|
+
* If the row/column is empty
|
|
66
|
+
|
|
67
|
+
See Also
|
|
68
|
+
--------
|
|
69
|
+
Series.idxmax : Return index of the maximum element.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
This method is the DataFrame version of ``ndarray.argmax``.
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
Consider a dataset containing food consumption in Argentina.
|
|
78
|
+
|
|
79
|
+
>>> import maxframe.dataframe as md
|
|
80
|
+
>>> df = md.DataFrame({'consumption': [10.51, 103.11, 55.48],
|
|
81
|
+
... 'co2_emissions': [37.2, 19.66, 1712]},
|
|
82
|
+
... index=['Pork', 'Wheat Products', 'Beef'])
|
|
83
|
+
|
|
84
|
+
>>> df.execute()
|
|
85
|
+
consumption co2_emissions
|
|
86
|
+
Pork 10.51 37.20
|
|
87
|
+
Wheat Products 103.11 19.66
|
|
88
|
+
Beef 55.48 1712.00
|
|
89
|
+
|
|
90
|
+
By default, it returns the index for the maximum value in each column.
|
|
91
|
+
|
|
92
|
+
>>> df.idxmax().execute()
|
|
93
|
+
consumption Wheat Products
|
|
94
|
+
co2_emissions Beef
|
|
95
|
+
dtype: object
|
|
96
|
+
|
|
97
|
+
To return the index for the maximum value in each row, use ``axis="columns"``.
|
|
98
|
+
|
|
99
|
+
>>> df.idxmax(axis="columns").execute()
|
|
100
|
+
Pork co2_emissions
|
|
101
|
+
Wheat Products consumption
|
|
102
|
+
Beef co2_emissions
|
|
103
|
+
dtype: object
|
|
104
|
+
"""
|
|
105
|
+
axis = validate_axis(axis, df)
|
|
106
|
+
op = DataFrameIdxMax(
|
|
107
|
+
axis=axis,
|
|
108
|
+
skipna=skipna,
|
|
109
|
+
output_types=[OutputType.series],
|
|
110
|
+
)
|
|
111
|
+
return op(df)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def idxmax_series(series, axis=0, skipna=True):
|
|
115
|
+
"""
|
|
116
|
+
Return the row label of the maximum value.
|
|
117
|
+
|
|
118
|
+
If multiple values equal the maximum, the first row label with that
|
|
119
|
+
value is returned.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
axis : int, default 0
|
|
124
|
+
For compatibility with DataFrame.idxmax. Redundant for application
|
|
125
|
+
on Series.
|
|
126
|
+
skipna : bool, default True
|
|
127
|
+
Exclude NA/null values. If the entire Series is NA, the result
|
|
128
|
+
will be NA.
|
|
129
|
+
*args, **kwargs
|
|
130
|
+
Additional arguments and keywords have no effect but might be
|
|
131
|
+
accepted for compatibility with NumPy.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Index
|
|
136
|
+
Label of the maximum value.
|
|
137
|
+
|
|
138
|
+
Raises
|
|
139
|
+
------
|
|
140
|
+
ValueError
|
|
141
|
+
If the Series is empty.
|
|
142
|
+
|
|
143
|
+
See Also
|
|
144
|
+
--------
|
|
145
|
+
numpy.argmax : Return indices of the maximum values
|
|
146
|
+
along the given axis.
|
|
147
|
+
DataFrame.idxmax : Return index of first occurrence of maximum
|
|
148
|
+
over requested axis.
|
|
149
|
+
Series.idxmin : Return index *label* of the first occurrence
|
|
150
|
+
of minimum of values.
|
|
151
|
+
|
|
152
|
+
Notes
|
|
153
|
+
-----
|
|
154
|
+
This method is the Series version of ``ndarray.argmax``. This method
|
|
155
|
+
returns the label of the maximum, while ``ndarray.argmax`` returns
|
|
156
|
+
the position. To get the position, use ``series.values.argmax()``.
|
|
157
|
+
|
|
158
|
+
Examples
|
|
159
|
+
--------
|
|
160
|
+
>>> import maxframe.dataframe as md
|
|
161
|
+
>>> s = md.Series(data=[1, None, 4, 3, 4],
|
|
162
|
+
... index=['A', 'B', 'C', 'D', 'E'])
|
|
163
|
+
>>> s.execute()
|
|
164
|
+
A 1.0
|
|
165
|
+
B NaN
|
|
166
|
+
C 4.0
|
|
167
|
+
D 3.0
|
|
168
|
+
E 4.0
|
|
169
|
+
dtype: float64
|
|
170
|
+
|
|
171
|
+
>>> s.idxmax().execute()
|
|
172
|
+
'C'
|
|
173
|
+
|
|
174
|
+
If `skipna` is False and there is an NA value in the data,
|
|
175
|
+
the function returns ``nan``.
|
|
176
|
+
|
|
177
|
+
>>> s.idxmax(skipna=False).execute()
|
|
178
|
+
nan
|
|
179
|
+
"""
|
|
180
|
+
validate_axis(axis, series)
|
|
181
|
+
op = DataFrameIdxMax(
|
|
182
|
+
dropna=skipna,
|
|
183
|
+
output_types=[OutputType.scalar],
|
|
184
|
+
)
|
|
185
|
+
return op(series)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameIdxMin(DataFrameReduction, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.IDXMIN
|
|
23
|
+
_func_name = "idxmin"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_atomic(self):
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def get_reduction_args(self, axis=None):
|
|
30
|
+
args = dict(skipna=self.skipna)
|
|
31
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
32
|
+
args["axis"] = axis
|
|
33
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_reduction_callable(cls, op):
|
|
37
|
+
func_name = getattr(op, "_func_name")
|
|
38
|
+
kw = dict(skipna=op.skipna)
|
|
39
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
40
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def idxmin_dataframe(df, axis=0, skipna=True):
|
|
44
|
+
"""
|
|
45
|
+
Return index of first occurrence of minimum over requested axis.
|
|
46
|
+
|
|
47
|
+
NA/null values are excluded.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
52
|
+
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
|
|
53
|
+
skipna : bool, default True
|
|
54
|
+
Exclude NA/null values. If an entire row/column is NA, the result
|
|
55
|
+
will be NA.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Series
|
|
60
|
+
Indexes of minima along the specified axis.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
ValueError
|
|
65
|
+
* If the row/column is empty
|
|
66
|
+
|
|
67
|
+
See Also
|
|
68
|
+
--------
|
|
69
|
+
Series.idxmin : Return index of the minimum element.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
This method is the DataFrame version of ``ndarray.argmin``.
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
Consider a dataset containing food consumption in Argentina.
|
|
78
|
+
|
|
79
|
+
>>> import maxframe.dataframe as md
|
|
80
|
+
>>> df = md.DataFrame({'consumption': [10.51, 103.11, 55.48],
|
|
81
|
+
... 'co2_emissions': [37.2, 19.66, 1712]},
|
|
82
|
+
... index=['Pork', 'Wheat Products', 'Beef'])
|
|
83
|
+
|
|
84
|
+
>>> df.execute()
|
|
85
|
+
consumption co2_emissions
|
|
86
|
+
Pork 10.51 37.20
|
|
87
|
+
Wheat Products 103.11 19.66
|
|
88
|
+
Beef 55.48 1712.00
|
|
89
|
+
|
|
90
|
+
By default, it returns the index for the minimum value in each column.
|
|
91
|
+
|
|
92
|
+
>>> df.idxmin().execute()
|
|
93
|
+
consumption Pork
|
|
94
|
+
co2_emissions Wheat Products
|
|
95
|
+
dtype: object
|
|
96
|
+
|
|
97
|
+
To return the index for the minimum value in each row, use ``axis="columns"``.
|
|
98
|
+
|
|
99
|
+
>>> df.idxmin(axis="columns").execute()
|
|
100
|
+
Pork consumption
|
|
101
|
+
Wheat Products co2_emissions
|
|
102
|
+
Beef consumption
|
|
103
|
+
dtype: object
|
|
104
|
+
"""
|
|
105
|
+
axis = validate_axis(axis, df)
|
|
106
|
+
op = DataFrameIdxMin(
|
|
107
|
+
axis=axis,
|
|
108
|
+
skipna=skipna,
|
|
109
|
+
output_types=[OutputType.series],
|
|
110
|
+
)
|
|
111
|
+
return op(df)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def idxmin_series(series, axis=0, skipna=True):
|
|
115
|
+
"""
|
|
116
|
+
Return the row label of the minimum value.
|
|
117
|
+
|
|
118
|
+
If multiple values equal the minimum, the first row label with that
|
|
119
|
+
value is returned.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
axis : int, default 0
|
|
124
|
+
For compatibility with DataFrame.idxmin. Redundant for application
|
|
125
|
+
on Series.
|
|
126
|
+
skipna : bool, default True
|
|
127
|
+
Exclude NA/null values. If the entire Series is NA, the result
|
|
128
|
+
will be NA.
|
|
129
|
+
*args, **kwargs
|
|
130
|
+
Additional arguments and keywords have no effect but might be
|
|
131
|
+
accepted for compatibility with NumPy.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Index
|
|
136
|
+
Label of the minimum value.
|
|
137
|
+
|
|
138
|
+
Raises
|
|
139
|
+
------
|
|
140
|
+
ValueError
|
|
141
|
+
If the Series is empty.
|
|
142
|
+
|
|
143
|
+
See Also
|
|
144
|
+
--------
|
|
145
|
+
numpy.argmin : Return indices of the minimum values
|
|
146
|
+
along the given axis.
|
|
147
|
+
DataFrame.idxmin : Return index of first occurrence of minimum
|
|
148
|
+
over requested axis.
|
|
149
|
+
Series.idxmin : Return index *label* of the first occurrence
|
|
150
|
+
of minimum of values.
|
|
151
|
+
|
|
152
|
+
Notes
|
|
153
|
+
-----
|
|
154
|
+
This method is the Series version of ``ndarray.argmin``. This method
|
|
155
|
+
returns the label of the minimum, while ``ndarray.argmin`` returns
|
|
156
|
+
the position. To get the position, use ``series.values.argmin()``.
|
|
157
|
+
|
|
158
|
+
Examples
|
|
159
|
+
--------
|
|
160
|
+
>>> import maxframe.dataframe as md
|
|
161
|
+
>>> s = md.Series(data=[1, None, 4, 3, 4],
|
|
162
|
+
... index=['A', 'B', 'C', 'D', 'E'])
|
|
163
|
+
>>> s.execute()
|
|
164
|
+
A 1.0
|
|
165
|
+
B NaN
|
|
166
|
+
C 4.0
|
|
167
|
+
D 3.0
|
|
168
|
+
E 4.0
|
|
169
|
+
dtype: float64
|
|
170
|
+
|
|
171
|
+
>>> s.idxmin().execute()
|
|
172
|
+
'C'
|
|
173
|
+
|
|
174
|
+
If `skipna` is False and there is an NA value in the data,
|
|
175
|
+
the function returns ``nan``.
|
|
176
|
+
|
|
177
|
+
>>> s.idxmin(skipna=False).execute()
|
|
178
|
+
nan
|
|
179
|
+
"""
|
|
180
|
+
validate_axis(axis, series)
|
|
181
|
+
op = DataFrameIdxMin(
|
|
182
|
+
dropna=skipna,
|
|
183
|
+
output_types=[OutputType.scalar],
|
|
184
|
+
)
|
|
185
|
+
return op(series)
|
|
@@ -17,10 +17,42 @@ import numpy as np
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import ENTITY_TYPE, OutputType
|
|
19
19
|
from ...serialization.serializables import BoolField
|
|
20
|
-
from .core import DataFrameReductionMixin,
|
|
20
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class
|
|
23
|
+
class KurtosisReductionCallable(ReductionCallable):
|
|
24
|
+
def __call__(self, value):
|
|
25
|
+
from .aggregation import where_function
|
|
26
|
+
|
|
27
|
+
skipna = self.kwargs["skipna"]
|
|
28
|
+
bias = self.kwargs["bias"]
|
|
29
|
+
fisher = self.kwargs["fisher"]
|
|
30
|
+
|
|
31
|
+
cnt = value.count()
|
|
32
|
+
mean = value.mean(skipna=skipna)
|
|
33
|
+
divided = (
|
|
34
|
+
(value**4).mean(skipna=skipna)
|
|
35
|
+
- 4 * (value**3).mean(skipna=skipna) * mean
|
|
36
|
+
+ 6 * (value**2).mean(skipna=skipna) * mean**2
|
|
37
|
+
- 3 * mean**4
|
|
38
|
+
)
|
|
39
|
+
var = value.var(skipna=skipna, ddof=0)
|
|
40
|
+
if isinstance(var, ENTITY_TYPE) or var > 0:
|
|
41
|
+
val = where_function(var > 0, divided / var**2, np.nan)
|
|
42
|
+
else:
|
|
43
|
+
val = np.nan
|
|
44
|
+
if not bias:
|
|
45
|
+
val = where_function(
|
|
46
|
+
(var > 0) & (cnt > 3),
|
|
47
|
+
(val * (cnt**2 - 1) - 3 * (cnt - 1) ** 2) / (cnt - 2) / (cnt - 3),
|
|
48
|
+
np.nan,
|
|
49
|
+
)
|
|
50
|
+
if not fisher:
|
|
51
|
+
val += 3
|
|
52
|
+
return val
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DataFrameKurtosis(DataFrameReduction, DataFrameReductionMixin):
|
|
24
56
|
_op_type_ = opcodes.KURTOSIS
|
|
25
57
|
_func_name = "kurt"
|
|
26
58
|
|
|
@@ -29,35 +61,10 @@ class DataFrameKurtosis(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
29
61
|
|
|
30
62
|
@classmethod
|
|
31
63
|
def get_reduction_callable(cls, op):
|
|
32
|
-
from .aggregation import where_function
|
|
33
|
-
|
|
34
64
|
skipna, bias, fisher = op.skipna, op.bias, op.fisher
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
mean = x.mean(skipna=skipna)
|
|
39
|
-
divided = (
|
|
40
|
-
(x**4).mean(skipna=skipna)
|
|
41
|
-
- 4 * (x**3).mean(skipna=skipna) * mean
|
|
42
|
-
+ 6 * (x**2).mean(skipna=skipna) * mean**2
|
|
43
|
-
- 3 * mean**4
|
|
44
|
-
)
|
|
45
|
-
var = x.var(skipna=skipna, ddof=0)
|
|
46
|
-
if isinstance(var, ENTITY_TYPE) or var > 0:
|
|
47
|
-
val = where_function(var > 0, divided / var**2, np.nan)
|
|
48
|
-
else:
|
|
49
|
-
val = np.nan
|
|
50
|
-
if not bias:
|
|
51
|
-
val = where_function(
|
|
52
|
-
(var > 0) & (cnt > 3),
|
|
53
|
-
(val * (cnt**2 - 1) - 3 * (cnt - 1) ** 2) / (cnt - 2) / (cnt - 3),
|
|
54
|
-
np.nan,
|
|
55
|
-
)
|
|
56
|
-
if not fisher:
|
|
57
|
-
val += 3
|
|
58
|
-
return val
|
|
59
|
-
|
|
60
|
-
return kurt
|
|
65
|
+
return KurtosisReductionCallable(
|
|
66
|
+
func_name="kurt", kwargs=dict(skipna=skipna, bias=bias, fisher=fisher)
|
|
67
|
+
)
|
|
61
68
|
|
|
62
69
|
|
|
63
70
|
def kurt_series(
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFrameMax(
|
|
20
|
+
class DataFrameMax(DataFrameReduction, DataFrameReductionMixin):
|
|
21
21
|
_op_type_ = opcodes.MAX
|
|
22
22
|
_func_name = "max"
|
|
23
23
|
|
|
@@ -14,21 +14,23 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import DataFrameReductionMixin,
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class
|
|
20
|
+
class MeanReductionCallable(ReductionCallable):
|
|
21
|
+
def __call__(self, value):
|
|
22
|
+
skipna = self.kwargs["skipna"]
|
|
23
|
+
return value.sum(skipna=skipna) / value.count()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DataFrameMean(DataFrameReduction, DataFrameReductionMixin):
|
|
21
27
|
_op_type_ = opcodes.MEAN
|
|
22
28
|
_func_name = "mean"
|
|
23
29
|
|
|
24
30
|
@classmethod
|
|
25
31
|
def get_reduction_callable(cls, op):
|
|
26
32
|
skipna = op.skipna
|
|
27
|
-
|
|
28
|
-
def mean(x):
|
|
29
|
-
return x.sum(skipna=skipna) / x.count()
|
|
30
|
-
|
|
31
|
-
return mean
|
|
33
|
+
return MeanReductionCallable(func_name="mean", kwargs=dict(skipna=skipna))
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
def mean_series(df, axis=None, skipna=True, level=None, method=None):
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFrameMedian(
|
|
20
|
+
class DataFrameMedian(DataFrameReduction, DataFrameReductionMixin):
|
|
21
21
|
_op_type_ = opcodes.MEDIAN
|
|
22
22
|
_func_name = "median"
|
|
23
23
|
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFrameMin(
|
|
20
|
+
class DataFrameMin(DataFrameReduction, DataFrameReductionMixin):
|
|
21
21
|
_op_type_ = opcodes.MIN
|
|
22
22
|
_func_name = "min"
|
|
23
23
|
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import OutputType, get_output_types
|
|
19
|
+
from ...serialization.serializables import BoolField, Int32Field
|
|
20
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
|
+
from ..utils import parse_index, validate_axis
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DataFrameMode(DataFrameOperator, DataFrameOperatorMixin):
|
|
25
|
+
_op_type_ = opcodes.MODE
|
|
26
|
+
|
|
27
|
+
axis = Int32Field("axis", default=None)
|
|
28
|
+
numeric_only = BoolField("numeric_only", default=False)
|
|
29
|
+
dropna = BoolField("dropna", default=True)
|
|
30
|
+
combine_size = Int32Field("combine_size", default=None)
|
|
31
|
+
|
|
32
|
+
def __call__(self, in_obj):
|
|
33
|
+
self._output_types = get_output_types(in_obj)
|
|
34
|
+
params = in_obj.params
|
|
35
|
+
shape = list(in_obj.shape)
|
|
36
|
+
shape[self.axis] = np.nan
|
|
37
|
+
params["shape"] = tuple(shape)
|
|
38
|
+
|
|
39
|
+
if self.axis == 0:
|
|
40
|
+
pd_idx = in_obj.index_value.to_pandas()[:0]
|
|
41
|
+
params["index_value"] = parse_index(pd_idx)
|
|
42
|
+
else:
|
|
43
|
+
pd_idx = in_obj.columns_value.to_pandas()[:0]
|
|
44
|
+
params["columns_value"] = parse_index(pd_idx)
|
|
45
|
+
params["dtypes"] = None
|
|
46
|
+
return self.new_tileable([in_obj], **params)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def mode_dataframe(df, axis=0, numeric_only=False, dropna=True, combine_size=None):
|
|
50
|
+
"""
|
|
51
|
+
Get the mode(s) of each element along the selected axis.
|
|
52
|
+
The mode of a set of values is the value that appears most often.
|
|
53
|
+
It can be multiple values.
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
57
|
+
The axis to iterate over while searching for the mode:
|
|
58
|
+
* 0 or 'index' : get mode of each column
|
|
59
|
+
* 1 or 'columns' : get mode of each row.
|
|
60
|
+
numeric_only : bool, default False
|
|
61
|
+
If True, only apply to numeric columns.
|
|
62
|
+
dropna : bool, default True
|
|
63
|
+
Don't consider counts of NaN/NaT.
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
DataFrame
|
|
67
|
+
The modes of each column or row.
|
|
68
|
+
See Also
|
|
69
|
+
--------
|
|
70
|
+
Series.mode : Return the highest frequency value in a Series.
|
|
71
|
+
Series.value_counts : Return the counts of values in a Series.
|
|
72
|
+
Examples
|
|
73
|
+
--------
|
|
74
|
+
>>> import maxframe.tensor as mt
|
|
75
|
+
>>> import maxframe.dataframe as md
|
|
76
|
+
>>> df = md.DataFrame([('bird', 2, 2),
|
|
77
|
+
... ('mammal', 4, mt.nan),
|
|
78
|
+
... ('arthropod', 8, 0),
|
|
79
|
+
... ('bird', 2, mt.nan)],
|
|
80
|
+
... index=('falcon', 'horse', 'spider', 'ostrich'),
|
|
81
|
+
... columns=('species', 'legs', 'wings'))
|
|
82
|
+
>>> df.execute()
|
|
83
|
+
species legs wings
|
|
84
|
+
falcon bird 2 2.0
|
|
85
|
+
horse mammal 4 NaN
|
|
86
|
+
spider arthropod 8 0.0
|
|
87
|
+
ostrich bird 2 NaN
|
|
88
|
+
By default, missing values are not considered, and the mode of wings
|
|
89
|
+
are both 0 and 2. Because the resulting DataFrame has two rows,
|
|
90
|
+
the second row of ``species`` and ``legs`` contains ``NaN``.
|
|
91
|
+
>>> df.mode().execute()
|
|
92
|
+
species legs wings
|
|
93
|
+
0 bird 2.0 0.0
|
|
94
|
+
1 NaN NaN 2.0
|
|
95
|
+
Setting ``dropna=False`` ``NaN`` values are considered and they can be
|
|
96
|
+
the mode (like for wings).
|
|
97
|
+
>>> df.mode(dropna=False).execute()
|
|
98
|
+
species legs wings
|
|
99
|
+
0 bird 2 NaN
|
|
100
|
+
Setting ``numeric_only=True``, only the mode of numeric columns is
|
|
101
|
+
computed, and columns of other types are ignored.
|
|
102
|
+
>>> df.mode(numeric_only=True).execute()
|
|
103
|
+
legs wings
|
|
104
|
+
0 2.0 0.0
|
|
105
|
+
1 NaN 2.0
|
|
106
|
+
To compute the mode over columns and not rows, use the axis parameter:
|
|
107
|
+
>>> df.mode(axis='columns', numeric_only=True).execute()
|
|
108
|
+
0 1
|
|
109
|
+
falcon 2.0 NaN
|
|
110
|
+
horse 4.0 NaN
|
|
111
|
+
spider 0.0 8.0
|
|
112
|
+
ostrich 2.0 NaN
|
|
113
|
+
"""
|
|
114
|
+
op = DataFrameMode(
|
|
115
|
+
axis=validate_axis(axis),
|
|
116
|
+
numeric_only=numeric_only,
|
|
117
|
+
dropna=dropna,
|
|
118
|
+
combine_size=combine_size,
|
|
119
|
+
output_types=[OutputType.dataframe],
|
|
120
|
+
)
|
|
121
|
+
return op(df)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def mode_series(series, dropna=True, combine_size=None):
|
|
125
|
+
"""
|
|
126
|
+
Return the mode(s) of the Series.
|
|
127
|
+
The mode is the value that appears most often. There can be multiple modes.
|
|
128
|
+
Always returns Series even if only one value is returned.
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
dropna : bool, default True
|
|
132
|
+
Don't consider counts of NaN/NaT.
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
Series
|
|
136
|
+
Modes of the Series in sorted order.
|
|
137
|
+
"""
|
|
138
|
+
op = DataFrameMode(
|
|
139
|
+
axis=0,
|
|
140
|
+
dropna=dropna,
|
|
141
|
+
combine_size=combine_size,
|
|
142
|
+
output_types=[OutputType.series],
|
|
143
|
+
)
|
|
144
|
+
return op(series)
|