maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def series_argsort(series, axis=0, kind="quicksort", order=None, stable=None):
|
|
17
|
+
"""
|
|
18
|
+
Return the integer indices that would sort the Series values.
|
|
19
|
+
|
|
20
|
+
Override ndarray.argsort. Argsorts the value, omitting NA/null values,
|
|
21
|
+
and places the result in the same locations as the non-NA values.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
axis : {0 or 'index'}
|
|
26
|
+
Unused. Parameter needed for compatibility with DataFrame.
|
|
27
|
+
kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
|
|
28
|
+
Choice of sorting algorithm. See :func:`numpy.sort` for more
|
|
29
|
+
information. 'mergesort' and 'stable' are the only stable algorithms.
|
|
30
|
+
order : None
|
|
31
|
+
Has no effect but is accepted for compatibility with numpy.
|
|
32
|
+
stable : None
|
|
33
|
+
Has no effect but is accepted for compatibility with numpy.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
Series[np.intp]
|
|
38
|
+
Positions of values within the sort order with -1 indicating
|
|
39
|
+
nan values.
|
|
40
|
+
|
|
41
|
+
See Also
|
|
42
|
+
--------
|
|
43
|
+
maxframe.tensor.argsort : Returns the indices that would sort this array.
|
|
44
|
+
|
|
45
|
+
Examples
|
|
46
|
+
--------
|
|
47
|
+
>>> import maxframe.tensor as mt
|
|
48
|
+
>>> import maxframe.dataframe as md
|
|
49
|
+
>>> s = md.Series([3, 2, 1])
|
|
50
|
+
>>> s.argsort().execute()
|
|
51
|
+
0 2
|
|
52
|
+
1 1
|
|
53
|
+
2 0
|
|
54
|
+
dtype: int64
|
|
55
|
+
"""
|
|
56
|
+
from ... import tensor as mt
|
|
57
|
+
from ..datasource.from_tensor import series_from_tensor
|
|
58
|
+
|
|
59
|
+
_ = axis, order, stable
|
|
60
|
+
axis = 0
|
|
61
|
+
t = mt.argsort(series.to_tensor(), axis=axis, kind=kind)
|
|
62
|
+
return series_from_tensor(t, index=series.index)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def index_argsort(index, *args, **kwargs):
|
|
66
|
+
from ... import tensor as mt
|
|
67
|
+
|
|
68
|
+
return mt.argsort(index.to_tensor(), *args, **kwargs)
|
maxframe/dataframe/sort/core.py
CHANGED
|
@@ -32,5 +32,6 @@ class DataFrameSortOperator(DataFrameOperator):
|
|
|
32
32
|
na_position = StringField("na_position")
|
|
33
33
|
ignore_index = BoolField("ignore_index")
|
|
34
34
|
parallel_kind = StringField("parallel_kind")
|
|
35
|
-
psrs_kinds = ListField("psrs_kinds", FieldTypes.string)
|
|
35
|
+
psrs_kinds = ListField("psrs_kinds", FieldTypes.string, default=None)
|
|
36
36
|
nrows = Int64Field("nrows", default=None)
|
|
37
|
+
keep_kind = StringField("keep_kind", default="head")
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import OutputType
|
|
16
|
+
from .sort_values import DataFrameSortValues
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _nlargest(df, n, columns=None, keep="first"):
|
|
20
|
+
op = DataFrameSortValues(
|
|
21
|
+
output_types=[OutputType.dataframe],
|
|
22
|
+
axis=0,
|
|
23
|
+
by=columns,
|
|
24
|
+
ignore_index=False,
|
|
25
|
+
ascending=False,
|
|
26
|
+
nrows=n,
|
|
27
|
+
keep_kind=keep,
|
|
28
|
+
)
|
|
29
|
+
return op(df)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def df_nlargest(df, n, columns, keep="first"):
|
|
33
|
+
"""
|
|
34
|
+
Return the first `n` rows ordered by `columns` in descending order.
|
|
35
|
+
|
|
36
|
+
Return the first `n` rows with the largest values in `columns`, in
|
|
37
|
+
descending order. The columns that are not specified are returned as
|
|
38
|
+
well, but not used for ordering.
|
|
39
|
+
|
|
40
|
+
This method is equivalent to
|
|
41
|
+
``df.sort_values(columns, ascending=False).head(n)``, but more
|
|
42
|
+
performant.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
n : int
|
|
47
|
+
Number of rows to return.
|
|
48
|
+
columns : label or list of labels
|
|
49
|
+
Column label(s) to order by.
|
|
50
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
51
|
+
Where there are duplicate values:
|
|
52
|
+
|
|
53
|
+
- `first` : prioritize the first occurrence(s)
|
|
54
|
+
- `last` : prioritize the last occurrence(s)
|
|
55
|
+
- ``all`` : do not drop any duplicates, even it means
|
|
56
|
+
selecting more than `n` items.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
DataFrame
|
|
61
|
+
The first `n` rows ordered by the given columns in descending
|
|
62
|
+
order.
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
|
|
67
|
+
ascending order.
|
|
68
|
+
DataFrame.sort_values : Sort DataFrame by the values.
|
|
69
|
+
DataFrame.head : Return the first `n` rows without re-ordering.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
This function cannot be used with all column types. For example, when
|
|
74
|
+
specifying columns with `object` or `category` dtypes, ``TypeError`` is
|
|
75
|
+
raised.
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
>>> import maxframe.dataframe as md
|
|
80
|
+
>>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
|
|
81
|
+
... 434000, 434000, 337000, 11300,
|
|
82
|
+
... 11300, 11300],
|
|
83
|
+
... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
|
|
84
|
+
... 17036, 182, 38, 311],
|
|
85
|
+
... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
|
|
86
|
+
... "IS", "NR", "TV", "AI"]},
|
|
87
|
+
... index=["Italy", "France", "Malta",
|
|
88
|
+
... "Maldives", "Brunei", "Iceland",
|
|
89
|
+
... "Nauru", "Tuvalu", "Anguilla"])
|
|
90
|
+
>>> df.execute()
|
|
91
|
+
population GDP alpha-2
|
|
92
|
+
Italy 59000000 1937894 IT
|
|
93
|
+
France 65000000 2583560 FR
|
|
94
|
+
Malta 434000 12011 MT
|
|
95
|
+
Maldives 434000 4520 MV
|
|
96
|
+
Brunei 434000 12128 BN
|
|
97
|
+
Iceland 337000 17036 IS
|
|
98
|
+
Nauru 11300 182 NR
|
|
99
|
+
Tuvalu 11300 38 TV
|
|
100
|
+
Anguilla 11300 311 AI
|
|
101
|
+
|
|
102
|
+
In the following example, we will use ``nlargest`` to select the three
|
|
103
|
+
rows having the largest values in column "population".
|
|
104
|
+
|
|
105
|
+
>>> df.nlargest(3, 'population').execute()
|
|
106
|
+
population GDP alpha-2
|
|
107
|
+
France 65000000 2583560 FR
|
|
108
|
+
Italy 59000000 1937894 IT
|
|
109
|
+
Malta 434000 12011 MT
|
|
110
|
+
|
|
111
|
+
When using ``keep='last'``, ties are resolved in reverse order:
|
|
112
|
+
|
|
113
|
+
>>> df.nlargest(3, 'population', keep='last').execute()
|
|
114
|
+
population GDP alpha-2
|
|
115
|
+
France 65000000 2583560 FR
|
|
116
|
+
Italy 59000000 1937894 IT
|
|
117
|
+
Brunei 434000 12128 BN
|
|
118
|
+
|
|
119
|
+
When using ``keep='all'``, all duplicate items are maintained:
|
|
120
|
+
|
|
121
|
+
>>> df.nlargest(3, 'population', keep='all').execute()
|
|
122
|
+
population GDP alpha-2
|
|
123
|
+
France 65000000 2583560 FR
|
|
124
|
+
Italy 59000000 1937894 IT
|
|
125
|
+
Malta 434000 12011 MT
|
|
126
|
+
Maldives 434000 4520 MV
|
|
127
|
+
Brunei 434000 12128 BN
|
|
128
|
+
|
|
129
|
+
To order by the largest values in column "population" and then "GDP",
|
|
130
|
+
we can specify multiple columns like in the next example.
|
|
131
|
+
|
|
132
|
+
>>> df.nlargest(3, ['population', 'GDP']).execute()
|
|
133
|
+
population GDP alpha-2
|
|
134
|
+
France 65000000 2583560 FR
|
|
135
|
+
Italy 59000000 1937894 IT
|
|
136
|
+
Brunei 434000 12128 BN
|
|
137
|
+
"""
|
|
138
|
+
return _nlargest(df, n, columns, keep=keep)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def series_nlargest(df, n, keep="first"):
|
|
142
|
+
"""
|
|
143
|
+
Return the largest `n` elements.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
n : int, default 5
|
|
148
|
+
Return this many descending sorted values.
|
|
149
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
150
|
+
When there are duplicate values that cannot all fit in a
|
|
151
|
+
Series of `n` elements:
|
|
152
|
+
|
|
153
|
+
- ``first`` : return the first `n` occurrences in order
|
|
154
|
+
of appearance.
|
|
155
|
+
- ``last`` : return the last `n` occurrences in reverse
|
|
156
|
+
order of appearance.
|
|
157
|
+
- ``all`` : keep all occurrences. This can result in a Series of
|
|
158
|
+
size larger than `n`.
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
Series
|
|
163
|
+
The `n` largest values in the Series, sorted in decreasing order.
|
|
164
|
+
|
|
165
|
+
See Also
|
|
166
|
+
--------
|
|
167
|
+
Series.nsmallest: Get the `n` smallest elements.
|
|
168
|
+
Series.sort_values: Sort Series by values.
|
|
169
|
+
Series.head: Return the first `n` rows.
|
|
170
|
+
|
|
171
|
+
Notes
|
|
172
|
+
-----
|
|
173
|
+
Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
|
|
174
|
+
relative to the size of the ``Series`` object.
|
|
175
|
+
|
|
176
|
+
Examples
|
|
177
|
+
--------
|
|
178
|
+
>>> import maxframe.dataframe as md
|
|
179
|
+
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
180
|
+
... "Malta": 434000, "Maldives": 434000,
|
|
181
|
+
... "Brunei": 434000, "Iceland": 337000,
|
|
182
|
+
... "Nauru": 11300, "Tuvalu": 11300,
|
|
183
|
+
... "Anguilla": 11300, "Montserrat": 5200}
|
|
184
|
+
>>> s = md.Series(countries_population)
|
|
185
|
+
>>> s.execute()
|
|
186
|
+
Italy 59000000
|
|
187
|
+
France 65000000
|
|
188
|
+
Malta 434000
|
|
189
|
+
Maldives 434000
|
|
190
|
+
Brunei 434000
|
|
191
|
+
Iceland 337000
|
|
192
|
+
Nauru 11300
|
|
193
|
+
Tuvalu 11300
|
|
194
|
+
Anguilla 11300
|
|
195
|
+
Montserrat 5200
|
|
196
|
+
dtype: int64
|
|
197
|
+
|
|
198
|
+
The `n` largest elements where ``n=5`` by default.
|
|
199
|
+
|
|
200
|
+
>>> s.nlargest().execute()
|
|
201
|
+
France 65000000
|
|
202
|
+
Italy 59000000
|
|
203
|
+
Malta 434000
|
|
204
|
+
Maldives 434000
|
|
205
|
+
Brunei 434000
|
|
206
|
+
dtype: int64
|
|
207
|
+
|
|
208
|
+
The `n` largest elements where ``n=3``. Default `keep` value is 'first'
|
|
209
|
+
so Malta will be kept.
|
|
210
|
+
|
|
211
|
+
>>> s.nlargest(3).execute()
|
|
212
|
+
France 65000000
|
|
213
|
+
Italy 59000000
|
|
214
|
+
Malta 434000
|
|
215
|
+
dtype: int64
|
|
216
|
+
|
|
217
|
+
The `n` largest elements where ``n=3`` and keeping the last duplicates.
|
|
218
|
+
Brunei will be kept since it is the last with value 434000 based on
|
|
219
|
+
the index order.
|
|
220
|
+
|
|
221
|
+
>>> s.nlargest(3, keep='last').execute()
|
|
222
|
+
France 65000000
|
|
223
|
+
Italy 59000000
|
|
224
|
+
Brunei 434000
|
|
225
|
+
dtype: int64
|
|
226
|
+
|
|
227
|
+
The `n` largest elements where ``n=3`` with all duplicates kept. Note
|
|
228
|
+
that the returned Series has five elements due to the three duplicates.
|
|
229
|
+
|
|
230
|
+
>>> s.nlargest(3, keep='all').execute()
|
|
231
|
+
France 65000000
|
|
232
|
+
Italy 59000000
|
|
233
|
+
Malta 434000
|
|
234
|
+
Maldives 434000
|
|
235
|
+
Brunei 434000
|
|
236
|
+
dtype: int64
|
|
237
|
+
"""
|
|
238
|
+
return _nlargest(df, n, keep=keep)
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import OutputType
|
|
16
|
+
from .sort_values import DataFrameSortValues
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _nsmallest(df, n, columns=None, keep="first"):
|
|
20
|
+
op = DataFrameSortValues(
|
|
21
|
+
output_types=[OutputType.dataframe],
|
|
22
|
+
axis=0,
|
|
23
|
+
by=columns,
|
|
24
|
+
ignore_index=False,
|
|
25
|
+
ascending=True,
|
|
26
|
+
nrows=n,
|
|
27
|
+
keep_kind=keep,
|
|
28
|
+
)
|
|
29
|
+
return op(df)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def df_nsmallest(df, n, columns, keep="first"):
|
|
33
|
+
"""
|
|
34
|
+
Return the first `n` rows ordered by `columns` in ascending order.
|
|
35
|
+
|
|
36
|
+
Return the first `n` rows with the smallest values in `columns`, in
|
|
37
|
+
ascending order. The columns that are not specified are returned as
|
|
38
|
+
well, but not used for ordering.
|
|
39
|
+
|
|
40
|
+
This method is equivalent to
|
|
41
|
+
``df.sort_values(columns, ascending=True).head(n)``, but more
|
|
42
|
+
performant.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
n : int
|
|
47
|
+
Number of items to retrieve.
|
|
48
|
+
columns : list or str
|
|
49
|
+
Column name or names to order by.
|
|
50
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
51
|
+
Where there are duplicate values:
|
|
52
|
+
|
|
53
|
+
- ``first`` : take the first occurrence.
|
|
54
|
+
- ``last`` : take the last occurrence.
|
|
55
|
+
- ``all`` : do not drop any duplicates, even it means
|
|
56
|
+
selecting more than `n` items.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
DataFrame
|
|
61
|
+
|
|
62
|
+
See Also
|
|
63
|
+
--------
|
|
64
|
+
DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
|
|
65
|
+
descending order.
|
|
66
|
+
DataFrame.sort_values : Sort DataFrame by the values.
|
|
67
|
+
DataFrame.head : Return the first `n` rows without re-ordering.
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
>>> import maxframe.dataframe as md
|
|
72
|
+
>>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
|
|
73
|
+
... 434000, 434000, 337000, 337000,
|
|
74
|
+
... 11300, 11300],
|
|
75
|
+
... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
|
|
76
|
+
... 17036, 182, 38, 311],
|
|
77
|
+
... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
|
|
78
|
+
... "IS", "NR", "TV", "AI"]},
|
|
79
|
+
... index=["Italy", "France", "Malta",
|
|
80
|
+
... "Maldives", "Brunei", "Iceland",
|
|
81
|
+
... "Nauru", "Tuvalu", "Anguilla"])
|
|
82
|
+
>>> df.execute()
|
|
83
|
+
population GDP alpha-2
|
|
84
|
+
Italy 59000000 1937894 IT
|
|
85
|
+
France 65000000 2583560 FR
|
|
86
|
+
Malta 434000 12011 MT
|
|
87
|
+
Maldives 434000 4520 MV
|
|
88
|
+
Brunei 434000 12128 BN
|
|
89
|
+
Iceland 337000 17036 IS
|
|
90
|
+
Nauru 337000 182 NR
|
|
91
|
+
Tuvalu 11300 38 TV
|
|
92
|
+
Anguilla 11300 311 AI
|
|
93
|
+
|
|
94
|
+
In the following example, we will use ``nsmallest`` to select the
|
|
95
|
+
three rows having the smallest values in column "population".
|
|
96
|
+
|
|
97
|
+
>>> df.nsmallest(3, 'population').execute()
|
|
98
|
+
population GDP alpha-2
|
|
99
|
+
Tuvalu 11300 38 TV
|
|
100
|
+
Anguilla 11300 311 AI
|
|
101
|
+
Iceland 337000 17036 IS
|
|
102
|
+
|
|
103
|
+
When using ``keep='last'``, ties are resolved in reverse order:
|
|
104
|
+
|
|
105
|
+
>>> df.nsmallest(3, 'population', keep='last').execute()
|
|
106
|
+
population GDP alpha-2
|
|
107
|
+
Anguilla 11300 311 AI
|
|
108
|
+
Tuvalu 11300 38 TV
|
|
109
|
+
Nauru 337000 182 NR
|
|
110
|
+
|
|
111
|
+
When using ``keep='all'``, all duplicate items are maintained:
|
|
112
|
+
|
|
113
|
+
>>> df.nsmallest(3, 'population', keep='all').execute()
|
|
114
|
+
population GDP alpha-2
|
|
115
|
+
Tuvalu 11300 38 TV
|
|
116
|
+
Anguilla 11300 311 AI
|
|
117
|
+
Iceland 337000 17036 IS
|
|
118
|
+
Nauru 337000 182 NR
|
|
119
|
+
|
|
120
|
+
To order by the smallest values in column "population" and then "GDP", we can
|
|
121
|
+
specify multiple columns like in the next example.
|
|
122
|
+
|
|
123
|
+
>>> df.nsmallest(3, ['population', 'GDP']).execute()
|
|
124
|
+
population GDP alpha-2
|
|
125
|
+
Tuvalu 11300 38 TV
|
|
126
|
+
Anguilla 11300 311 AI
|
|
127
|
+
Nauru 337000 182 NR
|
|
128
|
+
"""
|
|
129
|
+
return _nsmallest(df, n, columns, keep=keep)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def series_nsmallest(df, n, keep="first"):
|
|
133
|
+
"""
|
|
134
|
+
Return the smallest `n` elements.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
n : int, default 5
|
|
139
|
+
Return this many ascending sorted values.
|
|
140
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
141
|
+
When there are duplicate values that cannot all fit in a
|
|
142
|
+
Series of `n` elements:
|
|
143
|
+
|
|
144
|
+
- ``first`` : return the first `n` occurrences in order
|
|
145
|
+
of appearance.
|
|
146
|
+
- ``last`` : return the last `n` occurrences in reverse
|
|
147
|
+
order of appearance.
|
|
148
|
+
- ``all`` : keep all occurrences. This can result in a Series of
|
|
149
|
+
size larger than `n`.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
Series
|
|
154
|
+
The `n` smallest values in the Series, sorted in increasing order.
|
|
155
|
+
|
|
156
|
+
See Also
|
|
157
|
+
--------
|
|
158
|
+
Series.nlargest: Get the `n` largest elements.
|
|
159
|
+
Series.sort_values: Sort Series by values.
|
|
160
|
+
Series.head: Return the first `n` rows.
|
|
161
|
+
|
|
162
|
+
Notes
|
|
163
|
+
-----
|
|
164
|
+
Faster than ``.sort_values().head(n)`` for small `n` relative to
|
|
165
|
+
the size of the ``Series`` object.
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> import maxframe.dataframe as md
|
|
170
|
+
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
171
|
+
... "Brunei": 434000, "Malta": 434000,
|
|
172
|
+
... "Maldives": 434000, "Iceland": 337000,
|
|
173
|
+
... "Nauru": 11300, "Tuvalu": 11300,
|
|
174
|
+
... "Anguilla": 11300, "Montserrat": 5200}
|
|
175
|
+
>>> s = md.Series(countries_population)
|
|
176
|
+
>>> s.execute()
|
|
177
|
+
Italy 59000000
|
|
178
|
+
France 65000000
|
|
179
|
+
Brunei 434000
|
|
180
|
+
Malta 434000
|
|
181
|
+
Maldives 434000
|
|
182
|
+
Iceland 337000
|
|
183
|
+
Nauru 11300
|
|
184
|
+
Tuvalu 11300
|
|
185
|
+
Anguilla 11300
|
|
186
|
+
Montserrat 5200
|
|
187
|
+
dtype: int64
|
|
188
|
+
|
|
189
|
+
The `n` smallest elements where ``n=5`` by default.
|
|
190
|
+
|
|
191
|
+
>>> s.nsmallest().execute()
|
|
192
|
+
Montserrat 5200
|
|
193
|
+
Nauru 11300
|
|
194
|
+
Tuvalu 11300
|
|
195
|
+
Anguilla 11300
|
|
196
|
+
Iceland 337000
|
|
197
|
+
dtype: int64
|
|
198
|
+
|
|
199
|
+
The `n` smallest elements where ``n=3``. Default `keep` value is
|
|
200
|
+
'first' so Nauru and Tuvalu will be kept.
|
|
201
|
+
|
|
202
|
+
>>> s.nsmallest(3).execute()
|
|
203
|
+
Montserrat 5200
|
|
204
|
+
Nauru 11300
|
|
205
|
+
Tuvalu 11300
|
|
206
|
+
dtype: int64
|
|
207
|
+
|
|
208
|
+
The `n` smallest elements where ``n=3`` and keeping the last
|
|
209
|
+
duplicates. Anguilla and Tuvalu will be kept since they are the last
|
|
210
|
+
with value 11300 based on the index order.
|
|
211
|
+
|
|
212
|
+
>>> s.nsmallest(3, keep='last').execute()
|
|
213
|
+
Montserrat 5200
|
|
214
|
+
Anguilla 11300
|
|
215
|
+
Tuvalu 11300
|
|
216
|
+
dtype: int64
|
|
217
|
+
|
|
218
|
+
The `n` smallest elements where ``n=3`` with all duplicates kept. Note
|
|
219
|
+
that the returned Series has four elements due to the three duplicates.
|
|
220
|
+
|
|
221
|
+
>>> s.nsmallest(3, keep='all').execute()
|
|
222
|
+
Montserrat 5200
|
|
223
|
+
Nauru 11300
|
|
224
|
+
Tuvalu 11300
|
|
225
|
+
Anguilla 11300
|
|
226
|
+
dtype: int64
|
|
227
|
+
"""
|
|
228
|
+
return _nsmallest(df, n, keep=keep)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ...serialization.serializables import BoolField, StringField
|
|
19
|
+
from ..operators import DataFrameOperatorMixin
|
|
20
|
+
from .core import DataFrameSortOperator
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DataFrameRank(DataFrameSortOperator, DataFrameOperatorMixin):
|
|
24
|
+
method = StringField("method", default=None)
|
|
25
|
+
numeric_only = BoolField("numeric_only", default=None)
|
|
26
|
+
pct = BoolField("pct", default=False)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def na_option(self):
|
|
30
|
+
return self.na_position
|
|
31
|
+
|
|
32
|
+
def __call__(self, df_obj):
|
|
33
|
+
params = df_obj.params
|
|
34
|
+
if df_obj.ndim == 2: # dataframe
|
|
35
|
+
if self.numeric_only:
|
|
36
|
+
sel_df = df_obj.select_dtypes(include=[np.number])
|
|
37
|
+
cols = sel_df.dtypes.index
|
|
38
|
+
else:
|
|
39
|
+
cols = df_obj.dtypes.index
|
|
40
|
+
params["dtypes"] = pd.Series([np.dtype(float)] * len(cols), index=cols)
|
|
41
|
+
return self.new_dataframe([df_obj], **params)
|
|
42
|
+
else:
|
|
43
|
+
params["dtypes"] = np.dtype(float)
|
|
44
|
+
return self.new_series([df_obj], **params)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def rank(
|
|
48
|
+
df,
|
|
49
|
+
axis=0,
|
|
50
|
+
method="average",
|
|
51
|
+
numeric_only=False,
|
|
52
|
+
na_option="keep",
|
|
53
|
+
ascending=True,
|
|
54
|
+
pct=False,
|
|
55
|
+
):
|
|
56
|
+
"""
|
|
57
|
+
Compute numerical data ranks (1 through n) along axis.
|
|
58
|
+
|
|
59
|
+
By default, equal values are assigned a rank that is the average of the
|
|
60
|
+
ranks of those values.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
65
|
+
Index to direct ranking.
|
|
66
|
+
method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
|
|
67
|
+
How to rank the group of records that have the same value (i.e. ties):
|
|
68
|
+
|
|
69
|
+
* average: average rank of the group
|
|
70
|
+
* min: lowest rank in the group
|
|
71
|
+
* max: highest rank in the group
|
|
72
|
+
* first: ranks assigned in order they appear in the array
|
|
73
|
+
* dense: like 'min', but rank always increases by 1 between groups.
|
|
74
|
+
|
|
75
|
+
numeric_only : bool, optional
|
|
76
|
+
For DataFrame objects, rank only numeric columns if set to True.
|
|
77
|
+
na_option : {'keep', 'top', 'bottom'}, default 'keep'
|
|
78
|
+
How to rank NaN values:
|
|
79
|
+
|
|
80
|
+
* keep: assign NaN rank to NaN values
|
|
81
|
+
* top: assign lowest rank to NaN values
|
|
82
|
+
* bottom: assign highest rank to NaN values
|
|
83
|
+
|
|
84
|
+
ascending : bool, default True
|
|
85
|
+
Whether or not the elements should be ranked in ascending order.
|
|
86
|
+
pct : bool, default False
|
|
87
|
+
Whether or not to display the returned rankings in percentile
|
|
88
|
+
form.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
same type as caller
|
|
93
|
+
Return a Series or DataFrame with data ranks as values.
|
|
94
|
+
|
|
95
|
+
See Also
|
|
96
|
+
--------
|
|
97
|
+
core.groupby.GroupBy.rank : Rank of values within each group.
|
|
98
|
+
|
|
99
|
+
Examples
|
|
100
|
+
--------
|
|
101
|
+
>>> import maxframe.tensor as mt
|
|
102
|
+
>>> import maxframe.dataframe as md
|
|
103
|
+
>>> df = md.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
|
|
104
|
+
... 'spider', 'snake'],
|
|
105
|
+
... 'Number_legs': [4, 2, 4, 8, mt.nan]})
|
|
106
|
+
>>> df.execute()
|
|
107
|
+
Animal Number_legs
|
|
108
|
+
0 cat 4.0
|
|
109
|
+
1 penguin 2.0
|
|
110
|
+
2 dog 4.0
|
|
111
|
+
3 spider 8.0
|
|
112
|
+
4 snake NaN
|
|
113
|
+
|
|
114
|
+
The following example shows how the method behaves with the above
|
|
115
|
+
parameters:
|
|
116
|
+
|
|
117
|
+
* default_rank: this is the default behaviour obtained without using
|
|
118
|
+
any parameter.
|
|
119
|
+
* max_rank: setting ``method = 'max'`` the records that have the
|
|
120
|
+
same values are ranked using the highest rank (e.g.: since 'cat'
|
|
121
|
+
and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)
|
|
122
|
+
* NA_bottom: choosing ``na_option = 'bottom'``, if there are records
|
|
123
|
+
with NaN values they are placed at the bottom of the ranking.
|
|
124
|
+
* pct_rank: when setting ``pct = True``, the ranking is expressed as
|
|
125
|
+
percentile rank.
|
|
126
|
+
|
|
127
|
+
>>> df['default_rank'] = df['Number_legs'].rank()
|
|
128
|
+
>>> df['max_rank'] = df['Number_legs'].rank(method='max')
|
|
129
|
+
>>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
|
|
130
|
+
>>> df['pct_rank'] = df['Number_legs'].rank(pct=True)
|
|
131
|
+
>>> df.execute()
|
|
132
|
+
Animal Number_legs default_rank max_rank NA_bottom pct_rank
|
|
133
|
+
0 cat 4.0 2.5 3.0 2.5 0.625
|
|
134
|
+
1 penguin 2.0 1.0 1.0 1.0 0.250
|
|
135
|
+
2 dog 4.0 2.5 3.0 2.5 0.625
|
|
136
|
+
3 spider 8.0 4.0 4.0 4.0 1.000
|
|
137
|
+
4 snake NaN NaN NaN 5.0 NaN
|
|
138
|
+
"""
|
|
139
|
+
op = DataFrameRank(
|
|
140
|
+
axis=axis,
|
|
141
|
+
method=method,
|
|
142
|
+
numeric_only=numeric_only,
|
|
143
|
+
na_position=na_option,
|
|
144
|
+
ascending=ascending,
|
|
145
|
+
pct=pct,
|
|
146
|
+
)
|
|
147
|
+
return op(df)
|