maxframe 2.0.0b2__cp311-cp311-win_amd64.whl → 2.2.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win_amd64.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -28,12 +28,19 @@ from ...tensor.core import TensorOrder
|
|
|
28
28
|
from ...utils import lazy_import
|
|
29
29
|
from ..core import DATAFRAME_TYPE
|
|
30
30
|
from ..initializer import Series as asseries
|
|
31
|
-
from .core import
|
|
31
|
+
from .core import (
|
|
32
|
+
CustomReduction,
|
|
33
|
+
DataFrameReduction,
|
|
34
|
+
DataFrameReductionMixin,
|
|
35
|
+
ReductionCallable,
|
|
36
|
+
)
|
|
32
37
|
|
|
33
38
|
cudf = lazy_import("cudf")
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
class UniqueReduction(CustomReduction):
|
|
42
|
+
_func_name = "unique"
|
|
43
|
+
|
|
37
44
|
def agg(self, data): # noqa: W0221 # pylint: disable=arguments-differ
|
|
38
45
|
xdf = cudf if self.is_gpu() else pd
|
|
39
46
|
# convert to series data
|
|
@@ -43,7 +50,12 @@ class UniqueReduction(CustomReduction):
|
|
|
43
50
|
return data.unique()
|
|
44
51
|
|
|
45
52
|
|
|
46
|
-
class
|
|
53
|
+
class UniqueReductionCallable(ReductionCallable):
|
|
54
|
+
def __call__(self, value):
|
|
55
|
+
return UniqueReduction(name="unique", is_gpu=self.kwargs["is_gpu"])(value)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DataFrameUnique(DataFrameReduction, DataFrameReductionMixin):
|
|
47
59
|
_op_type_ = opcodes.UNIQUE
|
|
48
60
|
_func_name = "unique"
|
|
49
61
|
|
|
@@ -53,9 +65,14 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
53
65
|
def is_atomic(self):
|
|
54
66
|
return True
|
|
55
67
|
|
|
68
|
+
def get_reduction_args(self, axis=None):
|
|
69
|
+
return {}
|
|
70
|
+
|
|
56
71
|
@classmethod
|
|
57
72
|
def get_reduction_callable(cls, op):
|
|
58
|
-
return
|
|
73
|
+
return UniqueReductionCallable(
|
|
74
|
+
func_name=cls._func_name, kwargs=dict(is_gpu=op.is_gpu())
|
|
75
|
+
)
|
|
59
76
|
|
|
60
77
|
def __call__(self, a):
|
|
61
78
|
if not isinstance(a, ENTITY_TYPE):
|
|
@@ -15,10 +15,21 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import Int32Field
|
|
18
|
-
from .core import DataFrameReductionMixin,
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class VarReductionCallable(ReductionCallable):
|
|
22
|
+
def __call__(self, value):
|
|
23
|
+
skipna, ddof = self.kwargs["skipna"], self.kwargs["ddof"]
|
|
24
|
+
cnt = value.count()
|
|
25
|
+
if ddof == 0:
|
|
26
|
+
return (value**2).mean(skipna=skipna) - (value.mean(skipna=skipna)) ** 2
|
|
27
|
+
return (
|
|
28
|
+
(value**2).sum(skipna=skipna) - value.sum(skipna=skipna) ** 2 / cnt
|
|
29
|
+
) / (cnt - ddof)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DataFrameVar(DataFrameReduction, DataFrameReductionMixin):
|
|
22
33
|
_op_type_ = opcodes.VAR
|
|
23
34
|
_func_name = "var"
|
|
24
35
|
|
|
@@ -27,16 +38,9 @@ class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
27
38
|
@classmethod
|
|
28
39
|
def get_reduction_callable(cls, op: "DataFrameVar"):
|
|
29
40
|
skipna, ddof = op.skipna, op.ddof
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if ddof == 0:
|
|
34
|
-
return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2
|
|
35
|
-
return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / (
|
|
36
|
-
cnt - ddof
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
return var
|
|
41
|
+
return VarReductionCallable(
|
|
42
|
+
func_name="var", kwargs={"skipna": skipna, "ddof": ddof}
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
def var_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from .melt import melt
|
|
17
|
+
from .pivot import pivot
|
|
18
|
+
from .pivot_table import pivot_table
|
|
19
|
+
from .stack import stack
|
|
20
|
+
from .unstack import unstack
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _install():
|
|
24
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
25
|
+
|
|
26
|
+
for t in DATAFRAME_TYPE:
|
|
27
|
+
setattr(t, "melt", melt)
|
|
28
|
+
setattr(t, "pivot", pivot)
|
|
29
|
+
setattr(t, "pivot_table", pivot_table)
|
|
30
|
+
setattr(t, "stack", stack)
|
|
31
|
+
setattr(t, "unstack", unstack)
|
|
32
|
+
|
|
33
|
+
for t in SERIES_TYPE:
|
|
34
|
+
setattr(t, "unstack", unstack)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_install()
|
|
38
|
+
del _install
|
|
@@ -35,6 +35,7 @@ class DataFramePivot(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
35
35
|
columns = AnyField("columns", default=None)
|
|
36
36
|
|
|
37
37
|
agg_results = KeyField("agg_results", default=None)
|
|
38
|
+
fill_value = AnyField("fill_value", default=None)
|
|
38
39
|
|
|
39
40
|
def __init__(self, aggfunc=None, **kw):
|
|
40
41
|
if aggfunc is None:
|
|
@@ -39,6 +39,7 @@ class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
39
|
margins_name = StringField("margins_name", default=None)
|
|
40
40
|
sort = BoolField("sort", default=False)
|
|
41
41
|
|
|
42
|
+
# generate intermediate agg result to infer value of columns
|
|
42
43
|
agg_results = KeyField("agg_results", default=None)
|
|
43
44
|
|
|
44
45
|
def __init__(self, aggfunc=None, **kw):
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...udf import builtin_function
|
|
16
|
+
from ..core import SERIES_TYPE
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@builtin_function
|
|
20
|
+
def _adjust_names(pivoted, idx_names=None, col_names=None, remove_col_level=False):
|
|
21
|
+
if remove_col_level:
|
|
22
|
+
pivoted = pivoted.droplevel(0, axis=1)
|
|
23
|
+
if idx_names:
|
|
24
|
+
pivoted = pivoted.rename_axis(idx_names, axis=0)
|
|
25
|
+
if col_names:
|
|
26
|
+
pivoted = pivoted.rename_axis(col_names, axis=1)
|
|
27
|
+
return pivoted
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def unstack(df_or_series, level=-1, fill_value=None):
|
|
31
|
+
"""
|
|
32
|
+
Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
level : int, str, or list of these, default last level
|
|
37
|
+
Level(s) to unstack, can pass level name.
|
|
38
|
+
fill_value : scalar value, default None
|
|
39
|
+
Value to use when replacing NaN values.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
DataFrame
|
|
44
|
+
Unstacked Series.
|
|
45
|
+
|
|
46
|
+
Examples
|
|
47
|
+
--------
|
|
48
|
+
>>> import maxframe.dataframe as md
|
|
49
|
+
>>> s = md.Series([1, 2, 3, 4],
|
|
50
|
+
... index=md.MultiIndex.from_product([['one', 'two'],
|
|
51
|
+
... ['a', 'b']]))
|
|
52
|
+
>>> s.execute()
|
|
53
|
+
one a 1
|
|
54
|
+
b 2
|
|
55
|
+
two a 3
|
|
56
|
+
b 4
|
|
57
|
+
dtype: int64
|
|
58
|
+
|
|
59
|
+
>>> s.unstack(level=-1).execute()
|
|
60
|
+
a b
|
|
61
|
+
one 1 2
|
|
62
|
+
two 3 4
|
|
63
|
+
|
|
64
|
+
>>> s.unstack(level=0).execute()
|
|
65
|
+
one two
|
|
66
|
+
a 1 3
|
|
67
|
+
b 2 4
|
|
68
|
+
"""
|
|
69
|
+
if df_or_series.index.nlevels == 1:
|
|
70
|
+
if isinstance(df_or_series, SERIES_TYPE):
|
|
71
|
+
raise ValueError("index must be a MultiIndex to unstack")
|
|
72
|
+
raw_idx_nlevels = df_or_series.index.nlevels
|
|
73
|
+
stacked = df_or_series.stack(level=level, fill_value=fill_value)
|
|
74
|
+
orig_order = list(stacked.index.nlevels)
|
|
75
|
+
new_order = orig_order[raw_idx_nlevels:] + orig_order[:raw_idx_nlevels]
|
|
76
|
+
return stacked.reorder_levels(new_order)
|
|
77
|
+
|
|
78
|
+
nlevels = df_or_series.index.nlevels
|
|
79
|
+
idx_names = list(df_or_series.index.names)
|
|
80
|
+
if df_or_series.ndim == 1:
|
|
81
|
+
col_names = []
|
|
82
|
+
else:
|
|
83
|
+
col_names = list(df_or_series.columns.names)
|
|
84
|
+
|
|
85
|
+
name_to_idx = {name: i for i, name in enumerate(idx_names)}
|
|
86
|
+
level = level if isinstance(level, list) else [level]
|
|
87
|
+
level_idxes = [(nlevels + name_to_idx.get(lv, lv)) % nlevels for lv in level]
|
|
88
|
+
level_idx_set = set(level_idxes)
|
|
89
|
+
level_cols = [f"__level_{idx}" for idx in range(nlevels)]
|
|
90
|
+
|
|
91
|
+
if df_or_series.ndim == 1:
|
|
92
|
+
data = df_or_series.to_frame(name="__data")
|
|
93
|
+
else:
|
|
94
|
+
data = df_or_series
|
|
95
|
+
pivot_val_col = list(data.dtypes.index)
|
|
96
|
+
pivot_col_col = [level_cols[i] for i in level_idxes]
|
|
97
|
+
pivot_idx_col = [level_cols[i] for i in range(nlevels) if i not in level_idx_set]
|
|
98
|
+
|
|
99
|
+
new_idx_names = [idx_names[i] for i in range(nlevels) if i not in level_idx_set]
|
|
100
|
+
new_col_names = col_names + [idx_names[i] for i in level_idxes]
|
|
101
|
+
|
|
102
|
+
data = data.reset_index(names=level_cols)
|
|
103
|
+
pivoted = data.pivot(
|
|
104
|
+
index=pivot_idx_col, columns=pivot_col_col, values=pivot_val_col
|
|
105
|
+
)
|
|
106
|
+
pivoted.op.fill_value = fill_value
|
|
107
|
+
return pivoted.mf.apply_chunk(
|
|
108
|
+
_adjust_names,
|
|
109
|
+
col_names=new_col_names,
|
|
110
|
+
idx_names=new_idx_names,
|
|
111
|
+
remove_col_level=df_or_series.ndim == 1,
|
|
112
|
+
skip_infer=True,
|
|
113
|
+
output_type="dataframe",
|
|
114
|
+
)
|
|
@@ -18,14 +18,22 @@ from .sort_values import DataFrameSortValues
|
|
|
18
18
|
|
|
19
19
|
def _install():
|
|
20
20
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
21
|
+
from .argsort import series_argsort
|
|
22
|
+
from .nlargest import df_nlargest, series_nlargest
|
|
23
|
+
from .nsmallest import df_nsmallest, series_nsmallest
|
|
21
24
|
from .sort_index import sort_index
|
|
22
25
|
from .sort_values import dataframe_sort_values, series_sort_values
|
|
23
26
|
|
|
24
27
|
for cls in DATAFRAME_TYPE:
|
|
28
|
+
setattr(cls, "nlargest", df_nlargest)
|
|
29
|
+
setattr(cls, "nsmallest", df_nsmallest)
|
|
25
30
|
setattr(cls, "sort_values", dataframe_sort_values)
|
|
26
31
|
setattr(cls, "sort_index", sort_index)
|
|
27
32
|
|
|
28
33
|
for cls in SERIES_TYPE:
|
|
34
|
+
setattr(cls, "argsort", series_argsort)
|
|
35
|
+
setattr(cls, "nlargest", series_nlargest)
|
|
36
|
+
setattr(cls, "nsmallest", series_nsmallest)
|
|
29
37
|
setattr(cls, "sort_values", series_sort_values)
|
|
30
38
|
setattr(cls, "sort_index", sort_index)
|
|
31
39
|
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def series_argsort(series, axis=0, kind="quicksort", order=None, stable=None):
|
|
17
|
+
"""
|
|
18
|
+
Return the integer indices that would sort the Series values.
|
|
19
|
+
|
|
20
|
+
Override ndarray.argsort. Argsorts the value, omitting NA/null values,
|
|
21
|
+
and places the result in the same locations as the non-NA values.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
axis : {0 or 'index'}
|
|
26
|
+
Unused. Parameter needed for compatibility with DataFrame.
|
|
27
|
+
kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
|
|
28
|
+
Choice of sorting algorithm. See :func:`numpy.sort` for more
|
|
29
|
+
information. 'mergesort' and 'stable' are the only stable algorithms.
|
|
30
|
+
order : None
|
|
31
|
+
Has no effect but is accepted for compatibility with numpy.
|
|
32
|
+
stable : None
|
|
33
|
+
Has no effect but is accepted for compatibility with numpy.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
Series[np.intp]
|
|
38
|
+
Positions of values within the sort order with -1 indicating
|
|
39
|
+
nan values.
|
|
40
|
+
|
|
41
|
+
See Also
|
|
42
|
+
--------
|
|
43
|
+
numpy.ndarray.argsort : Returns the indices that would sort this array.
|
|
44
|
+
|
|
45
|
+
Examples
|
|
46
|
+
--------
|
|
47
|
+
>>> import maxframe.tensor as mt
|
|
48
|
+
>>> import maxframe.dataframe as md
|
|
49
|
+
>>> s = md.Series([3, 2, 1])
|
|
50
|
+
>>> s.argsort().execute()
|
|
51
|
+
0 2
|
|
52
|
+
1 1
|
|
53
|
+
2 0
|
|
54
|
+
dtype: int64
|
|
55
|
+
"""
|
|
56
|
+
from ... import tensor as mt
|
|
57
|
+
from ..datasource.from_tensor import series_from_tensor
|
|
58
|
+
|
|
59
|
+
_ = axis, order, stable
|
|
60
|
+
axis = 0
|
|
61
|
+
t = mt.argsort(series.to_tensor(), axis=axis, kind=kind)
|
|
62
|
+
return series_from_tensor(t, index=series.index)
|
maxframe/dataframe/sort/core.py
CHANGED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import OutputType
|
|
16
|
+
from .sort_values import DataFrameSortValues
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _nlargest(df, n, columns=None, keep="first"):
|
|
20
|
+
op = DataFrameSortValues(
|
|
21
|
+
output_types=[OutputType.dataframe],
|
|
22
|
+
axis=0,
|
|
23
|
+
by=columns,
|
|
24
|
+
ignore_index=False,
|
|
25
|
+
ascending=False,
|
|
26
|
+
nrows=n,
|
|
27
|
+
keep_kind=keep,
|
|
28
|
+
)
|
|
29
|
+
return op(df)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def df_nlargest(df, n, columns, keep="first"):
|
|
33
|
+
"""
|
|
34
|
+
Return the first `n` rows ordered by `columns` in descending order.
|
|
35
|
+
|
|
36
|
+
Return the first `n` rows with the largest values in `columns`, in
|
|
37
|
+
descending order. The columns that are not specified are returned as
|
|
38
|
+
well, but not used for ordering.
|
|
39
|
+
|
|
40
|
+
This method is equivalent to
|
|
41
|
+
``df.sort_values(columns, ascending=False).head(n)``, but more
|
|
42
|
+
performant.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
n : int
|
|
47
|
+
Number of rows to return.
|
|
48
|
+
columns : label or list of labels
|
|
49
|
+
Column label(s) to order by.
|
|
50
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
51
|
+
Where there are duplicate values:
|
|
52
|
+
|
|
53
|
+
- `first` : prioritize the first occurrence(s)
|
|
54
|
+
- `last` : prioritize the last occurrence(s)
|
|
55
|
+
- ``all`` : do not drop any duplicates, even it means
|
|
56
|
+
selecting more than `n` items.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
DataFrame
|
|
61
|
+
The first `n` rows ordered by the given columns in descending
|
|
62
|
+
order.
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
|
|
67
|
+
ascending order.
|
|
68
|
+
DataFrame.sort_values : Sort DataFrame by the values.
|
|
69
|
+
DataFrame.head : Return the first `n` rows without re-ordering.
|
|
70
|
+
|
|
71
|
+
Notes
|
|
72
|
+
-----
|
|
73
|
+
This function cannot be used with all column types. For example, when
|
|
74
|
+
specifying columns with `object` or `category` dtypes, ``TypeError`` is
|
|
75
|
+
raised.
|
|
76
|
+
|
|
77
|
+
Examples
|
|
78
|
+
--------
|
|
79
|
+
>>> import maxframe.dataframe as md
|
|
80
|
+
>>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
|
|
81
|
+
... 434000, 434000, 337000, 11300,
|
|
82
|
+
... 11300, 11300],
|
|
83
|
+
... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
|
|
84
|
+
... 17036, 182, 38, 311],
|
|
85
|
+
... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
|
|
86
|
+
... "IS", "NR", "TV", "AI"]},
|
|
87
|
+
... index=["Italy", "France", "Malta",
|
|
88
|
+
... "Maldives", "Brunei", "Iceland",
|
|
89
|
+
... "Nauru", "Tuvalu", "Anguilla"])
|
|
90
|
+
>>> df.execute()
|
|
91
|
+
population GDP alpha-2
|
|
92
|
+
Italy 59000000 1937894 IT
|
|
93
|
+
France 65000000 2583560 FR
|
|
94
|
+
Malta 434000 12011 MT
|
|
95
|
+
Maldives 434000 4520 MV
|
|
96
|
+
Brunei 434000 12128 BN
|
|
97
|
+
Iceland 337000 17036 IS
|
|
98
|
+
Nauru 11300 182 NR
|
|
99
|
+
Tuvalu 11300 38 TV
|
|
100
|
+
Anguilla 11300 311 AI
|
|
101
|
+
|
|
102
|
+
In the following example, we will use ``nlargest`` to select the three
|
|
103
|
+
rows having the largest values in column "population".
|
|
104
|
+
|
|
105
|
+
>>> df.nlargest(3, 'population').execute()
|
|
106
|
+
population GDP alpha-2
|
|
107
|
+
France 65000000 2583560 FR
|
|
108
|
+
Italy 59000000 1937894 IT
|
|
109
|
+
Malta 434000 12011 MT
|
|
110
|
+
|
|
111
|
+
When using ``keep='last'``, ties are resolved in reverse order:
|
|
112
|
+
|
|
113
|
+
>>> df.nlargest(3, 'population', keep='last').execute()
|
|
114
|
+
population GDP alpha-2
|
|
115
|
+
France 65000000 2583560 FR
|
|
116
|
+
Italy 59000000 1937894 IT
|
|
117
|
+
Brunei 434000 12128 BN
|
|
118
|
+
|
|
119
|
+
When using ``keep='all'``, all duplicate items are maintained:
|
|
120
|
+
|
|
121
|
+
>>> df.nlargest(3, 'population', keep='all').execute()
|
|
122
|
+
population GDP alpha-2
|
|
123
|
+
France 65000000 2583560 FR
|
|
124
|
+
Italy 59000000 1937894 IT
|
|
125
|
+
Malta 434000 12011 MT
|
|
126
|
+
Maldives 434000 4520 MV
|
|
127
|
+
Brunei 434000 12128 BN
|
|
128
|
+
|
|
129
|
+
To order by the largest values in column "population" and then "GDP",
|
|
130
|
+
we can specify multiple columns like in the next example.
|
|
131
|
+
|
|
132
|
+
>>> df.nlargest(3, ['population', 'GDP']).execute()
|
|
133
|
+
population GDP alpha-2
|
|
134
|
+
France 65000000 2583560 FR
|
|
135
|
+
Italy 59000000 1937894 IT
|
|
136
|
+
Brunei 434000 12128 BN
|
|
137
|
+
"""
|
|
138
|
+
return _nlargest(df, n, columns, keep=keep)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def series_nlargest(df, n, keep="first"):
|
|
142
|
+
"""
|
|
143
|
+
Return the largest `n` elements.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
n : int, default 5
|
|
148
|
+
Return this many descending sorted values.
|
|
149
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
150
|
+
When there are duplicate values that cannot all fit in a
|
|
151
|
+
Series of `n` elements:
|
|
152
|
+
|
|
153
|
+
- ``first`` : return the first `n` occurrences in order
|
|
154
|
+
of appearance.
|
|
155
|
+
- ``last`` : return the last `n` occurrences in reverse
|
|
156
|
+
order of appearance.
|
|
157
|
+
- ``all`` : keep all occurrences. This can result in a Series of
|
|
158
|
+
size larger than `n`.
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
Series
|
|
163
|
+
The `n` largest values in the Series, sorted in decreasing order.
|
|
164
|
+
|
|
165
|
+
See Also
|
|
166
|
+
--------
|
|
167
|
+
Series.nsmallest: Get the `n` smallest elements.
|
|
168
|
+
Series.sort_values: Sort Series by values.
|
|
169
|
+
Series.head: Return the first `n` rows.
|
|
170
|
+
|
|
171
|
+
Notes
|
|
172
|
+
-----
|
|
173
|
+
Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
|
|
174
|
+
relative to the size of the ``Series`` object.
|
|
175
|
+
|
|
176
|
+
Examples
|
|
177
|
+
--------
|
|
178
|
+
>>> import maxframe.dataframe as md
|
|
179
|
+
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
180
|
+
... "Malta": 434000, "Maldives": 434000,
|
|
181
|
+
... "Brunei": 434000, "Iceland": 337000,
|
|
182
|
+
... "Nauru": 11300, "Tuvalu": 11300,
|
|
183
|
+
... "Anguilla": 11300, "Montserrat": 5200}
|
|
184
|
+
>>> s = md.Series(countries_population)
|
|
185
|
+
>>> s.execute()
|
|
186
|
+
Italy 59000000
|
|
187
|
+
France 65000000
|
|
188
|
+
Malta 434000
|
|
189
|
+
Maldives 434000
|
|
190
|
+
Brunei 434000
|
|
191
|
+
Iceland 337000
|
|
192
|
+
Nauru 11300
|
|
193
|
+
Tuvalu 11300
|
|
194
|
+
Anguilla 11300
|
|
195
|
+
Montserrat 5200
|
|
196
|
+
dtype: int64
|
|
197
|
+
|
|
198
|
+
The `n` largest elements where ``n=5`` by default.
|
|
199
|
+
|
|
200
|
+
>>> s.nlargest().execute()
|
|
201
|
+
France 65000000
|
|
202
|
+
Italy 59000000
|
|
203
|
+
Malta 434000
|
|
204
|
+
Maldives 434000
|
|
205
|
+
Brunei 434000
|
|
206
|
+
dtype: int64
|
|
207
|
+
|
|
208
|
+
The `n` largest elements where ``n=3``. Default `keep` value is 'first'
|
|
209
|
+
so Malta will be kept.
|
|
210
|
+
|
|
211
|
+
>>> s.nlargest(3).execute()
|
|
212
|
+
France 65000000
|
|
213
|
+
Italy 59000000
|
|
214
|
+
Malta 434000
|
|
215
|
+
dtype: int64
|
|
216
|
+
|
|
217
|
+
The `n` largest elements where ``n=3`` and keeping the last duplicates.
|
|
218
|
+
Brunei will be kept since it is the last with value 434000 based on
|
|
219
|
+
the index order.
|
|
220
|
+
|
|
221
|
+
>>> s.nlargest(3, keep='last').execute()
|
|
222
|
+
France 65000000
|
|
223
|
+
Italy 59000000
|
|
224
|
+
Brunei 434000
|
|
225
|
+
dtype: int64
|
|
226
|
+
|
|
227
|
+
The `n` largest elements where ``n=3`` with all duplicates kept. Note
|
|
228
|
+
that the returned Series has five elements due to the three duplicates.
|
|
229
|
+
|
|
230
|
+
>>> s.nlargest(3, keep='all').execute()
|
|
231
|
+
France 65000000
|
|
232
|
+
Italy 59000000
|
|
233
|
+
Malta 434000
|
|
234
|
+
Maldives 434000
|
|
235
|
+
Brunei 434000
|
|
236
|
+
dtype: int64
|
|
237
|
+
"""
|
|
238
|
+
return _nlargest(df, n, keep=keep)
|