maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import OutputType
|
|
16
|
+
from .sort_values import DataFrameSortValues
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _nsmallest(df, n, columns=None, keep="first"):
|
|
20
|
+
op = DataFrameSortValues(
|
|
21
|
+
output_types=[OutputType.dataframe],
|
|
22
|
+
axis=0,
|
|
23
|
+
by=columns,
|
|
24
|
+
ignore_index=False,
|
|
25
|
+
ascending=True,
|
|
26
|
+
nrows=n,
|
|
27
|
+
keep_kind=keep,
|
|
28
|
+
)
|
|
29
|
+
return op(df)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def df_nsmallest(df, n, columns, keep="first"):
|
|
33
|
+
"""
|
|
34
|
+
Return the first `n` rows ordered by `columns` in ascending order.
|
|
35
|
+
|
|
36
|
+
Return the first `n` rows with the smallest values in `columns`, in
|
|
37
|
+
ascending order. The columns that are not specified are returned as
|
|
38
|
+
well, but not used for ordering.
|
|
39
|
+
|
|
40
|
+
This method is equivalent to
|
|
41
|
+
``df.sort_values(columns, ascending=True).head(n)``, but more
|
|
42
|
+
performant.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
n : int
|
|
47
|
+
Number of items to retrieve.
|
|
48
|
+
columns : list or str
|
|
49
|
+
Column name or names to order by.
|
|
50
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
51
|
+
Where there are duplicate values:
|
|
52
|
+
|
|
53
|
+
- ``first`` : take the first occurrence.
|
|
54
|
+
- ``last`` : take the last occurrence.
|
|
55
|
+
- ``all`` : do not drop any duplicates, even it means
|
|
56
|
+
selecting more than `n` items.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
DataFrame
|
|
61
|
+
|
|
62
|
+
See Also
|
|
63
|
+
--------
|
|
64
|
+
DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
|
|
65
|
+
descending order.
|
|
66
|
+
DataFrame.sort_values : Sort DataFrame by the values.
|
|
67
|
+
DataFrame.head : Return the first `n` rows without re-ordering.
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
>>> import maxframe.dataframe as md
|
|
72
|
+
>>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
|
|
73
|
+
... 434000, 434000, 337000, 337000,
|
|
74
|
+
... 11300, 11300],
|
|
75
|
+
... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
|
|
76
|
+
... 17036, 182, 38, 311],
|
|
77
|
+
... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
|
|
78
|
+
... "IS", "NR", "TV", "AI"]},
|
|
79
|
+
... index=["Italy", "France", "Malta",
|
|
80
|
+
... "Maldives", "Brunei", "Iceland",
|
|
81
|
+
... "Nauru", "Tuvalu", "Anguilla"])
|
|
82
|
+
>>> df.execute()
|
|
83
|
+
population GDP alpha-2
|
|
84
|
+
Italy 59000000 1937894 IT
|
|
85
|
+
France 65000000 2583560 FR
|
|
86
|
+
Malta 434000 12011 MT
|
|
87
|
+
Maldives 434000 4520 MV
|
|
88
|
+
Brunei 434000 12128 BN
|
|
89
|
+
Iceland 337000 17036 IS
|
|
90
|
+
Nauru 337000 182 NR
|
|
91
|
+
Tuvalu 11300 38 TV
|
|
92
|
+
Anguilla 11300 311 AI
|
|
93
|
+
|
|
94
|
+
In the following example, we will use ``nsmallest`` to select the
|
|
95
|
+
three rows having the smallest values in column "population".
|
|
96
|
+
|
|
97
|
+
>>> df.nsmallest(3, 'population').execute()
|
|
98
|
+
population GDP alpha-2
|
|
99
|
+
Tuvalu 11300 38 TV
|
|
100
|
+
Anguilla 11300 311 AI
|
|
101
|
+
Iceland 337000 17036 IS
|
|
102
|
+
|
|
103
|
+
When using ``keep='last'``, ties are resolved in reverse order:
|
|
104
|
+
|
|
105
|
+
>>> df.nsmallest(3, 'population', keep='last').execute()
|
|
106
|
+
population GDP alpha-2
|
|
107
|
+
Anguilla 11300 311 AI
|
|
108
|
+
Tuvalu 11300 38 TV
|
|
109
|
+
Nauru 337000 182 NR
|
|
110
|
+
|
|
111
|
+
When using ``keep='all'``, all duplicate items are maintained:
|
|
112
|
+
|
|
113
|
+
>>> df.nsmallest(3, 'population', keep='all').execute()
|
|
114
|
+
population GDP alpha-2
|
|
115
|
+
Tuvalu 11300 38 TV
|
|
116
|
+
Anguilla 11300 311 AI
|
|
117
|
+
Iceland 337000 17036 IS
|
|
118
|
+
Nauru 337000 182 NR
|
|
119
|
+
|
|
120
|
+
To order by the smallest values in column "population" and then "GDP", we can
|
|
121
|
+
specify multiple columns like in the next example.
|
|
122
|
+
|
|
123
|
+
>>> df.nsmallest(3, ['population', 'GDP']).execute()
|
|
124
|
+
population GDP alpha-2
|
|
125
|
+
Tuvalu 11300 38 TV
|
|
126
|
+
Anguilla 11300 311 AI
|
|
127
|
+
Nauru 337000 182 NR
|
|
128
|
+
"""
|
|
129
|
+
return _nsmallest(df, n, columns, keep=keep)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def series_nsmallest(df, n, keep="first"):
|
|
133
|
+
"""
|
|
134
|
+
Return the smallest `n` elements.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
n : int, default 5
|
|
139
|
+
Return this many ascending sorted values.
|
|
140
|
+
keep : {'first', 'last', 'all'}, default 'first'
|
|
141
|
+
When there are duplicate values that cannot all fit in a
|
|
142
|
+
Series of `n` elements:
|
|
143
|
+
|
|
144
|
+
- ``first`` : return the first `n` occurrences in order
|
|
145
|
+
of appearance.
|
|
146
|
+
- ``last`` : return the last `n` occurrences in reverse
|
|
147
|
+
order of appearance.
|
|
148
|
+
- ``all`` : keep all occurrences. This can result in a Series of
|
|
149
|
+
size larger than `n`.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
Series
|
|
154
|
+
The `n` smallest values in the Series, sorted in increasing order.
|
|
155
|
+
|
|
156
|
+
See Also
|
|
157
|
+
--------
|
|
158
|
+
Series.nlargest: Get the `n` largest elements.
|
|
159
|
+
Series.sort_values: Sort Series by values.
|
|
160
|
+
Series.head: Return the first `n` rows.
|
|
161
|
+
|
|
162
|
+
Notes
|
|
163
|
+
-----
|
|
164
|
+
Faster than ``.sort_values().head(n)`` for small `n` relative to
|
|
165
|
+
the size of the ``Series`` object.
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> import maxframe.dataframe as md
|
|
170
|
+
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
171
|
+
... "Brunei": 434000, "Malta": 434000,
|
|
172
|
+
... "Maldives": 434000, "Iceland": 337000,
|
|
173
|
+
... "Nauru": 11300, "Tuvalu": 11300,
|
|
174
|
+
... "Anguilla": 11300, "Montserrat": 5200}
|
|
175
|
+
>>> s = md.Series(countries_population)
|
|
176
|
+
>>> s.execute()
|
|
177
|
+
Italy 59000000
|
|
178
|
+
France 65000000
|
|
179
|
+
Brunei 434000
|
|
180
|
+
Malta 434000
|
|
181
|
+
Maldives 434000
|
|
182
|
+
Iceland 337000
|
|
183
|
+
Nauru 11300
|
|
184
|
+
Tuvalu 11300
|
|
185
|
+
Anguilla 11300
|
|
186
|
+
Montserrat 5200
|
|
187
|
+
dtype: int64
|
|
188
|
+
|
|
189
|
+
The `n` smallest elements where ``n=5`` by default.
|
|
190
|
+
|
|
191
|
+
>>> s.nsmallest().execute()
|
|
192
|
+
Montserrat 5200
|
|
193
|
+
Nauru 11300
|
|
194
|
+
Tuvalu 11300
|
|
195
|
+
Anguilla 11300
|
|
196
|
+
Iceland 337000
|
|
197
|
+
dtype: int64
|
|
198
|
+
|
|
199
|
+
The `n` smallest elements where ``n=3``. Default `keep` value is
|
|
200
|
+
'first' so Nauru and Tuvalu will be kept.
|
|
201
|
+
|
|
202
|
+
>>> s.nsmallest(3).execute()
|
|
203
|
+
Montserrat 5200
|
|
204
|
+
Nauru 11300
|
|
205
|
+
Tuvalu 11300
|
|
206
|
+
dtype: int64
|
|
207
|
+
|
|
208
|
+
The `n` smallest elements where ``n=3`` and keeping the last
|
|
209
|
+
duplicates. Anguilla and Tuvalu will be kept since they are the last
|
|
210
|
+
with value 11300 based on the index order.
|
|
211
|
+
|
|
212
|
+
>>> s.nsmallest(3, keep='last').execute()
|
|
213
|
+
Montserrat 5200
|
|
214
|
+
Anguilla 11300
|
|
215
|
+
Tuvalu 11300
|
|
216
|
+
dtype: int64
|
|
217
|
+
|
|
218
|
+
The `n` smallest elements where ``n=3`` with all duplicates kept. Note
|
|
219
|
+
that the returned Series has four elements due to the three duplicates.
|
|
220
|
+
|
|
221
|
+
>>> s.nsmallest(3, keep='all').execute()
|
|
222
|
+
Montserrat 5200
|
|
223
|
+
Nauru 11300
|
|
224
|
+
Tuvalu 11300
|
|
225
|
+
Anguilla 11300
|
|
226
|
+
dtype: int64
|
|
227
|
+
"""
|
|
228
|
+
return _nsmallest(df, n, keep=keep)
|
|
@@ -16,15 +16,15 @@
|
|
|
16
16
|
def _install():
|
|
17
17
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
18
18
|
from .corr import df_corr, df_corrwith, series_autocorr, series_corr
|
|
19
|
-
from .quantile import
|
|
19
|
+
from .quantile import dataframe_quantile, series_quantile
|
|
20
20
|
|
|
21
21
|
for t in SERIES_TYPE:
|
|
22
|
-
t.quantile =
|
|
22
|
+
t.quantile = series_quantile
|
|
23
23
|
t.corr = series_corr
|
|
24
24
|
t.autocorr = series_autocorr
|
|
25
25
|
|
|
26
26
|
for t in DATAFRAME_TYPE:
|
|
27
|
-
t.quantile =
|
|
27
|
+
t.quantile = dataframe_quantile
|
|
28
28
|
t.corr = df_corr
|
|
29
29
|
t.corrwith = df_corrwith
|
|
30
30
|
|
|
@@ -34,6 +34,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
34
34
|
min_periods = Int32Field("min_periods", default=None)
|
|
35
35
|
axis = Int32Field("axis", default=None)
|
|
36
36
|
drop = BoolField("drop", default=None)
|
|
37
|
+
ddof = Int32Field("ddof", default=0)
|
|
37
38
|
|
|
38
39
|
@classmethod
|
|
39
40
|
def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):
|
|
@@ -213,7 +213,7 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
213
213
|
return self._call_series(a, inputs)
|
|
214
214
|
|
|
215
215
|
|
|
216
|
-
def
|
|
216
|
+
def series_quantile(series, q=0.5, interpolation="linear"):
|
|
217
217
|
"""
|
|
218
218
|
Return value at the given quantile.
|
|
219
219
|
|
|
@@ -268,7 +268,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
|
|
|
268
268
|
return op(series, q_input=q_input)
|
|
269
269
|
|
|
270
270
|
|
|
271
|
-
def
|
|
271
|
+
def dataframe_quantile(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
|
|
272
272
|
# FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
|
|
273
273
|
"""
|
|
274
274
|
Return values at the given quantile over requested axis.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ...core import OutputType
|
|
19
|
+
from ..typing_ import get_function_output_meta
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_dataframe_type_annotation():
|
|
23
|
+
def func() -> pd.DataFrame[int]:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
meta = get_function_output_meta(func)
|
|
27
|
+
assert meta is not None
|
|
28
|
+
assert meta.output_type.name == "dataframe"
|
|
29
|
+
assert len(meta.dtypes) == 1
|
|
30
|
+
assert meta.dtypes[0] == np.dtype(int)
|
|
31
|
+
|
|
32
|
+
def func() -> pd.DataFrame[{"col1": int, "col2": float}]: # noqa: F821
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
meta = get_function_output_meta(func)
|
|
36
|
+
assert meta is not None
|
|
37
|
+
assert meta.output_type.name == "dataframe"
|
|
38
|
+
assert len(meta.dtypes) == 2
|
|
39
|
+
assert meta.dtypes[0] == np.dtype(int)
|
|
40
|
+
assert meta.dtypes[1] == np.dtype(float)
|
|
41
|
+
|
|
42
|
+
def func() -> pd.DataFrame[str, {"col1": int, "col2": float}]: # noqa: F821
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
meta = get_function_output_meta(func)
|
|
46
|
+
assert meta is not None
|
|
47
|
+
assert meta.output_type.name == "dataframe"
|
|
48
|
+
assert len(meta.dtypes) == 2
|
|
49
|
+
assert meta.index_value.value.dtype == np.dtype("O")
|
|
50
|
+
assert list(meta.dtypes.index) == ["col1", "col2"]
|
|
51
|
+
assert list(meta.dtypes) == [np.dtype(int), np.dtype(float)]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_series_type_annotation():
|
|
55
|
+
def func() -> pd.Series[np.str_]:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
meta = get_function_output_meta(func)
|
|
59
|
+
assert meta is not None
|
|
60
|
+
assert meta.output_type == OutputType.series
|
|
61
|
+
assert meta.dtype == np.dtype(np.str_)
|
|
62
|
+
|
|
63
|
+
def func() -> pd.Series[("idx_name", str), ("series_name", np.int64)]: # noqa: F821
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
meta = get_function_output_meta(func)
|
|
67
|
+
assert meta is not None
|
|
68
|
+
assert meta.output_type == OutputType.series
|
|
69
|
+
assert meta.name == "series_name"
|
|
70
|
+
assert meta.dtype == np.dtype(np.int64)
|
|
71
|
+
assert meta.index_value.value._name == "idx_name"
|
|
72
|
+
assert meta.index_value.value.dtype == np.dtype("O")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_index_type_annotation():
|
|
76
|
+
def func() -> pd.Index[np.int64]:
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
meta = get_function_output_meta(func)
|
|
80
|
+
assert meta is not None
|
|
81
|
+
assert meta.output_type == OutputType.index
|
|
82
|
+
|
|
83
|
+
def func() -> pd.Index[[("ix1", str), ("ix2", np.int64)]]: # noqa: F821
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
meta = get_function_output_meta(func)
|
|
87
|
+
assert meta is not None
|
|
88
|
+
assert meta.output_type == OutputType.index
|
|
89
|
+
assert meta.index_value.value.names == ["ix1", "ix2"]
|
|
90
|
+
assert list(meta.index_value.value.dtypes) == [np.dtype("O"), np.dtype("int64")]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_function_output_meta_corner_cases():
|
|
94
|
+
def func():
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
assert get_function_output_meta(func) is None
|
|
98
|
+
assert get_function_output_meta("non-func-obj") is None
|
|
99
|
+
|
|
100
|
+
def func() -> int:
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
meta = get_function_output_meta(func)
|
|
104
|
+
assert meta.dtype == np.dtype("int64")
|
|
@@ -17,9 +17,16 @@ import pandas as pd
|
|
|
17
17
|
import pyarrow as pa
|
|
18
18
|
import pytest
|
|
19
19
|
|
|
20
|
-
from ...
|
|
20
|
+
from ...config import option_context
|
|
21
|
+
from ...core.operator import Operator
|
|
22
|
+
from ...udf import (
|
|
23
|
+
MarkedFunction,
|
|
24
|
+
with_python_requirements,
|
|
25
|
+
with_resources,
|
|
26
|
+
with_running_options,
|
|
27
|
+
)
|
|
21
28
|
from ...utils import ARROW_DTYPE_NOT_SUPPORTED
|
|
22
|
-
from ..utils import _generate_value, pack_func_args
|
|
29
|
+
from ..utils import _generate_value, copy_func_scheduling_hints, pack_func_args
|
|
23
30
|
|
|
24
31
|
try:
|
|
25
32
|
from pandas import ArrowDtype
|
|
@@ -84,6 +91,20 @@ def test_pack_function(df1):
|
|
|
84
91
|
[(np.int32(1), "1")],
|
|
85
92
|
),
|
|
86
93
|
(pa.map_(pa.int32(), pa.string()), 1, [(np.int32(1), "1")]),
|
|
94
|
+
(
|
|
95
|
+
ArrowDtype(
|
|
96
|
+
pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())])
|
|
97
|
+
)
|
|
98
|
+
if ArrowDtype
|
|
99
|
+
else None,
|
|
100
|
+
1,
|
|
101
|
+
{"a": np.int32(1), "b": "1"},
|
|
102
|
+
),
|
|
103
|
+
(
|
|
104
|
+
pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())]),
|
|
105
|
+
1,
|
|
106
|
+
{"a": np.int32(1), "b": "1"},
|
|
107
|
+
),
|
|
87
108
|
(pa.int32(), 1, np.int32(1)),
|
|
88
109
|
(np.datetime64, "2023-01-01", pd.Timestamp("2023-01-01")),
|
|
89
110
|
(np.timedelta64, "1D", pd.Timedelta("1D")),
|
|
@@ -99,3 +120,46 @@ def test_pack_function(df1):
|
|
|
99
120
|
def test_generate_value(dtype, fill_value, expected):
|
|
100
121
|
result = _generate_value(dtype, fill_value)
|
|
101
122
|
assert result == expected
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_copy_func_scheduling_hints():
|
|
126
|
+
# Test with a regular function (no scheduling hints)
|
|
127
|
+
with option_context() as options:
|
|
128
|
+
options.function.default_running_options = {} # No default options
|
|
129
|
+
|
|
130
|
+
def regular_func(x):
|
|
131
|
+
return x + 1
|
|
132
|
+
|
|
133
|
+
op1 = Operator()
|
|
134
|
+
copy_func_scheduling_hints(regular_func, op1)
|
|
135
|
+
# Should not set any attributes since regular function has no hints
|
|
136
|
+
assert not hasattr(op1, "expect_engine") or op1.expect_engine is None
|
|
137
|
+
assert not hasattr(op1, "expect_resources") or op1.expect_resources is None
|
|
138
|
+
assert not hasattr(op1, "gpu") or op1.gpu is None
|
|
139
|
+
|
|
140
|
+
# Test with MarkedFunction with scheduling hints
|
|
141
|
+
|
|
142
|
+
@with_running_options(engine="DPE", cpu=4, memory="8GiB")
|
|
143
|
+
def marked_func(x):
|
|
144
|
+
return x + 1
|
|
145
|
+
|
|
146
|
+
op2 = Operator()
|
|
147
|
+
copy_func_scheduling_hints(marked_func, op2)
|
|
148
|
+
assert op2.expect_engine == "DPE"
|
|
149
|
+
# The expect_resources will include default values for gpu and gu_quota
|
|
150
|
+
expected_resources = {"cpu": 4, "memory": "8GiB", "gpu": 0, "gu_quota": None}
|
|
151
|
+
assert op2.expect_resources == expected_resources
|
|
152
|
+
|
|
153
|
+
# Test with MarkedFunction with GPU
|
|
154
|
+
@with_running_options(gu=2)
|
|
155
|
+
def gpu_func(x):
|
|
156
|
+
return x + 1
|
|
157
|
+
|
|
158
|
+
op3 = Operator()
|
|
159
|
+
copy_func_scheduling_hints(gpu_func, op3)
|
|
160
|
+
assert op3.gpu is True
|
|
161
|
+
# The expect_resources will include the gu value and default values
|
|
162
|
+
# System has default options: {'cpu': 1, 'memory': '4GiB', 'gpu': 0}
|
|
163
|
+
# The with_running_options decorator will override the gpu value with the gu value
|
|
164
|
+
expected_resources = {"gpu": 2, "gu_quota": None, "cpu": 1, "memory": "4GiB"}
|
|
165
|
+
assert op3.expect_resources == expected_resources
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import dataclasses
|
|
16
|
+
import functools
|
|
17
|
+
import inspect
|
|
18
|
+
from typing import Any, Callable, Generic, List, Optional, TypeVar
|
|
19
|
+
|
|
20
|
+
import pandas as pd
|
|
21
|
+
|
|
22
|
+
from ..core import OutputType
|
|
23
|
+
from ..typing_ import PandasDType
|
|
24
|
+
from ..utils import make_dtype
|
|
25
|
+
from .utils import InferredDataFrameMeta, parse_index
|
|
26
|
+
|
|
27
|
+
# TypeVars
|
|
28
|
+
T = TypeVar("T")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclasses.dataclass
|
|
32
|
+
class _FieldDef:
|
|
33
|
+
name: Any
|
|
34
|
+
dtype: PandasDType
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _item_to_field_def(item_):
|
|
38
|
+
if isinstance(item_, tuple):
|
|
39
|
+
tp = make_dtype(item_[1])
|
|
40
|
+
return _FieldDef(name=item_[0], dtype=tp)
|
|
41
|
+
else:
|
|
42
|
+
tp = make_dtype(item_)
|
|
43
|
+
return _FieldDef(name=None, dtype=tp)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class IndexType:
|
|
47
|
+
def __init__(self, index_fields: List[_FieldDef]):
|
|
48
|
+
self.index_fields = index_fields
|
|
49
|
+
|
|
50
|
+
def __repr__(self):
|
|
51
|
+
return f"IndexType({[f.dtype for f in self.index_fields]})"
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_getitem_args(cls, item) -> "IndexType":
|
|
55
|
+
if isinstance(item, (dict, pd.Series)):
|
|
56
|
+
item = list(item.items())
|
|
57
|
+
|
|
58
|
+
if isinstance(item, list):
|
|
59
|
+
return IndexType([_item_to_field_def(tp) for tp in item])
|
|
60
|
+
else:
|
|
61
|
+
return IndexType([_item_to_field_def(item)])
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class SeriesType(Generic[T]):
|
|
65
|
+
def __init__(
|
|
66
|
+
self, index_fields: Optional[List[_FieldDef]], name_and_dtype: _FieldDef
|
|
67
|
+
):
|
|
68
|
+
self.index_fields = index_fields
|
|
69
|
+
self.name_and_dtype = name_and_dtype
|
|
70
|
+
|
|
71
|
+
def __repr__(self) -> str:
|
|
72
|
+
return "SeriesType[{}]".format(self.name_and_dtype.dtype)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_getitem_args(cls, item) -> "SeriesType":
|
|
76
|
+
if not isinstance(item, tuple):
|
|
77
|
+
item = (item,)
|
|
78
|
+
if len(item) == 1:
|
|
79
|
+
tp = _item_to_field_def(item[0])
|
|
80
|
+
return SeriesType(None, tp)
|
|
81
|
+
else:
|
|
82
|
+
tp = _item_to_field_def(item[1])
|
|
83
|
+
idx_fields = IndexType.from_getitem_args(item[0]).index_fields
|
|
84
|
+
return SeriesType(idx_fields, tp)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class DataFrameType:
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
index_fields: Optional[List[_FieldDef]],
|
|
91
|
+
data_fields: List[_FieldDef],
|
|
92
|
+
):
|
|
93
|
+
self.index_fields = index_fields
|
|
94
|
+
self.data_fields = data_fields
|
|
95
|
+
|
|
96
|
+
def __repr__(self) -> str:
|
|
97
|
+
types = [field.dtype for field in self.data_fields]
|
|
98
|
+
return f"DataFrameType[{types}]"
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def from_getitem_args(cls, item) -> "DataFrameType":
|
|
102
|
+
if not isinstance(item, tuple):
|
|
103
|
+
item = (item,)
|
|
104
|
+
fields = IndexType.from_getitem_args(item[-1]).index_fields
|
|
105
|
+
if len(item) == 1:
|
|
106
|
+
return DataFrameType(None, fields)
|
|
107
|
+
else:
|
|
108
|
+
idx_fields = IndexType.from_getitem_args(item[0]).index_fields
|
|
109
|
+
return DataFrameType(idx_fields, fields)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_function_output_meta(
|
|
113
|
+
func: Callable, df_obj=None
|
|
114
|
+
) -> Optional[InferredDataFrameMeta]:
|
|
115
|
+
try:
|
|
116
|
+
func_argspec = inspect.getfullargspec(func)
|
|
117
|
+
ret_type = (func_argspec.annotations or {}).get("return")
|
|
118
|
+
if ret_type is None:
|
|
119
|
+
return None
|
|
120
|
+
except:
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
dtypes = dtype = name = None
|
|
124
|
+
index_fields = None
|
|
125
|
+
if isinstance(ret_type, DataFrameType):
|
|
126
|
+
output_type = OutputType.dataframe
|
|
127
|
+
dtypes = pd.Series(
|
|
128
|
+
[fd.dtype for fd in ret_type.data_fields],
|
|
129
|
+
index=[fd.name for fd in ret_type.data_fields],
|
|
130
|
+
)
|
|
131
|
+
index_fields = ret_type.index_fields
|
|
132
|
+
elif isinstance(ret_type, SeriesType):
|
|
133
|
+
output_type = OutputType.series
|
|
134
|
+
dtype = ret_type.name_and_dtype.dtype
|
|
135
|
+
name = ret_type.name_and_dtype.name
|
|
136
|
+
index_fields = ret_type.index_fields
|
|
137
|
+
elif isinstance(ret_type, IndexType):
|
|
138
|
+
output_type = OutputType.index
|
|
139
|
+
index_fields = ret_type.index_fields
|
|
140
|
+
else:
|
|
141
|
+
output_type = OutputType.scalar
|
|
142
|
+
try:
|
|
143
|
+
dtype = make_dtype(ret_type)
|
|
144
|
+
except:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
if index_fields is not None:
|
|
148
|
+
if len(index_fields) == 1:
|
|
149
|
+
mock_idx = pd.Index(
|
|
150
|
+
[], dtype=index_fields[0].dtype, name=index_fields[0].name
|
|
151
|
+
)
|
|
152
|
+
else:
|
|
153
|
+
col_names = [index_field.name for index_field in index_fields]
|
|
154
|
+
col_dtypes = pd.Series(
|
|
155
|
+
[index_field.dtype for index_field in index_fields], index=col_names
|
|
156
|
+
)
|
|
157
|
+
mock_df = pd.DataFrame([], columns=col_names).astype(col_dtypes)
|
|
158
|
+
mock_idx = pd.MultiIndex.from_frame(mock_df)
|
|
159
|
+
index_value = parse_index(mock_idx, df_obj, store_data=False)
|
|
160
|
+
else:
|
|
161
|
+
index_value = None
|
|
162
|
+
|
|
163
|
+
return InferredDataFrameMeta(
|
|
164
|
+
output_type=output_type,
|
|
165
|
+
index_value=index_value,
|
|
166
|
+
dtypes=dtypes,
|
|
167
|
+
dtype=dtype,
|
|
168
|
+
name=name,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def register_pandas_typing_funcs():
|
|
173
|
+
def _cls_getitem_func(cls, item, type_cls):
|
|
174
|
+
return type_cls.from_getitem_args(item)
|
|
175
|
+
|
|
176
|
+
for pd_cls, type_cls in [
|
|
177
|
+
(pd.DataFrame, DataFrameType),
|
|
178
|
+
(pd.Series, SeriesType),
|
|
179
|
+
(pd.Index, IndexType),
|
|
180
|
+
]:
|
|
181
|
+
if hasattr(pd_cls, "__class_getitem__"): # pragma: no cover
|
|
182
|
+
continue
|
|
183
|
+
pd_cls.__class_getitem__ = classmethod(
|
|
184
|
+
functools.partial(_cls_getitem_func, type_cls=type_cls)
|
|
185
|
+
)
|