maxframe 2.0.0b2__cp38-cp38-win32.whl → 2.2.0__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, Int64Field
|
|
17
|
+
from .core import BaseGroupByWindowOp
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GroupByShift(BaseGroupByWindowOp):
|
|
21
|
+
_op_type_ = opcodes.SHIFT
|
|
22
|
+
|
|
23
|
+
periods = Int64Field("periods", default=None)
|
|
24
|
+
freq = AnyField("freq", default=None)
|
|
25
|
+
fill_value = AnyField("fill_value", default=None)
|
|
26
|
+
|
|
27
|
+
def _calc_mock_result_df(self, mock_groupby):
|
|
28
|
+
return mock_groupby.shift(
|
|
29
|
+
self.periods, freq=self.freq, fill_value=self.fill_value
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def shift(
|
|
34
|
+
groupby, periods=1, freq=None, fill_value=None, order_cols=None, ascending=True
|
|
35
|
+
):
|
|
36
|
+
"""
|
|
37
|
+
Shift each group by periods observations.
|
|
38
|
+
|
|
39
|
+
If freq is passed, the index will be increased using the periods and the freq.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
periods : int | Sequence[int], default 1
|
|
44
|
+
Number of periods to shift. If a list of values, shift each group by
|
|
45
|
+
each period.
|
|
46
|
+
freq : str, optional
|
|
47
|
+
Frequency string.
|
|
48
|
+
|
|
49
|
+
fill_value : optional
|
|
50
|
+
The scalar value to use for newly introduced missing values.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
Series or DataFrame
|
|
55
|
+
Object shifted within each group.
|
|
56
|
+
|
|
57
|
+
See Also
|
|
58
|
+
--------
|
|
59
|
+
Index.shift : Shift values of Index.
|
|
60
|
+
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
63
|
+
|
|
64
|
+
For SeriesGroupBy:
|
|
65
|
+
|
|
66
|
+
>>> import maxframe.dataframe as md
|
|
67
|
+
>>> lst = ['a', 'a', 'b', 'b']
|
|
68
|
+
>>> ser = md.Series([1, 2, 3, 4], index=lst)
|
|
69
|
+
>>> ser.execute()
|
|
70
|
+
a 1
|
|
71
|
+
a 2
|
|
72
|
+
b 3
|
|
73
|
+
b 4
|
|
74
|
+
dtype: int64
|
|
75
|
+
>>> ser.groupby(level=0).shift(1).execute()
|
|
76
|
+
a NaN
|
|
77
|
+
a 1.0
|
|
78
|
+
b NaN
|
|
79
|
+
b 3.0
|
|
80
|
+
dtype: float64
|
|
81
|
+
|
|
82
|
+
For DataFrameGroupBy:
|
|
83
|
+
|
|
84
|
+
>>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]]
|
|
85
|
+
>>> df = md.DataFrame(data, columns=["a", "b", "c"],
|
|
86
|
+
... index=["tuna", "salmon", "catfish", "goldfish"])
|
|
87
|
+
>>> df.execute()
|
|
88
|
+
a b c
|
|
89
|
+
tuna 1 2 3
|
|
90
|
+
salmon 1 5 6
|
|
91
|
+
catfish 2 5 8
|
|
92
|
+
goldfish 2 6 9
|
|
93
|
+
>>> df.groupby("a").shift(1).execute()
|
|
94
|
+
b c
|
|
95
|
+
tuna NaN NaN
|
|
96
|
+
salmon 2.0 3.0
|
|
97
|
+
catfish NaN NaN
|
|
98
|
+
goldfish 5.0 8.0
|
|
99
|
+
"""
|
|
100
|
+
if not isinstance(ascending, list):
|
|
101
|
+
ascending = [ascending]
|
|
102
|
+
|
|
103
|
+
window_params = dict(
|
|
104
|
+
order_cols=order_cols,
|
|
105
|
+
ascending=ascending,
|
|
106
|
+
)
|
|
107
|
+
op = GroupByShift(
|
|
108
|
+
periods=periods,
|
|
109
|
+
freq=freq,
|
|
110
|
+
fill_value=fill_value,
|
|
111
|
+
groupby_params=groupby.op.groupby_params,
|
|
112
|
+
window_params=window_params,
|
|
113
|
+
)
|
|
114
|
+
return op(groupby)
|
|
@@ -284,11 +284,6 @@ def test_groupby_cum():
|
|
|
284
284
|
assert r.shape == (len(df1), 2)
|
|
285
285
|
assert r.index_value.key == mdf.index_value.key
|
|
286
286
|
|
|
287
|
-
r = getattr(mdf.groupby("b"), fun)(axis=1)
|
|
288
|
-
assert r.op.output_types[0] == OutputType.dataframe
|
|
289
|
-
assert r.shape == (len(df1), 3)
|
|
290
|
-
assert r.index_value.key == mdf.index_value.key
|
|
291
|
-
|
|
292
287
|
r = mdf.groupby("b").cumcount()
|
|
293
288
|
assert r.op.output_types[0] == OutputType.series
|
|
294
289
|
assert r.shape == (len(df1),)
|
|
@@ -23,6 +23,9 @@ def _install():
|
|
|
23
23
|
)
|
|
24
24
|
from .align import align
|
|
25
25
|
from .at import at
|
|
26
|
+
from .droplevel import df_series_droplevel, index_droplevel
|
|
27
|
+
from .filter import filter_dataframe
|
|
28
|
+
from .get_level_values import get_level_values
|
|
26
29
|
from .getitem import dataframe_getitem, series_getitem
|
|
27
30
|
from .iat import iat
|
|
28
31
|
from .iloc import head, iloc, index_getitem, index_setitem, tail
|
|
@@ -31,15 +34,22 @@ def _install():
|
|
|
31
34
|
from .reindex import reindex, reindex_like
|
|
32
35
|
from .rename import df_rename, index_rename, index_set_names, series_rename
|
|
33
36
|
from .rename_axis import rename_axis
|
|
37
|
+
from .reorder_levels import df_reorder_levels, series_reorder_levels
|
|
34
38
|
from .reset_index import df_reset_index, series_reset_index
|
|
35
39
|
from .sample import sample
|
|
36
40
|
from .set_axis import df_set_axis, series_set_axis
|
|
37
41
|
from .set_index import set_index
|
|
38
42
|
from .setitem import dataframe_setitem
|
|
43
|
+
from .swaplevel import df_swaplevel, series_swaplevel
|
|
44
|
+
from .take import take
|
|
45
|
+
from .truncate import truncate
|
|
39
46
|
from .where import mask, where
|
|
47
|
+
from .xs import xs
|
|
40
48
|
|
|
41
49
|
for cls in DATAFRAME_TYPE + SERIES_TYPE:
|
|
42
50
|
setattr(cls, "at", property(fget=at))
|
|
51
|
+
setattr(cls, "droplevel", df_series_droplevel)
|
|
52
|
+
setattr(cls, "filter", filter_dataframe)
|
|
43
53
|
setattr(cls, "head", head)
|
|
44
54
|
setattr(cls, "iat", property(fget=iat))
|
|
45
55
|
setattr(cls, "iloc", property(fget=iloc))
|
|
@@ -50,7 +60,10 @@ def _install():
|
|
|
50
60
|
setattr(cls, "rename_axis", rename_axis)
|
|
51
61
|
setattr(cls, "sample", sample)
|
|
52
62
|
setattr(cls, "tail", tail)
|
|
63
|
+
setattr(cls, "take", take)
|
|
64
|
+
setattr(cls, "truncate", truncate)
|
|
53
65
|
setattr(cls, "where", where)
|
|
66
|
+
setattr(cls, "xs", xs)
|
|
54
67
|
|
|
55
68
|
for cls in DATAFRAME_TYPE:
|
|
56
69
|
setattr(cls, "add_prefix", df_add_prefix)
|
|
@@ -59,10 +72,12 @@ def _install():
|
|
|
59
72
|
setattr(cls, "__getitem__", dataframe_getitem)
|
|
60
73
|
setattr(cls, "insert", df_insert)
|
|
61
74
|
setattr(cls, "rename", df_rename)
|
|
75
|
+
setattr(cls, "reorder_levels", df_reorder_levels)
|
|
62
76
|
setattr(cls, "reset_index", df_reset_index)
|
|
63
77
|
setattr(cls, "set_axis", df_set_axis)
|
|
64
78
|
setattr(cls, "set_index", set_index)
|
|
65
79
|
setattr(cls, "__setitem__", dataframe_setitem)
|
|
80
|
+
setattr(cls, "swaplevel", df_swaplevel)
|
|
66
81
|
|
|
67
82
|
for cls in SERIES_TYPE:
|
|
68
83
|
setattr(cls, "add_prefix", series_add_prefix)
|
|
@@ -70,13 +85,17 @@ def _install():
|
|
|
70
85
|
setattr(cls, "align", align)
|
|
71
86
|
setattr(cls, "__getitem__", series_getitem)
|
|
72
87
|
setattr(cls, "rename", series_rename)
|
|
88
|
+
setattr(cls, "reorder_levels", series_reorder_levels)
|
|
73
89
|
setattr(cls, "reset_index", series_reset_index)
|
|
74
90
|
setattr(cls, "set_axis", series_set_axis)
|
|
91
|
+
setattr(cls, "swaplevel", series_swaplevel)
|
|
75
92
|
|
|
76
93
|
for cls in INDEX_TYPE:
|
|
94
|
+
setattr(cls, "droplevel", index_droplevel)
|
|
95
|
+
setattr(cls, "get_level_values", get_level_values)
|
|
77
96
|
setattr(cls, "__getitem__", index_getitem)
|
|
78
|
-
setattr(cls, "__setitem__", index_setitem)
|
|
79
97
|
setattr(cls, "rename", index_rename)
|
|
98
|
+
setattr(cls, "__setitem__", index_setitem)
|
|
80
99
|
setattr(cls, "set_names", index_set_names)
|
|
81
100
|
|
|
82
101
|
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, Int32Field
|
|
17
|
+
from ..core import INDEX_TYPE
|
|
18
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
from ..utils import build_df, build_series, parse_index, validate_axis
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DataFrameDropLevel(DataFrameOperator, DataFrameOperatorMixin):
|
|
23
|
+
_op_type_ = opcodes.DROPLEVEL
|
|
24
|
+
|
|
25
|
+
level = AnyField("level")
|
|
26
|
+
axis = Int32Field("axis", default=0)
|
|
27
|
+
|
|
28
|
+
def __init__(self, output_types=None, **kw):
|
|
29
|
+
super().__init__(_output_types=output_types, **kw)
|
|
30
|
+
|
|
31
|
+
def __call__(self, df_obj):
|
|
32
|
+
if isinstance(df_obj, INDEX_TYPE):
|
|
33
|
+
# For Index objects
|
|
34
|
+
empty_index = df_obj.index_value.to_pandas()
|
|
35
|
+
result_index = empty_index.droplevel(self.level)
|
|
36
|
+
return self.new_index(
|
|
37
|
+
[df_obj],
|
|
38
|
+
shape=(df_obj.shape[0],),
|
|
39
|
+
dtype=result_index.dtype,
|
|
40
|
+
index_value=parse_index(result_index, store_data=False),
|
|
41
|
+
name=result_index.name,
|
|
42
|
+
)
|
|
43
|
+
elif df_obj.ndim == 1:
|
|
44
|
+
# For Series objects
|
|
45
|
+
empty_series = build_series(df_obj)
|
|
46
|
+
result_index = empty_series.index.droplevel(self.level)
|
|
47
|
+
|
|
48
|
+
return self.new_series(
|
|
49
|
+
[df_obj],
|
|
50
|
+
shape=df_obj.shape,
|
|
51
|
+
dtype=df_obj.dtype,
|
|
52
|
+
index_value=parse_index(result_index, store_data=False),
|
|
53
|
+
name=df_obj.name,
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
# For DataFrame objects
|
|
57
|
+
result_dtypes = df_obj.dtypes
|
|
58
|
+
result_shape = (df_obj.shape[0], df_obj.shape[1])
|
|
59
|
+
|
|
60
|
+
empty_df = build_df(df_obj)
|
|
61
|
+
if self.axis == 0:
|
|
62
|
+
# Dropping levels from index
|
|
63
|
+
result_index = empty_df.index.droplevel(self.level)
|
|
64
|
+
result_index_value = parse_index(result_index, store_data=False)
|
|
65
|
+
result_columns_value = df_obj.columns_value
|
|
66
|
+
else:
|
|
67
|
+
# Dropping levels from columns
|
|
68
|
+
result_columns = empty_df.columns.droplevel(self.level)
|
|
69
|
+
result_columns_value = parse_index(result_columns, store_data=True)
|
|
70
|
+
result_index_value = df_obj.index_value
|
|
71
|
+
|
|
72
|
+
return self.new_dataframe(
|
|
73
|
+
[df_obj],
|
|
74
|
+
shape=result_shape,
|
|
75
|
+
dtypes=result_dtypes,
|
|
76
|
+
index_value=result_index_value,
|
|
77
|
+
columns_value=result_columns_value,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _droplevel(df_obj, level, axis=0):
|
|
82
|
+
axis = validate_axis(axis, df_obj)
|
|
83
|
+
op = DataFrameDropLevel(level=level, axis=axis)
|
|
84
|
+
return op(df_obj)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def df_series_droplevel(df_or_series, level, axis=0):
|
|
88
|
+
"""
|
|
89
|
+
Return Series/DataFrame with requested index / column level(s) removed.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
level : int, str, or list-like
|
|
94
|
+
If a string is given, must be the name of a level
|
|
95
|
+
If list-like, elements must be names or positional indexes
|
|
96
|
+
of levels.
|
|
97
|
+
|
|
98
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
99
|
+
Axis along which the level(s) is removed:
|
|
100
|
+
|
|
101
|
+
* 0 or 'index': remove level(s) in column.
|
|
102
|
+
* 1 or 'columns': remove level(s) in row.
|
|
103
|
+
|
|
104
|
+
For `Series` this parameter is unused and defaults to 0.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
Series/DataFrame
|
|
109
|
+
Series/DataFrame with requested index / column level(s) removed.
|
|
110
|
+
|
|
111
|
+
Examples
|
|
112
|
+
--------
|
|
113
|
+
>>> import maxframe.dataframe as md
|
|
114
|
+
>>> df = md.DataFrame([
|
|
115
|
+
... [1, 2, 3, 4],
|
|
116
|
+
... [5, 6, 7, 8],
|
|
117
|
+
... [9, 10, 11, 12]
|
|
118
|
+
... ]).set_index([0, 1]).rename_axis(['a', 'b'])
|
|
119
|
+
|
|
120
|
+
>>> df.columns = md.MultiIndex.from_tuples([
|
|
121
|
+
... ('c', 'e'), ('d', 'f')
|
|
122
|
+
... ], names=['level_1', 'level_2'])
|
|
123
|
+
|
|
124
|
+
>>> df.execute()
|
|
125
|
+
level_1 c d
|
|
126
|
+
level_2 e f
|
|
127
|
+
a b
|
|
128
|
+
1 2 3 4
|
|
129
|
+
5 6 7 8
|
|
130
|
+
9 10 11 12
|
|
131
|
+
|
|
132
|
+
>>> df.droplevel('a').execute()
|
|
133
|
+
level_1 c d
|
|
134
|
+
level_2 e f
|
|
135
|
+
b
|
|
136
|
+
2 3 4
|
|
137
|
+
6 7 8
|
|
138
|
+
10 11 12
|
|
139
|
+
|
|
140
|
+
>>> df.droplevel('level_2', axis=1).execute()
|
|
141
|
+
level_1 c d
|
|
142
|
+
a b
|
|
143
|
+
1 2 3 4
|
|
144
|
+
5 6 7 8
|
|
145
|
+
9 10 11 12
|
|
146
|
+
"""
|
|
147
|
+
return _droplevel(df_or_series, level, axis)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def index_droplevel(idx, level):
|
|
151
|
+
"""
|
|
152
|
+
Return index with requested level(s) removed.
|
|
153
|
+
|
|
154
|
+
If resulting index has only 1 level left, the result will be
|
|
155
|
+
of Index type, not MultiIndex. The original index is not modified inplace.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
level : int, str, or list-like, default 0
|
|
160
|
+
If a string is given, must be the name of a level
|
|
161
|
+
If list-like, elements must be names or indexes of levels.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
Index or MultiIndex
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> import maxframe.dataframe as md
|
|
170
|
+
>>> mi = md.MultiIndex.from_arrays(
|
|
171
|
+
... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
|
|
172
|
+
>>> mi.execute()
|
|
173
|
+
MultiIndex([(1, 3, 5),
|
|
174
|
+
(2, 4, 6)],
|
|
175
|
+
names=['x', 'y', 'z'])
|
|
176
|
+
|
|
177
|
+
>>> mi.droplevel().execute()
|
|
178
|
+
MultiIndex([(3, 5),
|
|
179
|
+
(4, 6)],
|
|
180
|
+
names=['y', 'z'])
|
|
181
|
+
|
|
182
|
+
>>> mi.droplevel(2).execute()
|
|
183
|
+
MultiIndex([(1, 3),
|
|
184
|
+
(2, 4)],
|
|
185
|
+
names=['x', 'y'])
|
|
186
|
+
|
|
187
|
+
>>> mi.droplevel('z').execute()
|
|
188
|
+
MultiIndex([(1, 3),
|
|
189
|
+
(2, 4)],
|
|
190
|
+
names=['x', 'y'])
|
|
191
|
+
|
|
192
|
+
>>> mi.droplevel(['x', 'y']).execute()
|
|
193
|
+
Index([5, 6], dtype='int64', name='z')
|
|
194
|
+
"""
|
|
195
|
+
return _droplevel(idx, level)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import get_output_types
|
|
22
|
+
from ...serialization.serializables import Int32Field, ListField, StringField
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import parse_index
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameFilter(DataFrameOperatorMixin, DataFrameOperator):
|
|
28
|
+
_op_type_ = opcodes.DATAFRAME_FILTER
|
|
29
|
+
|
|
30
|
+
items = ListField("items", default=None)
|
|
31
|
+
like = StringField("like", default=None)
|
|
32
|
+
regex = StringField("regex", default=None)
|
|
33
|
+
axis = Int32Field("axis", default=None)
|
|
34
|
+
|
|
35
|
+
def __call__(self, df_or_series):
|
|
36
|
+
self._output_types = get_output_types(df_or_series)
|
|
37
|
+
|
|
38
|
+
# Get axis labels to filter
|
|
39
|
+
if self.axis == 0:
|
|
40
|
+
# Filter by index
|
|
41
|
+
labels = df_or_series.index_value.to_pandas()
|
|
42
|
+
else:
|
|
43
|
+
# Filter by columns (DataFrame only)
|
|
44
|
+
if not hasattr(df_or_series, "columns"):
|
|
45
|
+
raise ValueError("axis=1 (columns) not valid for Series")
|
|
46
|
+
labels = df_or_series.columns_value.to_pandas()
|
|
47
|
+
|
|
48
|
+
# Apply filter criteria
|
|
49
|
+
filtered_labels = self._apply_filter_criteria(labels)
|
|
50
|
+
|
|
51
|
+
# Calculate output shape and metadata
|
|
52
|
+
out_params = self._calculate_output_metadata(df_or_series, filtered_labels)
|
|
53
|
+
return self.new_tileable([df_or_series], **out_params)
|
|
54
|
+
|
|
55
|
+
def _apply_filter_criteria(self, labels):
|
|
56
|
+
"""Apply filter criteria to labels"""
|
|
57
|
+
if self.items is not None:
|
|
58
|
+
# Exact match filter
|
|
59
|
+
return [label for label in labels if label in self.items]
|
|
60
|
+
elif self.like is not None:
|
|
61
|
+
# Substring match filter
|
|
62
|
+
return [label for label in labels if self.like in str(label)]
|
|
63
|
+
elif self.regex is not None:
|
|
64
|
+
# Regex match filter
|
|
65
|
+
pattern = re.compile(self.regex)
|
|
66
|
+
return [label for label in labels if pattern.search(str(label))]
|
|
67
|
+
else:
|
|
68
|
+
return list(labels)
|
|
69
|
+
|
|
70
|
+
def _calculate_output_metadata(self, input_tileable, filtered_labels):
|
|
71
|
+
input_shape = input_tileable.shape
|
|
72
|
+
|
|
73
|
+
out_params = input_tileable.params
|
|
74
|
+
if self.axis == 0:
|
|
75
|
+
out_params["shape"] = (len(filtered_labels) or np.nan,) + input_shape[1:]
|
|
76
|
+
out_params["index_value"] = parse_index(
|
|
77
|
+
pd.Index(filtered_labels), input_tileable.index_value
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
out_params["shape"] = (input_shape[0], len(filtered_labels))
|
|
81
|
+
out_params["columns_value"] = parse_index(
|
|
82
|
+
input_tileable.dtypes[filtered_labels].index, store_data=True
|
|
83
|
+
)
|
|
84
|
+
return out_params
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def filter_dataframe(df_or_series, items=None, like=None, regex=None, axis=None):
|
|
88
|
+
"""
|
|
89
|
+
Subset the dataframe rows or columns according to the specified index labels.
|
|
90
|
+
|
|
91
|
+
Note that this routine does not filter a dataframe on its
|
|
92
|
+
contents. The filter is applied to the labels of the index.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
items : list-like
|
|
97
|
+
Keep labels from axis which are in items.
|
|
98
|
+
like : str
|
|
99
|
+
Keep labels from axis for which "like in label == True".
|
|
100
|
+
regex : str (regular expression)
|
|
101
|
+
Keep labels from axis for which re.search(regex, label) == True.
|
|
102
|
+
axis : {0 or 'index', 1 or 'columns', None}, default None
|
|
103
|
+
The axis to filter on, expressed either as an index (int)
|
|
104
|
+
or axis name (str). By default this is the info axis, 'columns' for
|
|
105
|
+
DataFrame. For `Series` this parameter is unused and defaults to `None`.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
same type as input object
|
|
110
|
+
|
|
111
|
+
See Also
|
|
112
|
+
--------
|
|
113
|
+
DataFrame.loc : Access a group of rows and columns
|
|
114
|
+
by label(s) or a boolean array.
|
|
115
|
+
|
|
116
|
+
Notes
|
|
117
|
+
-----
|
|
118
|
+
The ``items``, ``like``, and ``regex`` parameters are
|
|
119
|
+
enforced to be mutually exclusive.
|
|
120
|
+
|
|
121
|
+
``axis`` defaults to the info axis that is used when indexing
|
|
122
|
+
with ``[]``.
|
|
123
|
+
|
|
124
|
+
Examples
|
|
125
|
+
--------
|
|
126
|
+
>>> import maxframe.tensor as mt
|
|
127
|
+
>>> import maxframe.dataframe as md
|
|
128
|
+
>>> df = md.DataFrame(mt.array(([1, 2, 3], [4, 5, 6])),
|
|
129
|
+
... index=['mouse', 'rabbit'],
|
|
130
|
+
... columns=['one', 'two', 'three'])
|
|
131
|
+
>>> df.execute()
|
|
132
|
+
one two three
|
|
133
|
+
mouse 1 2 3
|
|
134
|
+
rabbit 4 5 6
|
|
135
|
+
|
|
136
|
+
>>> # select columns by name
|
|
137
|
+
>>> df.filter(items=['one', 'three']).execute()
|
|
138
|
+
one three
|
|
139
|
+
mouse 1 3
|
|
140
|
+
rabbit 4 6
|
|
141
|
+
|
|
142
|
+
>>> # select columns by regular expression
|
|
143
|
+
>>> df.filter(regex='e$', axis=1).execute()
|
|
144
|
+
one three
|
|
145
|
+
mouse 1 3
|
|
146
|
+
rabbit 4 6
|
|
147
|
+
|
|
148
|
+
>>> # select rows containing 'bbi'
|
|
149
|
+
>>> df.filter(like='bbi', axis=0).execute()
|
|
150
|
+
one two three
|
|
151
|
+
rabbit 4 5 6
|
|
152
|
+
"""
|
|
153
|
+
if axis is None:
|
|
154
|
+
# For Series, axis is always 0 (index)
|
|
155
|
+
# For DataFrame, default is 1 (columns)
|
|
156
|
+
if hasattr(df_or_series, "columns"):
|
|
157
|
+
axis = 1 # DataFrame - filter columns by default
|
|
158
|
+
else:
|
|
159
|
+
axis = 0 # Series - filter index
|
|
160
|
+
|
|
161
|
+
param_count = sum(x is not None for x in [items, like, regex])
|
|
162
|
+
if param_count == 0:
|
|
163
|
+
raise TypeError("Must pass either `items`, `like`, or `regex`")
|
|
164
|
+
if param_count > 1:
|
|
165
|
+
raise TypeError(
|
|
166
|
+
"keyword arguments `items`, `like`, `regex` are mutually exclusive"
|
|
167
|
+
)
|
|
168
|
+
op = DataFrameFilter(items=items, like=like, regex=regex, axis=axis)
|
|
169
|
+
return op(df_or_series)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField
|
|
17
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
18
|
+
from ..utils import parse_index
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class IndexGetLevelValues(DataFrameOperator, DataFrameOperatorMixin):
|
|
22
|
+
_op_type_ = opcodes.GET_LEVEL_VALUES
|
|
23
|
+
|
|
24
|
+
level = AnyField("level")
|
|
25
|
+
|
|
26
|
+
def __init__(self, output_types=None, **kw):
|
|
27
|
+
super().__init__(_output_types=output_types, **kw)
|
|
28
|
+
|
|
29
|
+
def __call__(self, index):
|
|
30
|
+
empty_index = index.index_value.to_pandas()
|
|
31
|
+
result_index = empty_index.get_level_values(self.level)
|
|
32
|
+
|
|
33
|
+
return self.new_index(
|
|
34
|
+
[index],
|
|
35
|
+
shape=(index.shape[0],),
|
|
36
|
+
dtype=result_index.dtype,
|
|
37
|
+
index_value=parse_index(result_index, store_data=False),
|
|
38
|
+
names=result_index.names,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_level_values(index, level):
|
|
43
|
+
"""
|
|
44
|
+
Return vector of label values for requested level.
|
|
45
|
+
|
|
46
|
+
Length of returned vector is equal to the length of the index.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
level : int or str
|
|
51
|
+
``level`` is either the integer position of the level in the
|
|
52
|
+
MultiIndex, or the name of the level.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
values : Index
|
|
57
|
+
Values is a level of this MultiIndex converted to
|
|
58
|
+
a single :class:`Index` (or subclass thereof).
|
|
59
|
+
|
|
60
|
+
Examples
|
|
61
|
+
--------
|
|
62
|
+
Create a MultiIndex:
|
|
63
|
+
|
|
64
|
+
>>> import maxframe.dataframe as md
|
|
65
|
+
>>> import pandas as pd
|
|
66
|
+
>>> mi = md.Index(pd.MultiIndex.from_arrays((list('abc'), list('def')), names=['level_1', 'level_2']))
|
|
67
|
+
|
|
68
|
+
Get level values by supplying level as either integer or name:
|
|
69
|
+
|
|
70
|
+
>>> mi.get_level_values(0).execute()
|
|
71
|
+
Index(['a', 'b', 'c'], dtype='object', name='level_1')
|
|
72
|
+
>>> mi.get_level_values('level_2').execute()
|
|
73
|
+
Index(['d', 'e', 'f'], dtype='object', name='level_2')
|
|
74
|
+
"""
|
|
75
|
+
op = IndexGetLevelValues(level=level)
|
|
76
|
+
return op(index)
|
|
@@ -34,4 +34,49 @@ class DataFrameIat:
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def iat(a):
|
|
37
|
+
"""
|
|
38
|
+
Access a single value for a row/column pair by integer position.
|
|
39
|
+
|
|
40
|
+
Similar to ``iloc``, in that both provide integer-based lookups. Use
|
|
41
|
+
``iat`` if you only need to get or set a single value in a DataFrame
|
|
42
|
+
or Series.
|
|
43
|
+
|
|
44
|
+
Raises
|
|
45
|
+
------
|
|
46
|
+
IndexError
|
|
47
|
+
When integer position is out of bounds.
|
|
48
|
+
|
|
49
|
+
See Also
|
|
50
|
+
--------
|
|
51
|
+
DataFrame.at : Access a single value for a row/column label pair.
|
|
52
|
+
DataFrame.loc : Access a group of rows and columns by label(s).
|
|
53
|
+
DataFrame.iloc : Access a group of rows and columns by integer position(s).
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
>>> import maxframe.dataframe as md
|
|
58
|
+
>>> df = md.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
|
|
59
|
+
... columns=['A', 'B', 'C'])
|
|
60
|
+
>>> df.execute()
|
|
61
|
+
A B C
|
|
62
|
+
0 0 2 3
|
|
63
|
+
1 0 4 1
|
|
64
|
+
2 10 20 30
|
|
65
|
+
|
|
66
|
+
Get value at specified row/column pair
|
|
67
|
+
|
|
68
|
+
>>> df.iat[1, 2].execute()
|
|
69
|
+
1
|
|
70
|
+
|
|
71
|
+
Set value at specified row/column pair
|
|
72
|
+
|
|
73
|
+
>>> df.iat[1, 2] = 10
|
|
74
|
+
>>> df.iat[1, 2].execute()
|
|
75
|
+
10
|
|
76
|
+
|
|
77
|
+
Get value within a series
|
|
78
|
+
|
|
79
|
+
>>> df.loc[0].iat[1].execute()
|
|
80
|
+
2
|
|
81
|
+
"""
|
|
37
82
|
return DataFrameIat(a)
|