maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -28,20 +28,25 @@ from ...tensor.utils import calc_sliced_size, filter_inputs
|
|
|
28
28
|
from ...utils import is_full_slice, lazy_import, pd_release_version
|
|
29
29
|
from ..core import DATAFRAME_TYPE, IndexValue
|
|
30
30
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
31
|
-
from ..utils import parse_index
|
|
31
|
+
from ..utils import parse_index, validate_axis
|
|
32
32
|
from .iloc import DataFrameIlocSetItem
|
|
33
33
|
|
|
34
34
|
cudf = lazy_import("cudf")
|
|
35
35
|
with_slice_locs_kind = pd_release_version < (1, 4, 0)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def process_loc_indexes(inp, indexes, fetch_index: bool = True):
|
|
38
|
+
def process_loc_indexes(inp, indexes, fetch_index: bool = True, axis=None):
|
|
39
39
|
ndim = inp.ndim
|
|
40
40
|
|
|
41
41
|
if not isinstance(indexes, tuple):
|
|
42
42
|
indexes = (indexes,)
|
|
43
|
+
if axis is not None and inp.axes[axis].nlevels > 1:
|
|
44
|
+
indexes = (indexes,)
|
|
43
45
|
if len(indexes) < ndim:
|
|
44
|
-
|
|
46
|
+
if axis == 0 or axis is None:
|
|
47
|
+
indexes += (slice(None),) * (ndim - len(indexes))
|
|
48
|
+
else:
|
|
49
|
+
indexes = (slice(None),) * (ndim - len(indexes)) + indexes
|
|
45
50
|
if len(indexes) > ndim:
|
|
46
51
|
raise IndexingError("Too many indexers")
|
|
47
52
|
|
|
@@ -67,8 +72,9 @@ def process_loc_indexes(inp, indexes, fetch_index: bool = True):
|
|
|
67
72
|
|
|
68
73
|
|
|
69
74
|
class DataFrameLoc:
|
|
70
|
-
def __init__(self, obj):
|
|
75
|
+
def __init__(self, obj, axis=None):
|
|
71
76
|
self._obj = obj
|
|
77
|
+
self._axis = axis
|
|
72
78
|
|
|
73
79
|
def _use_iloc(self, indexes):
|
|
74
80
|
# for RangeIndex from 0, use iloc instead of loc
|
|
@@ -105,7 +111,7 @@ class DataFrameLoc:
|
|
|
105
111
|
return True, None
|
|
106
112
|
|
|
107
113
|
def __getitem__(self, indexes):
|
|
108
|
-
indexes = process_loc_indexes(self._obj, indexes)
|
|
114
|
+
indexes = process_loc_indexes(self._obj, indexes, axis=self._axis)
|
|
109
115
|
|
|
110
116
|
use_iloc, new_indexes = self._use_iloc(indexes)
|
|
111
117
|
if use_iloc:
|
|
@@ -120,7 +126,9 @@ class DataFrameLoc:
|
|
|
120
126
|
raise NotImplementedError("Only scalar value is supported to set by loc")
|
|
121
127
|
if not isinstance(self._obj, DATAFRAME_TYPE):
|
|
122
128
|
raise NotImplementedError("Only DataFrame is supported to set by loc")
|
|
123
|
-
indexes = process_loc_indexes(
|
|
129
|
+
indexes = process_loc_indexes(
|
|
130
|
+
self._obj, indexes, fetch_index=False, axis=self._axis
|
|
131
|
+
)
|
|
124
132
|
use_iloc, new_indexes = self._use_iloc(indexes)
|
|
125
133
|
if use_iloc:
|
|
126
134
|
op = DataFrameIlocSetItem(indexes=new_indexes, value=value)
|
|
@@ -137,9 +145,13 @@ class DataFrameLoc:
|
|
|
137
145
|
ret = op([self._obj] + indices_tileable)
|
|
138
146
|
self._obj.data = ret.data
|
|
139
147
|
|
|
148
|
+
def __call__(self, axis):
|
|
149
|
+
axis = validate_axis(axis, self._obj)
|
|
150
|
+
return DataFrameLoc(self._obj, axis)
|
|
151
|
+
|
|
140
152
|
|
|
141
153
|
class DataFrameLocSetItem(DataFrameOperator, DataFrameOperatorMixin):
|
|
142
|
-
_op_type_ = opcodes.
|
|
154
|
+
_op_type_ = opcodes.DATAFRAME_LOC_SETITEM
|
|
143
155
|
|
|
144
156
|
indexes = ListField("indexes", default=None)
|
|
145
157
|
value = AnyField("value", default=None)
|
|
@@ -336,6 +348,17 @@ class DataFrameLocGetItem(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
336
348
|
else:
|
|
337
349
|
# append None to indicate returning Series
|
|
338
350
|
param["shape"] = None
|
|
351
|
+
elif isinstance(index, tuple):
|
|
352
|
+
has_ranges = any(
|
|
353
|
+
isinstance(i, (slice, np.ndarray))
|
|
354
|
+
or (hasattr(i, "dtype") and index.ndim == 1)
|
|
355
|
+
for i in index
|
|
356
|
+
)
|
|
357
|
+
if has_ranges:
|
|
358
|
+
param["shape"] = np.nan
|
|
359
|
+
param["index_value"] = parse_index(pd_index, inp, index)
|
|
360
|
+
else:
|
|
361
|
+
param["shape"] = None
|
|
339
362
|
else:
|
|
340
363
|
param["shape"] = None
|
|
341
364
|
return param
|
|
@@ -411,4 +434,261 @@ class DataFrameLocGetItem(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
411
434
|
|
|
412
435
|
|
|
413
436
|
def loc(a):
|
|
437
|
+
"""
|
|
438
|
+
Access a group of rows and columns by label(s) or a boolean array.
|
|
439
|
+
|
|
440
|
+
``.loc[]`` is primarily label based, but may also be used with a
|
|
441
|
+
boolean array.
|
|
442
|
+
|
|
443
|
+
Allowed inputs are:
|
|
444
|
+
|
|
445
|
+
- A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
|
|
446
|
+
interpreted as a *label* of the index, and **never** as an
|
|
447
|
+
integer position along the index).
|
|
448
|
+
- A list or array of labels, e.g. ``['a', 'b', 'c']``.
|
|
449
|
+
- A slice object with labels, e.g. ``'a':'f'``.
|
|
450
|
+
|
|
451
|
+
.. warning:: Note that contrary to usual python slices, **both** the
|
|
452
|
+
start and the stop are included
|
|
453
|
+
|
|
454
|
+
- A boolean array of the same length as the axis being sliced,
|
|
455
|
+
e.g. ``[True, False, True]``.
|
|
456
|
+
- An alignable boolean Series. The index of the key will be aligned before
|
|
457
|
+
masking.
|
|
458
|
+
- An alignable Index. The Index of the returned selection will be the input.
|
|
459
|
+
- A ``callable`` function with one argument (the calling Series or
|
|
460
|
+
DataFrame) and that returns valid output for indexing (one of the above)
|
|
461
|
+
|
|
462
|
+
See more at :ref:`Selection by Label <indexing.label>`.
|
|
463
|
+
|
|
464
|
+
Raises
|
|
465
|
+
------
|
|
466
|
+
KeyError
|
|
467
|
+
If any items are not found.
|
|
468
|
+
IndexingError
|
|
469
|
+
If an indexed key is passed and its index is unalignable to the frame index.
|
|
470
|
+
|
|
471
|
+
See Also
|
|
472
|
+
--------
|
|
473
|
+
DataFrame.at : Access a single value for a row/column label pair.
|
|
474
|
+
DataFrame.iloc : Access group of rows and columns by integer position(s).
|
|
475
|
+
DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
|
|
476
|
+
Series/DataFrame.
|
|
477
|
+
Series.loc : Access group of values using labels.
|
|
478
|
+
|
|
479
|
+
Examples
|
|
480
|
+
--------
|
|
481
|
+
**Getting values**
|
|
482
|
+
|
|
483
|
+
>>> import maxframe.dataframe as md
|
|
484
|
+
>>> df = md.DataFrame([[1, 2], [4, 5], [7, 8]],
|
|
485
|
+
... index=['cobra', 'viper', 'sidewinder'],
|
|
486
|
+
... columns=['max_speed', 'shield'])
|
|
487
|
+
>>> df.execute()
|
|
488
|
+
max_speed shield
|
|
489
|
+
cobra 1 2
|
|
490
|
+
viper 4 5
|
|
491
|
+
sidewinder 7 8
|
|
492
|
+
|
|
493
|
+
Single label. Note this returns the row as a Series.
|
|
494
|
+
|
|
495
|
+
>>> df.loc['viper'].execute()
|
|
496
|
+
max_speed 4
|
|
497
|
+
shield 5
|
|
498
|
+
Name: viper, dtype: int64
|
|
499
|
+
|
|
500
|
+
List of labels. Note using ``[[]]`` returns a DataFrame.
|
|
501
|
+
|
|
502
|
+
>>> df.loc[['viper', 'sidewinder']].execute()
|
|
503
|
+
max_speed shield
|
|
504
|
+
viper 4 5
|
|
505
|
+
sidewinder 7 8
|
|
506
|
+
|
|
507
|
+
Single label for row and column
|
|
508
|
+
|
|
509
|
+
>>> df.loc['cobra', 'shield'].execute()
|
|
510
|
+
2
|
|
511
|
+
|
|
512
|
+
Slice with labels for row and single label for column. As mentioned
|
|
513
|
+
above, note that both the start and stop of the slice are included.
|
|
514
|
+
|
|
515
|
+
>>> df.loc['cobra':'viper', 'max_speed'].execute()
|
|
516
|
+
cobra 1
|
|
517
|
+
viper 4
|
|
518
|
+
Name: max_speed, dtype: int64
|
|
519
|
+
|
|
520
|
+
Boolean list with the same length as the row axis
|
|
521
|
+
|
|
522
|
+
>>> df.loc[[False, False, True]].execute()
|
|
523
|
+
max_speed shield
|
|
524
|
+
sidewinder 7 8
|
|
525
|
+
|
|
526
|
+
Alignable boolean Series:
|
|
527
|
+
|
|
528
|
+
>>> df.loc[md.Series([False, True, False],
|
|
529
|
+
... index=['viper', 'sidewinder', 'cobra'])].execute()
|
|
530
|
+
max_speed shield
|
|
531
|
+
sidewinder 7 8
|
|
532
|
+
|
|
533
|
+
Index (same behavior as ``df.reindex``)
|
|
534
|
+
|
|
535
|
+
>>> df.loc[md.Index(["cobra", "viper"], name="foo")].execute()
|
|
536
|
+
max_speed shield
|
|
537
|
+
foo
|
|
538
|
+
cobra 1 2
|
|
539
|
+
viper 4 5
|
|
540
|
+
|
|
541
|
+
Conditional that returns a boolean Series
|
|
542
|
+
|
|
543
|
+
>>> df.loc[df['shield'] > 6].execute()
|
|
544
|
+
max_speed shield
|
|
545
|
+
sidewinder 7 8
|
|
546
|
+
|
|
547
|
+
Conditional that returns a boolean Series with column labels specified
|
|
548
|
+
|
|
549
|
+
>>> df.loc[df['shield'] > 6, ['max_speed']].execute()
|
|
550
|
+
max_speed
|
|
551
|
+
sidewinder 7
|
|
552
|
+
|
|
553
|
+
Callable that returns a boolean Series
|
|
554
|
+
|
|
555
|
+
>>> df.loc[lambda df: df['shield'] == 8].execute()
|
|
556
|
+
max_speed shield
|
|
557
|
+
sidewinder 7 8
|
|
558
|
+
|
|
559
|
+
**Setting values**
|
|
560
|
+
|
|
561
|
+
Set value for all items matching the list of labels
|
|
562
|
+
|
|
563
|
+
>>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
|
|
564
|
+
>>> df.execute()
|
|
565
|
+
max_speed shield
|
|
566
|
+
cobra 1 2
|
|
567
|
+
viper 4 50
|
|
568
|
+
sidewinder 7 50
|
|
569
|
+
|
|
570
|
+
Set value for an entire row
|
|
571
|
+
|
|
572
|
+
>>> df.loc['cobra'] = 10
|
|
573
|
+
>>> df.execute()
|
|
574
|
+
max_speed shield
|
|
575
|
+
cobra 10 10
|
|
576
|
+
viper 4 50
|
|
577
|
+
sidewinder 7 50
|
|
578
|
+
|
|
579
|
+
Set value for an entire column
|
|
580
|
+
|
|
581
|
+
>>> df.loc[:, 'max_speed'] = 30
|
|
582
|
+
>>> df.execute()
|
|
583
|
+
max_speed shield
|
|
584
|
+
cobra 30 10
|
|
585
|
+
viper 30 50
|
|
586
|
+
sidewinder 30 50
|
|
587
|
+
|
|
588
|
+
Set value for rows matching callable condition
|
|
589
|
+
|
|
590
|
+
>>> df.loc[df['shield'] > 35] = 0
|
|
591
|
+
>>> df.execute()
|
|
592
|
+
max_speed shield
|
|
593
|
+
cobra 30 10
|
|
594
|
+
viper 0 0
|
|
595
|
+
sidewinder 0 0
|
|
596
|
+
|
|
597
|
+
**Getting values on a DataFrame with an index that has integer labels**
|
|
598
|
+
|
|
599
|
+
Another example using integers for the index
|
|
600
|
+
|
|
601
|
+
>>> df = md.DataFrame([[1, 2], [4, 5], [7, 8]],
|
|
602
|
+
... index=[7, 8, 9], columns=['max_speed', 'shield'])
|
|
603
|
+
>>> df.execute()
|
|
604
|
+
max_speed shield
|
|
605
|
+
7 1 2
|
|
606
|
+
8 4 5
|
|
607
|
+
9 7 8
|
|
608
|
+
|
|
609
|
+
Slice with integer labels for rows. As mentioned above, note that both
|
|
610
|
+
the start and stop of the slice are included.
|
|
611
|
+
|
|
612
|
+
>>> df.loc[7:9].execute()
|
|
613
|
+
max_speed shield
|
|
614
|
+
7 1 2
|
|
615
|
+
8 4 5
|
|
616
|
+
9 7 8
|
|
617
|
+
|
|
618
|
+
**Getting values with a MultiIndex**
|
|
619
|
+
|
|
620
|
+
A number of examples using a DataFrame with a MultiIndex
|
|
621
|
+
|
|
622
|
+
>>> tuples = [
|
|
623
|
+
... ('cobra', 'mark i'), ('cobra', 'mark ii'),
|
|
624
|
+
... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
|
|
625
|
+
... ('viper', 'mark ii'), ('viper', 'mark iii')
|
|
626
|
+
... ]
|
|
627
|
+
>>> index = md.MultiIndex.from_tuples(tuples)
|
|
628
|
+
>>> values = [[12, 2], [0, 4], [10, 20],
|
|
629
|
+
... [1, 4], [7, 1], [16, 36]]
|
|
630
|
+
>>> df = md.DataFrame(values, columns=['max_speed', 'shield'], index=index)
|
|
631
|
+
>>> df.execute()
|
|
632
|
+
max_speed shield
|
|
633
|
+
cobra mark i 12 2
|
|
634
|
+
mark ii 0 4
|
|
635
|
+
sidewinder mark i 10 20
|
|
636
|
+
mark ii 1 4
|
|
637
|
+
viper mark ii 7 1
|
|
638
|
+
mark iii 16 36
|
|
639
|
+
|
|
640
|
+
Single label. Note this returns a DataFrame with a single index.
|
|
641
|
+
|
|
642
|
+
>>> df.loc['cobra'].execute()
|
|
643
|
+
max_speed shield
|
|
644
|
+
mark i 12 2
|
|
645
|
+
mark ii 0 4
|
|
646
|
+
|
|
647
|
+
Single index tuple. Note this returns a Series.
|
|
648
|
+
|
|
649
|
+
>>> df.loc[('cobra', 'mark ii')].execute()
|
|
650
|
+
max_speed 0
|
|
651
|
+
shield 4
|
|
652
|
+
Name: (cobra, mark ii), dtype: int64
|
|
653
|
+
|
|
654
|
+
Single label for row and column. Similar to passing in a tuple, this
|
|
655
|
+
returns a Series.
|
|
656
|
+
|
|
657
|
+
>>> df.loc['cobra', 'mark i'].execute()
|
|
658
|
+
max_speed 12
|
|
659
|
+
shield 2
|
|
660
|
+
Name: (cobra, mark i), dtype: int64
|
|
661
|
+
|
|
662
|
+
Single tuple. Note using ``[[]]`` returns a DataFrame.
|
|
663
|
+
|
|
664
|
+
>>> df.loc[[('cobra', 'mark ii')]].execute()
|
|
665
|
+
max_speed shield
|
|
666
|
+
cobra mark ii 0 4
|
|
667
|
+
|
|
668
|
+
Single tuple for the index with a single label for the column
|
|
669
|
+
|
|
670
|
+
>>> df.loc[('cobra', 'mark i'), 'shield'].execute()
|
|
671
|
+
2
|
|
672
|
+
|
|
673
|
+
Slice from index tuple to single label
|
|
674
|
+
|
|
675
|
+
>>> df.loc[('cobra', 'mark i'):'viper'].execute()
|
|
676
|
+
max_speed shield
|
|
677
|
+
cobra mark i 12 2
|
|
678
|
+
mark ii 0 4
|
|
679
|
+
sidewinder mark i 10 20
|
|
680
|
+
mark ii 1 4
|
|
681
|
+
viper mark ii 7 1
|
|
682
|
+
mark iii 16 36
|
|
683
|
+
|
|
684
|
+
Slice from index tuple to index tuple
|
|
685
|
+
|
|
686
|
+
>>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')].execute()
|
|
687
|
+
max_speed shield
|
|
688
|
+
cobra mark i 12 2
|
|
689
|
+
mark ii 0 4
|
|
690
|
+
sidewinder mark i 10 20
|
|
691
|
+
mark ii 1 4
|
|
692
|
+
viper mark ii 7 1
|
|
693
|
+
"""
|
|
414
694
|
return DataFrameLoc(a)
|
|
@@ -16,6 +16,8 @@ from typing import List
|
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
|
+
from ...core.operator import OperatorStage
|
|
20
|
+
|
|
19
21
|
try:
|
|
20
22
|
import scipy.sparse as sps
|
|
21
23
|
except ImportError: # pragma: no cover
|
|
@@ -31,7 +33,7 @@ from ...serialization.serializables import (
|
|
|
31
33
|
StringField,
|
|
32
34
|
)
|
|
33
35
|
from ...tensor import tensor as astensor
|
|
34
|
-
from ...utils import lazy_import, pd_release_version
|
|
36
|
+
from ...utils import is_full_slice, lazy_import, pd_release_version
|
|
35
37
|
from ..core import INDEX_TYPE
|
|
36
38
|
from ..core import Index as DataFrameIndexType
|
|
37
39
|
from ..initializer import Index as asindex
|
|
@@ -92,12 +94,19 @@ class DataFrameReindex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
92
94
|
@classmethod
|
|
93
95
|
def _set_inputs(cls, op: "DataFrameReindex", inputs: List[EntityData]):
|
|
94
96
|
super()._set_inputs(op, inputs)
|
|
95
|
-
|
|
97
|
+
if getattr(op, "indexes", None):
|
|
98
|
+
op.index, op.columns = [
|
|
99
|
+
None if is_full_slice(idx) else idx for idx in list(op.indexes) + [None]
|
|
100
|
+
][:2]
|
|
101
|
+
inputs_iter = iter(inputs)
|
|
96
102
|
op._input = next(inputs_iter)
|
|
97
103
|
if op.index is not None and isinstance(op.index, ENTITY_TYPE):
|
|
98
104
|
op.index = next(inputs_iter)
|
|
99
|
-
if op.fill_value is not None
|
|
100
|
-
op.
|
|
105
|
+
if op.fill_value is not None:
|
|
106
|
+
if op.stage == OperatorStage.agg:
|
|
107
|
+
op.fill_value = None
|
|
108
|
+
elif isinstance(op.fill_value, ENTITY_TYPE):
|
|
109
|
+
op.fill_value = next(inputs_iter)
|
|
101
110
|
|
|
102
111
|
def __call__(self, df_or_series):
|
|
103
112
|
inputs = [df_or_series]
|
|
@@ -363,7 +372,7 @@ def reindex(
|
|
|
363
372
|
axes_kwargs = dict(index=index, columns=columns, axis=axis)
|
|
364
373
|
axes = validate_axis_style_args(
|
|
365
374
|
df_or_series,
|
|
366
|
-
(labels,),
|
|
375
|
+
(labels,) if labels is not None else (),
|
|
367
376
|
{k: v for k, v in axes_kwargs.items() if v is not None},
|
|
368
377
|
"labels",
|
|
369
378
|
"reindex",
|
|
@@ -16,6 +16,7 @@ import warnings
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import get_output_types
|
|
19
|
+
from ...serialization import PickleContainer
|
|
19
20
|
from ...serialization.serializables import AnyField, StringField
|
|
20
21
|
from ..core import INDEX_TYPE, SERIES_TYPE
|
|
21
22
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -34,6 +35,11 @@ class DataFrameRename(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
34
35
|
def __init__(self, output_types=None, **kw):
|
|
35
36
|
super().__init__(_output_types=output_types, **kw)
|
|
36
37
|
|
|
38
|
+
def has_custom_code(self) -> bool:
|
|
39
|
+
return isinstance(self.columns_mapper, PickleContainer) or isinstance(
|
|
40
|
+
self.index_mapper, PickleContainer
|
|
41
|
+
)
|
|
42
|
+
|
|
37
43
|
def _calc_renamed_df(self, df, errors="ignore"):
|
|
38
44
|
empty_df = build_df(df)
|
|
39
45
|
return empty_df.rename(
|
|
@@ -85,8 +85,8 @@ def rename_axis_with_level(
|
|
|
85
85
|
else:
|
|
86
86
|
columns = mapper
|
|
87
87
|
op = DataFrameRenameAxis(
|
|
88
|
-
index=
|
|
89
|
-
columns=
|
|
88
|
+
index=index,
|
|
89
|
+
columns=columns,
|
|
90
90
|
copy_value=copy,
|
|
91
91
|
level=level,
|
|
92
92
|
axis=0 if index is not no_default else 1,
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import get_output_types
|
|
19
|
+
from ...serialization.serializables import AnyField, Int32Field
|
|
20
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
21
|
+
from ..utils import parse_index, validate_axis
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DataFrameReorderLevels(DataFrameOperator, DataFrameOperatorMixin):
|
|
25
|
+
_op_type_ = opcodes.REORDER_LEVELS
|
|
26
|
+
|
|
27
|
+
order = AnyField("order")
|
|
28
|
+
axis = Int32Field("axis", default=0)
|
|
29
|
+
|
|
30
|
+
def __call__(self, df_or_series):
|
|
31
|
+
# Determine output type
|
|
32
|
+
self._output_types = get_output_types(df_or_series)
|
|
33
|
+
|
|
34
|
+
if self.axis == 0:
|
|
35
|
+
src_idx_value = df_or_series.index_value
|
|
36
|
+
else:
|
|
37
|
+
src_idx_value = df_or_series.columns_value
|
|
38
|
+
|
|
39
|
+
# Create reordered index
|
|
40
|
+
pd_index = src_idx_value.to_pandas()
|
|
41
|
+
if not isinstance(pd_index, pd.MultiIndex):
|
|
42
|
+
raise ValueError("reorder_levels can only be used with MultiIndex")
|
|
43
|
+
pd_index = pd_index.reorder_levels(self.order)
|
|
44
|
+
|
|
45
|
+
params = df_or_series.params
|
|
46
|
+
if self.axis == 0:
|
|
47
|
+
params["index_value"] = parse_index(pd_index)
|
|
48
|
+
else:
|
|
49
|
+
params["columns_value"] = parse_index(pd_index, store_data=True)
|
|
50
|
+
return self.new_tileable([df_or_series], **params)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _reorder_levels(df_or_series, order, axis=0):
|
|
54
|
+
axis = validate_axis(axis, df_or_series)
|
|
55
|
+
op = DataFrameReorderLevels(order=order, axis=axis)
|
|
56
|
+
return op(df_or_series)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def df_reorder_levels(df, order, axis=0):
|
|
60
|
+
"""
|
|
61
|
+
Rearrange index levels using input order. May not drop or duplicate levels.
|
|
62
|
+
|
|
63
|
+
Parameters
|
|
64
|
+
----------
|
|
65
|
+
order : list of int or list of str
|
|
66
|
+
List representing new level order. Reference level by number
|
|
67
|
+
(position) or by key (label).
|
|
68
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
69
|
+
Where to reorder levels.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
DataFrame
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
>>> import maxframe.dataframe as md
|
|
78
|
+
>>> data = {
|
|
79
|
+
... "class": ["Mammals", "Mammals", "Reptiles"],
|
|
80
|
+
... "diet": ["Omnivore", "Carnivore", "Carnivore"],
|
|
81
|
+
... "species": ["Humans", "Dogs", "Snakes"],
|
|
82
|
+
... }
|
|
83
|
+
>>> df = md.DataFrame(data, columns=["class", "diet", "species"])
|
|
84
|
+
>>> df = df.set_index(["class", "diet"])
|
|
85
|
+
>>> df.execute()
|
|
86
|
+
species
|
|
87
|
+
class diet
|
|
88
|
+
Mammals Omnivore Humans
|
|
89
|
+
Carnivore Dogs
|
|
90
|
+
Reptiles Carnivore Snakes
|
|
91
|
+
|
|
92
|
+
Let's reorder the levels of the index:
|
|
93
|
+
|
|
94
|
+
>>> df.reorder_levels(["diet", "class"]).execute()
|
|
95
|
+
species
|
|
96
|
+
diet class
|
|
97
|
+
Omnivore Mammals Humans
|
|
98
|
+
Carnivore Mammals Dogs
|
|
99
|
+
Reptiles Snakes
|
|
100
|
+
"""
|
|
101
|
+
return _reorder_levels(df, order, axis=axis)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def series_reorder_levels(series, order):
|
|
105
|
+
"""
|
|
106
|
+
Rearrange index levels using input order.
|
|
107
|
+
|
|
108
|
+
May not drop or duplicate levels.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
order : list of int representing new level order
|
|
113
|
+
Reference level by number or key.
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
type of caller (new object)
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
>>> import maxframe.tensor as mt
|
|
122
|
+
>>> import maxframe.dataframe as md
|
|
123
|
+
>>> arrays = [mt.array(["dog", "dog", "cat", "cat", "bird", "bird"]),
|
|
124
|
+
... mt.array(["white", "black", "white", "black", "white", "black"])]
|
|
125
|
+
>>> s = md.Series([1, 2, 3, 3, 5, 2], index=arrays)
|
|
126
|
+
>>> s.execute()
|
|
127
|
+
dog white 1
|
|
128
|
+
black 2
|
|
129
|
+
cat white 3
|
|
130
|
+
black 3
|
|
131
|
+
bird white 5
|
|
132
|
+
black 2
|
|
133
|
+
dtype: int64
|
|
134
|
+
>>> s.reorder_levels([1, 0]).execute()
|
|
135
|
+
white dog 1
|
|
136
|
+
black dog 2
|
|
137
|
+
white cat 3
|
|
138
|
+
black cat 3
|
|
139
|
+
white bird 5
|
|
140
|
+
black bird 2
|
|
141
|
+
dtype: int64
|
|
142
|
+
"""
|
|
143
|
+
return _reorder_levels(series, order)
|
|
@@ -18,10 +18,12 @@ import pandas as pd
|
|
|
18
18
|
from ... import opcodes
|
|
19
19
|
from ...core import OutputType
|
|
20
20
|
from ...serialization.serializables import AnyField, BoolField
|
|
21
|
-
from ...utils import no_default
|
|
21
|
+
from ...utils import no_default, pd_release_version
|
|
22
22
|
from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
23
23
|
from ..utils import build_empty_df, build_empty_series, parse_index
|
|
24
24
|
|
|
25
|
+
_reset_index_has_names = pd_release_version >= (1, 5)
|
|
26
|
+
|
|
25
27
|
|
|
26
28
|
class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
27
29
|
_op_type_ = opcodes.RESET_INDEX
|
|
@@ -29,8 +31,10 @@ class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
29
31
|
level = AnyField("level", default=None)
|
|
30
32
|
drop = BoolField("drop", default=False)
|
|
31
33
|
name = AnyField("name", default=None)
|
|
32
|
-
col_level = AnyField("col_level", default=
|
|
33
|
-
col_fill = AnyField("col_fill", default=
|
|
34
|
+
col_level = AnyField("col_level", default=None)
|
|
35
|
+
col_fill = AnyField("col_fill", default=None)
|
|
36
|
+
incremental_index = BoolField("incremental_index", default=False)
|
|
37
|
+
names = AnyField("names", default=None)
|
|
34
38
|
|
|
35
39
|
def __init__(self, output_types=None, **kwargs):
|
|
36
40
|
super().__init__(_output_types=output_types, **kwargs)
|
|
@@ -76,9 +80,26 @@ class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
76
80
|
else:
|
|
77
81
|
empty_df = build_empty_df(a.dtypes)
|
|
78
82
|
empty_df.index = a.index_value.to_pandas()[:0]
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
83
|
+
|
|
84
|
+
if self.names and _reset_index_has_names:
|
|
85
|
+
empty_df = empty_df.reset_index(
|
|
86
|
+
level=self.level,
|
|
87
|
+
col_level=self.col_level,
|
|
88
|
+
col_fill=self.col_fill,
|
|
89
|
+
names=self.names,
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
empty_df = empty_df.reset_index(
|
|
93
|
+
level=self.level, col_level=self.col_level, col_fill=self.col_fill
|
|
94
|
+
)
|
|
95
|
+
if self.names:
|
|
96
|
+
names = (
|
|
97
|
+
[self.names] if not isinstance(self.names, list) else self.names
|
|
98
|
+
)
|
|
99
|
+
cols = list(empty_df.columns)
|
|
100
|
+
cols[: len(names)] = names
|
|
101
|
+
empty_df.columns = pd.Index(cols, name=empty_df.columns.name)
|
|
102
|
+
|
|
82
103
|
shape = (a.shape[0], len(empty_df.columns))
|
|
83
104
|
columns_value = parse_index(empty_df.columns, store_data=True)
|
|
84
105
|
dtypes = empty_df.dtypes
|
|
@@ -105,6 +126,8 @@ def df_reset_index(
|
|
|
105
126
|
inplace=False,
|
|
106
127
|
col_level=0,
|
|
107
128
|
col_fill="",
|
|
129
|
+
names=None,
|
|
130
|
+
incremental_index=False,
|
|
108
131
|
):
|
|
109
132
|
"""
|
|
110
133
|
Reset the index, or a level of it.
|
|
@@ -255,6 +278,8 @@ def df_reset_index(
|
|
|
255
278
|
drop=drop,
|
|
256
279
|
col_level=col_level,
|
|
257
280
|
col_fill=col_fill,
|
|
281
|
+
names=names,
|
|
282
|
+
incremental_index=incremental_index,
|
|
258
283
|
output_types=[OutputType.dataframe],
|
|
259
284
|
)
|
|
260
285
|
ret = op(df)
|
|
@@ -270,6 +295,7 @@ def series_reset_index(
|
|
|
270
295
|
drop=False,
|
|
271
296
|
name=no_default,
|
|
272
297
|
inplace=False,
|
|
298
|
+
incremental_index=False,
|
|
273
299
|
):
|
|
274
300
|
"""
|
|
275
301
|
Generate a new DataFrame or Series with the index reset.
|
|
@@ -389,6 +415,7 @@ def series_reset_index(
|
|
|
389
415
|
level=level,
|
|
390
416
|
drop=drop,
|
|
391
417
|
name=name,
|
|
418
|
+
incremental_index=incremental_index,
|
|
392
419
|
output_types=[OutputType.series if drop else OutputType.dataframe],
|
|
393
420
|
)
|
|
394
421
|
ret = op(series)
|
|
@@ -25,6 +25,7 @@ from ...serialization.serializables import (
|
|
|
25
25
|
Float64Field,
|
|
26
26
|
Int8Field,
|
|
27
27
|
Int64Field,
|
|
28
|
+
KeyField,
|
|
28
29
|
)
|
|
29
30
|
from ...tensor.random import RandomStateField
|
|
30
31
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -41,6 +42,11 @@ class DataFrameSample(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
41
42
|
axis = Int8Field("axis", default=None)
|
|
42
43
|
seed = Int64Field("seed", default=None)
|
|
43
44
|
random_state = RandomStateField("random_state", default=None)
|
|
45
|
+
always_multinomial = BoolField("always_multinomial", default=None)
|
|
46
|
+
|
|
47
|
+
# for chunks
|
|
48
|
+
# num of instances for chunks
|
|
49
|
+
chunk_samples = KeyField("chunk_samples", default=None)
|
|
44
50
|
|
|
45
51
|
def __init__(self, random_state=None, seed=None, **kw):
|
|
46
52
|
if random_state is None:
|
|
@@ -54,6 +60,8 @@ class DataFrameSample(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
54
60
|
next(it)
|
|
55
61
|
if isinstance(op.weights, ENTITY_TYPE):
|
|
56
62
|
op.weights = next(it)
|
|
63
|
+
if isinstance(op.chunk_samples, ENTITY_TYPE):
|
|
64
|
+
op.chunk_samples = next(it)
|
|
57
65
|
|
|
58
66
|
def __call__(self, df):
|
|
59
67
|
params = df.params
|