maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import get_output_types
|
|
17
|
+
from ...serialization.serializables import BoolField, StringField
|
|
18
|
+
from ...udf import BuiltinFunction
|
|
19
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
20
|
+
from ..utils import parse_index
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DataFrameUpdate(DataFrameOperator, DataFrameOperatorMixin):
|
|
24
|
+
_op_type_ = opcodes.DATAFRAME_UPDATE
|
|
25
|
+
|
|
26
|
+
join = StringField("join", default=None)
|
|
27
|
+
overwrite = BoolField("overwrite", default=None)
|
|
28
|
+
filter_func = BoolField("filter_func", default=None)
|
|
29
|
+
errors = StringField("errors", default=None)
|
|
30
|
+
|
|
31
|
+
def __init__(self, output_types=None, **kwargs):
|
|
32
|
+
super().__init__(_output_types=output_types, **kwargs)
|
|
33
|
+
|
|
34
|
+
def has_custom_code(self) -> bool:
|
|
35
|
+
return not isinstance(self.filter_func, BuiltinFunction)
|
|
36
|
+
|
|
37
|
+
def __call__(self, df_or_series, other):
|
|
38
|
+
self._output_types = get_output_types(df_or_series)
|
|
39
|
+
|
|
40
|
+
index_tokenize_objects = [
|
|
41
|
+
df_or_series,
|
|
42
|
+
other,
|
|
43
|
+
self.join,
|
|
44
|
+
self.overwrite,
|
|
45
|
+
self.filter_func,
|
|
46
|
+
self.errors,
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
# Return the original object (update is in-place)
|
|
50
|
+
if df_or_series.ndim == 2:
|
|
51
|
+
return self.new_dataframe(
|
|
52
|
+
[df_or_series, other],
|
|
53
|
+
shape=df_or_series.shape,
|
|
54
|
+
dtypes=df_or_series.dtypes,
|
|
55
|
+
index_value=parse_index(
|
|
56
|
+
df_or_series.index_value.to_pandas(), *index_tokenize_objects
|
|
57
|
+
),
|
|
58
|
+
columns_value=df_or_series.columns_value,
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
return self.new_series(
|
|
62
|
+
[df_or_series, other],
|
|
63
|
+
shape=df_or_series.shape,
|
|
64
|
+
dtype=df_or_series.dtype,
|
|
65
|
+
index_value=parse_index(
|
|
66
|
+
df_or_series.index_value.to_pandas(), *index_tokenize_objects
|
|
67
|
+
),
|
|
68
|
+
name=df_or_series.name,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _update(
|
|
73
|
+
df_or_series, other, join="left", overwrite=True, filter_func=None, errors="ignore"
|
|
74
|
+
):
|
|
75
|
+
op = DataFrameUpdate(
|
|
76
|
+
join=join,
|
|
77
|
+
overwrite=overwrite,
|
|
78
|
+
filter_func=filter_func,
|
|
79
|
+
errors=errors,
|
|
80
|
+
)
|
|
81
|
+
result = op(df_or_series, other)
|
|
82
|
+
df_or_series.data = result.data
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def df_update(
|
|
86
|
+
df, other, join="left", overwrite=True, filter_func=None, errors="ignore"
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Modify in place using non-NA values from another DataFrame.
|
|
90
|
+
|
|
91
|
+
Aligns on indices. There is no return value.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
other : DataFrame, or object coercible into a DataFrame
|
|
96
|
+
Should have at least one matching index/column label
|
|
97
|
+
with the original DataFrame. If a Series is passed,
|
|
98
|
+
its name attribute must be set, and that will be
|
|
99
|
+
used as the column name to align with the original DataFrame.
|
|
100
|
+
join : {'left'}, default 'left'
|
|
101
|
+
Only left join is implemented, keeping the index and columns of the
|
|
102
|
+
original object.
|
|
103
|
+
overwrite : bool, default True
|
|
104
|
+
How to handle non-NA values for overlapping keys:
|
|
105
|
+
|
|
106
|
+
* True: overwrite original DataFrame's values
|
|
107
|
+
with values from `other`.
|
|
108
|
+
* False: only update values that are NA in
|
|
109
|
+
the original DataFrame.
|
|
110
|
+
|
|
111
|
+
filter_func : callable(1d-array) -> bool 1d-array, optional
|
|
112
|
+
Can choose to replace values other than NA. Return True for values
|
|
113
|
+
that should be updated.
|
|
114
|
+
errors : {'raise', 'ignore'}, default 'ignore'
|
|
115
|
+
If 'raise', will raise a ValueError if the DataFrame and `other`
|
|
116
|
+
both contain non-NA data in the same place.
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
None
|
|
121
|
+
This method directly changes calling object.
|
|
122
|
+
|
|
123
|
+
Raises
|
|
124
|
+
------
|
|
125
|
+
ValueError
|
|
126
|
+
* When `errors='raise'` and there's overlapping non-NA data.
|
|
127
|
+
* When `errors` is not either `'ignore'` or `'raise'`
|
|
128
|
+
NotImplementedError
|
|
129
|
+
* If `join != 'left'`
|
|
130
|
+
|
|
131
|
+
See Also
|
|
132
|
+
--------
|
|
133
|
+
dict.update : Similar method for dictionaries.
|
|
134
|
+
DataFrame.merge : For column(s)-on-column(s) operations.
|
|
135
|
+
|
|
136
|
+
Examples
|
|
137
|
+
--------
|
|
138
|
+
>>> import maxframe.tensor as mt
|
|
139
|
+
>>> import maxframe.dataframe as md
|
|
140
|
+
>>> df = md.DataFrame({'A': [1, 2, 3],
|
|
141
|
+
... 'B': [400, 500, 600]})
|
|
142
|
+
>>> new_df = md.DataFrame({'B': [4, 5, 6],
|
|
143
|
+
... 'C': [7, 8, 9]})
|
|
144
|
+
>>> df.update(new_df)
|
|
145
|
+
>>> df.execute()
|
|
146
|
+
A B
|
|
147
|
+
0 1 4
|
|
148
|
+
1 2 5
|
|
149
|
+
2 3 6
|
|
150
|
+
|
|
151
|
+
The DataFrame's length does not increase as a result of the update,
|
|
152
|
+
only values at matching index/column labels are updated.
|
|
153
|
+
|
|
154
|
+
>>> df = md.DataFrame({'A': ['a', 'b', 'c'],
|
|
155
|
+
... 'B': ['x', 'y', 'z']})
|
|
156
|
+
>>> new_df = md.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
|
|
157
|
+
>>> df.update(new_df)
|
|
158
|
+
>>> df.execute()
|
|
159
|
+
A B
|
|
160
|
+
0 a d
|
|
161
|
+
1 b e
|
|
162
|
+
2 c f
|
|
163
|
+
|
|
164
|
+
>>> df = md.DataFrame({'A': ['a', 'b', 'c'],
|
|
165
|
+
... 'B': ['x', 'y', 'z']})
|
|
166
|
+
>>> new_df = md.DataFrame({'B': ['d', 'f']}, index=[0, 2])
|
|
167
|
+
>>> df.update(new_df)
|
|
168
|
+
>>> df.execute()
|
|
169
|
+
A B
|
|
170
|
+
0 a d
|
|
171
|
+
1 b y
|
|
172
|
+
2 c f
|
|
173
|
+
|
|
174
|
+
For Series, its name attribute must be set.
|
|
175
|
+
|
|
176
|
+
>>> df = md.DataFrame({'A': ['a', 'b', 'c'],
|
|
177
|
+
... 'B': ['x', 'y', 'z']})
|
|
178
|
+
>>> new_column = md.Series(['d', 'e', 'f'], name='B')
|
|
179
|
+
>>> df.update(new_column)
|
|
180
|
+
>>> df.execute()
|
|
181
|
+
A B
|
|
182
|
+
0 a d
|
|
183
|
+
1 b e
|
|
184
|
+
2 c f
|
|
185
|
+
|
|
186
|
+
If `other` contains NaNs the corresponding values are not updated
|
|
187
|
+
in the original dataframe.
|
|
188
|
+
|
|
189
|
+
>>> df = md.DataFrame({'A': [1, 2, 3],
|
|
190
|
+
... 'B': [400., 500., 600.]})
|
|
191
|
+
>>> new_df = md.DataFrame({'B': [4, mt.nan, 6]})
|
|
192
|
+
>>> df.update(new_df)
|
|
193
|
+
>>> df.execute()
|
|
194
|
+
A B
|
|
195
|
+
0 1 4.0
|
|
196
|
+
1 2 500.0
|
|
197
|
+
2 3 6.0
|
|
198
|
+
"""
|
|
199
|
+
return _update(df, other, join, overwrite, filter_func, errors)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def series_update(series, other):
|
|
203
|
+
"""
|
|
204
|
+
Modify Series in place using values from passed Series.
|
|
205
|
+
|
|
206
|
+
Uses non-NA values from passed Series to make updates. Aligns
|
|
207
|
+
on index.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
other : Series, or object coercible into Series
|
|
212
|
+
|
|
213
|
+
Examples
|
|
214
|
+
--------
|
|
215
|
+
>>> import maxframe.tensor as mt
|
|
216
|
+
>>> import maxframe.dataframe as md
|
|
217
|
+
>>> s = md.Series([1, 2, 3])
|
|
218
|
+
>>> s.update(md.Series([4, 5, 6]))
|
|
219
|
+
>>> s.execute()
|
|
220
|
+
0 4
|
|
221
|
+
1 5
|
|
222
|
+
2 6
|
|
223
|
+
dtype: int64
|
|
224
|
+
|
|
225
|
+
>>> s = md.Series(['a', 'b', 'c'])
|
|
226
|
+
>>> s.update(md.Series(['d', 'e'], index=[0, 2]))
|
|
227
|
+
>>> s.execute()
|
|
228
|
+
0 d
|
|
229
|
+
1 b
|
|
230
|
+
2 e
|
|
231
|
+
dtype: object
|
|
232
|
+
|
|
233
|
+
>>> s = md.Series([1, 2, 3])
|
|
234
|
+
>>> s.update(md.Series([4, 5, 6, 7, 8]))
|
|
235
|
+
>>> s.execute()
|
|
236
|
+
0 4
|
|
237
|
+
1 5
|
|
238
|
+
2 6
|
|
239
|
+
dtype: int64
|
|
240
|
+
|
|
241
|
+
If ``other`` contains NaNs the corresponding values are not updated
|
|
242
|
+
in the original Series.
|
|
243
|
+
|
|
244
|
+
>>> s = md.Series([1, 2, 3])
|
|
245
|
+
>>> s.update(md.Series([4, mt.nan, 6]))
|
|
246
|
+
>>> s.execute()
|
|
247
|
+
0 4
|
|
248
|
+
1 2
|
|
249
|
+
2 6
|
|
250
|
+
dtype: int64
|
|
251
|
+
|
|
252
|
+
``other`` can also be a non-Series object type
|
|
253
|
+
that is coercible into a Series
|
|
254
|
+
|
|
255
|
+
>>> s = md.Series([1, 2, 3])
|
|
256
|
+
>>> s.update([4, mt.nan, 6])
|
|
257
|
+
>>> s.execute()
|
|
258
|
+
0 4
|
|
259
|
+
1 2
|
|
260
|
+
2 6
|
|
261
|
+
dtype: int64
|
|
262
|
+
|
|
263
|
+
>>> s = md.Series([1, 2, 3])
|
|
264
|
+
>>> s.update({1: 9})
|
|
265
|
+
>>> s.execute()
|
|
266
|
+
0 1
|
|
267
|
+
1 9
|
|
268
|
+
2 3
|
|
269
|
+
dtype: int64
|
|
270
|
+
"""
|
|
271
|
+
return _update(series, other)
|
|
@@ -21,10 +21,12 @@ from .check_monotonic import (
|
|
|
21
21
|
is_monotonic_decreasing,
|
|
22
22
|
is_monotonic_increasing,
|
|
23
23
|
)
|
|
24
|
+
from .check_unique import index_is_unique, series_is_unique
|
|
25
|
+
from .clip import clip
|
|
24
26
|
from .cut import cut
|
|
25
27
|
from .describe import describe
|
|
26
28
|
from .diff import df_diff, series_diff
|
|
27
|
-
from .drop import df_drop, df_pop, index_drop, series_drop
|
|
29
|
+
from .drop import df_drop, df_pop, index_drop, series_drop, series_pop
|
|
28
30
|
from .drop_duplicates import (
|
|
29
31
|
df_drop_duplicates,
|
|
30
32
|
index_drop_duplicates,
|
|
@@ -33,21 +35,20 @@ from .drop_duplicates import (
|
|
|
33
35
|
from .duplicated import df_duplicated, index_duplicated, series_duplicated
|
|
34
36
|
from .eval import df_eval, df_query
|
|
35
37
|
from .explode import df_explode, series_explode
|
|
38
|
+
from .infer_dtypes import convert_dtypes, infer_objects
|
|
36
39
|
from .isin import df_isin, series_isin
|
|
37
|
-
from .map import index_map, series_map
|
|
38
|
-
from .melt import melt
|
|
40
|
+
from .map import df_map, index_map, series_map
|
|
39
41
|
from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
|
|
40
42
|
from .pct_change import pct_change
|
|
41
|
-
from .pivot import pivot
|
|
42
|
-
from .pivot_table import pivot_table
|
|
43
43
|
from .qcut import qcut
|
|
44
44
|
from .rechunk import rechunk
|
|
45
|
+
from .repeat import index_repeat, series_repeat
|
|
45
46
|
from .select_dtypes import select_dtypes
|
|
46
47
|
from .shift import shift, tshift
|
|
47
|
-
from .stack import stack
|
|
48
48
|
from .transform import df_transform, series_transform
|
|
49
49
|
from .transpose import transpose
|
|
50
|
-
from .
|
|
50
|
+
from .valid_index import first_valid_index, last_valid_index
|
|
51
|
+
from .value_counts import df_value_counts, value_counts
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
def _install():
|
|
@@ -55,7 +56,10 @@ def _install():
|
|
|
55
56
|
|
|
56
57
|
for t in DATAFRAME_TYPE:
|
|
57
58
|
setattr(t, "apply", df_apply)
|
|
59
|
+
setattr(t, "applymap", df_map)
|
|
58
60
|
setattr(t, "astype", astype)
|
|
61
|
+
setattr(t, "clip", clip)
|
|
62
|
+
setattr(t, "convert_dtypes", convert_dtypes)
|
|
59
63
|
setattr(t, "describe", describe)
|
|
60
64
|
setattr(
|
|
61
65
|
t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
|
|
@@ -66,41 +70,50 @@ def _install():
|
|
|
66
70
|
setattr(t, "drop", df_drop)
|
|
67
71
|
setattr(t, "eval", df_eval)
|
|
68
72
|
setattr(t, "explode", df_explode)
|
|
73
|
+
setattr(t, "first_valid_index", first_valid_index)
|
|
74
|
+
setattr(t, "infer_objects", infer_objects)
|
|
69
75
|
setattr(t, "isin", df_isin)
|
|
70
|
-
setattr(t, "
|
|
76
|
+
setattr(t, "last_valid_index", last_valid_index)
|
|
77
|
+
setattr(t, "map", df_map)
|
|
71
78
|
setattr(t, "memory_usage", df_memory_usage)
|
|
72
79
|
setattr(t, "pct_change", pct_change)
|
|
73
|
-
setattr(t, "pivot", pivot)
|
|
74
|
-
setattr(t, "pivot_table", pivot_table)
|
|
75
80
|
setattr(t, "pop", df_pop)
|
|
76
81
|
setattr(t, "query", df_query)
|
|
77
82
|
setattr(t, "rechunk", rechunk)
|
|
78
83
|
setattr(t, "select_dtypes", select_dtypes)
|
|
79
84
|
setattr(t, "shift", shift)
|
|
80
|
-
setattr(t, "stack", stack)
|
|
81
85
|
setattr(t, "transform", df_transform)
|
|
82
86
|
setattr(t, "transpose", transpose)
|
|
83
87
|
setattr(t, "tshift", tshift)
|
|
88
|
+
setattr(t, "value_counts", df_value_counts)
|
|
84
89
|
|
|
85
90
|
for t in SERIES_TYPE:
|
|
86
91
|
setattr(t, "apply", series_apply)
|
|
87
92
|
setattr(t, "astype", astype)
|
|
88
93
|
setattr(t, "case_when", case_when)
|
|
89
94
|
setattr(t, "check_monotonic", check_monotonic)
|
|
95
|
+
setattr(t, "clip", clip)
|
|
96
|
+
setattr(t, "convert_dtypes", convert_dtypes)
|
|
90
97
|
setattr(t, "describe", describe)
|
|
91
98
|
setattr(t, "diff", series_diff)
|
|
92
99
|
setattr(t, "drop", series_drop)
|
|
93
100
|
setattr(t, "drop_duplicates", series_drop_duplicates)
|
|
94
101
|
setattr(t, "duplicated", series_duplicated)
|
|
95
102
|
setattr(t, "explode", series_explode)
|
|
103
|
+
setattr(t, "first_valid_index", first_valid_index)
|
|
104
|
+
setattr(t, "infer_objects", infer_objects)
|
|
96
105
|
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
97
106
|
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
98
107
|
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
99
108
|
setattr(t, "isin", series_isin)
|
|
109
|
+
setattr(t, "is_unique", property(fget=series_is_unique))
|
|
110
|
+
setattr(t, "last_valid_index", last_valid_index)
|
|
100
111
|
setattr(t, "map", series_map)
|
|
101
112
|
setattr(t, "memory_usage", series_memory_usage)
|
|
102
113
|
setattr(t, "pct_change", pct_change)
|
|
114
|
+
setattr(t, "pop", series_pop)
|
|
103
115
|
setattr(t, "rechunk", rechunk)
|
|
116
|
+
setattr(t, "repeat", series_repeat)
|
|
104
117
|
setattr(t, "shift", shift)
|
|
105
118
|
setattr(t, "transform", series_transform)
|
|
106
119
|
setattr(t, "tshift", tshift)
|
|
@@ -109,15 +122,19 @@ def _install():
|
|
|
109
122
|
for t in INDEX_TYPE:
|
|
110
123
|
setattr(t, "astype", index_astype)
|
|
111
124
|
setattr(t, "check_monotonic", check_monotonic)
|
|
125
|
+
setattr(t, "clip", clip)
|
|
112
126
|
setattr(t, "drop", index_drop)
|
|
113
127
|
setattr(t, "drop_duplicates", index_drop_duplicates)
|
|
114
128
|
setattr(t, "duplicated", index_duplicated)
|
|
129
|
+
setattr(t, "has_duplicates", property(fget=lambda x: not index_is_unique(x)))
|
|
115
130
|
setattr(t, "is_monotonic", property(fget=is_monotonic))
|
|
116
131
|
setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
|
|
117
132
|
setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
|
|
133
|
+
setattr(t, "is_unique", property(fget=index_is_unique))
|
|
118
134
|
setattr(t, "map", index_map)
|
|
119
135
|
setattr(t, "memory_usage", index_memory_usage)
|
|
120
136
|
setattr(t, "rechunk", rechunk)
|
|
137
|
+
setattr(t, "repeat", index_repeat)
|
|
121
138
|
setattr(t, "value_counts", value_counts)
|
|
122
139
|
|
|
123
140
|
|
|
@@ -17,28 +17,34 @@ from typing import List
|
|
|
17
17
|
import pandas as pd
|
|
18
18
|
from pandas.api.types import is_list_like
|
|
19
19
|
|
|
20
|
-
from ...core import EntityData
|
|
20
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
21
21
|
from ...core.operator import MapReduceOperator
|
|
22
22
|
from ...serialization.serializables import AnyField, KeyField, StringField
|
|
23
23
|
from ..operators import DataFrameOperatorMixin
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class BaseDuplicateOp(MapReduceOperator, DataFrameOperatorMixin):
|
|
27
|
+
_legacy_name = "DuplicateOperand" # since 2.2.0
|
|
28
|
+
|
|
27
29
|
input = KeyField("input")
|
|
28
30
|
subset = AnyField("subset", default=None)
|
|
29
31
|
keep = AnyField("keep", default="first")
|
|
30
32
|
method = StringField("method", default=None)
|
|
31
33
|
|
|
32
34
|
@classmethod
|
|
33
|
-
def _set_inputs(cls, op: "
|
|
35
|
+
def _set_inputs(cls, op: "BaseDuplicateOp", inputs: List[EntityData]):
|
|
34
36
|
super()._set_inputs(op, inputs)
|
|
35
37
|
op.input = op._inputs[0]
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
# keep for import compatibility
|
|
41
|
+
DuplicateOperand = BaseDuplicateOp
|
|
42
|
+
|
|
43
|
+
|
|
38
44
|
def validate_subset(df, subset):
|
|
39
45
|
if subset is None:
|
|
40
46
|
return subset
|
|
41
|
-
if not is_list_like(subset):
|
|
47
|
+
if not is_list_like(subset) or isinstance(subset, ENTITY_TYPE):
|
|
42
48
|
subset = [subset]
|
|
43
49
|
else:
|
|
44
50
|
subset = list(subset)
|
maxframe/dataframe/misc/apply.py
CHANGED
|
@@ -64,7 +64,7 @@ class DataFrameApply(
|
|
|
64
64
|
DataFrameOperator, DataFrameOperatorMixin, ApplyOperandLogicKeyGeneratorMixin
|
|
65
65
|
):
|
|
66
66
|
_op_type_ = opcodes.APPLY
|
|
67
|
-
_legacy_name = "ApplyOperator"
|
|
67
|
+
_legacy_name = "ApplyOperator" # since v2.0.0
|
|
68
68
|
|
|
69
69
|
func = FunctionField("func")
|
|
70
70
|
axis = AnyField("axis", default=0)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ...udf import builtin_function
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@builtin_function
|
|
21
|
+
def _tailor_unique(series_or_idx):
|
|
22
|
+
if not series_or_idx.is_unique:
|
|
23
|
+
if isinstance(series_or_idx, pd.Series):
|
|
24
|
+
return series_or_idx.iloc[:0]
|
|
25
|
+
else:
|
|
26
|
+
return series_or_idx[:0]
|
|
27
|
+
return series_or_idx
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _is_unique(series_or_index):
|
|
31
|
+
from ... import tensor as mt
|
|
32
|
+
|
|
33
|
+
return mt.equal(
|
|
34
|
+
series_or_index.mf.apply_chunk(
|
|
35
|
+
_tailor_unique, dtype=series_or_index.dtype
|
|
36
|
+
).nunique(),
|
|
37
|
+
mt.shape(series_or_index)[0],
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def series_is_unique(series):
|
|
42
|
+
"""
|
|
43
|
+
Return boolean if values in the object are unique.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
bool
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
>>> import maxframe.dataframe as md
|
|
52
|
+
>>> s = md.Series([1, 2, 3])
|
|
53
|
+
>>> s.is_unique.execute()
|
|
54
|
+
True
|
|
55
|
+
|
|
56
|
+
>>> s = md.Series([1, 2, 3, 1])
|
|
57
|
+
>>> s.is_unique.execute()
|
|
58
|
+
False
|
|
59
|
+
"""
|
|
60
|
+
return _is_unique(series)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def index_is_unique(index):
|
|
64
|
+
"""
|
|
65
|
+
Return boolean if values in the index are unique.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
bool
|
|
70
|
+
|
|
71
|
+
Examples
|
|
72
|
+
--------
|
|
73
|
+
>>> import maxframe.dataframe as md
|
|
74
|
+
>>> index = md.Index([1, 2, 3])
|
|
75
|
+
>>> index.is_unique.execute()
|
|
76
|
+
True
|
|
77
|
+
|
|
78
|
+
>>> index = md.Index([1, 2, 3, 1])
|
|
79
|
+
>>> index.is_unique.execute()
|
|
80
|
+
False
|
|
81
|
+
"""
|
|
82
|
+
return index.to_series().is_unique
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from pandas.api.types import is_list_like
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE, get_output_types
|
|
21
|
+
from ...serialization.serializables import Int8Field, TupleField
|
|
22
|
+
from ...typing_ import EntityType
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import validate_axis
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameClip(DataFrameOperatorMixin, DataFrameOperator):
|
|
28
|
+
_op_type_ = opcodes.CLIP
|
|
29
|
+
|
|
30
|
+
bounds = TupleField("bounds", default=None)
|
|
31
|
+
axis = Int8Field("axis", default=None)
|
|
32
|
+
|
|
33
|
+
def __init__(self, output_types=None, **kw):
|
|
34
|
+
super().__init__(_output_types=output_types, **kw)
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def _set_inputs(cls, op: "DataFrameClip", inputs: List[EntityType]):
|
|
38
|
+
super()._set_inputs(op, inputs)
|
|
39
|
+
inputs_iter = iter(inputs[1:])
|
|
40
|
+
|
|
41
|
+
bounds = list(op.bounds)
|
|
42
|
+
if len(inputs) > 1:
|
|
43
|
+
for idx in range(len(bounds)):
|
|
44
|
+
if isinstance(bounds[idx], ENTITY_TYPE):
|
|
45
|
+
bounds[idx] = next(inputs_iter)
|
|
46
|
+
op.bounds = tuple(bounds)
|
|
47
|
+
|
|
48
|
+
def __call__(self, df):
|
|
49
|
+
self._output_types = get_output_types(df)
|
|
50
|
+
bound_inputs = [bd for bd in self.bounds if isinstance(bd, ENTITY_TYPE)]
|
|
51
|
+
return self.new_tileable([df] + bound_inputs, **df.params)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def clip(df, lower=None, upper=None, *, axis=None, inplace=False):
|
|
55
|
+
"""
|
|
56
|
+
Trim values at input threshold(s).
|
|
57
|
+
|
|
58
|
+
Assigns values outside boundary to boundary values. Thresholds
|
|
59
|
+
can be singular values or array like, and in the latter case
|
|
60
|
+
the clipping is performed element-wise in the specified axis.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
lower : float or array-like, default None
|
|
65
|
+
Minimum threshold value. All values below this
|
|
66
|
+
threshold will be set to it. If None, no lower clipping is performed.
|
|
67
|
+
upper : float or array-like, default None
|
|
68
|
+
Maximum threshold value. All values above this
|
|
69
|
+
threshold will be set to it. If None, no upper clipping is performed.
|
|
70
|
+
axis : int or str axis name, optional
|
|
71
|
+
Align object with lower and upper along the given axis.
|
|
72
|
+
inplace : bool, default False
|
|
73
|
+
Whether to perform the operation in place on the data.
|
|
74
|
+
*args, **kwargs
|
|
75
|
+
Additional keywords have no effect but might be accepted
|
|
76
|
+
for compatibility with numpy.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
Series or DataFrame or None
|
|
81
|
+
Same type as calling object with the values outside the
|
|
82
|
+
clip boundaries replaced or None if ``inplace=True``.
|
|
83
|
+
|
|
84
|
+
See Also
|
|
85
|
+
--------
|
|
86
|
+
Series.clip : Trim values at input threshold in series.
|
|
87
|
+
DataFrame.clip : Trim values at input threshold in dataframe.
|
|
88
|
+
numpy.clip : Clip (limit) the values in an array.
|
|
89
|
+
|
|
90
|
+
Examples
|
|
91
|
+
--------
|
|
92
|
+
>>> import maxframe.dataframe as md
|
|
93
|
+
>>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
|
|
94
|
+
>>> df = md.DataFrame(data)
|
|
95
|
+
>>> df.execute()
|
|
96
|
+
col_0 col_1
|
|
97
|
+
0 9 -2
|
|
98
|
+
1 -3 -7
|
|
99
|
+
2 0 6
|
|
100
|
+
3 -1 8
|
|
101
|
+
4 5 -5
|
|
102
|
+
|
|
103
|
+
Clips per column using lower and upper thresholds:
|
|
104
|
+
|
|
105
|
+
>>> df.clip(lower=-4, upper=7).execute()
|
|
106
|
+
col_0 col_1
|
|
107
|
+
0 7 -2
|
|
108
|
+
1 -3 -4
|
|
109
|
+
2 0 6
|
|
110
|
+
3 -1 7
|
|
111
|
+
4 5 -4
|
|
112
|
+
|
|
113
|
+
Clips using specific lower and upper thresholds per column element:
|
|
114
|
+
|
|
115
|
+
>>> t = md.Series([2, -4, -1, 6, 3])
|
|
116
|
+
>>> t.execute()
|
|
117
|
+
0 2
|
|
118
|
+
1 -4
|
|
119
|
+
2 -1
|
|
120
|
+
3 6
|
|
121
|
+
4 3
|
|
122
|
+
dtype: int64
|
|
123
|
+
|
|
124
|
+
>>> df.clip(lower=t, upper=t).execute()
|
|
125
|
+
col_0 col_1
|
|
126
|
+
0 2 2
|
|
127
|
+
1 -3 -4
|
|
128
|
+
2 0 -1
|
|
129
|
+
3 -1 6
|
|
130
|
+
4 5 3
|
|
131
|
+
"""
|
|
132
|
+
axis = validate_axis(axis, df) if axis is not None else None
|
|
133
|
+
if axis is None and any(
|
|
134
|
+
isinstance(x, ENTITY_TYPE) or is_list_like(x) for x in (lower, upper)
|
|
135
|
+
):
|
|
136
|
+
if df.ndim == 1:
|
|
137
|
+
axis = 0
|
|
138
|
+
else:
|
|
139
|
+
raise ValueError("Must specify axis=0 or 1")
|
|
140
|
+
|
|
141
|
+
op = DataFrameClip(bounds=(lower, upper), axis=axis)
|
|
142
|
+
out = op(df)
|
|
143
|
+
if inplace:
|
|
144
|
+
df.data = out.data
|
|
145
|
+
return out
|