maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
maxframe/dataframe/misc/map.py
CHANGED
|
@@ -21,8 +21,8 @@ import pandas as pd
|
|
|
21
21
|
from ... import opcodes
|
|
22
22
|
from ...core import EntityData, OutputType
|
|
23
23
|
from ...serialization.serializables import AnyField, KeyField, StringField
|
|
24
|
-
from ...udf import BuiltinFunction, MarkedFunction
|
|
25
|
-
from ...utils import quiet_stdio
|
|
24
|
+
from ...udf import BuiltinFunction, MarkedFunction, ODPSFunction
|
|
25
|
+
from ...utils import make_dtype, quiet_stdio
|
|
26
26
|
from ..core import SERIES_TYPE
|
|
27
27
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
28
28
|
from ..utils import build_series, copy_func_scheduling_hints
|
|
@@ -40,6 +40,7 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
40
40
|
if not self.output_types:
|
|
41
41
|
self.output_types = [OutputType.series]
|
|
42
42
|
if hasattr(self, "arg"):
|
|
43
|
+
self.arg = ODPSFunction.wrap(self.arg)
|
|
43
44
|
copy_func_scheduling_hints(self.arg, self)
|
|
44
45
|
|
|
45
46
|
@classmethod
|
|
@@ -55,25 +56,34 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
55
56
|
) and not isinstance(self.arg, BuiltinFunction)
|
|
56
57
|
|
|
57
58
|
def __call__(self, series, dtype, skip_infer=False):
|
|
58
|
-
if dtype is
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
if dtype is not None:
|
|
60
|
+
dtype = make_dtype(dtype)
|
|
61
|
+
else:
|
|
62
|
+
# obtain dtype from existing hints
|
|
63
|
+
if isinstance(self.arg, ODPSFunction):
|
|
64
|
+
if self.arg.result_dtype is not None:
|
|
65
|
+
dtype = self.arg.result_dtype
|
|
66
|
+
elif callable(self.arg):
|
|
61
67
|
# arg is a function, try to inspect the signature
|
|
62
68
|
sig = inspect.signature(self.arg)
|
|
63
69
|
return_type = sig.return_annotation
|
|
64
70
|
if return_type is not inspect._empty:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
71
|
+
dtype = np.dtype(return_type)
|
|
72
|
+
|
|
73
|
+
err_prefix = None
|
|
74
|
+
if dtype is None and not skip_infer:
|
|
75
|
+
inferred_dtype = None
|
|
76
|
+
if callable(self.arg):
|
|
77
|
+
try:
|
|
78
|
+
with quiet_stdio():
|
|
79
|
+
# try to infer dtype by calling the function
|
|
80
|
+
inferred_dtype = (
|
|
81
|
+
build_series(series)
|
|
82
|
+
.map(self.arg, na_action=self.na_action)
|
|
83
|
+
.dtype
|
|
84
|
+
)
|
|
85
|
+
except: # noqa: E722 # nosec
|
|
86
|
+
pass
|
|
77
87
|
else:
|
|
78
88
|
if isinstance(self.arg, MutableMapping):
|
|
79
89
|
inferred_dtype = pd.Series(self.arg).dtype
|
|
@@ -86,13 +96,16 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
86
96
|
# but for int, due to the nan which may occur,
|
|
87
97
|
# we cannot infer the dtype
|
|
88
98
|
dtype = inferred_dtype
|
|
99
|
+
else:
|
|
100
|
+
err_prefix = "int type may not be exact"
|
|
89
101
|
else:
|
|
90
102
|
dtype = inferred_dtype
|
|
91
103
|
|
|
92
104
|
if dtype is None:
|
|
93
105
|
if not skip_infer:
|
|
106
|
+
err_prefix = err_prefix or "cannot infer dtype"
|
|
94
107
|
raise ValueError(
|
|
95
|
-
"
|
|
108
|
+
f"{err_prefix}, it needs to be specified manually for `map`"
|
|
96
109
|
)
|
|
97
110
|
else:
|
|
98
111
|
dtype = np.int64 if dtype is int else dtype
|
|
@@ -251,3 +264,97 @@ def index_map(
|
|
|
251
264
|
"""
|
|
252
265
|
op = DataFrameMap(arg=mapper, na_action=na_action, memory_scale=memory_scale)
|
|
253
266
|
return op(idx, dtype=dtype, skip_infer=skip_infer)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def df_map(
|
|
270
|
+
df, func, na_action=None, dtypes=None, dtype=None, skip_infer=False, **kwargs
|
|
271
|
+
):
|
|
272
|
+
"""
|
|
273
|
+
Apply a function to a Dataframe elementwise.
|
|
274
|
+
|
|
275
|
+
This method applies a function that accepts and returns a scalar
|
|
276
|
+
to every element of a DataFrame.
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
func : callable
|
|
281
|
+
Python function, returns a single value from a single value.
|
|
282
|
+
na_action : {None, 'ignore'}, default None
|
|
283
|
+
If 'ignore', propagate NaN values, without passing them to func.
|
|
284
|
+
dtypes : Series, default None
|
|
285
|
+
Specify dtypes of returned DataFrames.
|
|
286
|
+
dtype : np.dtype, default None
|
|
287
|
+
Specify dtypes of all columns of returned DataFrames, only
|
|
288
|
+
effective when dtypes is not specified.
|
|
289
|
+
skip_infer: bool, default False
|
|
290
|
+
Whether infer dtypes when dtypes or dtype is not specified.
|
|
291
|
+
**kwargs
|
|
292
|
+
Additional keyword arguments to pass as keywords arguments to
|
|
293
|
+
`func`.
|
|
294
|
+
|
|
295
|
+
Returns
|
|
296
|
+
-------
|
|
297
|
+
DataFrame
|
|
298
|
+
Transformed DataFrame.
|
|
299
|
+
|
|
300
|
+
See Also
|
|
301
|
+
--------
|
|
302
|
+
DataFrame.apply : Apply a function along input axis of DataFrame.
|
|
303
|
+
DataFrame.replace: Replace values given in `to_replace` with `value`.
|
|
304
|
+
Series.map : Apply a function elementwise on a Series.
|
|
305
|
+
|
|
306
|
+
Examples
|
|
307
|
+
--------
|
|
308
|
+
>>> import maxframe.dataframe as md
|
|
309
|
+
>>> df = md.DataFrame([[1, 2.12], [3.356, 4.567]])
|
|
310
|
+
>>> df.execute()
|
|
311
|
+
0 1
|
|
312
|
+
0 1.000 2.120
|
|
313
|
+
1 3.356 4.567
|
|
314
|
+
|
|
315
|
+
>>> df.map(lambda x: len(str(x))).execute()
|
|
316
|
+
0 1
|
|
317
|
+
0 3 4
|
|
318
|
+
1 5 5
|
|
319
|
+
|
|
320
|
+
Like Series.map, NA values can be ignored:
|
|
321
|
+
|
|
322
|
+
>>> df_copy = df.copy()
|
|
323
|
+
>>> df_copy.iloc[0, 0] = md.NA
|
|
324
|
+
>>> df_copy.map(lambda x: len(str(x)), na_action='ignore').execute()
|
|
325
|
+
0 1
|
|
326
|
+
0 NaN 4
|
|
327
|
+
1 5.0 5
|
|
328
|
+
|
|
329
|
+
It is also possible to use `map` with functions that are not
|
|
330
|
+
`lambda` functions:
|
|
331
|
+
|
|
332
|
+
>>> df.map(round, ndigits=1).execute()
|
|
333
|
+
0 1
|
|
334
|
+
0 1.0 2.1
|
|
335
|
+
1 3.4 4.6
|
|
336
|
+
|
|
337
|
+
Note that a vectorized version of `func` often exists, which will
|
|
338
|
+
be much faster. You could square each number elementwise.
|
|
339
|
+
|
|
340
|
+
>>> df.map(lambda x: x**2).execute()
|
|
341
|
+
0 1
|
|
342
|
+
0 1.000000 4.494400
|
|
343
|
+
1 11.262736 20.857489
|
|
344
|
+
|
|
345
|
+
But it's better to avoid map in that case.
|
|
346
|
+
|
|
347
|
+
>>> (df ** 2).execute()
|
|
348
|
+
0 1
|
|
349
|
+
0 1.000000 4.494400
|
|
350
|
+
1 11.262736 20.857489
|
|
351
|
+
"""
|
|
352
|
+
if dtypes is None and dtype is not None:
|
|
353
|
+
dtypes = pd.Series([dtype] * df.shape[1], index=df.dtypes.index)
|
|
354
|
+
|
|
355
|
+
def _wrapper(row):
|
|
356
|
+
return row.map(func, na_action=na_action, **kwargs)
|
|
357
|
+
|
|
358
|
+
return df.apply(
|
|
359
|
+
_wrapper, axis=1, dtypes=dtypes, skip_infer=skip_infer, elementwise=True
|
|
360
|
+
)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from pandas.api.types import is_list_like
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import ENTITY_TYPE, EntityData, get_output_types
|
|
22
|
+
from ...serialization.serializables import AnyField
|
|
23
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
|
+
from ..utils import parse_index, validate_axis
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataFrameRepeat(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
|
+
_op_type_ = opcodes.REPEAT
|
|
29
|
+
|
|
30
|
+
repeats = AnyField("repeats", default=None)
|
|
31
|
+
|
|
32
|
+
def __init__(self, output_types=None, **kw):
|
|
33
|
+
super().__init__(_output_types=output_types, **kw)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def _set_inputs(cls, op: "DataFrameRepeat", inputs: List[EntityData]):
|
|
37
|
+
super()._set_inputs(op, inputs)
|
|
38
|
+
if isinstance(op.repeats, ENTITY_TYPE):
|
|
39
|
+
op.repeats = inputs[1]
|
|
40
|
+
|
|
41
|
+
def __call__(self, obj, repeats):
|
|
42
|
+
self._output_types = get_output_types(obj)
|
|
43
|
+
test_index = obj.index_value.to_pandas()[:0]
|
|
44
|
+
|
|
45
|
+
params = obj.params
|
|
46
|
+
params["index_value"] = parse_index(test_index, obj, type(self), self.repeats)
|
|
47
|
+
params["shape"] = (np.nan,)
|
|
48
|
+
|
|
49
|
+
inputs = [obj]
|
|
50
|
+
if isinstance(repeats, ENTITY_TYPE):
|
|
51
|
+
inputs.append(repeats)
|
|
52
|
+
return self.new_tileable(inputs, **params)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _repeat(obj, repeats, axis=None):
|
|
56
|
+
from ...tensor.datasource import tensor
|
|
57
|
+
|
|
58
|
+
axis = validate_axis(axis or 0, obj)
|
|
59
|
+
if is_list_like(repeats):
|
|
60
|
+
repeats = tensor(repeats)
|
|
61
|
+
op = DataFrameRepeat(repeats=repeats, axis=axis)
|
|
62
|
+
return op(obj, repeats)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def series_repeat(obj, repeats, axis=None):
|
|
66
|
+
"""
|
|
67
|
+
Repeat elements of a Series.
|
|
68
|
+
|
|
69
|
+
Returns a new Series where each element of the current Series
|
|
70
|
+
is repeated consecutively a given number of times.
|
|
71
|
+
|
|
72
|
+
Parameters
|
|
73
|
+
----------
|
|
74
|
+
repeats : int or array of ints
|
|
75
|
+
The number of repetitions for each element. This should be a
|
|
76
|
+
non-negative integer. Repeating 0 times will return an empty
|
|
77
|
+
Series.
|
|
78
|
+
axis : None
|
|
79
|
+
Must be ``None``. Has no effect but is accepted for compatibility
|
|
80
|
+
with numpy.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Series
|
|
85
|
+
Newly created Series with repeated elements.
|
|
86
|
+
|
|
87
|
+
See Also
|
|
88
|
+
--------
|
|
89
|
+
Index.repeat : Equivalent function for Index.
|
|
90
|
+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
|
|
91
|
+
|
|
92
|
+
Examples
|
|
93
|
+
--------
|
|
94
|
+
>>> import maxframe.dataframe as md
|
|
95
|
+
>>> s = md.Series(['a', 'b', 'c'])
|
|
96
|
+
>>> s.execute()
|
|
97
|
+
0 a
|
|
98
|
+
1 b
|
|
99
|
+
2 c
|
|
100
|
+
dtype: object
|
|
101
|
+
>>> s.repeat(2).execute()
|
|
102
|
+
0 a
|
|
103
|
+
0 a
|
|
104
|
+
1 b
|
|
105
|
+
1 b
|
|
106
|
+
2 c
|
|
107
|
+
2 c
|
|
108
|
+
dtype: object
|
|
109
|
+
>>> s.repeat([1, 2, 3]).execute()
|
|
110
|
+
0 a
|
|
111
|
+
1 b
|
|
112
|
+
1 b
|
|
113
|
+
2 c
|
|
114
|
+
2 c
|
|
115
|
+
2 c
|
|
116
|
+
dtype: object
|
|
117
|
+
"""
|
|
118
|
+
return _repeat(obj, repeats, axis=axis)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def index_repeat(obj, repeats, axis=None):
|
|
122
|
+
"""
|
|
123
|
+
Repeat elements of an Index.
|
|
124
|
+
|
|
125
|
+
Returns a new Index where each element of the current Index
|
|
126
|
+
is repeated consecutively a given number of times.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
repeats : int or array of ints
|
|
131
|
+
The number of repetitions for each element. This should be a
|
|
132
|
+
non-negative integer. Repeating 0 times will return an empty
|
|
133
|
+
Index.
|
|
134
|
+
axis : None
|
|
135
|
+
Must be ``None``. Has no effect but is accepted for compatibility
|
|
136
|
+
with numpy.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
repeated_index : Index
|
|
141
|
+
Newly created Index with repeated elements.
|
|
142
|
+
|
|
143
|
+
See Also
|
|
144
|
+
--------
|
|
145
|
+
Series.repeat : Equivalent function for Series.
|
|
146
|
+
numpy.repeat : Similar method for :class:`numpy.ndarray`.
|
|
147
|
+
|
|
148
|
+
Examples
|
|
149
|
+
--------
|
|
150
|
+
>>> import maxframe.dataframe as md
|
|
151
|
+
>>> idx = md.Index(['a', 'b', 'c'])
|
|
152
|
+
>>> idx.execute()
|
|
153
|
+
Index(['a', 'b', 'c'], dtype='object')
|
|
154
|
+
>>> idx.repeat(2).execute()
|
|
155
|
+
Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
|
|
156
|
+
>>> idx.repeat([1, 2, 3]).execute()
|
|
157
|
+
Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
|
|
158
|
+
"""
|
|
159
|
+
return _repeat(obj, repeats, axis=axis)
|
|
@@ -16,11 +16,13 @@ import numpy as np
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
import pytest
|
|
18
18
|
|
|
19
|
+
from maxframe import options
|
|
20
|
+
|
|
19
21
|
from .... import opcodes
|
|
20
22
|
from ....core import OutputType
|
|
21
23
|
from ....dataframe import DataFrame
|
|
22
24
|
from ....tensor.core import TENSOR_TYPE
|
|
23
|
-
from ....udf import with_running_options
|
|
25
|
+
from ....udf import ODPSFunction, with_running_options
|
|
24
26
|
from ... import eval as maxframe_eval
|
|
25
27
|
from ... import get_dummies, to_numeric
|
|
26
28
|
from ...arithmetic import DataFrameGreater, DataFrameLess
|
|
@@ -124,6 +126,7 @@ def test_dataframe_apply():
|
|
|
124
126
|
dtypes=pd.Series([np.dtype(float)] * 3),
|
|
125
127
|
)
|
|
126
128
|
assert df2.ndim == 2
|
|
129
|
+
assert df2.op.expect_resources == options.function.default_running_options
|
|
127
130
|
|
|
128
131
|
|
|
129
132
|
def test_series_apply():
|
|
@@ -180,6 +183,8 @@ def test_series_apply():
|
|
|
180
183
|
pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
|
|
181
184
|
)
|
|
182
185
|
assert r.ndim == 2
|
|
186
|
+
assert r.op.expect_resources == options.function.default_running_options
|
|
187
|
+
|
|
183
188
|
pd.testing.assert_series_equal(r.dtypes, dtypes)
|
|
184
189
|
assert r.shape == (2, 3)
|
|
185
190
|
|
|
@@ -305,6 +310,7 @@ def test_transform():
|
|
|
305
310
|
assert r.shape == series.shape
|
|
306
311
|
assert r.op._op_type_ == opcodes.TRANSFORM
|
|
307
312
|
assert r.op.output_types[0] == OutputType.series
|
|
313
|
+
assert r.op.expect_resources == options.function.default_running_options
|
|
308
314
|
|
|
309
315
|
|
|
310
316
|
def test_series_isin():
|
|
@@ -563,12 +569,17 @@ def test_apply():
|
|
|
563
569
|
)
|
|
564
570
|
assert apply_df.shape == (3, 2)
|
|
565
571
|
assert apply_df.op.expect_engine == "SPE"
|
|
566
|
-
assert apply_df.op.expect_resources == {
|
|
572
|
+
assert apply_df.op.expect_resources == {
|
|
573
|
+
"cpu": 4,
|
|
574
|
+
"memory": "40GB",
|
|
575
|
+
"gpu": 0,
|
|
576
|
+
"gu_quota": None,
|
|
577
|
+
}
|
|
567
578
|
|
|
568
579
|
|
|
569
580
|
def test_pivot_table():
|
|
570
581
|
from ...groupby.aggregation import DataFrameGroupByAgg
|
|
571
|
-
from ...
|
|
582
|
+
from ...reshape.pivot_table import DataFramePivotTable
|
|
572
583
|
|
|
573
584
|
raw = pd.DataFrame(
|
|
574
585
|
{
|
|
@@ -602,3 +613,37 @@ def test_pivot_table():
|
|
|
602
613
|
t = df.pivot_table(index=["A", "B"], columns="C", aggfunc="sum")
|
|
603
614
|
assert isinstance(t.op, DataFramePivotTable)
|
|
604
615
|
assert t.shape == (np.nan, np.nan)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def test_map_with_functions():
|
|
619
|
+
raw = pd.Series([1, 2, 3], name="s_name")
|
|
620
|
+
series = from_pandas_series(raw, chunk_size=2)
|
|
621
|
+
|
|
622
|
+
# inferred type may not be exact
|
|
623
|
+
def fn1(val):
|
|
624
|
+
return val
|
|
625
|
+
|
|
626
|
+
with pytest.raises(ValueError, match="int type"):
|
|
627
|
+
series.map(fn1)
|
|
628
|
+
mapped = series.map(fn1, dtype="float64", skip_infer=True)
|
|
629
|
+
assert mapped.dtype == np.dtype("float64")
|
|
630
|
+
|
|
631
|
+
# test when type infer is valid
|
|
632
|
+
def fn2(val):
|
|
633
|
+
return val * 1.0
|
|
634
|
+
|
|
635
|
+
mapped = series.map(fn2)
|
|
636
|
+
assert mapped.dtype == np.dtype("float64")
|
|
637
|
+
|
|
638
|
+
# test function with type annotations
|
|
639
|
+
def fn3(val) -> int:
|
|
640
|
+
return val
|
|
641
|
+
|
|
642
|
+
mapped = series.map(fn3)
|
|
643
|
+
assert mapped.dtype == np.dtype("int64")
|
|
644
|
+
|
|
645
|
+
# test odps function
|
|
646
|
+
odps_func = ODPSFunction("test_odps_udf", dtype=np.float64)
|
|
647
|
+
mapped = series.map(odps_func)
|
|
648
|
+
assert isinstance(mapped.op.arg, ODPSFunction)
|
|
649
|
+
assert mapped.dtype == np.dtype("float64")
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
|
+
from ... import opcodes
|
|
18
19
|
from ...core import ENTITY_TYPE, OutputType
|
|
19
20
|
from ...serialization.serializables import StringField
|
|
20
21
|
from ...tensor import tensor as astensor
|
|
@@ -23,6 +24,8 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
|
|
27
|
+
_op_type_ = opcodes.TO_NUMERIC
|
|
28
|
+
|
|
26
29
|
errors = StringField("errors")
|
|
27
30
|
downcast = StringField("downcast")
|
|
28
31
|
|
|
@@ -38,8 +38,9 @@ from ..utils import (
|
|
|
38
38
|
_with_convert_dtype = pd_release_version < (1, 2, 0)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class
|
|
41
|
+
class DataFrameTransform(DataFrameOperator, DataFrameOperatorMixin):
|
|
42
42
|
_op_type_ = opcodes.TRANSFORM
|
|
43
|
+
_legacy_name = "TransformOperator"
|
|
43
44
|
|
|
44
45
|
func = AnyField("func", default=None)
|
|
45
46
|
axis = AnyField("axis", default=None)
|
|
@@ -141,13 +142,17 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
141
142
|
|
|
142
143
|
@classmethod
|
|
143
144
|
def estimate_size(
|
|
144
|
-
cls, ctx: MutableMapping[str, Union[int, float]], op: "
|
|
145
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameTransform"
|
|
145
146
|
) -> None:
|
|
146
147
|
if isinstance(op.func, MarkedFunction):
|
|
147
148
|
ctx[op.outputs[0].key] = float("inf")
|
|
148
149
|
super().estimate_size(ctx, op)
|
|
149
150
|
|
|
150
151
|
|
|
152
|
+
# keep for import compatibility
|
|
153
|
+
TransformOperator = DataFrameTransform
|
|
154
|
+
|
|
155
|
+
|
|
151
156
|
def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
|
|
152
157
|
stub_df = _build_stub_pandas_obj(df, output_type)
|
|
153
158
|
n_args = copy_if_possible(args)
|
|
@@ -235,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
|
|
|
235
240
|
"""
|
|
236
241
|
call_agg = kwargs.pop("_call_agg", False)
|
|
237
242
|
func = get_packed_funcs(df, OutputType.dataframe, func, *args, **kwargs)
|
|
238
|
-
op =
|
|
243
|
+
op = DataFrameTransform(
|
|
239
244
|
func=func,
|
|
240
245
|
axis=axis,
|
|
241
246
|
args=args,
|
|
@@ -327,13 +332,15 @@ def series_transform(
|
|
|
327
332
|
"""
|
|
328
333
|
call_agg = kwargs.pop("_call_agg", False)
|
|
329
334
|
func = get_packed_funcs(series, OutputType.series, func, *args, **kwargs)
|
|
330
|
-
op =
|
|
335
|
+
op = DataFrameTransform(
|
|
331
336
|
func=func,
|
|
332
337
|
axis=axis,
|
|
333
338
|
convert_dtype=convert_dtype,
|
|
334
339
|
args=args,
|
|
335
340
|
kwds=kwargs,
|
|
336
|
-
output_types=[OutputType.series]
|
|
341
|
+
output_types=[OutputType.series]
|
|
342
|
+
if not call_agg and not isinstance(func, list)
|
|
343
|
+
else None,
|
|
337
344
|
call_agg=call_agg,
|
|
338
345
|
)
|
|
339
346
|
return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
16
18
|
from ...core import OutputType
|
|
17
19
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -30,10 +32,20 @@ class DataFrameTranspose(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
30
32
|
new_shape = arg.shape[::-1]
|
|
31
33
|
columns_value = arg.index_value
|
|
32
34
|
index_value = parse_index(arg.dtypes.index)
|
|
35
|
+
|
|
36
|
+
if not arg.index_value.has_value:
|
|
37
|
+
dtypes = None
|
|
38
|
+
else:
|
|
39
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
40
|
+
|
|
41
|
+
dtype = find_common_type(list(arg.dtypes))
|
|
42
|
+
pd_index = arg.index_value.to_pandas()
|
|
43
|
+
dtypes = pd.Series([dtype] * len(pd_index), index=pd_index)
|
|
44
|
+
|
|
33
45
|
return self.new_dataframe(
|
|
34
46
|
[arg],
|
|
35
47
|
shape=new_shape,
|
|
36
|
-
dtypes=
|
|
48
|
+
dtypes=dtypes,
|
|
37
49
|
columns_value=columns_value,
|
|
38
50
|
index_value=index_value,
|
|
39
51
|
)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...udf import builtin_function
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@builtin_function
|
|
19
|
+
def _item_or_none(item):
|
|
20
|
+
if len(item) > 0:
|
|
21
|
+
return item[0]
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _valid_index(df_or_series, slc: slice):
|
|
26
|
+
from ... import tensor as mt
|
|
27
|
+
|
|
28
|
+
idx = df_or_series.dropna(how="all").index[slc]
|
|
29
|
+
return mt.array(idx).mf.apply_chunk(_item_or_none, dtype=idx.dtype)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_doc = """
|
|
33
|
+
Return index for %(pos)s non-NA value or None, if no non-NA value is found.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
type of index
|
|
38
|
+
|
|
39
|
+
Examples
|
|
40
|
+
--------
|
|
41
|
+
For Series:
|
|
42
|
+
|
|
43
|
+
>>> import maxframe.dataframe as md
|
|
44
|
+
>>> s = md.Series([None, 3, 4])
|
|
45
|
+
>>> s.first_valid_index().execute()
|
|
46
|
+
1
|
|
47
|
+
>>> s.last_valid_index().execute()
|
|
48
|
+
2
|
|
49
|
+
|
|
50
|
+
>>> s = md.Series([None, None])
|
|
51
|
+
>>> print(s.first_valid_index()).execute()
|
|
52
|
+
None
|
|
53
|
+
>>> print(s.last_valid_index()).execute()
|
|
54
|
+
None
|
|
55
|
+
|
|
56
|
+
If all elements in Series are NA/null, returns None.
|
|
57
|
+
|
|
58
|
+
>>> s = md.Series()
|
|
59
|
+
>>> print(s.first_valid_index()).execute()
|
|
60
|
+
None
|
|
61
|
+
>>> print(s.last_valid_index()).execute()
|
|
62
|
+
None
|
|
63
|
+
|
|
64
|
+
If Series is empty, returns None.
|
|
65
|
+
|
|
66
|
+
For DataFrame:
|
|
67
|
+
|
|
68
|
+
>>> df = md.DataFrame({'A': [None, None, 2], 'B': [None, 3, 4]})
|
|
69
|
+
>>> df.execute()
|
|
70
|
+
A B
|
|
71
|
+
0 NaN NaN
|
|
72
|
+
1 NaN 3.0
|
|
73
|
+
2 2.0 4.0
|
|
74
|
+
>>> df.first_valid_index().execute()
|
|
75
|
+
1
|
|
76
|
+
>>> df.last_valid_index().execute()
|
|
77
|
+
2
|
|
78
|
+
|
|
79
|
+
>>> df = md.DataFrame({'A': [None, None, None], 'B': [None, None, None]})
|
|
80
|
+
>>> df.execute()
|
|
81
|
+
A B
|
|
82
|
+
0 None None
|
|
83
|
+
1 None None
|
|
84
|
+
2 None None
|
|
85
|
+
>>> print(df.first_valid_index()).execute()
|
|
86
|
+
None
|
|
87
|
+
>>> print(df.last_valid_index()).execute()
|
|
88
|
+
None
|
|
89
|
+
|
|
90
|
+
If all elements in DataFrame are NA/null, returns None.
|
|
91
|
+
|
|
92
|
+
>>> df = md.DataFrame()
|
|
93
|
+
>>> df.execute()
|
|
94
|
+
Empty DataFrame
|
|
95
|
+
Columns: []
|
|
96
|
+
Index: []
|
|
97
|
+
>>> print(df.first_valid_index()).execute()
|
|
98
|
+
None
|
|
99
|
+
>>> print(df.last_valid_index()).execute()
|
|
100
|
+
None
|
|
101
|
+
|
|
102
|
+
If DataFrame is empty, returns None.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def first_valid_index(df_or_series):
|
|
107
|
+
return _valid_index(df_or_series, slice(None, 1))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def last_valid_index(df_or_series):
|
|
111
|
+
return _valid_index(df_or_series, slice(-1, None))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
first_valid_index.__doc__ = _doc % dict(pos="first")
|
|
115
|
+
last_valid_index.__doc__ = _doc % dict(pos="last")
|