maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -20,35 +20,43 @@ except ImportError: # pragma: no cover
|
|
|
20
20
|
from ... import opcodes
|
|
21
21
|
from ...config import options
|
|
22
22
|
from ...core import OutputType
|
|
23
|
-
from ...serialization.serializables import BoolField
|
|
23
|
+
from ...serialization.serializables import BoolField, StringField
|
|
24
24
|
from ...utils import lazy_import
|
|
25
|
-
from
|
|
25
|
+
from ..utils import validate_dtype_backend
|
|
26
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
26
27
|
|
|
27
28
|
cudf = lazy_import("cudf")
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
class DataFrameNunique(
|
|
31
|
+
class DataFrameNunique(DataFrameReduction, DataFrameReductionMixin):
|
|
31
32
|
_op_type_ = opcodes.NUNIQUE
|
|
32
33
|
_func_name = "nunique"
|
|
33
34
|
|
|
34
35
|
dropna = BoolField("dropna", default=None)
|
|
35
|
-
|
|
36
|
+
dtype_backend = StringField(
|
|
37
|
+
"dtype_backend", on_deserialize=validate_dtype_backend, default=None
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def __init__(self, dtype_backend=None, **kw):
|
|
41
|
+
dtype_backend = validate_dtype_backend(dtype_backend)
|
|
42
|
+
super().__init__(dtype_backend=dtype_backend, **kw)
|
|
36
43
|
|
|
37
44
|
@property
|
|
38
45
|
def is_atomic(self):
|
|
39
46
|
return True
|
|
40
47
|
|
|
48
|
+
def get_reduction_args(self, axis=None):
|
|
49
|
+
args = dict(dropna=self.dropna)
|
|
50
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
51
|
+
args["axis"] = axis
|
|
52
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
53
|
+
|
|
41
54
|
@classmethod
|
|
42
55
|
def get_reduction_callable(cls, op):
|
|
43
56
|
func_name = getattr(op, "_func_name")
|
|
44
57
|
kw = dict(dropna=op.dropna)
|
|
45
58
|
kw = {k: v for k, v in kw.items() if v is not None}
|
|
46
|
-
|
|
47
|
-
def fun(value):
|
|
48
|
-
return value.nunique(**kw)
|
|
49
|
-
|
|
50
|
-
fun.__name__ = func_name
|
|
51
|
-
return fun
|
|
59
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
52
60
|
|
|
53
61
|
|
|
54
62
|
def nunique_dataframe(df, axis=0, dropna=True):
|
|
@@ -136,6 +144,6 @@ def nunique_series(series, dropna=True):
|
|
|
136
144
|
op = DataFrameNunique(
|
|
137
145
|
dropna=dropna,
|
|
138
146
|
output_types=[OutputType.scalar],
|
|
139
|
-
|
|
147
|
+
dtype_backend=options.dataframe.dtype_backend,
|
|
140
148
|
)
|
|
141
149
|
return op(series)
|
|
@@ -16,11 +16,23 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from .
|
|
20
|
-
from .core import DataFrameReductionMixin, DataFrameReductionOperator
|
|
19
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
class
|
|
22
|
+
class ProdReductionCallable(ReductionCallable):
|
|
23
|
+
def __call__(self, value):
|
|
24
|
+
from .aggregation import where_function
|
|
25
|
+
|
|
26
|
+
skipna, min_count = self.kwargs["skipna"], self.kwargs["min_count"]
|
|
27
|
+
if min_count == 0:
|
|
28
|
+
return value.prod(skipna=skipna)
|
|
29
|
+
else:
|
|
30
|
+
return where_function(
|
|
31
|
+
value.count() >= min_count, value.prod(skipna=skipna), np.nan
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DataFrameProd(DataFrameReduction, DataFrameReductionMixin):
|
|
24
36
|
_op_type_ = opcodes.PROD
|
|
25
37
|
_func_name = "prod"
|
|
26
38
|
|
|
@@ -31,16 +43,9 @@ class DataFrameProd(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
31
43
|
@classmethod
|
|
32
44
|
def get_reduction_callable(cls, op):
|
|
33
45
|
skipna, min_count = op.skipna, op.min_count
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return value.prod(skipna=skipna)
|
|
38
|
-
else:
|
|
39
|
-
return where_function(
|
|
40
|
-
value.count() >= min_count, value.prod(skipna=skipna), np.nan
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
return prod
|
|
46
|
+
return ProdReductionCallable(
|
|
47
|
+
func_name="prod", kwargs=dict(skipna=skipna, min_count=min_count)
|
|
48
|
+
)
|
|
44
49
|
|
|
45
50
|
|
|
46
51
|
def prod_series(df, axis=None, skipna=True, level=None, min_count=0, method=None):
|
|
@@ -14,10 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFrameSize(
|
|
20
|
+
class DataFrameSize(DataFrameReduction, DataFrameReductionMixin):
|
|
21
21
|
_op_type_ = opcodes.REDUCTION_SIZE
|
|
22
22
|
_func_name = "size"
|
|
23
23
|
|
|
@@ -15,10 +15,18 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import Int32Field
|
|
18
|
-
from .core import DataFrameReductionMixin,
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class SemReductionCallable(ReductionCallable):
|
|
22
|
+
def __call__(self, value):
|
|
23
|
+
skipna, ddof = self.kwargs["skipna"], self.kwargs["ddof"]
|
|
24
|
+
var = value.var(skipna=skipna, ddof=ddof)
|
|
25
|
+
cnt = value.count()
|
|
26
|
+
return (var / cnt) ** 0.5
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DataFrameSem(DataFrameReduction, DataFrameReductionMixin):
|
|
22
30
|
_op_type_ = opcodes.SEM
|
|
23
31
|
_func_name = "sem"
|
|
24
32
|
|
|
@@ -27,13 +35,9 @@ class DataFrameSem(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
27
35
|
@classmethod
|
|
28
36
|
def get_reduction_callable(cls, op: "DataFrameSem"):
|
|
29
37
|
skipna, ddof = op.skipna, op.ddof
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
cnt = x.count()
|
|
34
|
-
return (var / cnt) ** 0.5
|
|
35
|
-
|
|
36
|
-
return sem
|
|
38
|
+
return SemReductionCallable(
|
|
39
|
+
func_name="sem", kwargs={"skipna": skipna, "ddof": ddof}
|
|
40
|
+
)
|
|
37
41
|
|
|
38
42
|
|
|
39
43
|
def sem_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):
|
|
@@ -17,10 +17,36 @@ import numpy as np
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import ENTITY_TYPE, OutputType
|
|
19
19
|
from ...serialization.serializables import BoolField
|
|
20
|
-
from .core import DataFrameReductionMixin,
|
|
20
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
class
|
|
23
|
+
class SkewReductionCallable(ReductionCallable):
|
|
24
|
+
def __call__(self, value):
|
|
25
|
+
from .aggregation import where_function
|
|
26
|
+
|
|
27
|
+
skipna, bias = self.kwargs["skipna"], self.kwargs["bias"]
|
|
28
|
+
cnt = value.count()
|
|
29
|
+
mean = value.mean(skipna=skipna)
|
|
30
|
+
divided = (
|
|
31
|
+
(value**3).mean(skipna=skipna)
|
|
32
|
+
- 3 * (value**2).mean(skipna=skipna) * mean
|
|
33
|
+
+ 2 * mean**3
|
|
34
|
+
)
|
|
35
|
+
var = value.var(skipna=skipna, ddof=0)
|
|
36
|
+
if isinstance(var, ENTITY_TYPE) or var > 0:
|
|
37
|
+
val = where_function(var > 0, divided / var**1.5, np.nan)
|
|
38
|
+
else:
|
|
39
|
+
val = np.nan
|
|
40
|
+
if not bias:
|
|
41
|
+
val = where_function(
|
|
42
|
+
(var > 0) & (cnt > 2),
|
|
43
|
+
val * ((cnt * (cnt - 1)) ** 0.5 / (cnt - 2)),
|
|
44
|
+
np.nan,
|
|
45
|
+
)
|
|
46
|
+
return val
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DataFrameSkew(DataFrameReduction, DataFrameReductionMixin):
|
|
24
50
|
_op_type_ = opcodes.SKEW
|
|
25
51
|
_func_name = "skew"
|
|
26
52
|
|
|
@@ -28,32 +54,10 @@ class DataFrameSkew(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
28
54
|
|
|
29
55
|
@classmethod
|
|
30
56
|
def get_reduction_callable(cls, op: "DataFrameSkew"):
|
|
31
|
-
from .aggregation import where_function
|
|
32
|
-
|
|
33
57
|
skipna, bias = op.skipna, op.bias
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
mean = x.mean(skipna=skipna)
|
|
38
|
-
divided = (
|
|
39
|
-
(x**3).mean(skipna=skipna)
|
|
40
|
-
- 3 * (x**2).mean(skipna=skipna) * mean
|
|
41
|
-
+ 2 * mean**3
|
|
42
|
-
)
|
|
43
|
-
var = x.var(skipna=skipna, ddof=0)
|
|
44
|
-
if isinstance(var, ENTITY_TYPE) or var > 0:
|
|
45
|
-
val = where_function(var > 0, divided / var**1.5, np.nan)
|
|
46
|
-
else:
|
|
47
|
-
val = np.nan
|
|
48
|
-
if not bias:
|
|
49
|
-
val = where_function(
|
|
50
|
-
(var > 0) & (cnt > 2),
|
|
51
|
-
val * ((cnt * (cnt - 1)) ** 0.5 / (cnt - 2)),
|
|
52
|
-
np.nan,
|
|
53
|
-
)
|
|
54
|
-
return val
|
|
55
|
-
|
|
56
|
-
return skew
|
|
58
|
+
return SkewReductionCallable(
|
|
59
|
+
func_name="skew", kwargs=dict(skipna=skipna, bias=bias)
|
|
60
|
+
)
|
|
57
61
|
|
|
58
62
|
|
|
59
63
|
def skew_series(df, axis=None, skipna=True, level=None, bias=False, method=None):
|
|
@@ -15,10 +15,15 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import StringField
|
|
18
|
-
from .core import DataFrameReductionMixin,
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class StrLenReductionCallable(ReductionCallable):
|
|
22
|
+
def __call__(self, value):
|
|
23
|
+
return build_str_concat_object(value, **self.kwargs)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DataFrameStrConcat(DataFrameReduction, DataFrameReductionMixin):
|
|
22
27
|
_op_type_ = opcodes.STR_CONCAT
|
|
23
28
|
_func_name = "str_concat"
|
|
24
29
|
|
|
@@ -35,11 +40,9 @@ class DataFrameStrConcat(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
35
40
|
@classmethod
|
|
36
41
|
def get_reduction_callable(cls, op: "DataFrameStrConcat"):
|
|
37
42
|
sep, na_rep = op.sep, op.na_rep
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
return str_concat
|
|
43
|
+
return StrLenReductionCallable(
|
|
44
|
+
func_name="str_concat", kwargs=dict(sep=sep, na_rep=na_rep)
|
|
45
|
+
)
|
|
43
46
|
|
|
44
47
|
|
|
45
48
|
def build_str_concat_object(df, sep=None, na_rep=None):
|
|
@@ -16,10 +16,23 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from .core import DataFrameReductionMixin,
|
|
19
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class
|
|
22
|
+
class SumReductionCallable(ReductionCallable):
|
|
23
|
+
def __call__(self, value):
|
|
24
|
+
from .aggregation import where_function
|
|
25
|
+
|
|
26
|
+
skipna, min_count = self.kwargs["skipna"], self.kwargs["min_count"]
|
|
27
|
+
if min_count == 0:
|
|
28
|
+
return value.sum(skipna=skipna)
|
|
29
|
+
else:
|
|
30
|
+
return where_function(
|
|
31
|
+
value.count() >= min_count, value.sum(skipna=skipna), np.nan
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DataFrameSum(DataFrameReduction, DataFrameReductionMixin):
|
|
23
36
|
_op_type_ = opcodes.SUM
|
|
24
37
|
_func_name = "sum"
|
|
25
38
|
|
|
@@ -29,19 +42,10 @@ class DataFrameSum(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
29
42
|
|
|
30
43
|
@classmethod
|
|
31
44
|
def get_reduction_callable(cls, op):
|
|
32
|
-
from .aggregation import where_function
|
|
33
|
-
|
|
34
45
|
skipna, min_count = op.skipna, op.min_count
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
return value.sum(skipna=skipna)
|
|
39
|
-
else:
|
|
40
|
-
return where_function(
|
|
41
|
-
value.count() >= min_count, value.sum(skipna=skipna), np.nan
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
return sum_
|
|
46
|
+
return SumReductionCallable(
|
|
47
|
+
func_name="sum", kwargs=dict(skipna=skipna, min_count=min_count)
|
|
48
|
+
)
|
|
45
49
|
|
|
46
50
|
|
|
47
51
|
def sum_series(df, axis=None, skipna=True, level=None, min_count=0, method=None):
|
|
@@ -26,6 +26,7 @@ from .... import dataframe as md
|
|
|
26
26
|
from ....lib.dtypes_extension import ArrowDtype
|
|
27
27
|
from ....tensor import Tensor
|
|
28
28
|
from ....tests.utils import assert_mf_index_dtype
|
|
29
|
+
from ....udf import ODPSFunction
|
|
29
30
|
from ...core import DataFrame, IndexValue, OutputType, Series
|
|
30
31
|
from ...datasource.dataframe import from_pandas as from_pandas_df
|
|
31
32
|
from ...datasource.series import from_pandas as from_pandas_series
|
|
@@ -527,3 +528,14 @@ def test_custom_aggregation():
|
|
|
527
528
|
assert result.agg_funcs[0].agg_func_name == "custom_reduction"
|
|
528
529
|
assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction2)
|
|
529
530
|
assert result.agg_funcs[0].output_limit == 2
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def test_aggregation_with_odps_function():
|
|
534
|
+
odps_func = ODPSFunction("test_odps_udaf", dtype=np.float64)
|
|
535
|
+
for ndim in [1, 2]:
|
|
536
|
+
compiler = ReductionCompiler()
|
|
537
|
+
compiler.add_function(odps_func, ndim=ndim)
|
|
538
|
+
result = compiler.compile()
|
|
539
|
+
assert result.agg_funcs[0].map_func_name == "custom_reduction"
|
|
540
|
+
assert result.agg_funcs[0].agg_func_name == "custom_reduction"
|
|
541
|
+
assert isinstance(result.agg_funcs[0].custom_reduction, ODPSFunction)
|
|
@@ -28,12 +28,19 @@ from ...tensor.core import TensorOrder
|
|
|
28
28
|
from ...utils import lazy_import
|
|
29
29
|
from ..core import DATAFRAME_TYPE
|
|
30
30
|
from ..initializer import Series as asseries
|
|
31
|
-
from .core import
|
|
31
|
+
from .core import (
|
|
32
|
+
CustomReduction,
|
|
33
|
+
DataFrameReduction,
|
|
34
|
+
DataFrameReductionMixin,
|
|
35
|
+
ReductionCallable,
|
|
36
|
+
)
|
|
32
37
|
|
|
33
38
|
cudf = lazy_import("cudf")
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
class UniqueReduction(CustomReduction):
|
|
42
|
+
_func_name = "unique"
|
|
43
|
+
|
|
37
44
|
def agg(self, data): # noqa: W0221 # pylint: disable=arguments-differ
|
|
38
45
|
xdf = cudf if self.is_gpu() else pd
|
|
39
46
|
# convert to series data
|
|
@@ -43,7 +50,12 @@ class UniqueReduction(CustomReduction):
|
|
|
43
50
|
return data.unique()
|
|
44
51
|
|
|
45
52
|
|
|
46
|
-
class
|
|
53
|
+
class UniqueReductionCallable(ReductionCallable):
|
|
54
|
+
def __call__(self, value):
|
|
55
|
+
return UniqueReduction(name="unique", is_gpu=self.kwargs["is_gpu"])(value)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DataFrameUnique(DataFrameReduction, DataFrameReductionMixin):
|
|
47
59
|
_op_type_ = opcodes.UNIQUE
|
|
48
60
|
_func_name = "unique"
|
|
49
61
|
|
|
@@ -53,9 +65,14 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
53
65
|
def is_atomic(self):
|
|
54
66
|
return True
|
|
55
67
|
|
|
68
|
+
def get_reduction_args(self, axis=None):
|
|
69
|
+
return {}
|
|
70
|
+
|
|
56
71
|
@classmethod
|
|
57
72
|
def get_reduction_callable(cls, op):
|
|
58
|
-
return
|
|
73
|
+
return UniqueReductionCallable(
|
|
74
|
+
func_name=cls._func_name, kwargs=dict(is_gpu=op.is_gpu())
|
|
75
|
+
)
|
|
59
76
|
|
|
60
77
|
def __call__(self, a):
|
|
61
78
|
if not isinstance(a, ENTITY_TYPE):
|
|
@@ -15,10 +15,21 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import Int32Field
|
|
18
|
-
from .core import DataFrameReductionMixin,
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class VarReductionCallable(ReductionCallable):
|
|
22
|
+
def __call__(self, value):
|
|
23
|
+
skipna, ddof = self.kwargs["skipna"], self.kwargs["ddof"]
|
|
24
|
+
cnt = value.count()
|
|
25
|
+
if ddof == 0:
|
|
26
|
+
return (value**2).mean(skipna=skipna) - (value.mean(skipna=skipna)) ** 2
|
|
27
|
+
return (
|
|
28
|
+
(value**2).sum(skipna=skipna) - value.sum(skipna=skipna) ** 2 / cnt
|
|
29
|
+
) / (cnt - ddof)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DataFrameVar(DataFrameReduction, DataFrameReductionMixin):
|
|
22
33
|
_op_type_ = opcodes.VAR
|
|
23
34
|
_func_name = "var"
|
|
24
35
|
|
|
@@ -27,16 +38,9 @@ class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
27
38
|
@classmethod
|
|
28
39
|
def get_reduction_callable(cls, op: "DataFrameVar"):
|
|
29
40
|
skipna, ddof = op.skipna, op.ddof
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
if ddof == 0:
|
|
34
|
-
return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2
|
|
35
|
-
return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / (
|
|
36
|
-
cnt - ddof
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
return var
|
|
41
|
+
return VarReductionCallable(
|
|
42
|
+
func_name="var", kwargs={"skipna": skipna, "ddof": ddof}
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
|
|
42
46
|
def var_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from .melt import melt
|
|
17
|
+
from .pivot import pivot
|
|
18
|
+
from .pivot_table import pivot_table
|
|
19
|
+
from .stack import stack
|
|
20
|
+
from .unstack import unstack
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _install():
|
|
24
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
25
|
+
|
|
26
|
+
for t in DATAFRAME_TYPE:
|
|
27
|
+
setattr(t, "melt", melt)
|
|
28
|
+
setattr(t, "pivot", pivot)
|
|
29
|
+
setattr(t, "pivot_table", pivot_table)
|
|
30
|
+
setattr(t, "stack", stack)
|
|
31
|
+
setattr(t, "unstack", unstack)
|
|
32
|
+
|
|
33
|
+
for t in SERIES_TYPE:
|
|
34
|
+
setattr(t, "unstack", unstack)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
_install()
|
|
38
|
+
del _install
|
|
@@ -35,6 +35,7 @@ class DataFramePivot(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
35
35
|
columns = AnyField("columns", default=None)
|
|
36
36
|
|
|
37
37
|
agg_results = KeyField("agg_results", default=None)
|
|
38
|
+
fill_value = AnyField("fill_value", default=None)
|
|
38
39
|
|
|
39
40
|
def __init__(self, aggfunc=None, **kw):
|
|
40
41
|
if aggfunc is None:
|
|
@@ -39,6 +39,7 @@ class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
39
|
margins_name = StringField("margins_name", default=None)
|
|
40
40
|
sort = BoolField("sort", default=False)
|
|
41
41
|
|
|
42
|
+
# generate intermediate agg result to infer value of columns
|
|
42
43
|
agg_results = KeyField("agg_results", default=None)
|
|
43
44
|
|
|
44
45
|
def __init__(self, aggfunc=None, **kw):
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...udf import builtin_function
|
|
16
|
+
from ..core import SERIES_TYPE
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@builtin_function
|
|
20
|
+
def _adjust_names(pivoted, idx_names=None, col_names=None, remove_col_level=False):
|
|
21
|
+
if remove_col_level:
|
|
22
|
+
pivoted = pivoted.droplevel(0, axis=1)
|
|
23
|
+
if idx_names:
|
|
24
|
+
pivoted = pivoted.rename_axis(idx_names, axis=0)
|
|
25
|
+
if col_names:
|
|
26
|
+
pivoted = pivoted.rename_axis(col_names, axis=1)
|
|
27
|
+
return pivoted
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def unstack(df_or_series, level=-1, fill_value=None):
|
|
31
|
+
"""
|
|
32
|
+
Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
level : int, str, or list of these, default last level
|
|
37
|
+
Level(s) to unstack, can pass level name.
|
|
38
|
+
fill_value : scalar value, default None
|
|
39
|
+
Value to use when replacing NaN values.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
DataFrame
|
|
44
|
+
Unstacked Series.
|
|
45
|
+
|
|
46
|
+
Examples
|
|
47
|
+
--------
|
|
48
|
+
>>> import maxframe.dataframe as md
|
|
49
|
+
>>> s = md.Series([1, 2, 3, 4],
|
|
50
|
+
... index=md.MultiIndex.from_product([['one', 'two'],
|
|
51
|
+
... ['a', 'b']]))
|
|
52
|
+
>>> s.execute()
|
|
53
|
+
one a 1
|
|
54
|
+
b 2
|
|
55
|
+
two a 3
|
|
56
|
+
b 4
|
|
57
|
+
dtype: int64
|
|
58
|
+
|
|
59
|
+
>>> s.unstack(level=-1).execute()
|
|
60
|
+
a b
|
|
61
|
+
one 1 2
|
|
62
|
+
two 3 4
|
|
63
|
+
|
|
64
|
+
>>> s.unstack(level=0).execute()
|
|
65
|
+
one two
|
|
66
|
+
a 1 3
|
|
67
|
+
b 2 4
|
|
68
|
+
"""
|
|
69
|
+
if df_or_series.index.nlevels == 1:
|
|
70
|
+
if isinstance(df_or_series, SERIES_TYPE):
|
|
71
|
+
raise ValueError("index must be a MultiIndex to unstack")
|
|
72
|
+
raw_idx_nlevels = df_or_series.index.nlevels
|
|
73
|
+
stacked = df_or_series.stack(level=level, fill_value=fill_value)
|
|
74
|
+
orig_order = list(stacked.index.nlevels)
|
|
75
|
+
new_order = orig_order[raw_idx_nlevels:] + orig_order[:raw_idx_nlevels]
|
|
76
|
+
return stacked.reorder_levels(new_order)
|
|
77
|
+
|
|
78
|
+
nlevels = df_or_series.index.nlevels
|
|
79
|
+
idx_names = list(df_or_series.index.names)
|
|
80
|
+
if df_or_series.ndim == 1:
|
|
81
|
+
col_names = []
|
|
82
|
+
else:
|
|
83
|
+
col_names = list(df_or_series.columns.names)
|
|
84
|
+
|
|
85
|
+
name_to_idx = {name: i for i, name in enumerate(idx_names)}
|
|
86
|
+
level = level if isinstance(level, list) else [level]
|
|
87
|
+
level_idxes = [(nlevels + name_to_idx.get(lv, lv)) % nlevels for lv in level]
|
|
88
|
+
level_idx_set = set(level_idxes)
|
|
89
|
+
level_cols = [f"__level_{idx}" for idx in range(nlevels)]
|
|
90
|
+
|
|
91
|
+
if df_or_series.ndim == 1:
|
|
92
|
+
data = df_or_series.to_frame(name="__data")
|
|
93
|
+
else:
|
|
94
|
+
data = df_or_series
|
|
95
|
+
pivot_val_col = list(data.dtypes.index)
|
|
96
|
+
pivot_col_col = [level_cols[i] for i in level_idxes]
|
|
97
|
+
pivot_idx_col = [level_cols[i] for i in range(nlevels) if i not in level_idx_set]
|
|
98
|
+
|
|
99
|
+
new_idx_names = [idx_names[i] for i in range(nlevels) if i not in level_idx_set]
|
|
100
|
+
new_col_names = col_names + [idx_names[i] for i in level_idxes]
|
|
101
|
+
|
|
102
|
+
data = data.reset_index(names=level_cols)
|
|
103
|
+
pivoted = data.pivot(
|
|
104
|
+
index=pivot_idx_col, columns=pivot_col_col, values=pivot_val_col
|
|
105
|
+
)
|
|
106
|
+
pivoted.op.fill_value = fill_value
|
|
107
|
+
return pivoted.mf.apply_chunk(
|
|
108
|
+
_adjust_names,
|
|
109
|
+
col_names=new_col_names,
|
|
110
|
+
idx_names=new_idx_names,
|
|
111
|
+
remove_col_level=df_or_series.ndim == 1,
|
|
112
|
+
skip_infer=True,
|
|
113
|
+
output_type="dataframe",
|
|
114
|
+
)
|
|
@@ -12,23 +12,38 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from .rank import DataFrameRank
|
|
15
16
|
from .sort_index import DataFrameSortIndex
|
|
16
17
|
from .sort_values import DataFrameSortValues
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def _install():
|
|
20
|
-
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
21
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
22
|
+
from .argsort import index_argsort, series_argsort
|
|
23
|
+
from .nlargest import df_nlargest, series_nlargest
|
|
24
|
+
from .nsmallest import df_nsmallest, series_nsmallest
|
|
25
|
+
from .rank import rank
|
|
21
26
|
from .sort_index import sort_index
|
|
22
27
|
from .sort_values import dataframe_sort_values, series_sort_values
|
|
23
28
|
|
|
24
29
|
for cls in DATAFRAME_TYPE:
|
|
30
|
+
setattr(cls, "nlargest", df_nlargest)
|
|
31
|
+
setattr(cls, "nsmallest", df_nsmallest)
|
|
32
|
+
setattr(cls, "rank", rank)
|
|
25
33
|
setattr(cls, "sort_values", dataframe_sort_values)
|
|
26
34
|
setattr(cls, "sort_index", sort_index)
|
|
27
35
|
|
|
28
36
|
for cls in SERIES_TYPE:
|
|
37
|
+
setattr(cls, "argsort", series_argsort)
|
|
38
|
+
setattr(cls, "nlargest", series_nlargest)
|
|
39
|
+
setattr(cls, "nsmallest", series_nsmallest)
|
|
40
|
+
setattr(cls, "rank", rank)
|
|
29
41
|
setattr(cls, "sort_values", series_sort_values)
|
|
30
42
|
setattr(cls, "sort_index", sort_index)
|
|
31
43
|
|
|
44
|
+
for cls in INDEX_TYPE:
|
|
45
|
+
setattr(cls, "argsort", index_argsort)
|
|
46
|
+
|
|
32
47
|
|
|
33
48
|
_install()
|
|
34
49
|
del _install
|