maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -22,9 +22,10 @@ from ...core import EntityData, OutputType
|
|
|
22
22
|
from ...serialization.serializables import BoolField, Int64Field, KeyField, StringField
|
|
23
23
|
from ...utils import pd_release_version
|
|
24
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
|
-
from ..utils import build_series, parse_index
|
|
25
|
+
from ..utils import build_df, build_series, parse_index
|
|
26
26
|
|
|
27
27
|
_keep_original_order = pd_release_version >= (1, 3, 0)
|
|
28
|
+
_name_count_or_proportion = pd_release_version >= (2, 0, 0)
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class DataFrameValueCounts(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -50,7 +51,16 @@ class DataFrameValueCounts(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
50
51
|
op.input = op._inputs[0]
|
|
51
52
|
|
|
52
53
|
def __call__(self, inp):
|
|
53
|
-
|
|
54
|
+
if inp.ndim == 2:
|
|
55
|
+
idx = pd.MultiIndex.from_frame(build_df(inp))
|
|
56
|
+
test_series = build_series(index=idx, dtype=int)
|
|
57
|
+
if _name_count_or_proportion:
|
|
58
|
+
out_name = "proportion" if self.normalize else "count"
|
|
59
|
+
else:
|
|
60
|
+
out_name = None
|
|
61
|
+
else:
|
|
62
|
+
test_series = build_series(inp).value_counts(normalize=self.normalize)
|
|
63
|
+
out_name = test_series.name
|
|
54
64
|
if self.bins is not None:
|
|
55
65
|
from .cut import cut
|
|
56
66
|
|
|
@@ -66,7 +76,7 @@ class DataFrameValueCounts(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
66
76
|
[inp],
|
|
67
77
|
shape=(np.nan,),
|
|
68
78
|
index_value=parse_index(pd.CategoricalIndex([]), inp, store_data=False),
|
|
69
|
-
name=
|
|
79
|
+
name=out_name,
|
|
70
80
|
dtype=test_series.dtype,
|
|
71
81
|
)
|
|
72
82
|
else:
|
|
@@ -74,7 +84,7 @@ class DataFrameValueCounts(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
74
84
|
[inp],
|
|
75
85
|
shape=(np.nan,),
|
|
76
86
|
index_value=parse_index(test_series.index, store_data=False),
|
|
77
|
-
name=
|
|
87
|
+
name=out_name,
|
|
78
88
|
dtype=test_series.dtype,
|
|
79
89
|
)
|
|
80
90
|
|
|
@@ -170,3 +180,27 @@ def value_counts(
|
|
|
170
180
|
method=method,
|
|
171
181
|
)
|
|
172
182
|
return op(series)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def df_value_counts(
|
|
186
|
+
df,
|
|
187
|
+
subset=None,
|
|
188
|
+
normalize=False,
|
|
189
|
+
sort=True,
|
|
190
|
+
ascending=False,
|
|
191
|
+
dropna=True,
|
|
192
|
+
method="auto",
|
|
193
|
+
):
|
|
194
|
+
if not subset:
|
|
195
|
+
df_to_count = df
|
|
196
|
+
else:
|
|
197
|
+
df_to_count = df[subset]
|
|
198
|
+
|
|
199
|
+
op = DataFrameValueCounts(
|
|
200
|
+
normalize=normalize,
|
|
201
|
+
sort=sort,
|
|
202
|
+
ascending=ascending,
|
|
203
|
+
dropna=dropna,
|
|
204
|
+
method=method,
|
|
205
|
+
)
|
|
206
|
+
return op(df_to_count)
|
|
@@ -22,6 +22,7 @@ from ... import tensor as mt
|
|
|
22
22
|
from ...core import ENTITY_TYPE, OutputType
|
|
23
23
|
from ...serialization.serializables import BoolField
|
|
24
24
|
from ...tensor.core import TENSOR_TYPE
|
|
25
|
+
from ...utils import get_pd_option
|
|
25
26
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE, MultiIndex
|
|
26
27
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
27
28
|
|
|
@@ -30,6 +31,7 @@ class DataFrameCheckNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
30
31
|
_op_type_ = opcodes.CHECK_NA
|
|
31
32
|
|
|
32
33
|
positive = BoolField("positive", default=None)
|
|
34
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
33
35
|
|
|
34
36
|
def __call__(self, df):
|
|
35
37
|
if isinstance(df, DATAFRAME_TYPE):
|
|
@@ -45,11 +47,15 @@ class DataFrameCheckNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
45
47
|
f"Expecting maxframe dataframe, series, index, or tensor, got {type(df)}"
|
|
46
48
|
)
|
|
47
49
|
|
|
48
|
-
params = df.params
|
|
50
|
+
params = df.params
|
|
49
51
|
if self.output_types[0] == OutputType.dataframe:
|
|
50
|
-
|
|
51
|
-
[
|
|
52
|
-
|
|
52
|
+
if df.dtypes is None:
|
|
53
|
+
params["dtypes"] = None
|
|
54
|
+
else:
|
|
55
|
+
params["dtypes"] = pd.Series(
|
|
56
|
+
[np.dtype("bool")] * len(df.dtypes),
|
|
57
|
+
index=df.columns_value.to_pandas(),
|
|
58
|
+
)
|
|
53
59
|
else:
|
|
54
60
|
params["dtype"] = np.dtype("bool")
|
|
55
61
|
return self.new_tileable([df], **params)
|
|
@@ -133,13 +139,14 @@ def isna(obj):
|
|
|
133
139
|
2 True
|
|
134
140
|
dtype: bool
|
|
135
141
|
"""
|
|
142
|
+
use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
|
|
136
143
|
if isinstance(obj, MultiIndex):
|
|
137
144
|
raise NotImplementedError("isna is not defined for MultiIndex")
|
|
138
145
|
elif isinstance(obj, ENTITY_TYPE):
|
|
139
146
|
if isinstance(obj, TENSOR_TYPE):
|
|
140
147
|
return mt.isnan(obj)
|
|
141
148
|
else:
|
|
142
|
-
op = DataFrameCheckNA(positive=True)
|
|
149
|
+
op = DataFrameCheckNA(positive=True, use_inf_as_na=use_inf_as_na)
|
|
143
150
|
return op(obj)
|
|
144
151
|
else:
|
|
145
152
|
return _from_pandas(pd.isna(obj))
|
|
@@ -207,13 +214,14 @@ def notna(obj):
|
|
|
207
214
|
2 False
|
|
208
215
|
dtype: bool
|
|
209
216
|
"""
|
|
217
|
+
use_inf_as_na = get_pd_option("mode.use_inf_as_na", False)
|
|
210
218
|
if isinstance(obj, MultiIndex):
|
|
211
219
|
raise NotImplementedError("isna is not defined for MultiIndex")
|
|
212
220
|
elif isinstance(obj, ENTITY_TYPE):
|
|
213
221
|
if isinstance(obj, TENSOR_TYPE):
|
|
214
222
|
return ~mt.isnan(obj)
|
|
215
223
|
else:
|
|
216
|
-
op = DataFrameCheckNA(positive=False)
|
|
224
|
+
op = DataFrameCheckNA(positive=False, use_inf_as_na=use_inf_as_na)
|
|
217
225
|
return op(obj)
|
|
218
226
|
else:
|
|
219
227
|
return _from_pandas(pd.notna(obj))
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
16
17
|
|
|
17
18
|
from ... import opcodes
|
|
18
19
|
from ...core import OutputType
|
|
@@ -40,6 +41,8 @@ class DataFrameDropNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
40
41
|
# if True, drop index
|
|
41
42
|
ignore_index = BoolField("ignore_index", default=False)
|
|
42
43
|
|
|
44
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
45
|
+
|
|
43
46
|
def __init__(self, sparse=None, output_types=None, **kw):
|
|
44
47
|
super().__init__(_output_types=output_types, sparse=sparse, **kw)
|
|
45
48
|
|
|
@@ -155,6 +158,7 @@ def df_dropna(
|
|
|
155
158
|
1 Batman Batmobile 1940-04-25
|
|
156
159
|
"""
|
|
157
160
|
axis = validate_axis(axis, df)
|
|
161
|
+
use_inf_as_na = pd.get_option("mode.use_inf_as_na")
|
|
158
162
|
if axis != 0:
|
|
159
163
|
raise NotImplementedError("Does not support dropna on DataFrame when axis=1")
|
|
160
164
|
if (
|
|
@@ -174,6 +178,7 @@ def df_dropna(
|
|
|
174
178
|
thresh=thresh,
|
|
175
179
|
subset=subset,
|
|
176
180
|
ignore_index=ignore_index,
|
|
181
|
+
use_inf_as_na=use_inf_as_na,
|
|
177
182
|
output_types=[OutputType.dataframe],
|
|
178
183
|
)
|
|
179
184
|
out_df = op(df)
|
|
@@ -26,7 +26,7 @@ from ..utils import validate_axis
|
|
|
26
26
|
|
|
27
27
|
class DataFrameFillNA(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
28
|
_op_type_ = opcodes.FILL_NA
|
|
29
|
-
_legacy_name = "FillNA"
|
|
29
|
+
_legacy_name = "FillNA" # since v2.0.0
|
|
30
30
|
|
|
31
31
|
value = AnyField(
|
|
32
32
|
"value", on_serialize=lambda x: x.data if isinstance(x, Entity) else x
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
from typing import List
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
|
-
from ...core import EntityData
|
|
19
|
-
from ...serialization.serializables import AnyField, Int32Field
|
|
18
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
19
|
+
from ...serialization.serializables import AnyField, FieldTypes, Int32Field, ListField
|
|
20
20
|
from ...utils import no_default
|
|
21
21
|
from ..operators import SERIES_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
22
22
|
from ..utils import build_df, build_series
|
|
@@ -31,15 +31,18 @@ class DataFrameReplace(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
31
31
|
regex = AnyField("regex", default=None)
|
|
32
32
|
method = AnyField("method", default=no_default)
|
|
33
33
|
|
|
34
|
+
fill_chunks = ListField("fill_chunks", FieldTypes.key, default=None)
|
|
35
|
+
|
|
34
36
|
@classmethod
|
|
35
37
|
def _set_inputs(cls, op: "DataFrameReplace", inputs: List[EntityData]):
|
|
36
38
|
super()._set_inputs(op, inputs)
|
|
37
39
|
input_iter = iter(inputs)
|
|
38
40
|
next(input_iter)
|
|
39
|
-
if isinstance(op.to_replace,
|
|
41
|
+
if isinstance(op.to_replace, ENTITY_TYPE):
|
|
40
42
|
op.to_replace = next(input_iter)
|
|
41
|
-
if isinstance(op.value,
|
|
43
|
+
if isinstance(op.value, ENTITY_TYPE):
|
|
42
44
|
op.value = next(input_iter)
|
|
45
|
+
op.fill_chunks = list(input_iter)
|
|
43
46
|
|
|
44
47
|
def __call__(self, df_or_series):
|
|
45
48
|
inputs = [df_or_series]
|
|
@@ -15,18 +15,23 @@
|
|
|
15
15
|
from .aggregation import DataFrameAggregate
|
|
16
16
|
from .all import DataFrameAll
|
|
17
17
|
from .any import DataFrameAny
|
|
18
|
-
from .
|
|
18
|
+
from .argmax import DataFrameArgMax
|
|
19
|
+
from .argmin import DataFrameArgMin
|
|
20
|
+
from .core import CustomReduction, NamedAgg
|
|
19
21
|
from .count import DataFrameCount
|
|
20
22
|
from .cummax import DataFrameCummax
|
|
21
23
|
from .cummin import DataFrameCummin
|
|
22
24
|
from .cumprod import DataFrameCumprod
|
|
23
25
|
from .cumsum import DataFrameCumsum
|
|
24
26
|
from .custom_reduction import DataFrameCustomReduction
|
|
27
|
+
from .idxmax import DataFrameIdxMax
|
|
28
|
+
from .idxmin import DataFrameIdxMin
|
|
25
29
|
from .kurtosis import DataFrameKurtosis
|
|
26
30
|
from .max import DataFrameMax
|
|
27
31
|
from .mean import DataFrameMean
|
|
28
32
|
from .median import DataFrameMedian
|
|
29
33
|
from .min import DataFrameMin
|
|
34
|
+
from .mode import DataFrameMode
|
|
30
35
|
from .nunique import DataFrameNunique
|
|
31
36
|
from .prod import DataFrameProd
|
|
32
37
|
from .reduction_size import DataFrameSize
|
|
@@ -43,16 +48,22 @@ def _install():
|
|
|
43
48
|
from .aggregation import aggregate
|
|
44
49
|
from .all import all_dataframe, all_index, all_series
|
|
45
50
|
from .any import any_dataframe, any_index, any_series
|
|
51
|
+
from .argmax import argmax_series_index
|
|
52
|
+
from .argmin import argmin_series_index
|
|
46
53
|
from .count import count_dataframe, count_series
|
|
54
|
+
from .cov import cov_dataframe, cov_series
|
|
47
55
|
from .cummax import cummax
|
|
48
56
|
from .cummin import cummin
|
|
49
57
|
from .cumprod import cumprod
|
|
50
58
|
from .cumsum import cumsum
|
|
59
|
+
from .idxmax import idxmax_dataframe, idxmax_series
|
|
60
|
+
from .idxmin import idxmin_dataframe, idxmin_series
|
|
51
61
|
from .kurtosis import kurt_dataframe, kurt_series
|
|
52
62
|
from .max import max_dataframe, max_index, max_series
|
|
53
63
|
from .mean import mean_dataframe, mean_series
|
|
54
64
|
from .median import median_dataframe, median_series
|
|
55
65
|
from .min import min_dataframe, min_index, min_series
|
|
66
|
+
from .mode import mode_dataframe, mode_series
|
|
56
67
|
from .nunique import nunique_dataframe, nunique_series
|
|
57
68
|
from .prod import prod_dataframe, prod_series
|
|
58
69
|
from .reduction_size import size_dataframe, size_series
|
|
@@ -63,31 +74,37 @@ def _install():
|
|
|
63
74
|
from .var import var_dataframe, var_series
|
|
64
75
|
|
|
65
76
|
funcs = [
|
|
66
|
-
("
|
|
67
|
-
("
|
|
68
|
-
("product", prod_series, prod_dataframe),
|
|
69
|
-
("max", max_series, max_dataframe),
|
|
70
|
-
("min", min_series, min_dataframe),
|
|
71
|
-
("count", count_series, count_dataframe),
|
|
72
|
-
("mean", mean_series, mean_dataframe),
|
|
73
|
-
("median", median_series, median_dataframe),
|
|
74
|
-
("var", var_series, var_dataframe),
|
|
75
|
-
("std", std_series, std_dataframe),
|
|
77
|
+
("agg", aggregate, aggregate),
|
|
78
|
+
("aggregate", aggregate, aggregate),
|
|
76
79
|
("all", all_series, all_dataframe),
|
|
77
80
|
("any", any_series, any_dataframe),
|
|
81
|
+
("argmax", argmax_series_index, None),
|
|
82
|
+
("argmin", argmin_series_index, None),
|
|
83
|
+
("count", count_series, count_dataframe),
|
|
84
|
+
("cov", cov_series, cov_dataframe),
|
|
78
85
|
("cummax", cummax, cummax),
|
|
79
86
|
("cummin", cummin, cummin),
|
|
80
87
|
("cumprod", cumprod, cumprod),
|
|
81
88
|
("cumsum", cumsum, cumsum),
|
|
82
|
-
("
|
|
83
|
-
("
|
|
89
|
+
("idxmax", idxmax_series, idxmax_dataframe),
|
|
90
|
+
("idxmin", idxmin_series, idxmin_dataframe),
|
|
91
|
+
("kurt", kurt_series, kurt_dataframe),
|
|
92
|
+
("kurtosis", kurt_series, kurt_dataframe),
|
|
93
|
+
("max", max_series, max_dataframe),
|
|
94
|
+
("mean", mean_series, mean_dataframe),
|
|
95
|
+
("median", median_series, median_dataframe),
|
|
96
|
+
("min", min_series, min_dataframe),
|
|
97
|
+
("mode", mode_series, mode_dataframe),
|
|
84
98
|
("nunique", nunique_series, nunique_dataframe),
|
|
99
|
+
("prod", prod_series, prod_dataframe),
|
|
100
|
+
("product", prod_series, prod_dataframe),
|
|
85
101
|
("sem", sem_series, sem_dataframe),
|
|
86
102
|
("skew", skew_series, skew_dataframe),
|
|
87
|
-
("
|
|
88
|
-
("
|
|
103
|
+
("std", std_series, std_dataframe),
|
|
104
|
+
("sum", sum_series, sum_dataframe),
|
|
89
105
|
("unique", unique, None),
|
|
90
|
-
("
|
|
106
|
+
("var", var_series, var_dataframe),
|
|
107
|
+
("_reduction_size", size_series, size_dataframe),
|
|
91
108
|
]
|
|
92
109
|
for func_name, series_func, df_func in funcs:
|
|
93
110
|
if df_func is not None: # pragma: no branch
|
|
@@ -104,6 +121,8 @@ def _install():
|
|
|
104
121
|
setattr(t, "any", any_index)
|
|
105
122
|
setattr(t, "min", min_index)
|
|
106
123
|
setattr(t, "max", max_index)
|
|
124
|
+
setattr(t, "argmin", argmin_series_index)
|
|
125
|
+
setattr(t, "argmax", argmax_series_index)
|
|
107
126
|
|
|
108
127
|
|
|
109
128
|
_install()
|
|
@@ -29,9 +29,16 @@ from ... import tensor as maxframe_tensor
|
|
|
29
29
|
from ...core import ENTITY_TYPE, OutputType, enter_mode
|
|
30
30
|
from ...io.odpsio.schema import pandas_dtype_to_arrow_type
|
|
31
31
|
from ...lib.dtypes_extension import ArrowDtype
|
|
32
|
-
from ...serialization.serializables import
|
|
32
|
+
from ...serialization.serializables import (
|
|
33
|
+
AnyField,
|
|
34
|
+
BoolField,
|
|
35
|
+
DictField,
|
|
36
|
+
Int32Field,
|
|
37
|
+
ListField,
|
|
38
|
+
)
|
|
33
39
|
from ...typing_ import TileableType
|
|
34
|
-
from ...
|
|
40
|
+
from ...udf import BuiltinFunction
|
|
41
|
+
from ...utils import get_pd_option, lazy_import, pd_release_version
|
|
35
42
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
36
43
|
from ..utils import build_df, build_empty_df, build_series, parse_index, validate_axis
|
|
37
44
|
from .core import (
|
|
@@ -85,16 +92,25 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
85
92
|
_op_type_ = opcodes.AGGREGATE
|
|
86
93
|
|
|
87
94
|
raw_func = AnyField("raw_func")
|
|
88
|
-
raw_func_kw = DictField("raw_func_kw")
|
|
89
|
-
func = AnyField("func")
|
|
95
|
+
raw_func_kw = DictField("raw_func_kw", default=None)
|
|
96
|
+
func = AnyField("func", default=None)
|
|
90
97
|
func_rename = ListField("func_rename", default=None)
|
|
91
98
|
axis = AnyField("axis", default=0)
|
|
92
|
-
numeric_only = BoolField("numeric_only")
|
|
93
|
-
bool_only = BoolField("bool_only")
|
|
94
|
-
|
|
95
|
-
pre_funcs: List[ReductionPreStep] = ListField("pre_funcs")
|
|
96
|
-
agg_funcs: List[ReductionAggStep] = ListField("agg_funcs")
|
|
97
|
-
post_funcs: List[ReductionPostStep] = ListField("post_funcs")
|
|
99
|
+
numeric_only = BoolField("numeric_only", default=None)
|
|
100
|
+
bool_only = BoolField("bool_only", default=None)
|
|
101
|
+
|
|
102
|
+
pre_funcs: List[ReductionPreStep] = ListField("pre_funcs", default=None)
|
|
103
|
+
agg_funcs: List[ReductionAggStep] = ListField("agg_funcs", default=None)
|
|
104
|
+
post_funcs: List[ReductionPostStep] = ListField("post_funcs", default=None)
|
|
105
|
+
combine_size = Int32Field("combine_size", default=None)
|
|
106
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=False)
|
|
107
|
+
|
|
108
|
+
def has_custom_code(self) -> bool:
|
|
109
|
+
return any(
|
|
110
|
+
fun.custom_reduction
|
|
111
|
+
and not isinstance(fun.custom_reduction, BuiltinFunction)
|
|
112
|
+
for fun in self.agg_funcs or ()
|
|
113
|
+
)
|
|
98
114
|
|
|
99
115
|
@staticmethod
|
|
100
116
|
def _filter_dtypes(op: "DataFrameAggregate", dtypes):
|
|
@@ -183,7 +199,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
183
199
|
normalize_reduction_funcs(self, ndim=df.ndim)
|
|
184
200
|
compile_reduction_funcs(self, df)
|
|
185
201
|
if output_type is None or dtypes is None:
|
|
186
|
-
with enter_mode(kernel=False, build=False):
|
|
202
|
+
with enter_mode(kernel=False, build=False, mock=True):
|
|
187
203
|
dtypes, index = self._calc_result_shape(df)
|
|
188
204
|
else:
|
|
189
205
|
self.output_types = [output_type]
|
|
@@ -215,7 +231,7 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
215
231
|
return self.new_series(
|
|
216
232
|
[df],
|
|
217
233
|
shape=new_shape,
|
|
218
|
-
dtype=dtypes[0],
|
|
234
|
+
dtype=dtypes.iloc[0],
|
|
219
235
|
name=dtypes.index[0],
|
|
220
236
|
index_value=new_index,
|
|
221
237
|
)
|
|
@@ -237,7 +253,11 @@ def is_funcs_aggregate(func, func_kw=None, ndim=2):
|
|
|
237
253
|
elif isinstance(func, dict):
|
|
238
254
|
if ndim == 2:
|
|
239
255
|
for f in func.values():
|
|
240
|
-
if
|
|
256
|
+
if (
|
|
257
|
+
isinstance(f, Iterable)
|
|
258
|
+
and not isinstance(f, ENTITY_TYPE)
|
|
259
|
+
and not isinstance(f, str)
|
|
260
|
+
):
|
|
241
261
|
to_check.extend(f)
|
|
242
262
|
else:
|
|
243
263
|
to_check.append(f)
|
|
@@ -293,7 +313,11 @@ def normalize_reduction_funcs(op, ndim=None):
|
|
|
293
313
|
else:
|
|
294
314
|
op.func = list(raw_func.values())
|
|
295
315
|
op.func_rename = list(raw_func.keys())
|
|
296
|
-
elif
|
|
316
|
+
elif (
|
|
317
|
+
isinstance(raw_func, Iterable)
|
|
318
|
+
and not isinstance(raw_func, ENTITY_TYPE)
|
|
319
|
+
and not isinstance(raw_func, str)
|
|
320
|
+
):
|
|
297
321
|
op.func = list(raw_func)
|
|
298
322
|
else:
|
|
299
323
|
op.func = [raw_func]
|
|
@@ -432,6 +456,7 @@ def aggregate(df, func=None, axis=0, **kw):
|
|
|
432
456
|
min 1
|
|
433
457
|
"""
|
|
434
458
|
axis = validate_axis(axis, df)
|
|
459
|
+
use_inf_as_na = kw.pop("_use_inf_as_na", get_pd_option("mode.use_inf_as_na", False))
|
|
435
460
|
if func == "unique":
|
|
436
461
|
# workaround for direct call of unique function which
|
|
437
462
|
# returns a tensor directly
|
|
@@ -456,6 +481,8 @@ def aggregate(df, func=None, axis=0, **kw):
|
|
|
456
481
|
dtypes = kw.pop("_dtypes", None)
|
|
457
482
|
index = kw.pop("_index", None)
|
|
458
483
|
|
|
484
|
+
combine_size = kw.pop("_combine_size", None) or kw.pop("combine_size", None)
|
|
485
|
+
|
|
459
486
|
if not is_funcs_aggregate(func, func_kw=kw, ndim=df.ndim):
|
|
460
487
|
return df.transform(func, axis=axis, _call_agg=True)
|
|
461
488
|
|
|
@@ -465,6 +492,8 @@ def aggregate(df, func=None, axis=0, **kw):
|
|
|
465
492
|
axis=axis,
|
|
466
493
|
numeric_only=numeric_only,
|
|
467
494
|
bool_only=bool_only,
|
|
495
|
+
combine_size=combine_size,
|
|
496
|
+
use_inf_as_na=use_inf_as_na,
|
|
468
497
|
)
|
|
469
498
|
|
|
470
499
|
return op(df, output_type=output_type, dtypes=dtypes, index=index)
|
|
@@ -16,10 +16,10 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from .core import DATAFRAME_TYPE,
|
|
19
|
+
from .core import DATAFRAME_TYPE, DataFrameReduction, DataFrameReductionMixin
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class DataFrameAll(
|
|
22
|
+
class DataFrameAll(DataFrameReduction, DataFrameReductionMixin):
|
|
23
23
|
_op_type_ = opcodes.ALL
|
|
24
24
|
_func_name = "all"
|
|
25
25
|
|
|
@@ -16,10 +16,10 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from .core import DATAFRAME_TYPE,
|
|
19
|
+
from .core import DATAFRAME_TYPE, DataFrameReduction, DataFrameReductionMixin
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class DataFrameAny(
|
|
22
|
+
class DataFrameAny(DataFrameReduction, DataFrameReductionMixin):
|
|
23
23
|
_op_type_ = opcodes.ANY
|
|
24
24
|
_func_name = "any"
|
|
25
25
|
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameArgMax(DataFrameReduction, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.ARGMAX
|
|
23
|
+
_func_name = "argmax"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_atomic(self):
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def get_reduction_args(self, axis=None):
|
|
30
|
+
args = dict(skipna=self.skipna)
|
|
31
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
32
|
+
args["axis"] = axis
|
|
33
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_reduction_callable(cls, op):
|
|
37
|
+
func_name = getattr(op, "_func_name")
|
|
38
|
+
kw = dict(skipna=op.skipna)
|
|
39
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
40
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def argmax_series_index(series_or_index, axis=0, skipna=True, *args, **kwargs):
|
|
44
|
+
"""
|
|
45
|
+
Return int position of the smallest value in the Series.
|
|
46
|
+
|
|
47
|
+
If the maximum is achieved in multiple locations,
|
|
48
|
+
the first row position is returned.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
axis : {None}
|
|
53
|
+
Unused. Parameter needed for compatibility with DataFrame.
|
|
54
|
+
skipna : bool, default True
|
|
55
|
+
Exclude NA/null values when showing the result.
|
|
56
|
+
*args, **kwargs
|
|
57
|
+
Additional arguments and keywords for compatibility with NumPy.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
int
|
|
62
|
+
Row position of the maximum value.
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
Series.argmin : Return position of the minimum value.
|
|
67
|
+
Series.argmax : Return position of the maximum value.
|
|
68
|
+
maxframe.tensor.argmax : Equivalent method for tensors.
|
|
69
|
+
Series.idxmax : Return index label of the maximum values.
|
|
70
|
+
Series.idxmin : Return index label of the minimum values.
|
|
71
|
+
|
|
72
|
+
Examples
|
|
73
|
+
--------
|
|
74
|
+
Consider dataset containing cereal calories
|
|
75
|
+
|
|
76
|
+
>>> import maxframe.dataframe as md
|
|
77
|
+
>>> s = md.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0,
|
|
78
|
+
... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0})
|
|
79
|
+
>>> s.execute()
|
|
80
|
+
Corn Flakes 100.0
|
|
81
|
+
Almond Delight 110.0
|
|
82
|
+
Cinnamon Toast Crunch 120.0
|
|
83
|
+
Cocoa Puff 110.0
|
|
84
|
+
dtype: float64
|
|
85
|
+
|
|
86
|
+
>>> s.argmax().execute()
|
|
87
|
+
2
|
|
88
|
+
>>> s.argmin().execute()
|
|
89
|
+
0
|
|
90
|
+
|
|
91
|
+
The maximum cereal calories is the third element and
|
|
92
|
+
the minimum cereal calories is the first element,
|
|
93
|
+
since series is zero-indexed.
|
|
94
|
+
"""
|
|
95
|
+
# args not implemented, just ignore
|
|
96
|
+
_ = args, kwargs
|
|
97
|
+
|
|
98
|
+
validate_axis(axis, series_or_index)
|
|
99
|
+
op = DataFrameArgMax(
|
|
100
|
+
dropna=skipna,
|
|
101
|
+
output_types=[OutputType.scalar],
|
|
102
|
+
)
|
|
103
|
+
return op(series_or_index)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...core import OutputType
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameArgMin(DataFrameReduction, DataFrameReductionMixin):
|
|
22
|
+
_op_type_ = opcodes.ARGMIN
|
|
23
|
+
_func_name = "argmin"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def is_atomic(self):
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def get_reduction_args(self, axis=None):
|
|
30
|
+
args = dict(skipna=self.skipna)
|
|
31
|
+
if self.inputs and self.inputs[0].ndim > 1:
|
|
32
|
+
args["axis"] = axis
|
|
33
|
+
return {k: v for k, v in args.items() if v is not None}
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_reduction_callable(cls, op):
|
|
37
|
+
func_name = getattr(op, "_func_name")
|
|
38
|
+
kw = dict(skipna=op.skipna)
|
|
39
|
+
kw = {k: v for k, v in kw.items() if v is not None}
|
|
40
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def argmin_series_index(series_or_index, axis=0, skipna=True, *args, **kwargs):
|
|
44
|
+
"""
|
|
45
|
+
Return int position of the smallest value in the Series.
|
|
46
|
+
|
|
47
|
+
If the minimum is achieved in multiple locations,
|
|
48
|
+
the first row position is returned.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
axis : {None}
|
|
53
|
+
Unused. Parameter needed for compatibility with DataFrame.
|
|
54
|
+
skipna : bool, default True
|
|
55
|
+
Exclude NA/null values when showing the result.
|
|
56
|
+
*args, **kwargs
|
|
57
|
+
Additional arguments and keywords for compatibility with NumPy.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
int
|
|
62
|
+
Row position of the minimum value.
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
Series.argmin : Return position of the minimum value.
|
|
67
|
+
Series.argmax : Return position of the maximum value.
|
|
68
|
+
maxframe.tensor.argmin : Equivalent method for tensors.
|
|
69
|
+
Series.idxmax : Return index label of the maximum values.
|
|
70
|
+
Series.idxmin : Return index label of the minimum values.
|
|
71
|
+
|
|
72
|
+
Examples
|
|
73
|
+
--------
|
|
74
|
+
Consider dataset containing cereal calories
|
|
75
|
+
|
|
76
|
+
>>> import maxframe.dataframe as md
|
|
77
|
+
>>> s = md.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0,
|
|
78
|
+
... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0})
|
|
79
|
+
>>> s.execute()
|
|
80
|
+
Corn Flakes 100.0
|
|
81
|
+
Almond Delight 110.0
|
|
82
|
+
Cinnamon Toast Crunch 120.0
|
|
83
|
+
Cocoa Puff 110.0
|
|
84
|
+
dtype: float64
|
|
85
|
+
|
|
86
|
+
>>> s.argmax().execute()
|
|
87
|
+
2
|
|
88
|
+
>>> s.argmin().execute()
|
|
89
|
+
0
|
|
90
|
+
|
|
91
|
+
The maximum cereal calories is the third element and
|
|
92
|
+
the minimum cereal calories is the first element,
|
|
93
|
+
since series is zero-indexed.
|
|
94
|
+
"""
|
|
95
|
+
# args not implemented, just ignore
|
|
96
|
+
_ = args, kwargs
|
|
97
|
+
|
|
98
|
+
validate_axis(axis, series_or_index)
|
|
99
|
+
op = DataFrameArgMin(
|
|
100
|
+
dropna=skipna,
|
|
101
|
+
output_types=[OutputType.scalar],
|
|
102
|
+
)
|
|
103
|
+
return op(series_or_index)
|