PyPI - maxframe - Versions diffs - 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl - Mend

maxframe 2.0.0b2cp37-cp37m-win32.whl → 2.3.0rc1cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp37-win32.pyd +0 -0
maxframe/_utils.pyx +14 -1
maxframe/codegen/core.py +9 -8
maxframe/codegen/spe/core.py +1 -1
maxframe/codegen/spe/dataframe/__init__.py +1 -0
maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
maxframe/codegen/spe/dataframe/groupby.py +88 -0
maxframe/codegen/spe/dataframe/indexing.py +99 -4
maxframe/codegen/spe/dataframe/merge.py +38 -1
maxframe/codegen/spe/dataframe/misc.py +11 -33
maxframe/codegen/spe/dataframe/reduction.py +32 -9
maxframe/codegen/spe/dataframe/reshape.py +46 -0
maxframe/codegen/spe/dataframe/sort.py +39 -18
maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
maxframe/codegen/spe/dataframe/tseries.py +9 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
maxframe/codegen/spe/tensor/__init__.py +3 -0
maxframe/codegen/spe/tensor/datasource.py +1 -0
maxframe/codegen/spe/tensor/fft.py +74 -0
maxframe/codegen/spe/tensor/linalg.py +29 -2
maxframe/codegen/spe/tensor/misc.py +79 -25
maxframe/codegen/spe/tensor/spatial.py +45 -0
maxframe/codegen/spe/tensor/statistics.py +44 -0
maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
maxframe/codegen/spe/utils.py +2 -0
maxframe/config/config.py +73 -9
maxframe/config/tests/test_validators.py +13 -1
maxframe/config/validators.py +49 -0
maxframe/conftest.py +54 -17
maxframe/core/accessor.py +2 -2
maxframe/core/base.py +2 -1
maxframe/core/entity/core.py +5 -0
maxframe/core/entity/tileables.py +3 -1
maxframe/core/graph/core.cp37-win32.pyd +0 -0
maxframe/core/graph/entity.py +8 -3
maxframe/core/mode.py +6 -1
maxframe/core/operator/base.py +9 -2
maxframe/core/operator/core.py +10 -2
maxframe/core/operator/utils.py +13 -0
maxframe/dataframe/__init__.py +12 -5
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/compat.py +45 -0
maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
maxframe/dataframe/accessors/dict_/contains.py +7 -16
maxframe/dataframe/accessors/dict_/core.py +48 -0
maxframe/dataframe/accessors/dict_/getitem.py +17 -21
maxframe/dataframe/accessors/dict_/length.py +7 -16
maxframe/dataframe/accessors/dict_/remove.py +6 -18
maxframe/dataframe/accessors/dict_/setitem.py +8 -18
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
maxframe/dataframe/accessors/list_/__init__.py +2 -2
maxframe/dataframe/accessors/list_/core.py +48 -0
maxframe/dataframe/accessors/list_/getitem.py +12 -19
maxframe/dataframe/accessors/list_/length.py +7 -16
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
maxframe/dataframe/accessors/string_/__init__.py +4 -1
maxframe/dataframe/accessors/struct_/__init__.py +37 -0
maxframe/dataframe/accessors/struct_/accessor.py +39 -0
maxframe/dataframe/accessors/struct_/core.py +43 -0
maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
maxframe/dataframe/accessors/struct_/field.py +123 -0
maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
maxframe/dataframe/arithmetic/__init__.py +18 -4
maxframe/dataframe/arithmetic/between.py +106 -0
maxframe/dataframe/arithmetic/dot.py +237 -0
maxframe/dataframe/arithmetic/maximum.py +33 -0
maxframe/dataframe/arithmetic/minimum.py +33 -0
maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
maxframe/dataframe/core.py +161 -224
maxframe/dataframe/datasource/__init__.py +18 -0
maxframe/dataframe/datasource/core.py +6 -0
maxframe/dataframe/datasource/direct.py +57 -0
maxframe/dataframe/datasource/from_dict.py +124 -0
maxframe/dataframe/datasource/from_index.py +1 -1
maxframe/dataframe/datasource/from_records.py +77 -0
maxframe/dataframe/datasource/from_tensor.py +109 -41
maxframe/dataframe/datasource/read_csv.py +21 -14
maxframe/dataframe/datasource/read_odps_query.py +29 -6
maxframe/dataframe/datasource/read_odps_table.py +32 -10
maxframe/dataframe/datasource/read_parquet.py +38 -39
maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
maxframe/dataframe/datastore/__init__.py +11 -1
maxframe/dataframe/datastore/direct.py +268 -0
maxframe/dataframe/datastore/to_csv.py +29 -41
maxframe/dataframe/datastore/to_odps.py +36 -4
maxframe/dataframe/extensions/__init__.py +20 -4
maxframe/dataframe/extensions/apply_chunk.py +32 -6
maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
maxframe/dataframe/extensions/collect_kv.py +126 -0
maxframe/dataframe/extensions/extract_kv.py +177 -0
maxframe/dataframe/extensions/flatjson.py +2 -1
maxframe/dataframe/extensions/map_reduce.py +263 -0
maxframe/dataframe/extensions/rebalance.py +62 -0
maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
maxframe/dataframe/groupby/__init__.py +17 -2
maxframe/dataframe/groupby/aggregation.py +86 -49
maxframe/dataframe/groupby/apply.py +1 -1
maxframe/dataframe/groupby/apply_chunk.py +19 -5
maxframe/dataframe/groupby/core.py +116 -16
maxframe/dataframe/groupby/cum.py +4 -25
maxframe/dataframe/groupby/expanding.py +264 -0
maxframe/dataframe/groupby/fill.py +1 -1
maxframe/dataframe/groupby/getitem.py +12 -5
maxframe/dataframe/groupby/head.py +11 -1
maxframe/dataframe/groupby/rank.py +136 -0
maxframe/dataframe/groupby/rolling.py +206 -0
maxframe/dataframe/groupby/shift.py +114 -0
maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
maxframe/dataframe/indexing/__init__.py +22 -2
maxframe/dataframe/indexing/droplevel.py +195 -0
maxframe/dataframe/indexing/filter.py +169 -0
maxframe/dataframe/indexing/get_level_values.py +76 -0
maxframe/dataframe/indexing/iat.py +45 -0
maxframe/dataframe/indexing/iloc.py +152 -12
maxframe/dataframe/indexing/insert.py +46 -18
maxframe/dataframe/indexing/loc.py +287 -7
maxframe/dataframe/indexing/reindex.py +14 -5
maxframe/dataframe/indexing/rename.py +6 -0
maxframe/dataframe/indexing/rename_axis.py +2 -2
maxframe/dataframe/indexing/reorder_levels.py +143 -0
maxframe/dataframe/indexing/reset_index.py +33 -6
maxframe/dataframe/indexing/sample.py +8 -0
maxframe/dataframe/indexing/setitem.py +3 -3
maxframe/dataframe/indexing/swaplevel.py +185 -0
maxframe/dataframe/indexing/take.py +99 -0
maxframe/dataframe/indexing/truncate.py +140 -0
maxframe/dataframe/indexing/where.py +0 -11
maxframe/dataframe/indexing/xs.py +148 -0
maxframe/dataframe/merge/__init__.py +15 -1
maxframe/dataframe/merge/append.py +97 -98
maxframe/dataframe/merge/combine.py +244 -0
maxframe/dataframe/merge/combine_first.py +120 -0
maxframe/dataframe/merge/compare.py +387 -0
maxframe/dataframe/merge/concat.py +183 -0
maxframe/dataframe/merge/update.py +271 -0
maxframe/dataframe/misc/__init__.py +28 -11
maxframe/dataframe/misc/_duplicate.py +10 -4
maxframe/dataframe/misc/apply.py +1 -1
maxframe/dataframe/misc/check_unique.py +82 -0
maxframe/dataframe/misc/clip.py +145 -0
maxframe/dataframe/misc/describe.py +175 -9
maxframe/dataframe/misc/drop.py +31 -0
maxframe/dataframe/misc/drop_duplicates.py +2 -2
maxframe/dataframe/misc/duplicated.py +2 -2
maxframe/dataframe/misc/get_dummies.py +5 -1
maxframe/dataframe/misc/infer_dtypes.py +251 -0
maxframe/dataframe/misc/isin.py +2 -2
maxframe/dataframe/misc/map.py +125 -18
maxframe/dataframe/misc/repeat.py +159 -0
maxframe/dataframe/misc/tests/test_misc.py +48 -3
maxframe/dataframe/misc/to_numeric.py +3 -0
maxframe/dataframe/misc/transform.py +12 -5
maxframe/dataframe/misc/transpose.py +13 -1
maxframe/dataframe/misc/valid_index.py +115 -0
maxframe/dataframe/misc/value_counts.py +38 -4
maxframe/dataframe/missing/checkna.py +14 -6
maxframe/dataframe/missing/dropna.py +5 -0
maxframe/dataframe/missing/fillna.py +1 -1
maxframe/dataframe/missing/replace.py +7 -4
maxframe/dataframe/reduction/__init__.py +35 -16
maxframe/dataframe/reduction/aggregation.py +43 -14
maxframe/dataframe/reduction/all.py +2 -2
maxframe/dataframe/reduction/any.py +2 -2
maxframe/dataframe/reduction/argmax.py +103 -0
maxframe/dataframe/reduction/argmin.py +103 -0
maxframe/dataframe/reduction/core.py +80 -24
maxframe/dataframe/reduction/count.py +13 -9
maxframe/dataframe/reduction/cov.py +166 -0
maxframe/dataframe/reduction/cummax.py +2 -2
maxframe/dataframe/reduction/cummin.py +2 -2
maxframe/dataframe/reduction/cumprod.py +2 -2
maxframe/dataframe/reduction/cumsum.py +2 -2
maxframe/dataframe/reduction/custom_reduction.py +2 -2
maxframe/dataframe/reduction/idxmax.py +185 -0
maxframe/dataframe/reduction/idxmin.py +185 -0
maxframe/dataframe/reduction/kurtosis.py +37 -30
maxframe/dataframe/reduction/max.py +2 -2
maxframe/dataframe/reduction/mean.py +9 -7
maxframe/dataframe/reduction/median.py +2 -2
maxframe/dataframe/reduction/min.py +2 -2
maxframe/dataframe/reduction/mode.py +144 -0
maxframe/dataframe/reduction/nunique.py +19 -11
maxframe/dataframe/reduction/prod.py +18 -13
maxframe/dataframe/reduction/reduction_size.py +2 -2
maxframe/dataframe/reduction/sem.py +13 -9
maxframe/dataframe/reduction/skew.py +31 -27
maxframe/dataframe/reduction/str_concat.py +10 -7
maxframe/dataframe/reduction/sum.py +18 -14
maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
maxframe/dataframe/reduction/unique.py +20 -3
maxframe/dataframe/reduction/var.py +16 -12
maxframe/dataframe/reshape/__init__.py +38 -0
maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
maxframe/dataframe/reshape/unstack.py +114 -0
maxframe/dataframe/sort/__init__.py +16 -1
maxframe/dataframe/sort/argsort.py +68 -0
maxframe/dataframe/sort/core.py +2 -1
maxframe/dataframe/sort/nlargest.py +238 -0
maxframe/dataframe/sort/nsmallest.py +228 -0
maxframe/dataframe/sort/rank.py +147 -0
maxframe/dataframe/statistics/__init__.py +3 -3
maxframe/dataframe/statistics/corr.py +1 -0
maxframe/dataframe/statistics/quantile.py +2 -2
maxframe/dataframe/tests/test_typing.py +104 -0
maxframe/dataframe/tests/test_utils.py +66 -2
maxframe/dataframe/tseries/__init__.py +19 -0
maxframe/dataframe/tseries/at_time.py +61 -0
maxframe/dataframe/tseries/between_time.py +122 -0
maxframe/dataframe/typing_.py +185 -0
maxframe/dataframe/utils.py +125 -52
maxframe/dataframe/window/aggregation.py +8 -4
maxframe/dataframe/window/core.py +14 -1
maxframe/dataframe/window/ewm.py +1 -3
maxframe/dataframe/window/expanding.py +37 -35
maxframe/dataframe/window/rolling.py +49 -39
maxframe/dataframe/window/tests/test_expanding.py +1 -7
maxframe/dataframe/window/tests/test_rolling.py +1 -1
maxframe/env.py +7 -4
maxframe/errors.py +2 -2
maxframe/io/odpsio/schema.py +9 -3
maxframe/io/odpsio/tableio.py +7 -2
maxframe/io/odpsio/tests/test_schema.py +198 -83
maxframe/learn/__init__.py +10 -2
maxframe/learn/cluster/__init__.py +15 -0
maxframe/learn/cluster/_kmeans.py +782 -0
maxframe/learn/contrib/llm/core.py +18 -7
maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/config.py +221 -0
maxframe/learn/contrib/llm/deploy/core.py +247 -0
maxframe/learn/contrib/llm/deploy/framework.py +35 -0
maxframe/learn/contrib/llm/deploy/loader.py +360 -0
maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +12 -6
maxframe/learn/contrib/llm/models/managed.py +76 -11
maxframe/learn/contrib/llm/models/openai.py +72 -0
maxframe/learn/contrib/llm/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/tests/test_core.py +34 -0
maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
maxframe/learn/contrib/llm/text.py +348 -42
maxframe/learn/contrib/models.py +4 -1
maxframe/learn/contrib/xgboost/classifier.py +2 -0
maxframe/learn/contrib/xgboost/core.py +113 -4
maxframe/learn/contrib/xgboost/predict.py +4 -2
maxframe/learn/contrib/xgboost/regressor.py +5 -0
maxframe/learn/contrib/xgboost/train.py +7 -2
maxframe/learn/core.py +66 -0
maxframe/learn/linear_model/_base.py +58 -1
maxframe/learn/linear_model/_lin_reg.py +1 -1
maxframe/learn/metrics/__init__.py +6 -0
maxframe/learn/metrics/_classification.py +145 -0
maxframe/learn/metrics/_ranking.py +477 -0
maxframe/learn/metrics/_scorer.py +60 -0
maxframe/learn/metrics/pairwise/__init__.py +21 -0
maxframe/learn/metrics/pairwise/core.py +77 -0
maxframe/learn/metrics/pairwise/cosine.py +115 -0
maxframe/learn/metrics/pairwise/euclidean.py +176 -0
maxframe/learn/metrics/pairwise/haversine.py +96 -0
maxframe/learn/metrics/pairwise/manhattan.py +80 -0
maxframe/learn/metrics/pairwise/pairwise.py +127 -0
maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
maxframe/learn/metrics/tests/__init__.py +13 -0
maxframe/learn/metrics/tests/test_scorer.py +26 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
maxframe/learn/utils/__init__.py +2 -1
maxframe/learn/utils/checks.py +1 -2
maxframe/learn/utils/core.py +59 -0
maxframe/learn/utils/extmath.py +79 -9
maxframe/learn/utils/odpsio.py +262 -0
maxframe/learn/utils/validation.py +2 -2
maxframe/lib/compat.py +40 -0
maxframe/lib/dtypes_extension/__init__.py +16 -1
maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
maxframe/lib/dtypes_extension/blob.py +304 -0
maxframe/lib/dtypes_extension/dtypes.py +40 -0
maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
maxframe/lib/filesystem/_oss_lib/common.py +124 -50
maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
maxframe/lib/filesystem/base.py +1 -1
maxframe/lib/filesystem/core.py +1 -1
maxframe/lib/filesystem/oss.py +115 -46
maxframe/lib/filesystem/tests/test_oss.py +74 -36
maxframe/lib/mmh3.cp37-win32.pyd +0 -0
maxframe/lib/wrapped_pickle.py +10 -0
maxframe/opcodes.py +41 -15
maxframe/protocol.py +12 -0
maxframe/remote/core.py +4 -0
maxframe/serialization/__init__.py +11 -2
maxframe/serialization/arrow.py +38 -13
maxframe/serialization/blob.py +32 -0
maxframe/serialization/core.cp37-win32.pyd +0 -0
maxframe/serialization/core.pyx +39 -1
maxframe/serialization/exception.py +2 -4
maxframe/serialization/numpy.py +11 -0
maxframe/serialization/pandas.py +46 -9
maxframe/serialization/serializables/core.py +2 -2
maxframe/serialization/tests/test_serial.py +31 -4
maxframe/tensor/__init__.py +38 -8
maxframe/tensor/arithmetic/__init__.py +19 -10
maxframe/tensor/arithmetic/core.py +2 -2
maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
maxframe/tensor/core.py +6 -2
maxframe/tensor/datasource/tests/test_datasource.py +2 -1
maxframe/tensor/extensions/__init__.py +2 -0
maxframe/tensor/extensions/apply_chunk.py +3 -3
maxframe/tensor/extensions/rebalance.py +65 -0
maxframe/tensor/fft/__init__.py +32 -0
maxframe/tensor/fft/core.py +168 -0
maxframe/tensor/fft/fft.py +112 -0
maxframe/tensor/fft/fft2.py +118 -0
maxframe/tensor/fft/fftfreq.py +80 -0
maxframe/tensor/fft/fftn.py +123 -0
maxframe/tensor/fft/fftshift.py +79 -0
maxframe/tensor/fft/hfft.py +112 -0
maxframe/tensor/fft/ifft.py +114 -0
maxframe/tensor/fft/ifft2.py +115 -0
maxframe/tensor/fft/ifftn.py +123 -0
maxframe/tensor/fft/ifftshift.py +73 -0
maxframe/tensor/fft/ihfft.py +93 -0
maxframe/tensor/fft/irfft.py +118 -0
maxframe/tensor/fft/irfft2.py +62 -0
maxframe/tensor/fft/irfftn.py +114 -0
maxframe/tensor/fft/rfft.py +116 -0
maxframe/tensor/fft/rfft2.py +63 -0
maxframe/tensor/fft/rfftfreq.py +87 -0
maxframe/tensor/fft/rfftn.py +113 -0
maxframe/tensor/indexing/fill_diagonal.py +1 -7
maxframe/tensor/linalg/__init__.py +7 -0
maxframe/tensor/linalg/_einsumfunc.py +1025 -0
maxframe/tensor/linalg/cholesky.py +117 -0
maxframe/tensor/linalg/einsum.py +339 -0
maxframe/tensor/linalg/lstsq.py +100 -0
maxframe/tensor/linalg/matrix_norm.py +75 -0
maxframe/tensor/linalg/norm.py +249 -0
maxframe/tensor/linalg/solve.py +72 -0
maxframe/tensor/linalg/solve_triangular.py +2 -2
maxframe/tensor/linalg/vector_norm.py +113 -0
maxframe/tensor/misc/__init__.py +24 -1
maxframe/tensor/misc/argwhere.py +72 -0
maxframe/tensor/misc/array_split.py +46 -0
maxframe/tensor/misc/broadcast_arrays.py +57 -0
maxframe/tensor/misc/copyto.py +130 -0
maxframe/tensor/misc/delete.py +104 -0
maxframe/tensor/misc/dsplit.py +68 -0
maxframe/tensor/misc/ediff1d.py +74 -0
maxframe/tensor/misc/expand_dims.py +85 -0
maxframe/tensor/misc/flip.py +90 -0
maxframe/tensor/misc/fliplr.py +64 -0
maxframe/tensor/misc/flipud.py +68 -0
maxframe/tensor/misc/hsplit.py +85 -0
maxframe/tensor/misc/insert.py +139 -0
maxframe/tensor/misc/moveaxis.py +83 -0
maxframe/tensor/misc/result_type.py +88 -0
maxframe/tensor/misc/roll.py +124 -0
maxframe/tensor/misc/rollaxis.py +77 -0
maxframe/tensor/misc/shape.py +89 -0
maxframe/tensor/misc/split.py +190 -0
maxframe/tensor/misc/tile.py +109 -0
maxframe/tensor/misc/vsplit.py +74 -0
maxframe/tensor/reduction/array_equal.py +2 -1
maxframe/tensor/sort/__init__.py +2 -0
maxframe/tensor/sort/argpartition.py +98 -0
maxframe/tensor/sort/partition.py +228 -0
maxframe/tensor/spatial/__init__.py +15 -0
maxframe/tensor/spatial/distance/__init__.py +17 -0
maxframe/tensor/spatial/distance/cdist.py +421 -0
maxframe/tensor/spatial/distance/pdist.py +398 -0
maxframe/tensor/spatial/distance/squareform.py +153 -0
maxframe/tensor/special/__init__.py +159 -21
maxframe/tensor/special/airy.py +55 -0
maxframe/tensor/special/bessel.py +199 -0
maxframe/tensor/special/core.py +65 -4
maxframe/tensor/special/ellip_func_integrals.py +155 -0
maxframe/tensor/special/ellip_harm.py +55 -0
maxframe/tensor/special/err_fresnel.py +223 -0
maxframe/tensor/special/gamma_funcs.py +303 -0
maxframe/tensor/special/hypergeometric_funcs.py +69 -0
maxframe/tensor/special/info_theory.py +189 -0
maxframe/tensor/special/misc.py +21 -0
maxframe/tensor/statistics/__init__.py +6 -0
maxframe/tensor/statistics/corrcoef.py +77 -0
maxframe/tensor/statistics/cov.py +222 -0
maxframe/tensor/statistics/digitize.py +126 -0
maxframe/tensor/statistics/histogram.py +520 -0
maxframe/tensor/statistics/median.py +85 -0
maxframe/tensor/statistics/ptp.py +89 -0
maxframe/tensor/utils.py +3 -3
maxframe/tests/test_udf.py +61 -0
maxframe/tests/test_utils.py +51 -6
maxframe/tests/utils.py +0 -2
maxframe/typing_.py +2 -0
maxframe/udf.py +130 -9
maxframe/utils.py +254 -27
{maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
{maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
maxframe_client/fetcher.py +35 -4
maxframe_client/session/odps.py +7 -2
maxframe_client/session/task.py +8 -1
maxframe_client/tests/test_fetcher.py +76 -3
maxframe_client/tests/test_session.py +28 -1
maxframe/dataframe/arrays.py +0 -864
/maxframe/dataframe/{misc → reshape}/melt.py +0 -0
/maxframe/dataframe/{misc → reshape}/stack.py +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
{maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0

maxframe/dataframe/misc/describe.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ...core import EntityData
 from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
 from ..core import SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
-from ..utils import build_empty_df, parse_index
+from ..utils import build_df, parse_index
 class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
     def __call__(self, df_or_series):
         if isinstance(df_or_series, SERIES_TYPE):
-            if not np.issubdtype(df_or_series.dtype, np.number):
-                raise NotImplementedError("non-numeric type is not supported for now")
             test_series = pd.Series([], dtype=df_or_series.dtype).describe(
                 percentiles=self.percentiles,
                 include=self.include,
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
                 index_value=parse_index(test_series.index, store_data=True),
             )
         else:
-            test_inp_df = build_empty_df(df_or_series.dtypes)
+            test_inp_df = build_df(df_or_series)
             test_df = test_inp_df.describe(
                 percentiles=self.percentiles,
                 include=self.include,
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
                 # MaxFrame DataFrame allows user to specify percentiles=False
                 # to skip computation about percentiles
                 test_df.drop(["50%"], axis=0, inplace=True)
-            for dtype in test_df.dtypes:
-                if not np.issubdtype(dtype, np.number):
-                    raise NotImplementedError(
-                        "non-numeric type is not supported for now"
-                    )
             return self.new_dataframe(
                 [df_or_series],
                 shape=test_df.shape,
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
 def describe(df_or_series, percentiles=None, include=None, exclude=None):
+    """
+    Generate descriptive statistics.
+    Descriptive statistics include those that summarize the central
+    tendency, dispersion and shape of a
+    dataset's distribution, excluding ``NaN`` values.
+    Analyzes both numeric and object series, as well
+    as ``DataFrame`` column sets of mixed data types. The output
+    will vary depending on what is provided. Refer to the notes
+    below for more detail.
+    Parameters
+    ----------
+    percentiles : list-like of numbers, optional
+        The percentiles to include in the output. All should
+        fall between 0 and 1. The default is
+        ``[.25, .5, .75]``, which returns the 25th, 50th, and
+        75th percentiles.
+    include : 'all', list-like of dtypes or None (default), optional
+        A white list of data types to include in the result. Ignored
+        for ``Series``. Here are the options:
+        - 'all' : All columns of the input will be included in the output.
+        - A list-like of dtypes : Limits the results to the
+          provided data types.
+          To limit the result to numeric types submit
+          ``numpy.number``. To limit it instead to object columns submit
+          the ``numpy.object`` data type. Strings
+          can also be used in the style of
+          ``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
+        - None (default) : The result will include all numeric columns.
+    exclude : list-like of dtypes or None (default), optional,
+        A black list of data types to omit from the result. Ignored
+        for ``Series``. Here are the options:
+        - A list-like of dtypes : Excludes the provided data types
+          from the result. To exclude numeric types submit
+          ``numpy.number``. To exclude object columns submit the data
+          type ``numpy.object``. Strings can also be used in the style of
+          ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
+        - None (default) : The result will exclude nothing.
+    Returns
+    -------
+    Series or DataFrame
+        Summary statistics of the Series or Dataframe provided.
+    See Also
+    --------
+    DataFrame.count: Count number of non-NA/null observations.
+    DataFrame.max: Maximum of the values in the object.
+    DataFrame.min: Minimum of the values in the object.
+    DataFrame.mean: Mean of the values.
+    DataFrame.std: Standard deviation of the observations.
+    DataFrame.select_dtypes: Subset of a DataFrame including/excluding
+        columns based on their dtype.
+    Notes
+    -----
+    For numeric data, the result's index will include ``count``,
+    ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
+    upper percentiles. By default the lower percentile is ``25`` and the
+    upper percentile is ``75``. The ``50`` percentile is the
+    same as the median.
+    For object data (e.g. strings or timestamps), the result's index
+    will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
+    is the most common value. The ``freq`` is the most common value's
+    frequency. Timestamps also include the ``first`` and ``last`` items.
+    If multiple object values have the highest count, then the
+    ``count`` and ``top`` results will be arbitrarily chosen from
+    among those with the highest count.
+    For mixed data types provided via a ``DataFrame``, the default is to
+    return only an analysis of numeric columns. If the dataframe consists
+    only of object data without any numeric columns, the default is to
+    return an analysis of object columns. If ``include='all'`` is provided
+    as an option, the result will include a union of attributes of each type.
+    The `include` and `exclude` parameters can be used to limit
+    which columns in a ``DataFrame`` are analyzed for the output.
+    The parameters are ignored when analyzing a ``Series``.
+    Examples
+    --------
+    Describing a numeric ``Series``.
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([1, 2, 3])
+    >>> s.describe().execute()
+    count    3.0
+    mean     2.0
+    std      1.0
+    min      1.0
+    25%      1.5
+    50%      2.0
+    75%      2.5
+    max      3.0
+    dtype: float64
+    Describing a ``DataFrame``. By default only numeric fields
+    are returned.
+    >>> df = md.DataFrame({'numeric': [1, 2, 3],
+    ...                    'object': ['a', 'b', 'c']
+    ...                    })
+    >>> df.describe().execute()
+           numeric
+    count      3.0
+    mean       2.0
+    std        1.0
+    min        1.0
+    25%        1.5
+    50%        2.0
+    75%        2.5
+    max        3.0
+    Describing all columns of a ``DataFrame`` regardless of data type.
+    >>> df.describe(include='all').execute()  # doctest: +SKIP.execute()
+           numeric object
+    count      3.0      3
+    unique     NaN      3
+    top        NaN      a
+    freq       NaN      1
+    mean       2.0    NaN
+    std        1.0    NaN
+    min        1.0    NaN
+    25%        1.5    NaN
+    50%        2.0    NaN
+    75%        2.5    NaN
+    max        3.0    NaN
+    Describing a column from a ``DataFrame`` by accessing it as
+    an attribute.
+    >>> df.numeric.describe().execute()
+    count    3.0
+    mean     2.0
+    std      1.0
+    min      1.0
+    25%      1.5
+    50%      2.0
+    75%      2.5
+    max      3.0
+    Name: numeric, dtype: float64
+    Including only numeric columns in a ``DataFrame`` description.
+    >>> df.describe(include=[mt.number]).execute()
+           numeric
+    count      3.0
+    mean       2.0
+    std        1.0
+    min        1.0
+    25%        1.5
+    50%        2.0
+    75%        2.5
+    max        3.0
+    Including only string columns in a ``DataFrame`` description.
+    >>> df.describe(include=[object]).execute()  # doctest: +SKIP.execute()
+           object
+    count       3
+    unique      3
+    top         a
+    freq        1
+    """
+    # fixme add support for categorical columns once implemented
     if percentiles is False:
         percentiles = []
     elif percentiles is None:

maxframe/dataframe/misc/drop.py CHANGED Viewed

@@ -419,6 +419,37 @@ def series_drop(
     )
+def series_pop(series, item):
+    """
+    Return item and drops from series. Raise KeyError if not found.
+    Parameters
+    ----------
+    item : label
+        Index of the element that needs to be removed.
+    Returns
+    -------
+    Value that is popped from series.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> ser = md.Series([1,2,3])
+    >>> ser.pop(0).execute()
+    1
+    >>> ser.execute()
+    1    2
+    2    3
+    dtype: int64
+    """
+    scalar = series.data[item]
+    series_drop(series, item, inplace=True)
+    return scalar
 def index_drop(index, labels, errors="raise"):
     """
     Make new Index with passed list of labels deleted.

maxframe/dataframe/misc/drop_duplicates.py CHANGED Viewed

@@ -19,10 +19,10 @@ from ... import opcodes
 from ...serialization.serializables import BoolField
 from ..operators import OutputType
 from ..utils import gen_unknown_index_value, parse_index
-from ._duplicate import DuplicateOperand, validate_subset
+from ._duplicate import BaseDuplicateOp, validate_subset
-class DataFrameDropDuplicates(DuplicateOperand):
+class DataFrameDropDuplicates(BaseDuplicateOp):
     _op_type_ = opcodes.DROP_DUPLICATES
     ignore_index = BoolField("ignore_index", default=True)

maxframe/dataframe/misc/duplicated.py CHANGED Viewed

@@ -16,10 +16,10 @@ import numpy as np
 from ... import opcodes
 from ...core import OutputType
-from ._duplicate import DuplicateOperand, validate_subset
+from ._duplicate import BaseDuplicateOp, validate_subset
-class DataFrameDuplicated(DuplicateOperand):
+class DataFrameDuplicated(BaseDuplicateOp):
     _op_type_ = opcodes.DUPLICATED
     def __init__(self, output_types=None, **kw):

maxframe/dataframe/misc/get_dummies.py CHANGED Viewed

@@ -25,12 +25,14 @@ from ...serialization.serializables import (
     ListField,
     StringField,
 )
+from ...utils import make_dtype, pd_release_version
 from ..datasource.dataframe import from_pandas as from_pandas_df
 from ..datasource.series import from_pandas as from_pandas_series
 from ..initializer import Series as asseries
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 _encoding_dtype_kind = ["O", "S", "U"]
+_ret_uint8 = pd_release_version < (2, 0, 0)
 class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
@@ -181,7 +183,9 @@ def get_dummies(
     elif isinstance(data, pd.DataFrame):
         data = from_pandas_df(data)
-    dtype = dtype if dtype is not None else np.dtype(bool)
+    dtype = make_dtype(
+        dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
+    )
     if prefix is not None:
         if isinstance(prefix, list):

maxframe/dataframe/misc/infer_dtypes.py ADDED Viewed

@@ -0,0 +1,251 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ... import opcodes
+from ...serialization.serializables import AnyField, StringField
+from ..core import DATAFRAME_TYPE, SERIES_TYPE
+from ..operators import DataFrameOperator, DataFrameOperatorMixin
+class DataFrameInferDtypes(DataFrameOperator, DataFrameOperatorMixin):
+    _op_type_ = opcodes.DATAFRAME_INFER_DTYPES
+    infer_method = StringField("infer_method")
+    infer_kwargs = AnyField("infer_kwargs")
+    infer_stage = StringField("infer_stage", default=None)
+    def __init__(self, output_types=None, **kw):
+        super().__init__(_output_types=output_types, **kw)
+    def __call__(self, df):
+        if isinstance(df, DATAFRAME_TYPE):
+            return self.new_dataframe(
+                [df],
+                shape=df.shape,
+                dtypes=None,
+                index_value=df.index_value,
+                columns_value=df.columns_value,
+            )
+        else:
+            assert isinstance(df, SERIES_TYPE)
+            return self.new_series(
+                [df],
+                shape=df.shape,
+                dtype=None,
+                name=df.name,
+                index_value=df.index_value,
+            )
+def convert_dtypes(
+    df_or_series,
+    infer_objects=True,
+    convert_string=True,
+    convert_integer=True,
+    convert_boolean=True,
+    convert_floating=True,
+    dtype_backend="numpy",
+):
+    """
+    Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
+    Parameters
+    ----------
+    infer_objects : bool, default True
+        Whether object dtypes should be converted to the best possible types.
+    convert_string : bool, default True
+        Whether object dtypes should be converted to ``StringDtype()``.
+    convert_integer : bool, default True
+        Whether, if possible, conversion can be done to integer extension types.
+    convert_boolean : bool, defaults True
+        Whether object dtypes should be converted to ``BooleanDtypes()``.
+    convert_floating : bool, defaults True
+        Whether, if possible, conversion can be done to floating extension types.
+        If `convert_integer` is also True, preference will be give to integer
+        dtypes if the floats can be faithfully casted to integers.
+    Returns
+    -------
+    Series or DataFrame
+        Copy of input object with new dtype.
+    See Also
+    --------
+    infer_objects : Infer dtypes of objects.
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    to_numeric : Convert argument to a numeric type.
+    Notes
+    -----
+    By default, ``convert_dtypes`` will attempt to convert a Series (or each
+    Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
+    ``convert_string``, ``convert_integer``, ``convert_boolean`` and
+    ``convert_boolean``, it is possible to turn off individual conversions
+    to ``StringDtype``, the integer extension types, ``BooleanDtype``
+    or floating extension types, respectively.
+    For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
+    rules as during normal Series/DataFrame construction.  Then, if possible,
+    convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
+    or floating extension type, otherwise leave as ``object``.
+    If the dtype is integer, convert to an appropriate integer extension type.
+    If the dtype is numeric, and consists of all integers, convert to an
+    appropriate integer extension type. Otherwise, convert to an
+    appropriate floating extension type.
+    .. versionchanged:: 1.2
+        Starting with pandas 1.2, this method also converts float columns
+        to the nullable floating extension type.
+    In the future, as new dtypes are added that support ``pd.NA``, the results
+    of this method will change to support those new dtypes.
+    Examples
+    --------
+    >>> import maxframe.tensor as mt
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame(
+    ...     {
+    ...         "a": md.Series([1, 2, 3], dtype=mt.dtype("int32")),
+    ...         "b": md.Series(["x", "y", "z"], dtype=mt.dtype("O")),
+    ...         "c": md.Series([True, False, mt.nan], dtype=mt.dtype("O")),
+    ...         "d": md.Series(["h", "i", mt.nan], dtype=mt.dtype("O")),
+    ...         "e": md.Series([10, mt.nan, 20], dtype=mt.dtype("float")),
+    ...         "f": md.Series([mt.nan, 100.5, 200], dtype=mt.dtype("float")),
+    ...     }
+    ... )
+    Start with a DataFrame with default dtypes.
+    >>> df.execute()
+       a  b      c    d     e      f
+    0  1  x   True    h  10.0    NaN
+    1  2  y  False    i   NaN  100.5
+    2  3  z    NaN  NaN  20.0  200.0
+    >>> df.dtypes.execute()
+    a      int32
+    b     object
+    c     object
+    d     object
+    e    float64
+    f    float64
+    dtype: object
+    Convert the DataFrame to use best possible dtypes.
+    >>> dfn = df.convert_dtypes()
+    >>> dfn.execute()
+       a  b      c     d     e      f
+    0  1  x   True     h    10   <NA>
+    1  2  y  False     i  <NA>  100.5
+    2  3  z   <NA>  <NA>    20  200.0
+    >>> dfn.dtypes.execute()
+    a      Int32
+    b     string
+    c    boolean
+    d     string
+    e      Int64
+    f    Float64
+    dtype: object
+    Start with a Series of strings and missing data represented by ``np.nan``.
+    >>> s = md.Series(["a", "b", mt.nan])
+    >>> s.execute()
+    0      a
+    1      b
+    2    NaN
+    dtype: object
+    Obtain a Series with dtype ``StringDtype``.
+    >>> s.convert_dtypes().execute()
+    0       a
+    1       b
+    2    <NA>
+    dtype: string
+    """
+    dtype_backend = "numpy" if dtype_backend == "numpy_nullable" else dtype_backend
+    op = DataFrameInferDtypes(
+        infer_method="convert_dtypes",
+        infer_kwargs=dict(
+            infer_objects=infer_objects,
+            convert_string=convert_string,
+            convert_integer=convert_integer,
+            convert_boolean=convert_boolean,
+            convert_floating=convert_floating,
+            dtype_backend=dtype_backend,
+        ),
+    )
+    return op(df_or_series)
+def infer_objects(df_or_series, copy=True):
+    """
+    Attempt to infer better dtypes for object columns.
+    Attempts soft conversion of object-dtyped
+    columns, leaving non-object and unconvertible
+    columns unchanged. The inference rules are the
+    same as during normal Series/DataFrame construction.
+    Returns
+    -------
+    converted : same type as input object
+    See Also
+    --------
+    to_datetime : Convert argument to datetime.
+    to_timedelta : Convert argument to timedelta.
+    to_numeric : Convert argument to numeric type.
+    convert_dtypes : Convert argument to best possible dtype.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({"A": ["a", 1, 2, 3]})
+    >>> df = df.iloc[1:]
+    >>> df.execute()
+       A
+    1  1
+    2  2
+    3  3
+    >>> df.dtypes.execute()
+    A    object
+    dtype: object
+    >>> df.infer_objects().dtypes.execute()
+    A    int64
+    dtype: object
+    """
+    if (isinstance(df_or_series, SERIES_TYPE) and df_or_series.dtype != "O") or (
+        isinstance(df_or_series, DATAFRAME_TYPE)
+        and all(dt != "O" for dt in df_or_series.dtypes)
+    ):
+        # no objects to cast
+        return df_or_series
+    _ = copy  # in MaxFrame data are immutable, thus ignore the parameter
+    op = DataFrameInferDtypes(
+        infer_method="infer_objects",
+        infer_kwargs={},
+    )
+    return op(df_or_series)

maxframe/dataframe/misc/isin.py CHANGED Viewed

@@ -133,7 +133,7 @@ def series_isin(elements, values):
     5    False
     Name: animal, dtype: bool
     """
-    if is_list_like(values):
+    if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
         values = list(values)
     elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
         raise TypeError(
@@ -207,7 +207,7 @@ def df_isin(df, values):
     falcon      True       True
     dog        False      False
     """
-    if is_list_like(values) and not isinstance(values, dict):
+    if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
         values = list(values)
     elif not isinstance(
         values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)

maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

Potentially problematic release.

maxframe 2.0.0b2cp37-cp37m-win32.whl → 2.3.0rc1cp37-cp37m-win32.whl