maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,7 @@ from ...core import EntityData
|
|
|
22
22
|
from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
|
|
23
23
|
from ..core import SERIES_TYPE
|
|
24
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
|
-
from ..utils import
|
|
25
|
+
from ..utils import build_df, parse_index
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
43
43
|
|
|
44
44
|
def __call__(self, df_or_series):
|
|
45
45
|
if isinstance(df_or_series, SERIES_TYPE):
|
|
46
|
-
if not np.issubdtype(df_or_series.dtype, np.number):
|
|
47
|
-
raise NotImplementedError("non-numeric type is not supported for now")
|
|
48
46
|
test_series = pd.Series([], dtype=df_or_series.dtype).describe(
|
|
49
47
|
percentiles=self.percentiles,
|
|
50
48
|
include=self.include,
|
|
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
57
55
|
index_value=parse_index(test_series.index, store_data=True),
|
|
58
56
|
)
|
|
59
57
|
else:
|
|
60
|
-
test_inp_df =
|
|
58
|
+
test_inp_df = build_df(df_or_series)
|
|
61
59
|
test_df = test_inp_df.describe(
|
|
62
60
|
percentiles=self.percentiles,
|
|
63
61
|
include=self.include,
|
|
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
69
67
|
# MaxFrame DataFrame allows user to specify percentiles=False
|
|
70
68
|
# to skip computation about percentiles
|
|
71
69
|
test_df.drop(["50%"], axis=0, inplace=True)
|
|
72
|
-
for dtype in test_df.dtypes:
|
|
73
|
-
if not np.issubdtype(dtype, np.number):
|
|
74
|
-
raise NotImplementedError(
|
|
75
|
-
"non-numeric type is not supported for now"
|
|
76
|
-
)
|
|
77
70
|
return self.new_dataframe(
|
|
78
71
|
[df_or_series],
|
|
79
72
|
shape=test_df.shape,
|
|
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
84
77
|
|
|
85
78
|
|
|
86
79
|
def describe(df_or_series, percentiles=None, include=None, exclude=None):
|
|
80
|
+
"""
|
|
81
|
+
Generate descriptive statistics.
|
|
82
|
+
|
|
83
|
+
Descriptive statistics include those that summarize the central
|
|
84
|
+
tendency, dispersion and shape of a
|
|
85
|
+
dataset's distribution, excluding ``NaN`` values.
|
|
86
|
+
|
|
87
|
+
Analyzes both numeric and object series, as well
|
|
88
|
+
as ``DataFrame`` column sets of mixed data types. The output
|
|
89
|
+
will vary depending on what is provided. Refer to the notes
|
|
90
|
+
below for more detail.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
percentiles : list-like of numbers, optional
|
|
95
|
+
The percentiles to include in the output. All should
|
|
96
|
+
fall between 0 and 1. The default is
|
|
97
|
+
``[.25, .5, .75]``, which returns the 25th, 50th, and
|
|
98
|
+
75th percentiles.
|
|
99
|
+
include : 'all', list-like of dtypes or None (default), optional
|
|
100
|
+
A white list of data types to include in the result. Ignored
|
|
101
|
+
for ``Series``. Here are the options:
|
|
102
|
+
|
|
103
|
+
- 'all' : All columns of the input will be included in the output.
|
|
104
|
+
- A list-like of dtypes : Limits the results to the
|
|
105
|
+
provided data types.
|
|
106
|
+
To limit the result to numeric types submit
|
|
107
|
+
``numpy.number``. To limit it instead to object columns submit
|
|
108
|
+
the ``numpy.object`` data type. Strings
|
|
109
|
+
can also be used in the style of
|
|
110
|
+
``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
|
|
111
|
+
- None (default) : The result will include all numeric columns.
|
|
112
|
+
exclude : list-like of dtypes or None (default), optional,
|
|
113
|
+
A black list of data types to omit from the result. Ignored
|
|
114
|
+
for ``Series``. Here are the options:
|
|
115
|
+
|
|
116
|
+
- A list-like of dtypes : Excludes the provided data types
|
|
117
|
+
from the result. To exclude numeric types submit
|
|
118
|
+
``numpy.number``. To exclude object columns submit the data
|
|
119
|
+
type ``numpy.object``. Strings can also be used in the style of
|
|
120
|
+
``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
|
|
121
|
+
- None (default) : The result will exclude nothing.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Series or DataFrame
|
|
126
|
+
Summary statistics of the Series or Dataframe provided.
|
|
127
|
+
|
|
128
|
+
See Also
|
|
129
|
+
--------
|
|
130
|
+
DataFrame.count: Count number of non-NA/null observations.
|
|
131
|
+
DataFrame.max: Maximum of the values in the object.
|
|
132
|
+
DataFrame.min: Minimum of the values in the object.
|
|
133
|
+
DataFrame.mean: Mean of the values.
|
|
134
|
+
DataFrame.std: Standard deviation of the observations.
|
|
135
|
+
DataFrame.select_dtypes: Subset of a DataFrame including/excluding
|
|
136
|
+
columns based on their dtype.
|
|
137
|
+
|
|
138
|
+
Notes
|
|
139
|
+
-----
|
|
140
|
+
For numeric data, the result's index will include ``count``,
|
|
141
|
+
``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
|
|
142
|
+
upper percentiles. By default the lower percentile is ``25`` and the
|
|
143
|
+
upper percentile is ``75``. The ``50`` percentile is the
|
|
144
|
+
same as the median.
|
|
145
|
+
|
|
146
|
+
For object data (e.g. strings or timestamps), the result's index
|
|
147
|
+
will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
|
|
148
|
+
is the most common value. The ``freq`` is the most common value's
|
|
149
|
+
frequency. Timestamps also include the ``first`` and ``last`` items.
|
|
150
|
+
|
|
151
|
+
If multiple object values have the highest count, then the
|
|
152
|
+
``count`` and ``top`` results will be arbitrarily chosen from
|
|
153
|
+
among those with the highest count.
|
|
154
|
+
|
|
155
|
+
For mixed data types provided via a ``DataFrame``, the default is to
|
|
156
|
+
return only an analysis of numeric columns. If the dataframe consists
|
|
157
|
+
only of object data without any numeric columns, the default is to
|
|
158
|
+
return an analysis of object columns. If ``include='all'`` is provided
|
|
159
|
+
as an option, the result will include a union of attributes of each type.
|
|
160
|
+
|
|
161
|
+
The `include` and `exclude` parameters can be used to limit
|
|
162
|
+
which columns in a ``DataFrame`` are analyzed for the output.
|
|
163
|
+
The parameters are ignored when analyzing a ``Series``.
|
|
164
|
+
|
|
165
|
+
Examples
|
|
166
|
+
--------
|
|
167
|
+
Describing a numeric ``Series``.
|
|
168
|
+
|
|
169
|
+
>>> import maxframe.tensor as mt
|
|
170
|
+
>>> import maxframe.dataframe as md
|
|
171
|
+
>>> s = md.Series([1, 2, 3])
|
|
172
|
+
>>> s.describe().execute()
|
|
173
|
+
count 3.0
|
|
174
|
+
mean 2.0
|
|
175
|
+
std 1.0
|
|
176
|
+
min 1.0
|
|
177
|
+
25% 1.5
|
|
178
|
+
50% 2.0
|
|
179
|
+
75% 2.5
|
|
180
|
+
max 3.0
|
|
181
|
+
dtype: float64
|
|
182
|
+
|
|
183
|
+
Describing a ``DataFrame``. By default only numeric fields
|
|
184
|
+
are returned.
|
|
185
|
+
|
|
186
|
+
>>> df = md.DataFrame({'numeric': [1, 2, 3],
|
|
187
|
+
... 'object': ['a', 'b', 'c']
|
|
188
|
+
... })
|
|
189
|
+
>>> df.describe().execute()
|
|
190
|
+
numeric
|
|
191
|
+
count 3.0
|
|
192
|
+
mean 2.0
|
|
193
|
+
std 1.0
|
|
194
|
+
min 1.0
|
|
195
|
+
25% 1.5
|
|
196
|
+
50% 2.0
|
|
197
|
+
75% 2.5
|
|
198
|
+
max 3.0
|
|
199
|
+
|
|
200
|
+
Describing all columns of a ``DataFrame`` regardless of data type.
|
|
201
|
+
|
|
202
|
+
>>> df.describe(include='all').execute() # doctest: +SKIP.execute()
|
|
203
|
+
numeric object
|
|
204
|
+
count 3.0 3
|
|
205
|
+
unique NaN 3
|
|
206
|
+
top NaN a
|
|
207
|
+
freq NaN 1
|
|
208
|
+
mean 2.0 NaN
|
|
209
|
+
std 1.0 NaN
|
|
210
|
+
min 1.0 NaN
|
|
211
|
+
25% 1.5 NaN
|
|
212
|
+
50% 2.0 NaN
|
|
213
|
+
75% 2.5 NaN
|
|
214
|
+
max 3.0 NaN
|
|
215
|
+
|
|
216
|
+
Describing a column from a ``DataFrame`` by accessing it as
|
|
217
|
+
an attribute.
|
|
218
|
+
|
|
219
|
+
>>> df.numeric.describe().execute()
|
|
220
|
+
count 3.0
|
|
221
|
+
mean 2.0
|
|
222
|
+
std 1.0
|
|
223
|
+
min 1.0
|
|
224
|
+
25% 1.5
|
|
225
|
+
50% 2.0
|
|
226
|
+
75% 2.5
|
|
227
|
+
max 3.0
|
|
228
|
+
Name: numeric, dtype: float64
|
|
229
|
+
|
|
230
|
+
Including only numeric columns in a ``DataFrame`` description.
|
|
231
|
+
|
|
232
|
+
>>> df.describe(include=[mt.number]).execute()
|
|
233
|
+
numeric
|
|
234
|
+
count 3.0
|
|
235
|
+
mean 2.0
|
|
236
|
+
std 1.0
|
|
237
|
+
min 1.0
|
|
238
|
+
25% 1.5
|
|
239
|
+
50% 2.0
|
|
240
|
+
75% 2.5
|
|
241
|
+
max 3.0
|
|
242
|
+
|
|
243
|
+
Including only string columns in a ``DataFrame`` description.
|
|
244
|
+
|
|
245
|
+
>>> df.describe(include=[object]).execute() # doctest: +SKIP.execute()
|
|
246
|
+
object
|
|
247
|
+
count 3
|
|
248
|
+
unique 3
|
|
249
|
+
top a
|
|
250
|
+
freq 1
|
|
251
|
+
"""
|
|
252
|
+
# fixme add support for categorical columns once implemented
|
|
87
253
|
if percentiles is False:
|
|
88
254
|
percentiles = []
|
|
89
255
|
elif percentiles is None:
|
maxframe/dataframe/misc/drop.py
CHANGED
|
@@ -419,6 +419,37 @@ def series_drop(
|
|
|
419
419
|
)
|
|
420
420
|
|
|
421
421
|
|
|
422
|
+
def series_pop(series, item):
|
|
423
|
+
"""
|
|
424
|
+
Return item and drops from series. Raise KeyError if not found.
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
item : label
|
|
429
|
+
Index of the element that needs to be removed.
|
|
430
|
+
|
|
431
|
+
Returns
|
|
432
|
+
-------
|
|
433
|
+
Value that is popped from series.
|
|
434
|
+
|
|
435
|
+
Examples
|
|
436
|
+
--------
|
|
437
|
+
>>> import maxframe.dataframe as md
|
|
438
|
+
>>> ser = md.Series([1,2,3])
|
|
439
|
+
|
|
440
|
+
>>> ser.pop(0).execute()
|
|
441
|
+
1
|
|
442
|
+
|
|
443
|
+
>>> ser.execute()
|
|
444
|
+
1 2
|
|
445
|
+
2 3
|
|
446
|
+
dtype: int64
|
|
447
|
+
"""
|
|
448
|
+
scalar = series.data[item]
|
|
449
|
+
series_drop(series, item, inplace=True)
|
|
450
|
+
return scalar
|
|
451
|
+
|
|
452
|
+
|
|
422
453
|
def index_drop(index, labels, errors="raise"):
|
|
423
454
|
"""
|
|
424
455
|
Make new Index with passed list of labels deleted.
|
|
@@ -19,10 +19,10 @@ from ... import opcodes
|
|
|
19
19
|
from ...serialization.serializables import BoolField
|
|
20
20
|
from ..operators import OutputType
|
|
21
21
|
from ..utils import gen_unknown_index_value, parse_index
|
|
22
|
-
from ._duplicate import
|
|
22
|
+
from ._duplicate import BaseDuplicateOp, validate_subset
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class DataFrameDropDuplicates(
|
|
25
|
+
class DataFrameDropDuplicates(BaseDuplicateOp):
|
|
26
26
|
_op_type_ = opcodes.DROP_DUPLICATES
|
|
27
27
|
|
|
28
28
|
ignore_index = BoolField("ignore_index", default=True)
|
|
@@ -16,10 +16,10 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from ._duplicate import
|
|
19
|
+
from ._duplicate import BaseDuplicateOp, validate_subset
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class DataFrameDuplicated(
|
|
22
|
+
class DataFrameDuplicated(BaseDuplicateOp):
|
|
23
23
|
_op_type_ = opcodes.DUPLICATED
|
|
24
24
|
|
|
25
25
|
def __init__(self, output_types=None, **kw):
|
|
@@ -25,12 +25,14 @@ from ...serialization.serializables import (
|
|
|
25
25
|
ListField,
|
|
26
26
|
StringField,
|
|
27
27
|
)
|
|
28
|
+
from ...utils import make_dtype, pd_release_version
|
|
28
29
|
from ..datasource.dataframe import from_pandas as from_pandas_df
|
|
29
30
|
from ..datasource.series import from_pandas as from_pandas_series
|
|
30
31
|
from ..initializer import Series as asseries
|
|
31
32
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
32
33
|
|
|
33
34
|
_encoding_dtype_kind = ["O", "S", "U"]
|
|
35
|
+
_ret_uint8 = pd_release_version < (2, 0, 0)
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -181,7 +183,9 @@ def get_dummies(
|
|
|
181
183
|
elif isinstance(data, pd.DataFrame):
|
|
182
184
|
data = from_pandas_df(data)
|
|
183
185
|
|
|
184
|
-
dtype =
|
|
186
|
+
dtype = make_dtype(
|
|
187
|
+
dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
|
|
188
|
+
)
|
|
185
189
|
|
|
186
190
|
if prefix is not None:
|
|
187
191
|
if isinstance(prefix, list):
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, StringField
|
|
17
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
18
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameInferDtypes(DataFrameOperator, DataFrameOperatorMixin):
|
|
22
|
+
_op_type_ = opcodes.DATAFRAME_INFER_DTYPES
|
|
23
|
+
|
|
24
|
+
infer_method = StringField("infer_method")
|
|
25
|
+
infer_kwargs = AnyField("infer_kwargs")
|
|
26
|
+
|
|
27
|
+
infer_stage = StringField("infer_stage", default=None)
|
|
28
|
+
|
|
29
|
+
def __init__(self, output_types=None, **kw):
|
|
30
|
+
super().__init__(_output_types=output_types, **kw)
|
|
31
|
+
|
|
32
|
+
def __call__(self, df):
|
|
33
|
+
if isinstance(df, DATAFRAME_TYPE):
|
|
34
|
+
return self.new_dataframe(
|
|
35
|
+
[df],
|
|
36
|
+
shape=df.shape,
|
|
37
|
+
dtypes=None,
|
|
38
|
+
index_value=df.index_value,
|
|
39
|
+
columns_value=df.columns_value,
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
assert isinstance(df, SERIES_TYPE)
|
|
43
|
+
return self.new_series(
|
|
44
|
+
[df],
|
|
45
|
+
shape=df.shape,
|
|
46
|
+
dtype=None,
|
|
47
|
+
name=df.name,
|
|
48
|
+
index_value=df.index_value,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def convert_dtypes(
|
|
53
|
+
df_or_series,
|
|
54
|
+
infer_objects=True,
|
|
55
|
+
convert_string=True,
|
|
56
|
+
convert_integer=True,
|
|
57
|
+
convert_boolean=True,
|
|
58
|
+
convert_floating=True,
|
|
59
|
+
dtype_backend="numpy",
|
|
60
|
+
):
|
|
61
|
+
"""
|
|
62
|
+
Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
infer_objects : bool, default True
|
|
67
|
+
Whether object dtypes should be converted to the best possible types.
|
|
68
|
+
convert_string : bool, default True
|
|
69
|
+
Whether object dtypes should be converted to ``StringDtype()``.
|
|
70
|
+
convert_integer : bool, default True
|
|
71
|
+
Whether, if possible, conversion can be done to integer extension types.
|
|
72
|
+
convert_boolean : bool, defaults True
|
|
73
|
+
Whether object dtypes should be converted to ``BooleanDtypes()``.
|
|
74
|
+
convert_floating : bool, defaults True
|
|
75
|
+
Whether, if possible, conversion can be done to floating extension types.
|
|
76
|
+
If `convert_integer` is also True, preference will be give to integer
|
|
77
|
+
dtypes if the floats can be faithfully casted to integers.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
Series or DataFrame
|
|
82
|
+
Copy of input object with new dtype.
|
|
83
|
+
|
|
84
|
+
See Also
|
|
85
|
+
--------
|
|
86
|
+
infer_objects : Infer dtypes of objects.
|
|
87
|
+
to_datetime : Convert argument to datetime.
|
|
88
|
+
to_timedelta : Convert argument to timedelta.
|
|
89
|
+
to_numeric : Convert argument to a numeric type.
|
|
90
|
+
|
|
91
|
+
Notes
|
|
92
|
+
-----
|
|
93
|
+
By default, ``convert_dtypes`` will attempt to convert a Series (or each
|
|
94
|
+
Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
|
|
95
|
+
``convert_string``, ``convert_integer``, ``convert_boolean`` and
|
|
96
|
+
``convert_boolean``, it is possible to turn off individual conversions
|
|
97
|
+
to ``StringDtype``, the integer extension types, ``BooleanDtype``
|
|
98
|
+
or floating extension types, respectively.
|
|
99
|
+
|
|
100
|
+
For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
|
|
101
|
+
rules as during normal Series/DataFrame construction. Then, if possible,
|
|
102
|
+
convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
|
|
103
|
+
or floating extension type, otherwise leave as ``object``.
|
|
104
|
+
|
|
105
|
+
If the dtype is integer, convert to an appropriate integer extension type.
|
|
106
|
+
|
|
107
|
+
If the dtype is numeric, and consists of all integers, convert to an
|
|
108
|
+
appropriate integer extension type. Otherwise, convert to an
|
|
109
|
+
appropriate floating extension type.
|
|
110
|
+
|
|
111
|
+
.. versionchanged:: 1.2
|
|
112
|
+
Starting with pandas 1.2, this method also converts float columns
|
|
113
|
+
to the nullable floating extension type.
|
|
114
|
+
|
|
115
|
+
In the future, as new dtypes are added that support ``pd.NA``, the results
|
|
116
|
+
of this method will change to support those new dtypes.
|
|
117
|
+
|
|
118
|
+
Examples
|
|
119
|
+
--------
|
|
120
|
+
>>> import maxframe.tensor as mt
|
|
121
|
+
>>> import maxframe.dataframe as md
|
|
122
|
+
>>> df = md.DataFrame(
|
|
123
|
+
... {
|
|
124
|
+
... "a": md.Series([1, 2, 3], dtype=mt.dtype("int32")),
|
|
125
|
+
... "b": md.Series(["x", "y", "z"], dtype=mt.dtype("O")),
|
|
126
|
+
... "c": md.Series([True, False, mt.nan], dtype=mt.dtype("O")),
|
|
127
|
+
... "d": md.Series(["h", "i", mt.nan], dtype=mt.dtype("O")),
|
|
128
|
+
... "e": md.Series([10, mt.nan, 20], dtype=mt.dtype("float")),
|
|
129
|
+
... "f": md.Series([mt.nan, 100.5, 200], dtype=mt.dtype("float")),
|
|
130
|
+
... }
|
|
131
|
+
... )
|
|
132
|
+
|
|
133
|
+
Start with a DataFrame with default dtypes.
|
|
134
|
+
|
|
135
|
+
>>> df.execute()
|
|
136
|
+
a b c d e f
|
|
137
|
+
0 1 x True h 10.0 NaN
|
|
138
|
+
1 2 y False i NaN 100.5
|
|
139
|
+
2 3 z NaN NaN 20.0 200.0
|
|
140
|
+
|
|
141
|
+
>>> df.dtypes.execute()
|
|
142
|
+
a int32
|
|
143
|
+
b object
|
|
144
|
+
c object
|
|
145
|
+
d object
|
|
146
|
+
e float64
|
|
147
|
+
f float64
|
|
148
|
+
dtype: object
|
|
149
|
+
|
|
150
|
+
Convert the DataFrame to use best possible dtypes.
|
|
151
|
+
|
|
152
|
+
>>> dfn = df.convert_dtypes()
|
|
153
|
+
>>> dfn.execute()
|
|
154
|
+
a b c d e f
|
|
155
|
+
0 1 x True h 10 <NA>
|
|
156
|
+
1 2 y False i <NA> 100.5
|
|
157
|
+
2 3 z <NA> <NA> 20 200.0
|
|
158
|
+
|
|
159
|
+
>>> dfn.dtypes.execute()
|
|
160
|
+
a Int32
|
|
161
|
+
b string
|
|
162
|
+
c boolean
|
|
163
|
+
d string
|
|
164
|
+
e Int64
|
|
165
|
+
f Float64
|
|
166
|
+
dtype: object
|
|
167
|
+
|
|
168
|
+
Start with a Series of strings and missing data represented by ``np.nan``.
|
|
169
|
+
|
|
170
|
+
>>> s = md.Series(["a", "b", mt.nan])
|
|
171
|
+
>>> s.execute()
|
|
172
|
+
0 a
|
|
173
|
+
1 b
|
|
174
|
+
2 NaN
|
|
175
|
+
dtype: object
|
|
176
|
+
|
|
177
|
+
Obtain a Series with dtype ``StringDtype``.
|
|
178
|
+
|
|
179
|
+
>>> s.convert_dtypes().execute()
|
|
180
|
+
0 a
|
|
181
|
+
1 b
|
|
182
|
+
2 <NA>
|
|
183
|
+
dtype: string
|
|
184
|
+
"""
|
|
185
|
+
dtype_backend = "numpy" if dtype_backend == "numpy_nullable" else dtype_backend
|
|
186
|
+
op = DataFrameInferDtypes(
|
|
187
|
+
infer_method="convert_dtypes",
|
|
188
|
+
infer_kwargs=dict(
|
|
189
|
+
infer_objects=infer_objects,
|
|
190
|
+
convert_string=convert_string,
|
|
191
|
+
convert_integer=convert_integer,
|
|
192
|
+
convert_boolean=convert_boolean,
|
|
193
|
+
convert_floating=convert_floating,
|
|
194
|
+
dtype_backend=dtype_backend,
|
|
195
|
+
),
|
|
196
|
+
)
|
|
197
|
+
return op(df_or_series)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def infer_objects(df_or_series, copy=True):
|
|
201
|
+
"""
|
|
202
|
+
Attempt to infer better dtypes for object columns.
|
|
203
|
+
|
|
204
|
+
Attempts soft conversion of object-dtyped
|
|
205
|
+
columns, leaving non-object and unconvertible
|
|
206
|
+
columns unchanged. The inference rules are the
|
|
207
|
+
same as during normal Series/DataFrame construction.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
converted : same type as input object
|
|
212
|
+
|
|
213
|
+
See Also
|
|
214
|
+
--------
|
|
215
|
+
to_datetime : Convert argument to datetime.
|
|
216
|
+
to_timedelta : Convert argument to timedelta.
|
|
217
|
+
to_numeric : Convert argument to numeric type.
|
|
218
|
+
convert_dtypes : Convert argument to best possible dtype.
|
|
219
|
+
|
|
220
|
+
Examples
|
|
221
|
+
--------
|
|
222
|
+
>>> import maxframe.dataframe as md
|
|
223
|
+
>>> df = md.DataFrame({"A": ["a", 1, 2, 3]})
|
|
224
|
+
>>> df = df.iloc[1:]
|
|
225
|
+
>>> df.execute()
|
|
226
|
+
A
|
|
227
|
+
1 1
|
|
228
|
+
2 2
|
|
229
|
+
3 3
|
|
230
|
+
|
|
231
|
+
>>> df.dtypes.execute()
|
|
232
|
+
A object
|
|
233
|
+
dtype: object
|
|
234
|
+
|
|
235
|
+
>>> df.infer_objects().dtypes.execute()
|
|
236
|
+
A int64
|
|
237
|
+
dtype: object
|
|
238
|
+
"""
|
|
239
|
+
if (isinstance(df_or_series, SERIES_TYPE) and df_or_series.dtype != "O") or (
|
|
240
|
+
isinstance(df_or_series, DATAFRAME_TYPE)
|
|
241
|
+
and all(dt != "O" for dt in df_or_series.dtypes)
|
|
242
|
+
):
|
|
243
|
+
# no objects to cast
|
|
244
|
+
return df_or_series
|
|
245
|
+
|
|
246
|
+
_ = copy # in MaxFrame data are immutable, thus ignore the parameter
|
|
247
|
+
op = DataFrameInferDtypes(
|
|
248
|
+
infer_method="infer_objects",
|
|
249
|
+
infer_kwargs={},
|
|
250
|
+
)
|
|
251
|
+
return op(df_or_series)
|
maxframe/dataframe/misc/isin.py
CHANGED
|
@@ -133,7 +133,7 @@ def series_isin(elements, values):
|
|
|
133
133
|
5 False
|
|
134
134
|
Name: animal, dtype: bool
|
|
135
135
|
"""
|
|
136
|
-
if is_list_like(values):
|
|
136
|
+
if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
|
|
137
137
|
values = list(values)
|
|
138
138
|
elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
|
|
139
139
|
raise TypeError(
|
|
@@ -207,7 +207,7 @@ def df_isin(df, values):
|
|
|
207
207
|
falcon True True
|
|
208
208
|
dog False False
|
|
209
209
|
"""
|
|
210
|
-
if is_list_like(values) and not isinstance(values, dict):
|
|
210
|
+
if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
|
|
211
211
|
values = list(values)
|
|
212
212
|
elif not isinstance(
|
|
213
213
|
values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)
|