maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,7 @@ from ...core import EntityData
|
|
|
22
22
|
from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
|
|
23
23
|
from ..core import SERIES_TYPE
|
|
24
24
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
25
|
-
from ..utils import
|
|
25
|
+
from ..utils import build_df, parse_index
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
43
43
|
|
|
44
44
|
def __call__(self, df_or_series):
|
|
45
45
|
if isinstance(df_or_series, SERIES_TYPE):
|
|
46
|
-
if not np.issubdtype(df_or_series.dtype, np.number):
|
|
47
|
-
raise NotImplementedError("non-numeric type is not supported for now")
|
|
48
46
|
test_series = pd.Series([], dtype=df_or_series.dtype).describe(
|
|
49
47
|
percentiles=self.percentiles,
|
|
50
48
|
include=self.include,
|
|
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
57
55
|
index_value=parse_index(test_series.index, store_data=True),
|
|
58
56
|
)
|
|
59
57
|
else:
|
|
60
|
-
test_inp_df =
|
|
58
|
+
test_inp_df = build_df(df_or_series)
|
|
61
59
|
test_df = test_inp_df.describe(
|
|
62
60
|
percentiles=self.percentiles,
|
|
63
61
|
include=self.include,
|
|
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
69
67
|
# MaxFrame DataFrame allows user to specify percentiles=False
|
|
70
68
|
# to skip computation about percentiles
|
|
71
69
|
test_df.drop(["50%"], axis=0, inplace=True)
|
|
72
|
-
for dtype in test_df.dtypes:
|
|
73
|
-
if not np.issubdtype(dtype, np.number):
|
|
74
|
-
raise NotImplementedError(
|
|
75
|
-
"non-numeric type is not supported for now"
|
|
76
|
-
)
|
|
77
70
|
return self.new_dataframe(
|
|
78
71
|
[df_or_series],
|
|
79
72
|
shape=test_df.shape,
|
|
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
84
77
|
|
|
85
78
|
|
|
86
79
|
def describe(df_or_series, percentiles=None, include=None, exclude=None):
|
|
80
|
+
"""
|
|
81
|
+
Generate descriptive statistics.
|
|
82
|
+
|
|
83
|
+
Descriptive statistics include those that summarize the central
|
|
84
|
+
tendency, dispersion and shape of a
|
|
85
|
+
dataset's distribution, excluding ``NaN`` values.
|
|
86
|
+
|
|
87
|
+
Analyzes both numeric and object series, as well
|
|
88
|
+
as ``DataFrame`` column sets of mixed data types. The output
|
|
89
|
+
will vary depending on what is provided. Refer to the notes
|
|
90
|
+
below for more detail.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
percentiles : list-like of numbers, optional
|
|
95
|
+
The percentiles to include in the output. All should
|
|
96
|
+
fall between 0 and 1. The default is
|
|
97
|
+
``[.25, .5, .75]``, which returns the 25th, 50th, and
|
|
98
|
+
75th percentiles.
|
|
99
|
+
include : 'all', list-like of dtypes or None (default), optional
|
|
100
|
+
A white list of data types to include in the result. Ignored
|
|
101
|
+
for ``Series``. Here are the options:
|
|
102
|
+
|
|
103
|
+
- 'all' : All columns of the input will be included in the output.
|
|
104
|
+
- A list-like of dtypes : Limits the results to the
|
|
105
|
+
provided data types.
|
|
106
|
+
To limit the result to numeric types submit
|
|
107
|
+
``numpy.number``. To limit it instead to object columns submit
|
|
108
|
+
the ``numpy.object`` data type. Strings
|
|
109
|
+
can also be used in the style of
|
|
110
|
+
``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
|
|
111
|
+
- None (default) : The result will include all numeric columns.
|
|
112
|
+
exclude : list-like of dtypes or None (default), optional,
|
|
113
|
+
A black list of data types to omit from the result. Ignored
|
|
114
|
+
for ``Series``. Here are the options:
|
|
115
|
+
|
|
116
|
+
- A list-like of dtypes : Excludes the provided data types
|
|
117
|
+
from the result. To exclude numeric types submit
|
|
118
|
+
``numpy.number``. To exclude object columns submit the data
|
|
119
|
+
type ``numpy.object``. Strings can also be used in the style of
|
|
120
|
+
``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
|
|
121
|
+
- None (default) : The result will exclude nothing.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
Series or DataFrame
|
|
126
|
+
Summary statistics of the Series or Dataframe provided.
|
|
127
|
+
|
|
128
|
+
See Also
|
|
129
|
+
--------
|
|
130
|
+
DataFrame.count: Count number of non-NA/null observations.
|
|
131
|
+
DataFrame.max: Maximum of the values in the object.
|
|
132
|
+
DataFrame.min: Minimum of the values in the object.
|
|
133
|
+
DataFrame.mean: Mean of the values.
|
|
134
|
+
DataFrame.std: Standard deviation of the observations.
|
|
135
|
+
DataFrame.select_dtypes: Subset of a DataFrame including/excluding
|
|
136
|
+
columns based on their dtype.
|
|
137
|
+
|
|
138
|
+
Notes
|
|
139
|
+
-----
|
|
140
|
+
For numeric data, the result's index will include ``count``,
|
|
141
|
+
``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
|
|
142
|
+
upper percentiles. By default the lower percentile is ``25`` and the
|
|
143
|
+
upper percentile is ``75``. The ``50`` percentile is the
|
|
144
|
+
same as the median.
|
|
145
|
+
|
|
146
|
+
For object data (e.g. strings or timestamps), the result's index
|
|
147
|
+
will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
|
|
148
|
+
is the most common value. The ``freq`` is the most common value's
|
|
149
|
+
frequency. Timestamps also include the ``first`` and ``last`` items.
|
|
150
|
+
|
|
151
|
+
If multiple object values have the highest count, then the
|
|
152
|
+
``count`` and ``top`` results will be arbitrarily chosen from
|
|
153
|
+
among those with the highest count.
|
|
154
|
+
|
|
155
|
+
For mixed data types provided via a ``DataFrame``, the default is to
|
|
156
|
+
return only an analysis of numeric columns. If the dataframe consists
|
|
157
|
+
only of object data without any numeric columns, the default is to
|
|
158
|
+
return an analysis of object columns. If ``include='all'`` is provided
|
|
159
|
+
as an option, the result will include a union of attributes of each type.
|
|
160
|
+
|
|
161
|
+
The `include` and `exclude` parameters can be used to limit
|
|
162
|
+
which columns in a ``DataFrame`` are analyzed for the output.
|
|
163
|
+
The parameters are ignored when analyzing a ``Series``.
|
|
164
|
+
|
|
165
|
+
Examples
|
|
166
|
+
--------
|
|
167
|
+
Describing a numeric ``Series``.
|
|
168
|
+
|
|
169
|
+
>>> import maxframe.tensor as mt
|
|
170
|
+
>>> import maxframe.dataframe as md
|
|
171
|
+
>>> s = md.Series([1, 2, 3])
|
|
172
|
+
>>> s.describe().execute()
|
|
173
|
+
count 3.0
|
|
174
|
+
mean 2.0
|
|
175
|
+
std 1.0
|
|
176
|
+
min 1.0
|
|
177
|
+
25% 1.5
|
|
178
|
+
50% 2.0
|
|
179
|
+
75% 2.5
|
|
180
|
+
max 3.0
|
|
181
|
+
dtype: float64
|
|
182
|
+
|
|
183
|
+
Describing a ``DataFrame``. By default only numeric fields
|
|
184
|
+
are returned.
|
|
185
|
+
|
|
186
|
+
>>> df = md.DataFrame({'numeric': [1, 2, 3],
|
|
187
|
+
... 'object': ['a', 'b', 'c']
|
|
188
|
+
... })
|
|
189
|
+
>>> df.describe().execute()
|
|
190
|
+
numeric
|
|
191
|
+
count 3.0
|
|
192
|
+
mean 2.0
|
|
193
|
+
std 1.0
|
|
194
|
+
min 1.0
|
|
195
|
+
25% 1.5
|
|
196
|
+
50% 2.0
|
|
197
|
+
75% 2.5
|
|
198
|
+
max 3.0
|
|
199
|
+
|
|
200
|
+
Describing all columns of a ``DataFrame`` regardless of data type.
|
|
201
|
+
|
|
202
|
+
>>> df.describe(include='all').execute() # doctest: +SKIP.execute()
|
|
203
|
+
numeric object
|
|
204
|
+
count 3.0 3
|
|
205
|
+
unique NaN 3
|
|
206
|
+
top NaN a
|
|
207
|
+
freq NaN 1
|
|
208
|
+
mean 2.0 NaN
|
|
209
|
+
std 1.0 NaN
|
|
210
|
+
min 1.0 NaN
|
|
211
|
+
25% 1.5 NaN
|
|
212
|
+
50% 2.0 NaN
|
|
213
|
+
75% 2.5 NaN
|
|
214
|
+
max 3.0 NaN
|
|
215
|
+
|
|
216
|
+
Describing a column from a ``DataFrame`` by accessing it as
|
|
217
|
+
an attribute.
|
|
218
|
+
|
|
219
|
+
>>> df.numeric.describe().execute()
|
|
220
|
+
count 3.0
|
|
221
|
+
mean 2.0
|
|
222
|
+
std 1.0
|
|
223
|
+
min 1.0
|
|
224
|
+
25% 1.5
|
|
225
|
+
50% 2.0
|
|
226
|
+
75% 2.5
|
|
227
|
+
max 3.0
|
|
228
|
+
Name: numeric, dtype: float64
|
|
229
|
+
|
|
230
|
+
Including only numeric columns in a ``DataFrame`` description.
|
|
231
|
+
|
|
232
|
+
>>> df.describe(include=[mt.number]).execute()
|
|
233
|
+
numeric
|
|
234
|
+
count 3.0
|
|
235
|
+
mean 2.0
|
|
236
|
+
std 1.0
|
|
237
|
+
min 1.0
|
|
238
|
+
25% 1.5
|
|
239
|
+
50% 2.0
|
|
240
|
+
75% 2.5
|
|
241
|
+
max 3.0
|
|
242
|
+
|
|
243
|
+
Including only string columns in a ``DataFrame`` description.
|
|
244
|
+
|
|
245
|
+
>>> df.describe(include=[object]).execute() # doctest: +SKIP.execute()
|
|
246
|
+
object
|
|
247
|
+
count 3
|
|
248
|
+
unique 3
|
|
249
|
+
top a
|
|
250
|
+
freq 1
|
|
251
|
+
"""
|
|
252
|
+
# fixme add support for categorical columns once implemented
|
|
87
253
|
if percentiles is False:
|
|
88
254
|
percentiles = []
|
|
89
255
|
elif percentiles is None:
|
|
@@ -19,10 +19,10 @@ from ... import opcodes
|
|
|
19
19
|
from ...serialization.serializables import BoolField
|
|
20
20
|
from ..operators import OutputType
|
|
21
21
|
from ..utils import gen_unknown_index_value, parse_index
|
|
22
|
-
from ._duplicate import
|
|
22
|
+
from ._duplicate import BaseDuplicateOp, validate_subset
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class DataFrameDropDuplicates(
|
|
25
|
+
class DataFrameDropDuplicates(BaseDuplicateOp):
|
|
26
26
|
_op_type_ = opcodes.DROP_DUPLICATES
|
|
27
27
|
|
|
28
28
|
ignore_index = BoolField("ignore_index", default=True)
|
|
@@ -16,10 +16,10 @@ import numpy as np
|
|
|
16
16
|
|
|
17
17
|
from ... import opcodes
|
|
18
18
|
from ...core import OutputType
|
|
19
|
-
from ._duplicate import
|
|
19
|
+
from ._duplicate import BaseDuplicateOp, validate_subset
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class DataFrameDuplicated(
|
|
22
|
+
class DataFrameDuplicated(BaseDuplicateOp):
|
|
23
23
|
_op_type_ = opcodes.DUPLICATED
|
|
24
24
|
|
|
25
25
|
def __init__(self, output_types=None, **kw):
|
|
@@ -25,12 +25,14 @@ from ...serialization.serializables import (
|
|
|
25
25
|
ListField,
|
|
26
26
|
StringField,
|
|
27
27
|
)
|
|
28
|
+
from ...utils import make_dtype, pd_release_version
|
|
28
29
|
from ..datasource.dataframe import from_pandas as from_pandas_df
|
|
29
30
|
from ..datasource.series import from_pandas as from_pandas_series
|
|
30
31
|
from ..initializer import Series as asseries
|
|
31
32
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
32
33
|
|
|
33
34
|
_encoding_dtype_kind = ["O", "S", "U"]
|
|
35
|
+
_ret_uint8 = pd_release_version < (2, 0, 0)
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
|
|
@@ -181,7 +183,9 @@ def get_dummies(
|
|
|
181
183
|
elif isinstance(data, pd.DataFrame):
|
|
182
184
|
data = from_pandas_df(data)
|
|
183
185
|
|
|
184
|
-
dtype =
|
|
186
|
+
dtype = make_dtype(
|
|
187
|
+
dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
|
|
188
|
+
)
|
|
185
189
|
|
|
186
190
|
if prefix is not None:
|
|
187
191
|
if isinstance(prefix, list):
|
maxframe/dataframe/misc/isin.py
CHANGED
|
@@ -133,7 +133,7 @@ def series_isin(elements, values):
|
|
|
133
133
|
5 False
|
|
134
134
|
Name: animal, dtype: bool
|
|
135
135
|
"""
|
|
136
|
-
if is_list_like(values):
|
|
136
|
+
if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
|
|
137
137
|
values = list(values)
|
|
138
138
|
elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
|
|
139
139
|
raise TypeError(
|
|
@@ -207,7 +207,7 @@ def df_isin(df, values):
|
|
|
207
207
|
falcon True True
|
|
208
208
|
dog False False
|
|
209
209
|
"""
|
|
210
|
-
if is_list_like(values) and not isinstance(values, dict):
|
|
210
|
+
if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
|
|
211
211
|
values = list(values)
|
|
212
212
|
elif not isinstance(
|
|
213
213
|
values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)
|
maxframe/dataframe/misc/map.py
CHANGED
|
@@ -251,3 +251,97 @@ def index_map(
|
|
|
251
251
|
"""
|
|
252
252
|
op = DataFrameMap(arg=mapper, na_action=na_action, memory_scale=memory_scale)
|
|
253
253
|
return op(idx, dtype=dtype, skip_infer=skip_infer)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def df_map(
|
|
257
|
+
df, func, na_action=None, dtypes=None, dtype=None, skip_infer=False, **kwargs
|
|
258
|
+
):
|
|
259
|
+
"""
|
|
260
|
+
Apply a function to a Dataframe elementwise.
|
|
261
|
+
|
|
262
|
+
This method applies a function that accepts and returns a scalar
|
|
263
|
+
to every element of a DataFrame.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
func : callable
|
|
268
|
+
Python function, returns a single value from a single value.
|
|
269
|
+
na_action : {None, 'ignore'}, default None
|
|
270
|
+
If 'ignore', propagate NaN values, without passing them to func.
|
|
271
|
+
dtypes : Series, default None
|
|
272
|
+
Specify dtypes of returned DataFrames.
|
|
273
|
+
dtype : np.dtype, default None
|
|
274
|
+
Specify dtypes of all columns of returned DataFrames, only
|
|
275
|
+
effective when dtypes is not specified.
|
|
276
|
+
skip_infer: bool, default False
|
|
277
|
+
Whether infer dtypes when dtypes or dtype is not specified.
|
|
278
|
+
**kwargs
|
|
279
|
+
Additional keyword arguments to pass as keywords arguments to
|
|
280
|
+
`func`.
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
DataFrame
|
|
285
|
+
Transformed DataFrame.
|
|
286
|
+
|
|
287
|
+
See Also
|
|
288
|
+
--------
|
|
289
|
+
DataFrame.apply : Apply a function along input axis of DataFrame.
|
|
290
|
+
DataFrame.replace: Replace values given in `to_replace` with `value`.
|
|
291
|
+
Series.map : Apply a function elementwise on a Series.
|
|
292
|
+
|
|
293
|
+
Examples
|
|
294
|
+
--------
|
|
295
|
+
>>> import maxframe.dataframe as md
|
|
296
|
+
>>> df = md.DataFrame([[1, 2.12], [3.356, 4.567]])
|
|
297
|
+
>>> df.execute()
|
|
298
|
+
0 1
|
|
299
|
+
0 1.000 2.120
|
|
300
|
+
1 3.356 4.567
|
|
301
|
+
|
|
302
|
+
>>> df.map(lambda x: len(str(x))).execute()
|
|
303
|
+
0 1
|
|
304
|
+
0 3 4
|
|
305
|
+
1 5 5
|
|
306
|
+
|
|
307
|
+
Like Series.map, NA values can be ignored:
|
|
308
|
+
|
|
309
|
+
>>> df_copy = df.copy()
|
|
310
|
+
>>> df_copy.iloc[0, 0] = md.NA
|
|
311
|
+
>>> df_copy.map(lambda x: len(str(x)), na_action='ignore').execute()
|
|
312
|
+
0 1
|
|
313
|
+
0 NaN 4
|
|
314
|
+
1 5.0 5
|
|
315
|
+
|
|
316
|
+
It is also possible to use `map` with functions that are not
|
|
317
|
+
`lambda` functions:
|
|
318
|
+
|
|
319
|
+
>>> df.map(round, ndigits=1).execute()
|
|
320
|
+
0 1
|
|
321
|
+
0 1.0 2.1
|
|
322
|
+
1 3.4 4.6
|
|
323
|
+
|
|
324
|
+
Note that a vectorized version of `func` often exists, which will
|
|
325
|
+
be much faster. You could square each number elementwise.
|
|
326
|
+
|
|
327
|
+
>>> df.map(lambda x: x**2).execute()
|
|
328
|
+
0 1
|
|
329
|
+
0 1.000000 4.494400
|
|
330
|
+
1 11.262736 20.857489
|
|
331
|
+
|
|
332
|
+
But it's better to avoid map in that case.
|
|
333
|
+
|
|
334
|
+
>>> (df ** 2).execute()
|
|
335
|
+
0 1
|
|
336
|
+
0 1.000000 4.494400
|
|
337
|
+
1 11.262736 20.857489
|
|
338
|
+
"""
|
|
339
|
+
if dtypes is None and dtype is not None:
|
|
340
|
+
dtypes = pd.Series([dtype] * df.shape[1], index=df.dtypes.index)
|
|
341
|
+
|
|
342
|
+
def _wrapper(row):
|
|
343
|
+
return row.map(func, na_action=na_action, **kwargs)
|
|
344
|
+
|
|
345
|
+
return df.apply(
|
|
346
|
+
_wrapper, axis=1, dtypes=dtypes, skip_infer=skip_infer, elementwise=True
|
|
347
|
+
)
|
|
@@ -16,6 +16,8 @@ import numpy as np
|
|
|
16
16
|
import pandas as pd
|
|
17
17
|
import pytest
|
|
18
18
|
|
|
19
|
+
from maxframe import options
|
|
20
|
+
|
|
19
21
|
from .... import opcodes
|
|
20
22
|
from ....core import OutputType
|
|
21
23
|
from ....dataframe import DataFrame
|
|
@@ -124,6 +126,7 @@ def test_dataframe_apply():
|
|
|
124
126
|
dtypes=pd.Series([np.dtype(float)] * 3),
|
|
125
127
|
)
|
|
126
128
|
assert df2.ndim == 2
|
|
129
|
+
assert df2.op.expect_resources == options.function.default_running_options
|
|
127
130
|
|
|
128
131
|
|
|
129
132
|
def test_series_apply():
|
|
@@ -180,6 +183,8 @@ def test_series_apply():
|
|
|
180
183
|
pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
|
|
181
184
|
)
|
|
182
185
|
assert r.ndim == 2
|
|
186
|
+
assert r.op.expect_resources == options.function.default_running_options
|
|
187
|
+
|
|
183
188
|
pd.testing.assert_series_equal(r.dtypes, dtypes)
|
|
184
189
|
assert r.shape == (2, 3)
|
|
185
190
|
|
|
@@ -305,6 +310,7 @@ def test_transform():
|
|
|
305
310
|
assert r.shape == series.shape
|
|
306
311
|
assert r.op._op_type_ == opcodes.TRANSFORM
|
|
307
312
|
assert r.op.output_types[0] == OutputType.series
|
|
313
|
+
assert r.op.expect_resources == options.function.default_running_options
|
|
308
314
|
|
|
309
315
|
|
|
310
316
|
def test_series_isin():
|
|
@@ -563,12 +569,17 @@ def test_apply():
|
|
|
563
569
|
)
|
|
564
570
|
assert apply_df.shape == (3, 2)
|
|
565
571
|
assert apply_df.op.expect_engine == "SPE"
|
|
566
|
-
assert apply_df.op.expect_resources == {
|
|
572
|
+
assert apply_df.op.expect_resources == {
|
|
573
|
+
"cpu": 4,
|
|
574
|
+
"memory": "40GB",
|
|
575
|
+
"gpu": 0,
|
|
576
|
+
"gu_quota": None,
|
|
577
|
+
}
|
|
567
578
|
|
|
568
579
|
|
|
569
580
|
def test_pivot_table():
|
|
570
581
|
from ...groupby.aggregation import DataFrameGroupByAgg
|
|
571
|
-
from ...
|
|
582
|
+
from ...reshape.pivot_table import DataFramePivotTable
|
|
572
583
|
|
|
573
584
|
raw = pd.DataFrame(
|
|
574
585
|
{
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
17
|
|
|
18
|
+
from ... import opcodes
|
|
18
19
|
from ...core import ENTITY_TYPE, OutputType
|
|
19
20
|
from ...serialization.serializables import StringField
|
|
20
21
|
from ...tensor import tensor as astensor
|
|
@@ -23,6 +24,8 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
|
|
27
|
+
_op_type_ = opcodes.TO_NUMERIC
|
|
28
|
+
|
|
26
29
|
errors = StringField("errors")
|
|
27
30
|
downcast = StringField("downcast")
|
|
28
31
|
|
|
@@ -38,8 +38,9 @@ from ..utils import (
|
|
|
38
38
|
_with_convert_dtype = pd_release_version < (1, 2, 0)
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
class
|
|
41
|
+
class DataFrameTransform(DataFrameOperator, DataFrameOperatorMixin):
|
|
42
42
|
_op_type_ = opcodes.TRANSFORM
|
|
43
|
+
_legacy_name = "TransformOperator"
|
|
43
44
|
|
|
44
45
|
func = AnyField("func", default=None)
|
|
45
46
|
axis = AnyField("axis", default=None)
|
|
@@ -141,13 +142,17 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
141
142
|
|
|
142
143
|
@classmethod
|
|
143
144
|
def estimate_size(
|
|
144
|
-
cls, ctx: MutableMapping[str, Union[int, float]], op: "
|
|
145
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameTransform"
|
|
145
146
|
) -> None:
|
|
146
147
|
if isinstance(op.func, MarkedFunction):
|
|
147
148
|
ctx[op.outputs[0].key] = float("inf")
|
|
148
149
|
super().estimate_size(ctx, op)
|
|
149
150
|
|
|
150
151
|
|
|
152
|
+
# keep for import compatibility
|
|
153
|
+
TransformOperator = DataFrameTransform
|
|
154
|
+
|
|
155
|
+
|
|
151
156
|
def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
|
|
152
157
|
stub_df = _build_stub_pandas_obj(df, output_type)
|
|
153
158
|
n_args = copy_if_possible(args)
|
|
@@ -235,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
|
|
|
235
240
|
"""
|
|
236
241
|
call_agg = kwargs.pop("_call_agg", False)
|
|
237
242
|
func = get_packed_funcs(df, OutputType.dataframe, func, *args, **kwargs)
|
|
238
|
-
op =
|
|
243
|
+
op = DataFrameTransform(
|
|
239
244
|
func=func,
|
|
240
245
|
axis=axis,
|
|
241
246
|
args=args,
|
|
@@ -327,13 +332,15 @@ def series_transform(
|
|
|
327
332
|
"""
|
|
328
333
|
call_agg = kwargs.pop("_call_agg", False)
|
|
329
334
|
func = get_packed_funcs(series, OutputType.series, func, *args, **kwargs)
|
|
330
|
-
op =
|
|
335
|
+
op = DataFrameTransform(
|
|
331
336
|
func=func,
|
|
332
337
|
axis=axis,
|
|
333
338
|
convert_dtype=convert_dtype,
|
|
334
339
|
args=args,
|
|
335
340
|
kwds=kwargs,
|
|
336
|
-
output_types=[OutputType.series]
|
|
341
|
+
output_types=[OutputType.series]
|
|
342
|
+
if not call_agg and not isinstance(func, list)
|
|
343
|
+
else None,
|
|
337
344
|
call_agg=call_agg,
|
|
338
345
|
)
|
|
339
346
|
return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)
|
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
16
18
|
from ...core import OutputType
|
|
17
19
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -30,10 +32,20 @@ class DataFrameTranspose(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
30
32
|
new_shape = arg.shape[::-1]
|
|
31
33
|
columns_value = arg.index_value
|
|
32
34
|
index_value = parse_index(arg.dtypes.index)
|
|
35
|
+
|
|
36
|
+
if not arg.index_value.has_value:
|
|
37
|
+
dtypes = None
|
|
38
|
+
else:
|
|
39
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
40
|
+
|
|
41
|
+
dtype = find_common_type(list(arg.dtypes))
|
|
42
|
+
pd_index = arg.index_value.to_pandas()
|
|
43
|
+
dtypes = pd.Series([dtype] * len(pd_index), index=pd_index)
|
|
44
|
+
|
|
33
45
|
return self.new_dataframe(
|
|
34
46
|
[arg],
|
|
35
47
|
shape=new_shape,
|
|
36
|
-
dtypes=
|
|
48
|
+
dtypes=dtypes,
|
|
37
49
|
columns_value=columns_value,
|
|
38
50
|
index_value=index_value,
|
|
39
51
|
)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...udf import builtin_function
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@builtin_function
|
|
19
|
+
def _item_or_none(item):
|
|
20
|
+
if len(item) > 0:
|
|
21
|
+
return item[0]
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _valid_index(df_or_series, slc: slice):
|
|
26
|
+
from ... import tensor as mt
|
|
27
|
+
|
|
28
|
+
idx = df_or_series.dropna(how="all").index[slc]
|
|
29
|
+
return mt.array(idx).mf.apply_chunk(_item_or_none, dtype=idx.dtype)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_doc = """
|
|
33
|
+
Return index for %(pos)s non-NA value or None, if no non-NA value is found.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
type of index
|
|
38
|
+
|
|
39
|
+
Examples
|
|
40
|
+
--------
|
|
41
|
+
For Series:
|
|
42
|
+
|
|
43
|
+
>>> import maxframe.dataframe as md
|
|
44
|
+
>>> s = md.Series([None, 3, 4])
|
|
45
|
+
>>> s.first_valid_index().execute()
|
|
46
|
+
1
|
|
47
|
+
>>> s.last_valid_index().execute()
|
|
48
|
+
2
|
|
49
|
+
|
|
50
|
+
>>> s = md.Series([None, None])
|
|
51
|
+
>>> print(s.first_valid_index()).execute()
|
|
52
|
+
None
|
|
53
|
+
>>> print(s.last_valid_index()).execute()
|
|
54
|
+
None
|
|
55
|
+
|
|
56
|
+
If all elements in Series are NA/null, returns None.
|
|
57
|
+
|
|
58
|
+
>>> s = md.Series()
|
|
59
|
+
>>> print(s.first_valid_index()).execute()
|
|
60
|
+
None
|
|
61
|
+
>>> print(s.last_valid_index()).execute()
|
|
62
|
+
None
|
|
63
|
+
|
|
64
|
+
If Series is empty, returns None.
|
|
65
|
+
|
|
66
|
+
For DataFrame:
|
|
67
|
+
|
|
68
|
+
>>> df = md.DataFrame({'A': [None, None, 2], 'B': [None, 3, 4]})
|
|
69
|
+
>>> df.execute()
|
|
70
|
+
A B
|
|
71
|
+
0 NaN NaN
|
|
72
|
+
1 NaN 3.0
|
|
73
|
+
2 2.0 4.0
|
|
74
|
+
>>> df.first_valid_index().execute()
|
|
75
|
+
1
|
|
76
|
+
>>> df.last_valid_index().execute()
|
|
77
|
+
2
|
|
78
|
+
|
|
79
|
+
>>> df = md.DataFrame({'A': [None, None, None], 'B': [None, None, None]})
|
|
80
|
+
>>> df.execute()
|
|
81
|
+
A B
|
|
82
|
+
0 None None
|
|
83
|
+
1 None None
|
|
84
|
+
2 None None
|
|
85
|
+
>>> print(df.first_valid_index()).execute()
|
|
86
|
+
None
|
|
87
|
+
>>> print(df.last_valid_index()).execute()
|
|
88
|
+
None
|
|
89
|
+
|
|
90
|
+
If all elements in DataFrame are NA/null, returns None.
|
|
91
|
+
|
|
92
|
+
>>> df = md.DataFrame()
|
|
93
|
+
>>> df.execute()
|
|
94
|
+
Empty DataFrame
|
|
95
|
+
Columns: []
|
|
96
|
+
Index: []
|
|
97
|
+
>>> print(df.first_valid_index()).execute()
|
|
98
|
+
None
|
|
99
|
+
>>> print(df.last_valid_index()).execute()
|
|
100
|
+
None
|
|
101
|
+
|
|
102
|
+
If DataFrame is empty, returns None.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def first_valid_index(df_or_series):
|
|
107
|
+
return _valid_index(df_or_series, slice(None, 1))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def last_valid_index(df_or_series):
|
|
111
|
+
return _valid_index(df_or_series, slice(-1, None))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
first_valid_index.__doc__ = _doc % dict(pos="first")
|
|
115
|
+
last_valid_index.__doc__ = _doc % dict(pos="last")
|