maxframe 2.0.0b2__cp311-cp311-win32.whl → 2.2.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import EntityData
|
|
22
|
+
from ...serialization.serializables import AnyField, KeyField
|
|
23
|
+
from ...tensor import tensor as astensor
|
|
24
|
+
from ...tensor.core import TENSOR_TYPE
|
|
25
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
26
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
27
|
+
from ..utils import parse_index
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DataFrameDot(DataFrameOperator, DataFrameOperatorMixin):
|
|
31
|
+
_op_type_ = opcodes.DOT
|
|
32
|
+
|
|
33
|
+
lhs = KeyField("lhs")
|
|
34
|
+
rhs = AnyField("rhs")
|
|
35
|
+
|
|
36
|
+
def __init__(self, output_types=None, **kw):
|
|
37
|
+
super().__init__(_output_types=output_types, **kw)
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def _set_inputs(cls, op: "DataFrameDot", inputs: List[EntityData]):
|
|
41
|
+
super()._set_inputs(op, inputs)
|
|
42
|
+
op.lhs, op.rhs = op._inputs[:2]
|
|
43
|
+
|
|
44
|
+
def __call__(self, lhs, rhs):
|
|
45
|
+
lhs = self._process_input(lhs)
|
|
46
|
+
rhs = self._process_input(rhs)
|
|
47
|
+
if not isinstance(rhs, (DATAFRAME_TYPE, SERIES_TYPE)):
|
|
48
|
+
rhs = astensor(rhs)
|
|
49
|
+
test_rhs = rhs
|
|
50
|
+
else:
|
|
51
|
+
test_rhs = rhs.to_tensor()
|
|
52
|
+
|
|
53
|
+
test_ret = lhs.to_tensor().dot(test_rhs)
|
|
54
|
+
if test_ret.ndim == 0:
|
|
55
|
+
if isinstance(lhs, SERIES_TYPE) and isinstance(rhs, TENSOR_TYPE):
|
|
56
|
+
# return tensor
|
|
57
|
+
return test_ret
|
|
58
|
+
return self.new_scalar([lhs, rhs], dtype=test_ret.dtype)
|
|
59
|
+
elif test_ret.ndim == 1:
|
|
60
|
+
if lhs.ndim == 1:
|
|
61
|
+
if hasattr(rhs, "columns_value"):
|
|
62
|
+
index_value = rhs.columns_value
|
|
63
|
+
else:
|
|
64
|
+
# tensor
|
|
65
|
+
length = -1 if np.isnan(rhs.shape[1]) else rhs.shape[1]
|
|
66
|
+
pd_index = pd.RangeIndex(length)
|
|
67
|
+
index_value = parse_index(pd_index, store_data=True)
|
|
68
|
+
else:
|
|
69
|
+
assert rhs.ndim == 1
|
|
70
|
+
index_value = lhs.index_value
|
|
71
|
+
return self.new_series(
|
|
72
|
+
[lhs, rhs],
|
|
73
|
+
shape=test_ret.shape,
|
|
74
|
+
dtype=test_ret.dtype,
|
|
75
|
+
index_value=index_value,
|
|
76
|
+
)
|
|
77
|
+
else:
|
|
78
|
+
if isinstance(rhs, TENSOR_TYPE):
|
|
79
|
+
dtypes = pd.Series(
|
|
80
|
+
np.repeat(test_ret.dtype, test_ret.shape[1]),
|
|
81
|
+
index=pd.RangeIndex(test_ret.shape[1]),
|
|
82
|
+
)
|
|
83
|
+
columns_value = parse_index(dtypes.index, store_data=True)
|
|
84
|
+
else:
|
|
85
|
+
dtypes = pd.Series(
|
|
86
|
+
np.repeat(test_ret.dtype, test_ret.shape[1]),
|
|
87
|
+
index=rhs.columns_value.to_pandas(),
|
|
88
|
+
)
|
|
89
|
+
columns_value = rhs.columns_value
|
|
90
|
+
return self.new_dataframe(
|
|
91
|
+
[lhs, rhs],
|
|
92
|
+
shape=test_ret.shape,
|
|
93
|
+
index_value=lhs.index_value,
|
|
94
|
+
columns_value=columns_value,
|
|
95
|
+
dtypes=dtypes,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def dot(df_or_series, other):
|
|
100
|
+
op = DataFrameDot(lhs=df_or_series, rhs=other)
|
|
101
|
+
return op(df_or_series, other)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def rdot(df_or_series, other):
|
|
105
|
+
op = DataFrameDot(lhs=other, rhs=df_or_series)
|
|
106
|
+
return op(other, df_or_series)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
dot.__frame_doc__ = """
|
|
110
|
+
Compute the matrix multiplication between the DataFrame and other.
|
|
111
|
+
|
|
112
|
+
This method computes the matrix product between the DataFrame and the
|
|
113
|
+
values of an other Series, DataFrame or a numpy array.
|
|
114
|
+
|
|
115
|
+
It can also be called using ``self @ other`` in Python >= 3.5.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
other : Series, DataFrame or array-like
|
|
120
|
+
The other object to compute the matrix product with.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
Series or DataFrame
|
|
125
|
+
If other is a Series, return the matrix product between self and
|
|
126
|
+
other as a Series. If other is a DataFrame or a numpy.array, return
|
|
127
|
+
the matrix product of self and other in a DataFrame of a np.array.
|
|
128
|
+
|
|
129
|
+
See Also
|
|
130
|
+
--------
|
|
131
|
+
Series.dot: Similar method for Series.
|
|
132
|
+
|
|
133
|
+
Notes
|
|
134
|
+
-----
|
|
135
|
+
The dimensions of DataFrame and other must be compatible in order to
|
|
136
|
+
compute the matrix multiplication. In addition, the column names of
|
|
137
|
+
DataFrame and the index of other must contain the same values, as they
|
|
138
|
+
will be aligned prior to the multiplication.
|
|
139
|
+
|
|
140
|
+
The dot method for Series computes the inner product, instead of the
|
|
141
|
+
matrix product here.
|
|
142
|
+
|
|
143
|
+
Examples
|
|
144
|
+
--------
|
|
145
|
+
Here we multiply a DataFrame with a Series.
|
|
146
|
+
|
|
147
|
+
>>> import maxframe.tensor as mt
|
|
148
|
+
>>> import maxframe.dataframe as md
|
|
149
|
+
>>> df = md.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
|
|
150
|
+
>>> s = md.Series([1, 1, 2, 1])
|
|
151
|
+
>>> df.dot(s).execute()
|
|
152
|
+
0 -4
|
|
153
|
+
1 5
|
|
154
|
+
dtype: int64
|
|
155
|
+
|
|
156
|
+
Here we multiply a DataFrame with another DataFrame.
|
|
157
|
+
|
|
158
|
+
>>> other = md.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]])
|
|
159
|
+
>>> df.dot(other).execute()
|
|
160
|
+
0 1
|
|
161
|
+
0 1 4
|
|
162
|
+
1 2 2
|
|
163
|
+
|
|
164
|
+
Note that the dot method give the same result as @
|
|
165
|
+
|
|
166
|
+
>>> (df @ other).execute()
|
|
167
|
+
0 1
|
|
168
|
+
0 1 4
|
|
169
|
+
1 2 2
|
|
170
|
+
|
|
171
|
+
The dot method works also if other is an np.array.
|
|
172
|
+
|
|
173
|
+
>>> arr = mt.array([[0, 1], [1, 2], [-1, -1], [2, 0]])
|
|
174
|
+
>>> df.dot(arr).execute()
|
|
175
|
+
0 1
|
|
176
|
+
0 1 4
|
|
177
|
+
1 2 2
|
|
178
|
+
|
|
179
|
+
Note how shuffling of the objects does not change the result.
|
|
180
|
+
|
|
181
|
+
>>> s2 = s.reindex([1, 0, 2, 3])
|
|
182
|
+
>>> df.dot(s2).execute()
|
|
183
|
+
0 -4
|
|
184
|
+
1 5
|
|
185
|
+
dtype: int64
|
|
186
|
+
"""
|
|
187
|
+
dot.__series_doc__ = """
|
|
188
|
+
Compute the dot product between the Series and the columns of other.
|
|
189
|
+
|
|
190
|
+
This method computes the dot product between the Series and another
|
|
191
|
+
one, or the Series and each columns of a DataFrame, or the Series and
|
|
192
|
+
each columns of an array.
|
|
193
|
+
|
|
194
|
+
It can also be called using `self @ other` in Python >= 3.5.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
other : Series, DataFrame or array-like
|
|
199
|
+
The other object to compute the dot product with its columns.
|
|
200
|
+
|
|
201
|
+
Returns
|
|
202
|
+
-------
|
|
203
|
+
scalar, Series or numpy.ndarray
|
|
204
|
+
Return the dot product of the Series and other if other is a
|
|
205
|
+
Series, the Series of the dot product of Series and each rows of
|
|
206
|
+
other if other is a DataFrame or a numpy.ndarray between the Series
|
|
207
|
+
and each columns of the numpy array.
|
|
208
|
+
|
|
209
|
+
See Also
|
|
210
|
+
--------
|
|
211
|
+
DataFrame.dot: Compute the matrix product with the DataFrame.
|
|
212
|
+
Series.mul: Multiplication of series and other, element-wise.
|
|
213
|
+
|
|
214
|
+
Notes
|
|
215
|
+
-----
|
|
216
|
+
The Series and other has to share the same index if other is a Series
|
|
217
|
+
or a DataFrame.
|
|
218
|
+
|
|
219
|
+
Examples
|
|
220
|
+
--------
|
|
221
|
+
>>> import maxframe.tensor as mt
|
|
222
|
+
>>> import maxframe.dataframe as md
|
|
223
|
+
>>> s = md.Series([0, 1, 2, 3])
|
|
224
|
+
>>> other = md.Series([-1, 2, -3, 4])
|
|
225
|
+
>>> s.dot(other).execute()
|
|
226
|
+
8
|
|
227
|
+
>>> (s @ other).execute()
|
|
228
|
+
8
|
|
229
|
+
>>> df = md.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
|
|
230
|
+
>>> s.dot(df).execute()
|
|
231
|
+
0 24
|
|
232
|
+
1 14
|
|
233
|
+
dtype: int64
|
|
234
|
+
>>> arr = mt.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
|
|
235
|
+
>>> s.dot(arr).execute()
|
|
236
|
+
array([24, 14])
|
|
237
|
+
"""
|
|
@@ -18,9 +18,10 @@ from ...utils import classproperty
|
|
|
18
18
|
from .core import DataFrameUnaryUfunc
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class
|
|
21
|
+
class DataFrameRound(DataFrameUnaryUfunc):
|
|
22
22
|
_op_type_ = opcodes.AROUND
|
|
23
|
-
_func_name = "
|
|
23
|
+
_func_name = "round"
|
|
24
|
+
_legacy_name = "DataFrameAround" # since v2.3.0
|
|
24
25
|
|
|
25
26
|
decimals = Int32Field("decimals", default=None)
|
|
26
27
|
|
|
@@ -34,17 +35,20 @@ class DataFrameAround(DataFrameUnaryUfunc):
|
|
|
34
35
|
return TensorAround
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def
|
|
38
|
+
def round(df, decimals=0, *args, **kwargs):
|
|
38
39
|
if len(args) > 0:
|
|
39
40
|
raise TypeError(
|
|
40
41
|
f"round() takes 0 positional arguments but {len(args)} was given"
|
|
41
42
|
)
|
|
42
|
-
op =
|
|
43
|
+
op = DataFrameRound(decimals=decimals, **kwargs)
|
|
43
44
|
return op(df)
|
|
44
45
|
|
|
45
46
|
|
|
46
|
-
#
|
|
47
|
-
|
|
47
|
+
# keep for import compatibility
|
|
48
|
+
DataFrameAround = DataFrameRound
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
round.__frame_doc__ = """
|
|
48
52
|
Round a DataFrame to a variable number of decimal places.
|
|
49
53
|
|
|
50
54
|
Parameters
|
|
@@ -108,7 +112,7 @@ places as value
|
|
|
108
112
|
2 0.7 0.0
|
|
109
113
|
3 0.2 0.0
|
|
110
114
|
"""
|
|
111
|
-
|
|
115
|
+
round.__series_doc__ = """
|
|
112
116
|
Round each value in a Series to the given number of decimals.
|
|
113
117
|
|
|
114
118
|
Parameters
|
maxframe/dataframe/core.py
CHANGED
|
@@ -56,8 +56,11 @@ from ..utils import (
|
|
|
56
56
|
ceildiv,
|
|
57
57
|
estimate_pandas_size,
|
|
58
58
|
on_serialize_numpy_type,
|
|
59
|
+
pd_release_version,
|
|
60
|
+
prevent_called_from_pandas,
|
|
59
61
|
tokenize,
|
|
60
62
|
)
|
|
63
|
+
from .typing_ import DataFrameType, IndexType, SeriesType
|
|
61
64
|
from .utils import (
|
|
62
65
|
ReprSeries,
|
|
63
66
|
apply_if_callable,
|
|
@@ -66,6 +69,8 @@ from .utils import (
|
|
|
66
69
|
parse_index,
|
|
67
70
|
)
|
|
68
71
|
|
|
72
|
+
_df_with_iteritems = pd_release_version[:2] < (2, 0)
|
|
73
|
+
|
|
69
74
|
|
|
70
75
|
class IndexValue(Serializable):
|
|
71
76
|
"""
|
|
@@ -478,9 +483,17 @@ _lazy_chunk_meta_properties = (
|
|
|
478
483
|
)
|
|
479
484
|
|
|
480
485
|
|
|
486
|
+
def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
487
|
+
return [0] + np.cumsum(nsplit).tolist()
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
|
|
491
|
+
return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
|
|
492
|
+
|
|
493
|
+
|
|
481
494
|
@functools.lru_cache(maxsize=128)
|
|
482
495
|
def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
|
|
483
|
-
return
|
|
496
|
+
return _calc_cum_nsplit(nsplit)
|
|
484
497
|
|
|
485
498
|
|
|
486
499
|
def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
|
|
@@ -684,6 +697,10 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
684
697
|
def names(self):
|
|
685
698
|
return getattr(self, "_names", None) or [self.name]
|
|
686
699
|
|
|
700
|
+
@property
|
|
701
|
+
def nlevels(self) -> int:
|
|
702
|
+
return len(self.names)
|
|
703
|
+
|
|
687
704
|
@property
|
|
688
705
|
def index_value(self) -> IndexValue:
|
|
689
706
|
return self._index_value
|
|
@@ -818,6 +835,9 @@ class Index(HasShapeTileable, _ToPandasMixin):
|
|
|
818
835
|
def __len__(self):
|
|
819
836
|
return len(self._data)
|
|
820
837
|
|
|
838
|
+
def __class_getitem__(cls, item):
|
|
839
|
+
return IndexType.from_getitem_args(item)
|
|
840
|
+
|
|
821
841
|
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
822
842
|
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
823
843
|
|
|
@@ -1049,12 +1069,6 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1049
1069
|
|
|
1050
1070
|
return from_series(self, dtype=dtype)
|
|
1051
1071
|
|
|
1052
|
-
@staticmethod
|
|
1053
|
-
def from_tensor(in_tensor, index=None, name=None):
|
|
1054
|
-
from .datasource.from_tensor import series_from_tensor
|
|
1055
|
-
|
|
1056
|
-
return series_from_tensor(in_tensor, index=index, name=name)
|
|
1057
|
-
|
|
1058
1072
|
|
|
1059
1073
|
class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
1060
1074
|
type_name = "Series"
|
|
@@ -1065,8 +1079,9 @@ class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
|
1065
1079
|
return tensor.astype(dtype=dtype, order=order, copy=False)
|
|
1066
1080
|
|
|
1067
1081
|
def iteritems(self, batch_size=10000, session=None):
|
|
1082
|
+
method_name = "iteritems" if _df_with_iteritems else "items"
|
|
1068
1083
|
for batch_data in self.iterbatch(batch_size=batch_size, session=session):
|
|
1069
|
-
yield from getattr(batch_data,
|
|
1084
|
+
yield from getattr(batch_data, method_name)()
|
|
1070
1085
|
|
|
1071
1086
|
items = iteritems
|
|
1072
1087
|
|
|
@@ -1082,12 +1097,39 @@ class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
|
1082
1097
|
name = name or self.name or 0
|
|
1083
1098
|
return dataframe_from_tensor(self, columns=[name])
|
|
1084
1099
|
|
|
1100
|
+
@property
|
|
1101
|
+
def hasnans(self):
|
|
1102
|
+
"""
|
|
1103
|
+
Return True if there are any NaNs.
|
|
1104
|
+
|
|
1105
|
+
Returns
|
|
1106
|
+
-------
|
|
1107
|
+
bool
|
|
1108
|
+
|
|
1109
|
+
Examples
|
|
1110
|
+
--------
|
|
1111
|
+
>>> import maxframe.dataframe as md
|
|
1112
|
+
>>> s = md.Series([1, 2, 3, None])
|
|
1113
|
+
>>> s.execute()
|
|
1114
|
+
0 1.0
|
|
1115
|
+
1 2.0
|
|
1116
|
+
2 3.0
|
|
1117
|
+
3 NaN
|
|
1118
|
+
dtype: float64
|
|
1119
|
+
>>> s.hasnans.execute()
|
|
1120
|
+
True
|
|
1121
|
+
"""
|
|
1122
|
+
return self.isna().any()
|
|
1123
|
+
|
|
1085
1124
|
|
|
1086
1125
|
class Series(HasShapeTileable, _ToPandasMixin):
|
|
1087
1126
|
__slots__ = ("_cache",)
|
|
1088
1127
|
_allow_data_type_ = (SeriesData,)
|
|
1089
1128
|
type_name = "Series"
|
|
1090
1129
|
|
|
1130
|
+
def __class_getitem__(cls, item):
|
|
1131
|
+
return SeriesType.from_getitem_args(item)
|
|
1132
|
+
|
|
1091
1133
|
def to_tensor(self, dtype=None):
|
|
1092
1134
|
return self._data.to_tensor(dtype=dtype)
|
|
1093
1135
|
|
|
@@ -1185,6 +1227,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1185
1227
|
else:
|
|
1186
1228
|
return super()._view()
|
|
1187
1229
|
|
|
1230
|
+
def __iter__(self):
|
|
1231
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1232
|
+
prevent_called_from_pandas()
|
|
1233
|
+
return (tp[1] for tp in self.items())
|
|
1234
|
+
|
|
1188
1235
|
def __len__(self):
|
|
1189
1236
|
return len(self._data)
|
|
1190
1237
|
|
|
@@ -1297,98 +1344,6 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1297
1344
|
"""
|
|
1298
1345
|
return self._data.to_frame(name=name)
|
|
1299
1346
|
|
|
1300
|
-
def between(self, left, right, inclusive="both"):
|
|
1301
|
-
"""
|
|
1302
|
-
Return boolean Series equivalent to left <= series <= right.
|
|
1303
|
-
This function returns a boolean vector containing `True` wherever the
|
|
1304
|
-
corresponding Series element is between the boundary values `left` and
|
|
1305
|
-
`right`. NA values are treated as `False`.
|
|
1306
|
-
|
|
1307
|
-
Parameters
|
|
1308
|
-
----------
|
|
1309
|
-
left : scalar or list-like
|
|
1310
|
-
Left boundary.
|
|
1311
|
-
right : scalar or list-like
|
|
1312
|
-
Right boundary.
|
|
1313
|
-
inclusive : {"both", "neither", "left", "right"}
|
|
1314
|
-
Include boundaries. Whether to set each bound as closed or open.
|
|
1315
|
-
|
|
1316
|
-
Returns
|
|
1317
|
-
-------
|
|
1318
|
-
Series
|
|
1319
|
-
Series representing whether each element is between left and
|
|
1320
|
-
right (inclusive).
|
|
1321
|
-
|
|
1322
|
-
See Also
|
|
1323
|
-
--------
|
|
1324
|
-
Series.gt : Greater than of series and other.
|
|
1325
|
-
Series.lt : Less than of series and other.
|
|
1326
|
-
|
|
1327
|
-
Notes
|
|
1328
|
-
-----
|
|
1329
|
-
This function is equivalent to ``(left <= ser) & (ser <= right)``
|
|
1330
|
-
|
|
1331
|
-
Examples
|
|
1332
|
-
--------
|
|
1333
|
-
>>> import maxframe.dataframe as md
|
|
1334
|
-
>>> s = md.Series([2, 0, 4, 8, np.nan])
|
|
1335
|
-
|
|
1336
|
-
Boundary values are included by default:
|
|
1337
|
-
|
|
1338
|
-
>>> s.between(1, 4).execute()
|
|
1339
|
-
0 True
|
|
1340
|
-
1 False
|
|
1341
|
-
2 True
|
|
1342
|
-
3 False
|
|
1343
|
-
4 False
|
|
1344
|
-
dtype: bool
|
|
1345
|
-
|
|
1346
|
-
With `inclusive` set to ``"neither"`` boundary values are excluded:
|
|
1347
|
-
|
|
1348
|
-
>>> s.between(1, 4, inclusive="neither").execute()
|
|
1349
|
-
0 True
|
|
1350
|
-
1 False
|
|
1351
|
-
2 False
|
|
1352
|
-
3 False
|
|
1353
|
-
4 False
|
|
1354
|
-
dtype: bool
|
|
1355
|
-
|
|
1356
|
-
`left` and `right` can be any scalar value:
|
|
1357
|
-
|
|
1358
|
-
>>> s = md.Series(['Alice', 'Bob', 'Carol', 'Eve'])
|
|
1359
|
-
>>> s.between('Anna', 'Daniel').execute()
|
|
1360
|
-
0 False
|
|
1361
|
-
1 True
|
|
1362
|
-
2 True
|
|
1363
|
-
3 False
|
|
1364
|
-
dtype: bool
|
|
1365
|
-
"""
|
|
1366
|
-
if isinstance(inclusive, bool): # pragma: no cover
|
|
1367
|
-
# for pandas < 1.3.0
|
|
1368
|
-
if inclusive:
|
|
1369
|
-
inclusive = "both"
|
|
1370
|
-
else:
|
|
1371
|
-
inclusive = "neither"
|
|
1372
|
-
if inclusive == "both":
|
|
1373
|
-
lmask = self >= left
|
|
1374
|
-
rmask = self <= right
|
|
1375
|
-
elif inclusive == "left":
|
|
1376
|
-
lmask = self >= left
|
|
1377
|
-
rmask = self < right
|
|
1378
|
-
elif inclusive == "right":
|
|
1379
|
-
lmask = self > left
|
|
1380
|
-
rmask = self <= right
|
|
1381
|
-
elif inclusive == "neither":
|
|
1382
|
-
lmask = self > left
|
|
1383
|
-
rmask = self < right
|
|
1384
|
-
else:
|
|
1385
|
-
raise ValueError(
|
|
1386
|
-
"Inclusive has to be either string of 'both',"
|
|
1387
|
-
"'left', 'right', or 'neither'."
|
|
1388
|
-
)
|
|
1389
|
-
|
|
1390
|
-
return lmask & rmask
|
|
1391
|
-
|
|
1392
1347
|
# def median(
|
|
1393
1348
|
# self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
|
|
1394
1349
|
# ):
|
|
@@ -1589,18 +1544,6 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1589
1544
|
|
|
1590
1545
|
return from_dataframe(self, dtype=dtype)
|
|
1591
1546
|
|
|
1592
|
-
@staticmethod
|
|
1593
|
-
def from_tensor(in_tensor, index=None, columns=None):
|
|
1594
|
-
from .datasource.from_tensor import dataframe_from_tensor
|
|
1595
|
-
|
|
1596
|
-
return dataframe_from_tensor(in_tensor, index=index, columns=columns)
|
|
1597
|
-
|
|
1598
|
-
@staticmethod
|
|
1599
|
-
def from_records(records, **kw):
|
|
1600
|
-
from .datasource.from_records import from_records
|
|
1601
|
-
|
|
1602
|
-
return from_records(records, **kw)
|
|
1603
|
-
|
|
1604
1547
|
@property
|
|
1605
1548
|
def index(self):
|
|
1606
1549
|
from .datasource.index import from_tileable
|
|
@@ -1747,12 +1690,6 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1747
1690
|
def to_tensor(self):
|
|
1748
1691
|
return self._data.to_tensor()
|
|
1749
1692
|
|
|
1750
|
-
def from_tensor(self, in_tensor, index=None, columns=None):
|
|
1751
|
-
return self._data.from_tensor(in_tensor, index=index, columns=columns)
|
|
1752
|
-
|
|
1753
|
-
def from_records(self, records, **kw):
|
|
1754
|
-
return self._data.from_records(records, **kw)
|
|
1755
|
-
|
|
1756
1693
|
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
1757
1694
|
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
1758
1695
|
|
|
@@ -1772,6 +1709,14 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1772
1709
|
+ [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
|
|
1773
1710
|
)
|
|
1774
1711
|
|
|
1712
|
+
def __iter__(self):
|
|
1713
|
+
# prevent being called by pandas to make sure `__eq__` works
|
|
1714
|
+
prevent_called_from_pandas()
|
|
1715
|
+
return iter(self.dtypes.index)
|
|
1716
|
+
|
|
1717
|
+
def __class_getitem__(cls, item):
|
|
1718
|
+
return DataFrameType.from_getitem_args(item)
|
|
1719
|
+
|
|
1775
1720
|
@property
|
|
1776
1721
|
def T(self):
|
|
1777
1722
|
return self.transpose()
|
|
@@ -13,3 +13,21 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from .core import PandasDataSourceOperator
|
|
16
|
+
from .from_dict import dataframe_from_dict
|
|
17
|
+
from .from_records import from_records
|
|
18
|
+
from .from_tensor import dataframe_from_tensor, series_from_tensor
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _install():
|
|
22
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
23
|
+
|
|
24
|
+
for t in DATAFRAME_TYPE:
|
|
25
|
+
t.from_dict = staticmethod(dataframe_from_dict)
|
|
26
|
+
t.from_records = staticmethod(from_records)
|
|
27
|
+
t.from_tensor = staticmethod(dataframe_from_tensor)
|
|
28
|
+
for t in SERIES_TYPE:
|
|
29
|
+
t.from_tensor = staticmethod(series_from_tensor)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_install()
|
|
33
|
+
del _install
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import ENTITY_TYPE
|
|
16
|
+
from ...utils import find_objects, no_default
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def dataframe_from_dict(data, orient="columns", dtype=None, columns=None):
|
|
21
|
+
"""
|
|
22
|
+
Construct DataFrame from dict of array-like or dicts.
|
|
23
|
+
|
|
24
|
+
Creates DataFrame object from dictionary by columns or by index
|
|
25
|
+
allowing dtype specification.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
data : dict
|
|
30
|
+
Of the form {field : array-like} or {field : dict}.
|
|
31
|
+
orient : {'columns', 'index', 'tight'}, default 'columns'
|
|
32
|
+
The "orientation" of the data. If the keys of the passed dict
|
|
33
|
+
should be the columns of the resulting DataFrame, pass 'columns'
|
|
34
|
+
(default). Otherwise if the keys should be rows, pass 'index'.
|
|
35
|
+
If 'tight', assume a dict with keys ['index', 'columns', 'data',
|
|
36
|
+
'index_names', 'column_names'].
|
|
37
|
+
|
|
38
|
+
dtype : dtype, default None
|
|
39
|
+
Data type to force after DataFrame construction, otherwise infer.
|
|
40
|
+
columns : list, default None
|
|
41
|
+
Column labels to use when ``orient='index'``. Raises a ValueError
|
|
42
|
+
if used with ``orient='columns'`` or ``orient='tight'``.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
DataFrame
|
|
47
|
+
|
|
48
|
+
See Also
|
|
49
|
+
--------
|
|
50
|
+
DataFrame.from_records : DataFrame from structured ndarray, sequence
|
|
51
|
+
of tuples or dicts, or DataFrame.
|
|
52
|
+
DataFrame : DataFrame object creation using constructor.
|
|
53
|
+
DataFrame.to_dict : Convert the DataFrame to a dictionary.
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
By default the keys of the dict become the DataFrame columns:
|
|
58
|
+
|
|
59
|
+
>>> import maxframe.dataframe as md
|
|
60
|
+
>>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
|
|
61
|
+
>>> md.DataFrame.from_dict(data).execute()
|
|
62
|
+
col_1 col_2
|
|
63
|
+
0 3 a
|
|
64
|
+
1 2 b
|
|
65
|
+
2 1 c
|
|
66
|
+
3 0 d
|
|
67
|
+
|
|
68
|
+
Specify ``orient='index'`` to create the DataFrame using dictionary
|
|
69
|
+
keys as rows:
|
|
70
|
+
|
|
71
|
+
>>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']}
|
|
72
|
+
>>> md.DataFrame.from_dict(data, orient='index').execute()
|
|
73
|
+
0 1 2 3
|
|
74
|
+
row_1 3 2 1 0
|
|
75
|
+
row_2 a b c d
|
|
76
|
+
|
|
77
|
+
When using the 'index' orientation, the column names can be
|
|
78
|
+
specified manually:
|
|
79
|
+
|
|
80
|
+
>>> md.DataFrame.from_dict(data, orient='index',
|
|
81
|
+
... columns=['A', 'B', 'C', 'D']).execute()
|
|
82
|
+
A B C D
|
|
83
|
+
row_1 3 2 1 0
|
|
84
|
+
row_2 a b c d
|
|
85
|
+
|
|
86
|
+
Specify ``orient='tight'`` to create the DataFrame using a 'tight'
|
|
87
|
+
format:
|
|
88
|
+
|
|
89
|
+
>>> data = {'index': [('a', 'b'), ('a', 'c')],
|
|
90
|
+
... 'columns': [('x', 1), ('y', 2)],
|
|
91
|
+
... 'data': [[1, 3], [2, 4]],
|
|
92
|
+
... 'index_names': ['n1', 'n2'],
|
|
93
|
+
... 'column_names': ['z1', 'z2']}
|
|
94
|
+
>>> md.DataFrame.from_dict(data, orient='tight').execute()
|
|
95
|
+
z1 x y
|
|
96
|
+
z2 1 2
|
|
97
|
+
n1 n2
|
|
98
|
+
a b 1 3
|
|
99
|
+
c 2 4
|
|
100
|
+
"""
|
|
101
|
+
from ..initializer import DataFrame as DataFrameInit
|
|
102
|
+
from .from_tensor import dataframe_from_1d_tileables
|
|
103
|
+
|
|
104
|
+
if orient != "tight" and not find_objects(data, ENTITY_TYPE):
|
|
105
|
+
res = DataFrameInit(data)
|
|
106
|
+
elif orient == "tight":
|
|
107
|
+
# init directly
|
|
108
|
+
init_kw = {
|
|
109
|
+
"index": data.get("index"),
|
|
110
|
+
"columns": data.get("columns"),
|
|
111
|
+
}
|
|
112
|
+
df = DataFrameInit(data["data"], **init_kw)
|
|
113
|
+
rename_kw = {
|
|
114
|
+
"index": data.get("index_names", no_default),
|
|
115
|
+
"columns": data.get("column_names", no_default),
|
|
116
|
+
}
|
|
117
|
+
res = df.rename_axis(**rename_kw)
|
|
118
|
+
else:
|
|
119
|
+
axis = validate_axis(orient)
|
|
120
|
+
res = dataframe_from_1d_tileables(data, columns=columns, axis=axis)
|
|
121
|
+
|
|
122
|
+
if dtype is not None:
|
|
123
|
+
res = res.astype(dtype)
|
|
124
|
+
return res
|