maxframe 2.0.0b2__cp38-cp38-win32.whl → 2.2.0__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ..utils import validate_axis
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def truncate(df, before=None, after=None, axis=0, copy=None):
|
|
19
|
+
"""
|
|
20
|
+
Truncate a Series or DataFrame before and after some index value.
|
|
21
|
+
|
|
22
|
+
This is a useful shorthand for boolean indexing based on index
|
|
23
|
+
values above or below certain thresholds.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
before : date, str, int
|
|
28
|
+
Truncate all rows before this index value.
|
|
29
|
+
after : date, str, int
|
|
30
|
+
Truncate all rows after this index value.
|
|
31
|
+
axis : {0 or 'index', 1 or 'columns'}, optional
|
|
32
|
+
Axis to truncate. Truncates the index (rows) by default.
|
|
33
|
+
For `Series` this parameter is unused and defaults to 0.
|
|
34
|
+
copy : bool, default is True,
|
|
35
|
+
This parameter is only kept for compatibility with pandas.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
type of caller
|
|
40
|
+
The truncated Series or DataFrame.
|
|
41
|
+
|
|
42
|
+
See Also
|
|
43
|
+
--------
|
|
44
|
+
DataFrame.loc : Select a subset of a DataFrame by label.
|
|
45
|
+
DataFrame.iloc : Select a subset of a DataFrame by position.
|
|
46
|
+
|
|
47
|
+
Notes
|
|
48
|
+
-----
|
|
49
|
+
If the index being truncated contains only datetime values,
|
|
50
|
+
`before` and `after` may be specified as strings instead of
|
|
51
|
+
Timestamps.
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
>>> import maxframe.dataframe as md
|
|
56
|
+
>>> df = md.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
|
|
57
|
+
... 'B': ['f', 'g', 'h', 'i', 'j'],
|
|
58
|
+
... 'C': ['k', 'l', 'm', 'n', 'o']},
|
|
59
|
+
... index=[1, 2, 3, 4, 5])
|
|
60
|
+
>>> df.execute()
|
|
61
|
+
A B C
|
|
62
|
+
1 a f k
|
|
63
|
+
2 b g l
|
|
64
|
+
3 c h m
|
|
65
|
+
4 d i n
|
|
66
|
+
5 e j o
|
|
67
|
+
|
|
68
|
+
>>> df.truncate(before=2, after=4).execute()
|
|
69
|
+
A B C
|
|
70
|
+
2 b g l
|
|
71
|
+
3 c h m
|
|
72
|
+
4 d i n
|
|
73
|
+
|
|
74
|
+
The columns of a DataFrame can be truncated.
|
|
75
|
+
|
|
76
|
+
>>> df.truncate(before="A", after="B", axis="columns").execute()
|
|
77
|
+
A B
|
|
78
|
+
1 a f
|
|
79
|
+
2 b g
|
|
80
|
+
3 c h
|
|
81
|
+
4 d i
|
|
82
|
+
5 e j
|
|
83
|
+
|
|
84
|
+
For Series, only rows can be truncated.
|
|
85
|
+
|
|
86
|
+
>>> df['A'].truncate(before=2, after=4).execute()
|
|
87
|
+
2 b
|
|
88
|
+
3 c
|
|
89
|
+
4 d
|
|
90
|
+
Name: A, dtype: object
|
|
91
|
+
|
|
92
|
+
The index values in ``truncate`` can be datetimes or string
|
|
93
|
+
dates.
|
|
94
|
+
|
|
95
|
+
>>> dates = md.date_range('2016-01-01', '2016-02-01', freq='s')
|
|
96
|
+
>>> df = md.DataFrame(index=dates, data={'A': 1})
|
|
97
|
+
>>> df.tail().execute()
|
|
98
|
+
A
|
|
99
|
+
2016-01-31 23:59:56 1
|
|
100
|
+
2016-01-31 23:59:57 1
|
|
101
|
+
2016-01-31 23:59:58 1
|
|
102
|
+
2016-01-31 23:59:59 1
|
|
103
|
+
2016-02-01 00:00:00 1
|
|
104
|
+
|
|
105
|
+
>>> df.truncate(before=md.Timestamp('2016-01-05'),
|
|
106
|
+
... after=md.Timestamp('2016-01-10')).tail().execute()
|
|
107
|
+
A
|
|
108
|
+
2016-01-09 23:59:56 1
|
|
109
|
+
2016-01-09 23:59:57 1
|
|
110
|
+
2016-01-09 23:59:58 1
|
|
111
|
+
2016-01-09 23:59:59 1
|
|
112
|
+
2016-01-10 00:00:00 1
|
|
113
|
+
|
|
114
|
+
Because the index is a DatetimeIndex containing only dates, we can
|
|
115
|
+
specify `before` and `after` as strings. They will be coerced to
|
|
116
|
+
Timestamps before truncation.
|
|
117
|
+
|
|
118
|
+
>>> df.truncate('2016-01-05', '2016-01-10').tail().execute()
|
|
119
|
+
A
|
|
120
|
+
2016-01-09 23:59:56 1
|
|
121
|
+
2016-01-09 23:59:57 1
|
|
122
|
+
2016-01-09 23:59:58 1
|
|
123
|
+
2016-01-09 23:59:59 1
|
|
124
|
+
2016-01-10 00:00:00 1
|
|
125
|
+
|
|
126
|
+
Note that ``truncate`` assumes a 0 value for any unspecified time
|
|
127
|
+
component (midnight). This differs from partial string slicing, which
|
|
128
|
+
returns any partially matching dates.
|
|
129
|
+
|
|
130
|
+
>>> df.loc['2016-01-05':'2016-01-10', :].tail().execute()
|
|
131
|
+
A
|
|
132
|
+
2016-01-10 23:59:55 1
|
|
133
|
+
2016-01-10 23:59:56 1
|
|
134
|
+
2016-01-10 23:59:57 1
|
|
135
|
+
2016-01-10 23:59:58 1
|
|
136
|
+
2016-01-10 23:59:59 1
|
|
137
|
+
"""
|
|
138
|
+
_ = copy
|
|
139
|
+
axis = validate_axis(axis, df)
|
|
140
|
+
return df.loc[before:after, :] if axis == 0 else df.loc[:, before:after]
|
|
@@ -79,8 +79,6 @@ class DataFrameWhere(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
79
79
|
other=mock_other,
|
|
80
80
|
axis=self.axis,
|
|
81
81
|
level=self.level,
|
|
82
|
-
errors=self.errors,
|
|
83
|
-
try_cast=self.try_cast,
|
|
84
82
|
)
|
|
85
83
|
|
|
86
84
|
inputs = filter_inputs([df_or_series, self.cond, self.other])
|
|
@@ -135,15 +133,6 @@ axis : int, default None
|
|
|
135
133
|
Alignment axis if needed.
|
|
136
134
|
level : int, default None
|
|
137
135
|
Alignment level if needed.
|
|
138
|
-
errors : str, {{'raise', 'ignore'}}, default 'raise'
|
|
139
|
-
Note that currently this parameter won't affect
|
|
140
|
-
the results and will always coerce to a suitable dtype.
|
|
141
|
-
|
|
142
|
-
- 'raise' : allow exceptions to be raised.
|
|
143
|
-
- 'ignore' : suppress exceptions. On error return original object.
|
|
144
|
-
|
|
145
|
-
try_cast : bool, default False
|
|
146
|
-
Try to cast the result back to the input type (if possible).
|
|
147
136
|
|
|
148
137
|
Returns
|
|
149
138
|
-------
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pandas.api.types import is_list_like
|
|
16
|
+
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def xs(df_or_series, key, axis=0, level=None, drop_level=True):
|
|
21
|
+
"""
|
|
22
|
+
Return cross-section from the Series/DataFrame.
|
|
23
|
+
|
|
24
|
+
This method takes a `key` argument to select data at a particular
|
|
25
|
+
level of a MultiIndex.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
key : label or tuple of label
|
|
30
|
+
Label contained in the index, or partially in a MultiIndex.
|
|
31
|
+
axis : {0 or 'index', 1 or 'columns'}, default 0
|
|
32
|
+
Axis to retrieve cross-section on.
|
|
33
|
+
level : object, defaults to first n levels (n=1 or len(key))
|
|
34
|
+
In case of a key partially contained in a MultiIndex, indicate
|
|
35
|
+
which levels are used. Levels can be referred by label or position.
|
|
36
|
+
drop_level : bool, default True
|
|
37
|
+
If False, returns object with same levels as self.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
Series or DataFrame
|
|
42
|
+
Cross-section from the original Series or DataFrame
|
|
43
|
+
corresponding to the selected index levels.
|
|
44
|
+
|
|
45
|
+
See Also
|
|
46
|
+
--------
|
|
47
|
+
DataFrame.loc : Access a group of rows and columns
|
|
48
|
+
by label(s) or a boolean array.
|
|
49
|
+
DataFrame.iloc : Purely integer-location based indexing
|
|
50
|
+
for selection by position.
|
|
51
|
+
|
|
52
|
+
Notes
|
|
53
|
+
-----
|
|
54
|
+
`xs` can not be used to set values.
|
|
55
|
+
|
|
56
|
+
MultiIndex Slicers is a generic way to get/set values on
|
|
57
|
+
any level or levels.
|
|
58
|
+
It is a superset of `xs` functionality, see
|
|
59
|
+
:ref:`MultiIndex Slicers <advanced.mi_slicers>`.
|
|
60
|
+
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
63
|
+
>>> import maxframe.dataframe as md
|
|
64
|
+
>>> d = {'num_legs': [4, 4, 2, 2],
|
|
65
|
+
... 'num_wings': [0, 0, 2, 2],
|
|
66
|
+
... 'class': ['mammal', 'mammal', 'mammal', 'bird'],
|
|
67
|
+
... 'animal': ['cat', 'dog', 'bat', 'penguin'],
|
|
68
|
+
... 'locomotion': ['walks', 'walks', 'flies', 'walks']}
|
|
69
|
+
>>> df = md.DataFrame(data=d)
|
|
70
|
+
>>> df = df.set_index(['class', 'animal', 'locomotion'])
|
|
71
|
+
>>> df.execute()
|
|
72
|
+
num_legs num_wings
|
|
73
|
+
class animal locomotion
|
|
74
|
+
mammal cat walks 4 0
|
|
75
|
+
dog walks 4 0
|
|
76
|
+
bat flies 2 2
|
|
77
|
+
bird penguin walks 2 2
|
|
78
|
+
|
|
79
|
+
Get values at specified index
|
|
80
|
+
|
|
81
|
+
>>> df.xs('mammal').execute()
|
|
82
|
+
num_legs num_wings
|
|
83
|
+
animal locomotion
|
|
84
|
+
cat walks 4 0
|
|
85
|
+
dog walks 4 0
|
|
86
|
+
bat flies 2 2
|
|
87
|
+
|
|
88
|
+
Get values at several indexes
|
|
89
|
+
|
|
90
|
+
>>> df.xs(('mammal', 'dog')).execute()
|
|
91
|
+
num_legs num_wings
|
|
92
|
+
locomotion
|
|
93
|
+
walks 4 0
|
|
94
|
+
|
|
95
|
+
Get values at specified index and level
|
|
96
|
+
|
|
97
|
+
>>> df.xs('cat', level=1).execute()
|
|
98
|
+
num_legs num_wings
|
|
99
|
+
class locomotion
|
|
100
|
+
mammal walks 4 0
|
|
101
|
+
|
|
102
|
+
Get values at several indexes and levels
|
|
103
|
+
|
|
104
|
+
>>> df.xs(('bird', 'walks'),
|
|
105
|
+
... level=[0, 'locomotion']).execute()
|
|
106
|
+
num_legs num_wings
|
|
107
|
+
animal
|
|
108
|
+
penguin 2 2
|
|
109
|
+
|
|
110
|
+
Get values at specified column and axis
|
|
111
|
+
|
|
112
|
+
>>> df.xs('num_wings', axis=1).execute()
|
|
113
|
+
class animal locomotion
|
|
114
|
+
mammal cat walks 0
|
|
115
|
+
dog walks 0
|
|
116
|
+
bat flies 2
|
|
117
|
+
bird penguin walks 2
|
|
118
|
+
Name: num_wings, dtype: int64
|
|
119
|
+
"""
|
|
120
|
+
axis = validate_axis(axis, df_or_series)
|
|
121
|
+
if level is None:
|
|
122
|
+
level = range(df_or_series.axes[axis].nlevels)
|
|
123
|
+
elif not is_list_like(level):
|
|
124
|
+
level = [level]
|
|
125
|
+
|
|
126
|
+
slc = [slice(None)] * df_or_series.axes[axis].nlevels
|
|
127
|
+
if not is_list_like(key):
|
|
128
|
+
key = (key,)
|
|
129
|
+
|
|
130
|
+
level_set = set()
|
|
131
|
+
for k, level_ in zip(key, level):
|
|
132
|
+
slc[level_] = k
|
|
133
|
+
level_set.add(level_)
|
|
134
|
+
left_levels = set(range(df_or_series.axes[axis].nlevels)) - level_set
|
|
135
|
+
|
|
136
|
+
if len(slc) > 1:
|
|
137
|
+
slc = tuple(slc)
|
|
138
|
+
|
|
139
|
+
res = df_or_series.loc(axis=axis)[slc]
|
|
140
|
+
if drop_level:
|
|
141
|
+
if len(left_levels) == 0:
|
|
142
|
+
if res.ndim > 1:
|
|
143
|
+
res = res.iloc[0, :] if axis == 0 else res.iloc[:, 0]
|
|
144
|
+
else:
|
|
145
|
+
res = res.iloc[0]
|
|
146
|
+
else:
|
|
147
|
+
res = res.droplevel(list(level_set), axis=axis)
|
|
148
|
+
return res
|
|
@@ -12,7 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .append import
|
|
15
|
+
from .append import append
|
|
16
|
+
from .combine_first import df_combine_first, series_combine_first
|
|
17
|
+
from .compare import DataFrameCompare, df_compare, series_compare
|
|
16
18
|
from .concat import DataFrameConcat, concat
|
|
17
19
|
from .merge import (
|
|
18
20
|
DataFrameMerge,
|
|
@@ -22,14 +24,23 @@ from .merge import (
|
|
|
22
24
|
join,
|
|
23
25
|
merge,
|
|
24
26
|
)
|
|
27
|
+
from .update import DataFrameUpdate, df_update, series_update
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
def _install():
|
|
28
31
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
29
32
|
|
|
30
33
|
for cls in DATAFRAME_TYPE:
|
|
34
|
+
setattr(cls, "combine_first", df_combine_first)
|
|
35
|
+
setattr(cls, "compare", df_compare)
|
|
31
36
|
setattr(cls, "join", join)
|
|
32
37
|
setattr(cls, "merge", merge)
|
|
38
|
+
setattr(cls, "update", df_update)
|
|
39
|
+
|
|
40
|
+
for cls in SERIES_TYPE:
|
|
41
|
+
setattr(cls, "combine_first", series_combine_first)
|
|
42
|
+
setattr(cls, "compare", series_compare)
|
|
43
|
+
setattr(cls, "update", series_update)
|
|
33
44
|
|
|
34
45
|
for cls in DATAFRAME_TYPE + SERIES_TYPE:
|
|
35
46
|
setattr(cls, "append", append)
|
|
@@ -14,108 +14,107 @@
|
|
|
14
14
|
|
|
15
15
|
import pandas as pd
|
|
16
16
|
|
|
17
|
-
from ... import opcodes
|
|
18
|
-
from ...core import OutputType
|
|
19
|
-
from ...serialization.serializables import BoolField
|
|
20
17
|
from ..datasource.dataframe import from_pandas
|
|
21
|
-
from ..operators import (
|
|
22
|
-
DATAFRAME_TYPE,
|
|
23
|
-
SERIES_TYPE,
|
|
24
|
-
DataFrameOperator,
|
|
25
|
-
DataFrameOperatorMixin,
|
|
26
|
-
)
|
|
27
|
-
from ..utils import parse_index
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DataFrameAppend(DataFrameOperator, DataFrameOperatorMixin):
|
|
31
|
-
_op_type_ = opcodes.APPEND
|
|
32
|
-
|
|
33
|
-
ignore_index = BoolField("ignore_index", default=False)
|
|
34
|
-
verify_integrity = BoolField("verify_integrity", default=False)
|
|
35
|
-
sort = BoolField("sort", default=False)
|
|
36
|
-
|
|
37
|
-
def __init__(self, output_types=None, **kw):
|
|
38
|
-
super().__init__(_output_types=output_types, **kw)
|
|
39
|
-
|
|
40
|
-
def _call_dataframe(self, df, other):
|
|
41
|
-
if isinstance(other, DATAFRAME_TYPE):
|
|
42
|
-
shape = (df.shape[0] + other.shape[0], df.shape[1])
|
|
43
|
-
inputs = [df, other]
|
|
44
|
-
if self.ignore_index:
|
|
45
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
46
|
-
else:
|
|
47
|
-
index_value = parse_index(
|
|
48
|
-
df.index_value.to_pandas().append(other.index_value.to_pandas())
|
|
49
|
-
)
|
|
50
|
-
elif isinstance(other, list):
|
|
51
|
-
row_length = df.shape[0]
|
|
52
|
-
index = df.index_value.to_pandas()
|
|
53
|
-
for item in other:
|
|
54
|
-
if not isinstance(item, DATAFRAME_TYPE): # pragma: no cover
|
|
55
|
-
raise ValueError(f"Invalid type {type(item)} to append")
|
|
56
|
-
row_length += item.shape[0]
|
|
57
|
-
index = index.append(item.index_value.to_pandas())
|
|
58
|
-
shape = (row_length, df.shape[1])
|
|
59
|
-
if self.ignore_index: # pragma: no cover
|
|
60
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
61
|
-
else:
|
|
62
|
-
index_value = parse_index(index)
|
|
63
|
-
inputs = [df] + other
|
|
64
|
-
else: # pragma: no cover
|
|
65
|
-
raise ValueError(f"Invalid type {type(other)} to append")
|
|
66
|
-
return self.new_dataframe(
|
|
67
|
-
inputs,
|
|
68
|
-
shape=shape,
|
|
69
|
-
dtypes=df.dtypes,
|
|
70
|
-
index_value=index_value,
|
|
71
|
-
columns_value=df.columns_value,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
def _call_series(self, df, other):
|
|
75
|
-
if isinstance(other, SERIES_TYPE):
|
|
76
|
-
shape = (df.shape[0] + other.shape[0],)
|
|
77
|
-
inputs = [df, other]
|
|
78
|
-
if self.ignore_index:
|
|
79
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
80
|
-
else:
|
|
81
|
-
index_value = parse_index(
|
|
82
|
-
df.index_value.to_pandas().append(other.index_value.to_pandas())
|
|
83
|
-
)
|
|
84
|
-
elif isinstance(other, list):
|
|
85
|
-
row_length = df.shape[0]
|
|
86
|
-
index = df.index_value.to_pandas()
|
|
87
|
-
for item in other:
|
|
88
|
-
if not isinstance(item, SERIES_TYPE): # pragma: no cover
|
|
89
|
-
raise ValueError(f"Invalid type {type(item)} to append")
|
|
90
|
-
row_length += item.shape[0]
|
|
91
|
-
index = index.append(item.index_value.to_pandas())
|
|
92
|
-
shape = (row_length,)
|
|
93
|
-
if self.ignore_index: # pragma: no cover
|
|
94
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
95
|
-
else:
|
|
96
|
-
index_value = parse_index(index)
|
|
97
|
-
inputs = [df] + other
|
|
98
|
-
else: # pragma: no cover
|
|
99
|
-
raise ValueError(f"Invalid type {type(other)} to append")
|
|
100
|
-
return self.new_series(
|
|
101
|
-
inputs, shape=shape, dtype=df.dtype, index_value=index_value, name=df.name
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def __call__(self, df, other):
|
|
105
|
-
if isinstance(df, DATAFRAME_TYPE):
|
|
106
|
-
self.output_types = [OutputType.dataframe]
|
|
107
|
-
return self._call_dataframe(df, other)
|
|
108
|
-
else:
|
|
109
|
-
self.output_types = [OutputType.series]
|
|
110
|
-
return self._call_series(df, other)
|
|
111
18
|
|
|
112
19
|
|
|
113
20
|
def append(df, other, ignore_index=False, verify_integrity=False, sort=False):
|
|
114
|
-
|
|
115
|
-
|
|
21
|
+
"""
|
|
22
|
+
Append rows of `other` to the end of caller, returning a new object.
|
|
23
|
+
|
|
24
|
+
Columns in `other` that are not in the caller are added as new columns.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
other : DataFrame or Series/dict-like object, or list of these
|
|
29
|
+
The data to append.
|
|
30
|
+
ignore_index : bool, default False
|
|
31
|
+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
32
|
+
verify_integrity : bool, default False
|
|
33
|
+
If True, raise ValueError on creating index with duplicates.
|
|
34
|
+
sort : bool, default False
|
|
35
|
+
Sort columns if the columns of `self` and `other` are not aligned.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
DataFrame
|
|
40
|
+
A new DataFrame consisting of the rows of caller and the rows of `other`.
|
|
41
|
+
|
|
42
|
+
See Also
|
|
43
|
+
--------
|
|
44
|
+
concat : General function to concatenate DataFrame or Series objects.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
If a list of dict/series is passed and the keys are all contained in
|
|
49
|
+
the DataFrame's index, the order of the columns in the resulting
|
|
50
|
+
DataFrame will be unchanged.
|
|
51
|
+
|
|
52
|
+
Iteratively appending rows to a DataFrame can be more computationally
|
|
53
|
+
intensive than a single concatenate. A better solution is to append
|
|
54
|
+
those rows to a list and then concatenate the list with the original
|
|
55
|
+
DataFrame all at once.
|
|
56
|
+
|
|
57
|
+
Examples
|
|
58
|
+
--------
|
|
59
|
+
>>> import maxframe.dataframe as md
|
|
60
|
+
>>> df = md.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y'])
|
|
61
|
+
>>> df.execute()
|
|
62
|
+
A B
|
|
63
|
+
x 1 2
|
|
64
|
+
y 3 4
|
|
65
|
+
>>> df2 = md.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y'])
|
|
66
|
+
>>> df.append(df2).execute()
|
|
67
|
+
A B
|
|
68
|
+
x 1 2
|
|
69
|
+
y 3 4
|
|
70
|
+
x 5 6
|
|
71
|
+
y 7 8
|
|
72
|
+
|
|
73
|
+
With `ignore_index` set to True:
|
|
74
|
+
|
|
75
|
+
>>> df.append(df2, ignore_index=True).execute()
|
|
76
|
+
A B
|
|
77
|
+
0 1 2
|
|
78
|
+
1 3 4
|
|
79
|
+
2 5 6
|
|
80
|
+
3 7 8
|
|
81
|
+
|
|
82
|
+
The following, while not recommended methods for generating DataFrames,
|
|
83
|
+
show two ways to generate a DataFrame from multiple data sources.
|
|
84
|
+
|
|
85
|
+
Less efficient:
|
|
86
|
+
|
|
87
|
+
>>> df = md.DataFrame(columns=['A'])
|
|
88
|
+
>>> for i in range(5):
|
|
89
|
+
... df = df.append({'A': i}, ignore_index=True)
|
|
90
|
+
>>> df.execute()
|
|
91
|
+
A
|
|
92
|
+
0 0
|
|
93
|
+
1 1
|
|
94
|
+
2 2
|
|
95
|
+
3 3
|
|
96
|
+
4 4
|
|
97
|
+
|
|
98
|
+
More efficient:
|
|
99
|
+
|
|
100
|
+
>>> md.concat([md.DataFrame([i], columns=['A']) for i in range(5)],
|
|
101
|
+
... ignore_index=True).execute()
|
|
102
|
+
A
|
|
103
|
+
0 0
|
|
104
|
+
1 1
|
|
105
|
+
2 2
|
|
106
|
+
3 3
|
|
107
|
+
4 4
|
|
108
|
+
"""
|
|
109
|
+
from .concat import concat
|
|
110
|
+
|
|
116
111
|
if isinstance(other, dict):
|
|
117
112
|
other = from_pandas(pd.DataFrame(dict((k, [v]) for k, v in other.items())))
|
|
118
|
-
|
|
119
|
-
|
|
113
|
+
if not isinstance(other, list):
|
|
114
|
+
other = [other]
|
|
115
|
+
return concat(
|
|
116
|
+
[df] + other,
|
|
117
|
+
ignore_index=ignore_index,
|
|
118
|
+
verify_integrity=verify_integrity,
|
|
119
|
+
sort=sort,
|
|
120
120
|
)
|
|
121
|
-
return op(df, other)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def df_combine_first(df, other):
|
|
17
|
+
"""
|
|
18
|
+
Update null elements with value in the same location in `other`.
|
|
19
|
+
|
|
20
|
+
Combine two DataFrame objects by filling null values in one DataFrame
|
|
21
|
+
with non-null values from other DataFrame. The row and column indexes
|
|
22
|
+
of the resulting DataFrame will be the union of the two. The resulting
|
|
23
|
+
dataframe contains the 'first' dataframe values and overrides the
|
|
24
|
+
second one values where both first.loc[index, col] and
|
|
25
|
+
second.loc[index, col] are not missing values, upon calling
|
|
26
|
+
first.combine_first(second).
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
other : DataFrame
|
|
31
|
+
Provided DataFrame to use to fill null values.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
DataFrame
|
|
36
|
+
The result of combining the provided DataFrame with the other object.
|
|
37
|
+
|
|
38
|
+
See Also
|
|
39
|
+
--------
|
|
40
|
+
DataFrame.combine : Perform series-wise operation on two DataFrames
|
|
41
|
+
using a given function.
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
>>> import maxframe.dataframe as md
|
|
46
|
+
>>> df1 = md.DataFrame({'A': [None, 0], 'B': [None, 4]})
|
|
47
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
48
|
+
>>> df1.combine_first(df2).execute()
|
|
49
|
+
A B
|
|
50
|
+
0 1.0 3.0
|
|
51
|
+
1 0.0 4.0
|
|
52
|
+
|
|
53
|
+
Null values still persist if the location of that null value
|
|
54
|
+
does not exist in `other`
|
|
55
|
+
|
|
56
|
+
>>> df1 = md.DataFrame({'A': [None, 0], 'B': [4, None]})
|
|
57
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])
|
|
58
|
+
>>> df1.combine_first(df2).execute()
|
|
59
|
+
A B C
|
|
60
|
+
0 NaN 4.0 NaN
|
|
61
|
+
1 0.0 3.0 1.0
|
|
62
|
+
2 NaN 3.0 1.0
|
|
63
|
+
"""
|
|
64
|
+
ret = df.copy()
|
|
65
|
+
ret.update(other, join="outer", overwrite=False)
|
|
66
|
+
return ret
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def series_combine_first(series, other):
|
|
70
|
+
"""
|
|
71
|
+
Update null elements with value in the same location in 'other'.
|
|
72
|
+
|
|
73
|
+
Combine two Series objects by filling null values in one Series with
|
|
74
|
+
non-null values from the other Series. Result index will be the union
|
|
75
|
+
of the two indexes.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
other : Series
|
|
80
|
+
The value(s) to be used for filling null values.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Series
|
|
85
|
+
The result of combining the provided Series with the other object.
|
|
86
|
+
|
|
87
|
+
See Also
|
|
88
|
+
--------
|
|
89
|
+
Series.combine : Perform element-wise operation on two Series
|
|
90
|
+
using a given function.
|
|
91
|
+
|
|
92
|
+
Examples
|
|
93
|
+
--------
|
|
94
|
+
>>> import maxframe.tensor as mt
|
|
95
|
+
>>> import maxframe.dataframe as md
|
|
96
|
+
>>> s1 = md.Series([1, mt.nan])
|
|
97
|
+
>>> s2 = md.Series([3, 4, 5])
|
|
98
|
+
>>> s1.combine_first(s2).execute()
|
|
99
|
+
0 1.0
|
|
100
|
+
1 4.0
|
|
101
|
+
2 5.0
|
|
102
|
+
dtype: float64
|
|
103
|
+
|
|
104
|
+
Null values still persist if the location of that null value
|
|
105
|
+
does not exist in `other`
|
|
106
|
+
|
|
107
|
+
>>> s1 = md.Series({'falcon': mt.nan, 'eagle': 160.0})
|
|
108
|
+
>>> s2 = md.Series({'eagle': 200.0, 'duck': 30.0})
|
|
109
|
+
>>> s1.combine_first(s2).execute()
|
|
110
|
+
duck 30.0
|
|
111
|
+
eagle 160.0
|
|
112
|
+
falcon NaN
|
|
113
|
+
dtype: float64
|
|
114
|
+
"""
|
|
115
|
+
ret = series.copy()
|
|
116
|
+
# as Series.update does not have other args, we add them manually
|
|
117
|
+
# to the operator object
|
|
118
|
+
ret.update(other)
|
|
119
|
+
ret.op.join, ret.op.overwrite = "outer", False
|
|
120
|
+
return ret
|