maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Tuple, Union
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from maxframe.utils import pd_release_version
|
|
21
|
+
|
|
22
|
+
from ... import opcodes
|
|
23
|
+
from ...core import OutputType
|
|
24
|
+
from ...serialization.serializables import AnyField, BoolField, TupleField
|
|
25
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
26
|
+
from ..utils import build_df, build_series, parse_index, validate_axis
|
|
27
|
+
|
|
28
|
+
_compare_has_result_names = pd_release_version >= (1, 5, 0)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _compare_with_result_names(left, right, *args, **kwargs):
|
|
32
|
+
if not _compare_has_result_names and kwargs.get("result_names"):
|
|
33
|
+
result_names = kwargs.pop("result_names")
|
|
34
|
+
res = left.compare(right, *args, **kwargs)
|
|
35
|
+
axis = kwargs.get("align_axis", 1)
|
|
36
|
+
idx_frame = res.axes[axis].to_frame(index=False)
|
|
37
|
+
if len(idx_frame) > 0:
|
|
38
|
+
idx_frame.iloc[-1] = idx_frame.iloc[-1].map(
|
|
39
|
+
dict(zip(["self", "other"], result_names))
|
|
40
|
+
)
|
|
41
|
+
res.axes[axis] = pd.MultiIndex.from_frame(idx_frame, names=res.axes[axis].names)
|
|
42
|
+
return res
|
|
43
|
+
else:
|
|
44
|
+
return left.compare(right, *args, **kwargs)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class DataFrameCompare(DataFrameOperator, DataFrameOperatorMixin):
|
|
48
|
+
_op_type_ = opcodes.DATAFRAME_COMPARE
|
|
49
|
+
|
|
50
|
+
align_axis = AnyField("align_axis", default=None)
|
|
51
|
+
keep_shape = BoolField("keep_shape", default=None)
|
|
52
|
+
keep_equal = BoolField("keep_equal", default=None)
|
|
53
|
+
result_names = TupleField("result_names", default=None)
|
|
54
|
+
|
|
55
|
+
def __init__(self, output_types=None, **kwargs):
|
|
56
|
+
super().__init__(_output_types=output_types, **kwargs)
|
|
57
|
+
|
|
58
|
+
def __call__(self, df_or_series, other):
|
|
59
|
+
index_tokenize_objects = [
|
|
60
|
+
df_or_series,
|
|
61
|
+
other,
|
|
62
|
+
self.align_axis,
|
|
63
|
+
self.keep_shape,
|
|
64
|
+
self.keep_equal,
|
|
65
|
+
self.result_names,
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
# Build empty objects for validation and output inference
|
|
69
|
+
if df_or_series.ndim == 1:
|
|
70
|
+
empty_left = build_series(df_or_series)
|
|
71
|
+
empty_right = build_series(other)
|
|
72
|
+
else:
|
|
73
|
+
empty_left = build_df(df_or_series)
|
|
74
|
+
empty_right = build_df(other)
|
|
75
|
+
|
|
76
|
+
# Validate arguments by calling pandas compare
|
|
77
|
+
compared = _compare_with_result_names(
|
|
78
|
+
empty_left,
|
|
79
|
+
empty_right,
|
|
80
|
+
align_axis=self.align_axis,
|
|
81
|
+
keep_shape=True, # keep dims
|
|
82
|
+
keep_equal=self.keep_equal,
|
|
83
|
+
result_names=self.result_names,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self._output_types = [
|
|
87
|
+
OutputType.dataframe if compared.ndim == 2 else OutputType.series
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
index_value = columns_value = dtypes = None
|
|
91
|
+
shape = [np.nan, np.nan]
|
|
92
|
+
if self.keep_shape or (df_or_series.ndim == 1 and self.align_axis == 1):
|
|
93
|
+
index_value = parse_index(compared.index, *index_tokenize_objects)
|
|
94
|
+
columns_value = parse_index(compared.columns, store_data=True)
|
|
95
|
+
dtypes = compared.dtypes
|
|
96
|
+
shape[1] = len(dtypes)
|
|
97
|
+
elif compared.ndim == 1:
|
|
98
|
+
index_value = parse_index(compared.index, store_data=True)
|
|
99
|
+
shape = (np.nan,)
|
|
100
|
+
|
|
101
|
+
if compared.ndim == 2:
|
|
102
|
+
return self.new_dataframe(
|
|
103
|
+
[df_or_series, other],
|
|
104
|
+
shape=tuple(shape),
|
|
105
|
+
dtypes=dtypes,
|
|
106
|
+
index_value=index_value,
|
|
107
|
+
columns_value=columns_value,
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
return self.new_series(
|
|
111
|
+
[df_or_series, other],
|
|
112
|
+
shape=tuple(shape),
|
|
113
|
+
dtype=compared.dtype,
|
|
114
|
+
index_value=index_value,
|
|
115
|
+
name=compared.name,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _compare(
|
|
120
|
+
df_or_series,
|
|
121
|
+
other,
|
|
122
|
+
align_axis: Union[int, str] = 1,
|
|
123
|
+
keep_shape: bool = False,
|
|
124
|
+
keep_equal: bool = False,
|
|
125
|
+
result_names: Tuple[str, str] = None,
|
|
126
|
+
):
|
|
127
|
+
align_axis = validate_axis(align_axis)
|
|
128
|
+
op = DataFrameCompare(
|
|
129
|
+
align_axis=align_axis,
|
|
130
|
+
keep_shape=keep_shape,
|
|
131
|
+
keep_equal=keep_equal,
|
|
132
|
+
result_names=result_names,
|
|
133
|
+
)
|
|
134
|
+
return op(df_or_series, other)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def df_compare(
|
|
138
|
+
df,
|
|
139
|
+
other,
|
|
140
|
+
align_axis: Union[int, str] = 1,
|
|
141
|
+
keep_shape: bool = False,
|
|
142
|
+
keep_equal: bool = False,
|
|
143
|
+
result_names: Tuple[str, str] = ("self", "other"),
|
|
144
|
+
):
|
|
145
|
+
"""
|
|
146
|
+
Compare to another DataFrame and show the differences.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
other : DataFrame
|
|
151
|
+
Object to compare with.
|
|
152
|
+
|
|
153
|
+
align_axis : {0 or 'index', 1 or 'columns'}, default 1
|
|
154
|
+
Determine which axis to align the comparison on.
|
|
155
|
+
|
|
156
|
+
* 0, or 'index' : Resulting differences are stacked vertically
|
|
157
|
+
with rows drawn alternately from self and other.
|
|
158
|
+
* 1, or 'columns' : Resulting differences are aligned horizontally
|
|
159
|
+
with columns drawn alternately from self and other.
|
|
160
|
+
|
|
161
|
+
keep_shape : bool, default False
|
|
162
|
+
If true, all rows and columns are kept.
|
|
163
|
+
Otherwise, only the ones with different values are kept.
|
|
164
|
+
|
|
165
|
+
keep_equal : bool, default False
|
|
166
|
+
If true, the result keeps values that are equal.
|
|
167
|
+
Otherwise, equal values are shown as NaNs.
|
|
168
|
+
|
|
169
|
+
result_names : tuple, default (‘self’, ‘other’)
|
|
170
|
+
Set the dataframes names in the comparison.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
DataFrame
|
|
175
|
+
DataFrame that shows the differences stacked side by side.
|
|
176
|
+
|
|
177
|
+
The resulting index will be a MultiIndex with 'self' and 'other'
|
|
178
|
+
stacked alternately at the inner level.
|
|
179
|
+
|
|
180
|
+
Raises
|
|
181
|
+
------
|
|
182
|
+
ValueError
|
|
183
|
+
When the two DataFrames don't have identical labels or shape.
|
|
184
|
+
|
|
185
|
+
See Also
|
|
186
|
+
--------
|
|
187
|
+
Series.compare : Compare with another Series and show differences.
|
|
188
|
+
DataFrame.equals : Test whether two objects contain the same elements.
|
|
189
|
+
|
|
190
|
+
Notes
|
|
191
|
+
-----
|
|
192
|
+
Matching NaNs will not appear as a difference.
|
|
193
|
+
|
|
194
|
+
Can only compare identically-labeled
|
|
195
|
+
(i.e. same shape, identical row and column labels) DataFrames
|
|
196
|
+
|
|
197
|
+
Examples
|
|
198
|
+
--------
|
|
199
|
+
>>> import maxframe.tensor as mt
|
|
200
|
+
>>> import maxframe.dataframe as md
|
|
201
|
+
>>> df = md.DataFrame(
|
|
202
|
+
... {
|
|
203
|
+
... "col1": ["a", "a", "b", "b", "a"],
|
|
204
|
+
... "col2": [1.0, 2.0, 3.0, mt.nan, 5.0],
|
|
205
|
+
... "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
|
|
206
|
+
... },
|
|
207
|
+
... columns=["col1", "col2", "col3"],
|
|
208
|
+
... )
|
|
209
|
+
>>> df.execute()
|
|
210
|
+
col1 col2 col3
|
|
211
|
+
0 a 1.0 1.0
|
|
212
|
+
1 a 2.0 2.0
|
|
213
|
+
2 b 3.0 3.0
|
|
214
|
+
3 b NaN 4.0
|
|
215
|
+
4 a 5.0 5.0
|
|
216
|
+
|
|
217
|
+
>>> df2 = df.copy()
|
|
218
|
+
>>> df2.loc[0, 'col1'] = 'c'
|
|
219
|
+
>>> df2.loc[2, 'col3'] = 4.0
|
|
220
|
+
>>> df2.execute()
|
|
221
|
+
col1 col2 col3
|
|
222
|
+
0 c 1.0 1.0
|
|
223
|
+
1 a 2.0 2.0
|
|
224
|
+
2 b 3.0 4.0
|
|
225
|
+
3 b NaN 4.0
|
|
226
|
+
4 a 5.0 5.0
|
|
227
|
+
|
|
228
|
+
Align the differences on columns
|
|
229
|
+
|
|
230
|
+
>>> df.compare(df2).execute()
|
|
231
|
+
col1 col3
|
|
232
|
+
self other self other
|
|
233
|
+
0 a c NaN NaN
|
|
234
|
+
2 NaN NaN 3.0 4.0
|
|
235
|
+
|
|
236
|
+
Stack the differences on rows
|
|
237
|
+
|
|
238
|
+
>>> df.compare(df2, align_axis=0).execute()
|
|
239
|
+
col1 col3
|
|
240
|
+
0 self a NaN
|
|
241
|
+
other c NaN
|
|
242
|
+
2 self NaN 3.0
|
|
243
|
+
other NaN 4.0
|
|
244
|
+
|
|
245
|
+
Keep the equal values
|
|
246
|
+
|
|
247
|
+
>>> df.compare(df2, keep_equal=True).execute()
|
|
248
|
+
col1 col3
|
|
249
|
+
self other self other
|
|
250
|
+
0 a c 1.0 1.0
|
|
251
|
+
2 b b 3.0 4.0
|
|
252
|
+
|
|
253
|
+
Keep all original rows and columns
|
|
254
|
+
|
|
255
|
+
>>> df.compare(df2, keep_shape=True).execute()
|
|
256
|
+
col1 col2 col3
|
|
257
|
+
self other self other self other
|
|
258
|
+
0 a c NaN NaN NaN NaN
|
|
259
|
+
1 NaN NaN NaN NaN NaN NaN
|
|
260
|
+
2 NaN NaN NaN NaN 3.0 4.0
|
|
261
|
+
3 NaN NaN NaN NaN NaN NaN
|
|
262
|
+
4 NaN NaN NaN NaN NaN NaN
|
|
263
|
+
|
|
264
|
+
Keep all original rows and columns and also all original values
|
|
265
|
+
|
|
266
|
+
>>> df.compare(df2, keep_shape=True, keep_equal=True).execute()
|
|
267
|
+
col1 col2 col3
|
|
268
|
+
self other self other self other
|
|
269
|
+
0 a c 1.0 1.0 1.0 1.0
|
|
270
|
+
1 a a 2.0 2.0 2.0 2.0
|
|
271
|
+
2 b b 3.0 3.0 3.0 4.0
|
|
272
|
+
3 b b NaN NaN 4.0 4.0
|
|
273
|
+
4 a a 5.0 5.0 5.0 5.0
|
|
274
|
+
"""
|
|
275
|
+
return _compare(
|
|
276
|
+
df,
|
|
277
|
+
other,
|
|
278
|
+
align_axis=align_axis,
|
|
279
|
+
keep_shape=keep_shape,
|
|
280
|
+
keep_equal=keep_equal,
|
|
281
|
+
result_names=result_names,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def series_compare(
|
|
286
|
+
series,
|
|
287
|
+
other,
|
|
288
|
+
align_axis: Union[int, str] = 1,
|
|
289
|
+
keep_shape: bool = False,
|
|
290
|
+
keep_equal: bool = False,
|
|
291
|
+
result_names: Tuple[str, str] = ("self", "other"),
|
|
292
|
+
):
|
|
293
|
+
"""
|
|
294
|
+
Compare to another Series and show the differences.
|
|
295
|
+
|
|
296
|
+
Parameters
|
|
297
|
+
----------
|
|
298
|
+
other : Series
|
|
299
|
+
Object to compare with.
|
|
300
|
+
|
|
301
|
+
align_axis : {0 or 'index', 1 or 'columns'}, default 1
|
|
302
|
+
Determine which axis to align the comparison on.
|
|
303
|
+
|
|
304
|
+
* 0, or 'index' : Resulting differences are stacked vertically
|
|
305
|
+
with rows drawn alternately from self and other.
|
|
306
|
+
* 1, or 'columns' : Resulting differences are aligned horizontally
|
|
307
|
+
with columns drawn alternately from self and other.
|
|
308
|
+
|
|
309
|
+
keep_shape : bool, default False
|
|
310
|
+
If true, all rows and columns are kept.
|
|
311
|
+
Otherwise, only the ones with different values are kept.
|
|
312
|
+
|
|
313
|
+
keep_equal : bool, default False
|
|
314
|
+
If true, the result keeps values that are equal.
|
|
315
|
+
Otherwise, equal values are shown as NaNs.
|
|
316
|
+
|
|
317
|
+
result_names : tuple, default (‘self’, ‘other’)
|
|
318
|
+
Set the dataframes names in the comparison.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
Series or DataFrame
|
|
323
|
+
If axis is 0 or 'index' the result will be a Series.
|
|
324
|
+
The resulting index will be a MultiIndex with 'self' and 'other'
|
|
325
|
+
stacked alternately at the inner level.
|
|
326
|
+
|
|
327
|
+
If axis is 1 or 'columns' the result will be a DataFrame.
|
|
328
|
+
It will have two columns namely 'self' and 'other'.
|
|
329
|
+
|
|
330
|
+
See Also
|
|
331
|
+
--------
|
|
332
|
+
DataFrame.compare : Compare with another DataFrame and show differences.
|
|
333
|
+
|
|
334
|
+
Notes
|
|
335
|
+
-----
|
|
336
|
+
Matching NaNs will not appear as a difference.
|
|
337
|
+
|
|
338
|
+
Examples
|
|
339
|
+
--------
|
|
340
|
+
>>> import maxframe.dataframe as md
|
|
341
|
+
>>> s1 = md.Series(["a", "b", "c", "d", "e"])
|
|
342
|
+
>>> s2 = md.Series(["a", "a", "c", "b", "e"])
|
|
343
|
+
|
|
344
|
+
Align the differences on columns
|
|
345
|
+
|
|
346
|
+
>>> s1.compare(s2).execute()
|
|
347
|
+
self other
|
|
348
|
+
1 b a
|
|
349
|
+
3 d b
|
|
350
|
+
|
|
351
|
+
Stack the differences on indices
|
|
352
|
+
|
|
353
|
+
>>> s1.compare(s2, align_axis=0).execute()
|
|
354
|
+
1 self b
|
|
355
|
+
other a
|
|
356
|
+
3 self d
|
|
357
|
+
other b
|
|
358
|
+
dtype: object
|
|
359
|
+
|
|
360
|
+
Keep all original rows
|
|
361
|
+
|
|
362
|
+
>>> s1.compare(s2, keep_shape=True).execute()
|
|
363
|
+
self other
|
|
364
|
+
0 NaN NaN
|
|
365
|
+
1 b a
|
|
366
|
+
2 NaN NaN
|
|
367
|
+
3 d b
|
|
368
|
+
4 NaN NaN
|
|
369
|
+
|
|
370
|
+
Keep all original rows and also all original values
|
|
371
|
+
|
|
372
|
+
>>> s1.compare(s2, keep_shape=True, keep_equal=True).execute()
|
|
373
|
+
self other
|
|
374
|
+
0 a a
|
|
375
|
+
1 b a
|
|
376
|
+
2 c c
|
|
377
|
+
3 d b
|
|
378
|
+
4 e e
|
|
379
|
+
"""
|
|
380
|
+
return _compare(
|
|
381
|
+
series,
|
|
382
|
+
other,
|
|
383
|
+
align_axis=align_axis,
|
|
384
|
+
keep_shape=keep_shape,
|
|
385
|
+
keep_equal=keep_equal,
|
|
386
|
+
result_names=result_names,
|
|
387
|
+
)
|
|
@@ -288,6 +288,189 @@ def concat(
|
|
|
288
288
|
sort=False,
|
|
289
289
|
copy=True,
|
|
290
290
|
):
|
|
291
|
+
"""
|
|
292
|
+
Concatenate dataframe objects along a particular axis with optional set logic
|
|
293
|
+
along the other axes.
|
|
294
|
+
|
|
295
|
+
Can also add a layer of hierarchical indexing on the concatenation axis,
|
|
296
|
+
which may be useful if the labels are the same (or overlapping) on
|
|
297
|
+
the passed axis number.
|
|
298
|
+
|
|
299
|
+
Parameters
|
|
300
|
+
----------
|
|
301
|
+
objs : a sequence or mapping of Series or DataFrame objects
|
|
302
|
+
If a mapping is passed, the sorted keys will be used as the `keys`
|
|
303
|
+
argument, unless it is passed, in which case the values will be
|
|
304
|
+
selected (see below). Any None objects will be dropped silently unless
|
|
305
|
+
they are all None in which case a ValueError will be raised.
|
|
306
|
+
axis : {0/'index', 1/'columns'}, default 0
|
|
307
|
+
The axis to concatenate along.
|
|
308
|
+
join : {'inner', 'outer'}, default 'outer'
|
|
309
|
+
How to handle indexes on other axis (or axes).
|
|
310
|
+
ignore_index : bool, default False
|
|
311
|
+
If True, do not use the index values along the concatenation axis. The
|
|
312
|
+
resulting axis will be labeled 0, ..., n - 1. This is useful if you are
|
|
313
|
+
concatenating objects where the concatenation axis does not have
|
|
314
|
+
meaningful indexing information. Note the index values on the other
|
|
315
|
+
axes are still respected in the join.
|
|
316
|
+
keys : sequence, default None
|
|
317
|
+
If multiple levels passed, should contain tuples. Construct
|
|
318
|
+
hierarchical index using the passed keys as the outermost level.
|
|
319
|
+
levels : list of sequences, default None
|
|
320
|
+
Specific levels (unique values) to use for constructing a
|
|
321
|
+
MultiIndex. Otherwise they will be inferred from the keys.
|
|
322
|
+
names : list, default None
|
|
323
|
+
Names for the levels in the resulting hierarchical index.
|
|
324
|
+
verify_integrity : bool, default False
|
|
325
|
+
Check whether the new concatenated axis contains duplicates. This can
|
|
326
|
+
be very expensive relative to the actual data concatenation.
|
|
327
|
+
sort : bool, default False
|
|
328
|
+
Sort non-concatenation axis if it is not already aligned when `join`
|
|
329
|
+
is 'outer'.
|
|
330
|
+
This has no effect when ``join='inner'``, which already preserves
|
|
331
|
+
the order of the non-concatenation axis.
|
|
332
|
+
copy : bool, default True
|
|
333
|
+
If False, do not copy data unnecessarily.
|
|
334
|
+
|
|
335
|
+
Returns
|
|
336
|
+
-------
|
|
337
|
+
object, type of objs
|
|
338
|
+
When concatenating all ``Series`` along the index (axis=0), a
|
|
339
|
+
``Series`` is returned. When ``objs`` contains at least one
|
|
340
|
+
``DataFrame``, a ``DataFrame`` is returned. When concatenating along
|
|
341
|
+
the columns (axis=1), a ``DataFrame`` is returned.
|
|
342
|
+
|
|
343
|
+
See Also
|
|
344
|
+
--------
|
|
345
|
+
Series.append : Concatenate Series.
|
|
346
|
+
DataFrame.append : Concatenate DataFrames.
|
|
347
|
+
DataFrame.join : Join DataFrames using indexes.
|
|
348
|
+
DataFrame.merge : Merge DataFrames by indexes or columns.
|
|
349
|
+
|
|
350
|
+
Notes
|
|
351
|
+
-----
|
|
352
|
+
The keys, levels, and names arguments are all optional.
|
|
353
|
+
|
|
354
|
+
A walkthrough of how this method fits in with other tools for combining
|
|
355
|
+
pandas objects can be found `here
|
|
356
|
+
<https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
|
|
357
|
+
|
|
358
|
+
Examples
|
|
359
|
+
--------
|
|
360
|
+
Combine two ``Series``.
|
|
361
|
+
|
|
362
|
+
>>> import maxframe.dataframe as md
|
|
363
|
+
>>> s1 = md.Series(['a', 'b'])
|
|
364
|
+
>>> s2 = md.Series(['c', 'd'])
|
|
365
|
+
>>> md.concat([s1, s2]).execute()
|
|
366
|
+
0 a
|
|
367
|
+
1 b
|
|
368
|
+
0 c
|
|
369
|
+
1 d
|
|
370
|
+
dtype: object
|
|
371
|
+
|
|
372
|
+
Clear the existing index and reset it in the result
|
|
373
|
+
by setting the ``ignore_index`` option to ``True``.
|
|
374
|
+
|
|
375
|
+
>>> md.concat([s1, s2], ignore_index=True).execute()
|
|
376
|
+
0 a
|
|
377
|
+
1 b
|
|
378
|
+
2 c
|
|
379
|
+
3 d
|
|
380
|
+
dtype: object
|
|
381
|
+
|
|
382
|
+
Add a hierarchical index at the outermost level of
|
|
383
|
+
the data with the ``keys`` option.
|
|
384
|
+
|
|
385
|
+
>>> md.concat([s1, s2], keys=['s1', 's2']).execute()
|
|
386
|
+
s1 0 a
|
|
387
|
+
1 b
|
|
388
|
+
s2 0 c
|
|
389
|
+
1 d
|
|
390
|
+
dtype: object
|
|
391
|
+
|
|
392
|
+
Label the index keys you create with the ``names`` option.
|
|
393
|
+
|
|
394
|
+
>>> md.concat([s1, s2], keys=['s1', 's2'],
|
|
395
|
+
... names=['Series name', 'Row ID']).execute()
|
|
396
|
+
Series name Row ID
|
|
397
|
+
s1 0 a
|
|
398
|
+
1 b
|
|
399
|
+
s2 0 c
|
|
400
|
+
1 d
|
|
401
|
+
dtype: object
|
|
402
|
+
|
|
403
|
+
Combine two ``DataFrame`` objects with identical columns.
|
|
404
|
+
|
|
405
|
+
>>> df1 = md.DataFrame([['a', 1], ['b', 2]],
|
|
406
|
+
... columns=['letter', 'number'])
|
|
407
|
+
>>> df1.execute()
|
|
408
|
+
letter number
|
|
409
|
+
0 a 1
|
|
410
|
+
1 b 2
|
|
411
|
+
>>> df2 = md.DataFrame([['c', 3], ['d', 4]],
|
|
412
|
+
... columns=['letter', 'number'])
|
|
413
|
+
>>> df2.execute()
|
|
414
|
+
letter number
|
|
415
|
+
0 c 3
|
|
416
|
+
1 d 4
|
|
417
|
+
>>> md.concat([df1, df2]).execute()
|
|
418
|
+
letter number
|
|
419
|
+
0 a 1
|
|
420
|
+
1 b 2
|
|
421
|
+
0 c 3
|
|
422
|
+
1 d 4
|
|
423
|
+
|
|
424
|
+
Combine ``DataFrame`` objects with overlapping columns
|
|
425
|
+
and return everything. Columns outside the intersection will
|
|
426
|
+
be filled with ``NaN`` values.
|
|
427
|
+
|
|
428
|
+
>>> df3 = md.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
|
|
429
|
+
... columns=['letter', 'number', 'animal'])
|
|
430
|
+
>>> df3.execute()
|
|
431
|
+
letter number animal
|
|
432
|
+
0 c 3 cat
|
|
433
|
+
1 d 4 dog
|
|
434
|
+
>>> md.concat([df1, df3], sort=False).execute()
|
|
435
|
+
letter number animal
|
|
436
|
+
0 a 1 NaN
|
|
437
|
+
1 b 2 NaN
|
|
438
|
+
0 c 3 cat
|
|
439
|
+
1 d 4 dog
|
|
440
|
+
|
|
441
|
+
Combine ``DataFrame`` objects with overlapping columns
|
|
442
|
+
and return only those that are shared by passing ``inner`` to
|
|
443
|
+
the ``join`` keyword argument.
|
|
444
|
+
|
|
445
|
+
>>> md.concat([df1, df3], join="inner").execute()
|
|
446
|
+
letter number
|
|
447
|
+
0 a 1
|
|
448
|
+
1 b 2
|
|
449
|
+
0 c 3
|
|
450
|
+
1 d 4
|
|
451
|
+
|
|
452
|
+
Combine ``DataFrame`` objects horizontally along the x axis by
|
|
453
|
+
passing in ``axis=1``.
|
|
454
|
+
|
|
455
|
+
>>> df4 = md.DataFrame([['bird', 'polly'], ['monkey', 'george']],
|
|
456
|
+
... columns=['animal', 'name'])
|
|
457
|
+
>>> md.concat([df1, df4], axis=1).execute()
|
|
458
|
+
letter number animal name
|
|
459
|
+
0 a 1 bird polly
|
|
460
|
+
1 b 2 monkey george
|
|
461
|
+
|
|
462
|
+
Prevent the result from including duplicate index values with the
|
|
463
|
+
``verify_integrity`` option.
|
|
464
|
+
|
|
465
|
+
>>> df5 = md.DataFrame([1], index=['a'])
|
|
466
|
+
>>> df5.execute()
|
|
467
|
+
0
|
|
468
|
+
a 1
|
|
469
|
+
>>> df6 = md.DataFrame([2], index=['a'])
|
|
470
|
+
>>> df6.execute()
|
|
471
|
+
0
|
|
472
|
+
a 2
|
|
473
|
+
"""
|
|
291
474
|
if not isinstance(objs, (list, tuple)): # pragma: no cover
|
|
292
475
|
raise TypeError(
|
|
293
476
|
"first argument must be an iterable of dataframe or series objects"
|