maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -14,108 +14,107 @@
|
|
|
14
14
|
|
|
15
15
|
import pandas as pd
|
|
16
16
|
|
|
17
|
-
from ... import opcodes
|
|
18
|
-
from ...core import OutputType
|
|
19
|
-
from ...serialization.serializables import BoolField
|
|
20
17
|
from ..datasource.dataframe import from_pandas
|
|
21
|
-
from ..operators import (
|
|
22
|
-
DATAFRAME_TYPE,
|
|
23
|
-
SERIES_TYPE,
|
|
24
|
-
DataFrameOperator,
|
|
25
|
-
DataFrameOperatorMixin,
|
|
26
|
-
)
|
|
27
|
-
from ..utils import parse_index
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DataFrameAppend(DataFrameOperator, DataFrameOperatorMixin):
|
|
31
|
-
_op_type_ = opcodes.APPEND
|
|
32
|
-
|
|
33
|
-
ignore_index = BoolField("ignore_index", default=False)
|
|
34
|
-
verify_integrity = BoolField("verify_integrity", default=False)
|
|
35
|
-
sort = BoolField("sort", default=False)
|
|
36
|
-
|
|
37
|
-
def __init__(self, output_types=None, **kw):
|
|
38
|
-
super().__init__(_output_types=output_types, **kw)
|
|
39
|
-
|
|
40
|
-
def _call_dataframe(self, df, other):
|
|
41
|
-
if isinstance(other, DATAFRAME_TYPE):
|
|
42
|
-
shape = (df.shape[0] + other.shape[0], df.shape[1])
|
|
43
|
-
inputs = [df, other]
|
|
44
|
-
if self.ignore_index:
|
|
45
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
46
|
-
else:
|
|
47
|
-
index_value = parse_index(
|
|
48
|
-
df.index_value.to_pandas().append(other.index_value.to_pandas())
|
|
49
|
-
)
|
|
50
|
-
elif isinstance(other, list):
|
|
51
|
-
row_length = df.shape[0]
|
|
52
|
-
index = df.index_value.to_pandas()
|
|
53
|
-
for item in other:
|
|
54
|
-
if not isinstance(item, DATAFRAME_TYPE): # pragma: no cover
|
|
55
|
-
raise ValueError(f"Invalid type {type(item)} to append")
|
|
56
|
-
row_length += item.shape[0]
|
|
57
|
-
index = index.append(item.index_value.to_pandas())
|
|
58
|
-
shape = (row_length, df.shape[1])
|
|
59
|
-
if self.ignore_index: # pragma: no cover
|
|
60
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
61
|
-
else:
|
|
62
|
-
index_value = parse_index(index)
|
|
63
|
-
inputs = [df] + other
|
|
64
|
-
else: # pragma: no cover
|
|
65
|
-
raise ValueError(f"Invalid type {type(other)} to append")
|
|
66
|
-
return self.new_dataframe(
|
|
67
|
-
inputs,
|
|
68
|
-
shape=shape,
|
|
69
|
-
dtypes=df.dtypes,
|
|
70
|
-
index_value=index_value,
|
|
71
|
-
columns_value=df.columns_value,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
def _call_series(self, df, other):
|
|
75
|
-
if isinstance(other, SERIES_TYPE):
|
|
76
|
-
shape = (df.shape[0] + other.shape[0],)
|
|
77
|
-
inputs = [df, other]
|
|
78
|
-
if self.ignore_index:
|
|
79
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
80
|
-
else:
|
|
81
|
-
index_value = parse_index(
|
|
82
|
-
df.index_value.to_pandas().append(other.index_value.to_pandas())
|
|
83
|
-
)
|
|
84
|
-
elif isinstance(other, list):
|
|
85
|
-
row_length = df.shape[0]
|
|
86
|
-
index = df.index_value.to_pandas()
|
|
87
|
-
for item in other:
|
|
88
|
-
if not isinstance(item, SERIES_TYPE): # pragma: no cover
|
|
89
|
-
raise ValueError(f"Invalid type {type(item)} to append")
|
|
90
|
-
row_length += item.shape[0]
|
|
91
|
-
index = index.append(item.index_value.to_pandas())
|
|
92
|
-
shape = (row_length,)
|
|
93
|
-
if self.ignore_index: # pragma: no cover
|
|
94
|
-
index_value = parse_index(pd.RangeIndex(shape[0]))
|
|
95
|
-
else:
|
|
96
|
-
index_value = parse_index(index)
|
|
97
|
-
inputs = [df] + other
|
|
98
|
-
else: # pragma: no cover
|
|
99
|
-
raise ValueError(f"Invalid type {type(other)} to append")
|
|
100
|
-
return self.new_series(
|
|
101
|
-
inputs, shape=shape, dtype=df.dtype, index_value=index_value, name=df.name
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
def __call__(self, df, other):
|
|
105
|
-
if isinstance(df, DATAFRAME_TYPE):
|
|
106
|
-
self.output_types = [OutputType.dataframe]
|
|
107
|
-
return self._call_dataframe(df, other)
|
|
108
|
-
else:
|
|
109
|
-
self.output_types = [OutputType.series]
|
|
110
|
-
return self._call_series(df, other)
|
|
111
18
|
|
|
112
19
|
|
|
113
20
|
def append(df, other, ignore_index=False, verify_integrity=False, sort=False):
|
|
114
|
-
|
|
115
|
-
|
|
21
|
+
"""
|
|
22
|
+
Append rows of `other` to the end of caller, returning a new object.
|
|
23
|
+
|
|
24
|
+
Columns in `other` that are not in the caller are added as new columns.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
other : DataFrame or Series/dict-like object, or list of these
|
|
29
|
+
The data to append.
|
|
30
|
+
ignore_index : bool, default False
|
|
31
|
+
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
32
|
+
verify_integrity : bool, default False
|
|
33
|
+
If True, raise ValueError on creating index with duplicates.
|
|
34
|
+
sort : bool, default False
|
|
35
|
+
Sort columns if the columns of `self` and `other` are not aligned.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
DataFrame
|
|
40
|
+
A new DataFrame consisting of the rows of caller and the rows of `other`.
|
|
41
|
+
|
|
42
|
+
See Also
|
|
43
|
+
--------
|
|
44
|
+
concat : General function to concatenate DataFrame or Series objects.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
If a list of dict/series is passed and the keys are all contained in
|
|
49
|
+
the DataFrame's index, the order of the columns in the resulting
|
|
50
|
+
DataFrame will be unchanged.
|
|
51
|
+
|
|
52
|
+
Iteratively appending rows to a DataFrame can be more computationally
|
|
53
|
+
intensive than a single concatenate. A better solution is to append
|
|
54
|
+
those rows to a list and then concatenate the list with the original
|
|
55
|
+
DataFrame all at once.
|
|
56
|
+
|
|
57
|
+
Examples
|
|
58
|
+
--------
|
|
59
|
+
>>> import maxframe.dataframe as md
|
|
60
|
+
>>> df = md.DataFrame([[1, 2], [3, 4]], columns=list('AB'), index=['x', 'y'])
|
|
61
|
+
>>> df.execute()
|
|
62
|
+
A B
|
|
63
|
+
x 1 2
|
|
64
|
+
y 3 4
|
|
65
|
+
>>> df2 = md.DataFrame([[5, 6], [7, 8]], columns=list('AB'), index=['x', 'y'])
|
|
66
|
+
>>> df.append(df2).execute()
|
|
67
|
+
A B
|
|
68
|
+
x 1 2
|
|
69
|
+
y 3 4
|
|
70
|
+
x 5 6
|
|
71
|
+
y 7 8
|
|
72
|
+
|
|
73
|
+
With `ignore_index` set to True:
|
|
74
|
+
|
|
75
|
+
>>> df.append(df2, ignore_index=True).execute()
|
|
76
|
+
A B
|
|
77
|
+
0 1 2
|
|
78
|
+
1 3 4
|
|
79
|
+
2 5 6
|
|
80
|
+
3 7 8
|
|
81
|
+
|
|
82
|
+
The following, while not recommended methods for generating DataFrames,
|
|
83
|
+
show two ways to generate a DataFrame from multiple data sources.
|
|
84
|
+
|
|
85
|
+
Less efficient:
|
|
86
|
+
|
|
87
|
+
>>> df = md.DataFrame(columns=['A'])
|
|
88
|
+
>>> for i in range(5):
|
|
89
|
+
... df = df.append({'A': i}, ignore_index=True)
|
|
90
|
+
>>> df.execute()
|
|
91
|
+
A
|
|
92
|
+
0 0
|
|
93
|
+
1 1
|
|
94
|
+
2 2
|
|
95
|
+
3 3
|
|
96
|
+
4 4
|
|
97
|
+
|
|
98
|
+
More efficient:
|
|
99
|
+
|
|
100
|
+
>>> md.concat([md.DataFrame([i], columns=['A']) for i in range(5)],
|
|
101
|
+
... ignore_index=True).execute()
|
|
102
|
+
A
|
|
103
|
+
0 0
|
|
104
|
+
1 1
|
|
105
|
+
2 2
|
|
106
|
+
3 3
|
|
107
|
+
4 4
|
|
108
|
+
"""
|
|
109
|
+
from .concat import concat
|
|
110
|
+
|
|
116
111
|
if isinstance(other, dict):
|
|
117
112
|
other = from_pandas(pd.DataFrame(dict((k, [v]) for k, v in other.items())))
|
|
118
|
-
|
|
119
|
-
|
|
113
|
+
if not isinstance(other, list):
|
|
114
|
+
other = [other]
|
|
115
|
+
return concat(
|
|
116
|
+
[df] + other,
|
|
117
|
+
ignore_index=ignore_index,
|
|
118
|
+
verify_integrity=verify_integrity,
|
|
119
|
+
sort=sort,
|
|
120
120
|
)
|
|
121
|
-
return op(df, other)
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ... import opcodes
|
|
16
|
+
from ...serialization.serializables import AnyField, BoolField, FunctionField
|
|
17
|
+
from ...udf import BuiltinFunction
|
|
18
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataFrameCombine(DataFrameOperator, DataFrameOperatorMixin):
|
|
22
|
+
_op_type_ = opcodes.DATAFRAME_COMBINE
|
|
23
|
+
|
|
24
|
+
func = FunctionField("func")
|
|
25
|
+
fill_value = AnyField("fill_value")
|
|
26
|
+
overwrite = BoolField("overwrite")
|
|
27
|
+
|
|
28
|
+
def has_custom_code(self) -> bool:
|
|
29
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
30
|
+
|
|
31
|
+
def __call__(self, obj1, obj2):
|
|
32
|
+
from ..indexing.align import align
|
|
33
|
+
|
|
34
|
+
assert obj1.ndim == 1 and obj2.ndim == 1
|
|
35
|
+
obj1, obj2 = align(obj1, obj2)
|
|
36
|
+
# Create the output series based on the result series
|
|
37
|
+
return self.new_series(
|
|
38
|
+
[obj1, obj2],
|
|
39
|
+
shape=obj1.shape,
|
|
40
|
+
dtype=obj1.dtype,
|
|
41
|
+
index_value=obj1.index_value,
|
|
42
|
+
name=obj1.name,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def df_combine(df, other, func, fill_value=None, overwrite=True):
|
|
47
|
+
"""
|
|
48
|
+
Perform column-wise combine with another DataFrame.
|
|
49
|
+
|
|
50
|
+
Combines a DataFrame with `other` DataFrame using `func`
|
|
51
|
+
to element-wise combine columns. The row and column indexes of the
|
|
52
|
+
resulting DataFrame will be the union of the two.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
other : DataFrame
|
|
57
|
+
The DataFrame to merge column-wise.
|
|
58
|
+
func : function
|
|
59
|
+
Function that takes two series as inputs and return a Series or a
|
|
60
|
+
scalar. Used to merge the two dataframes column by columns.
|
|
61
|
+
fill_value : scalar value, default None
|
|
62
|
+
The value to fill NaNs with prior to passing any column to the
|
|
63
|
+
merge func.
|
|
64
|
+
overwrite : bool, default True
|
|
65
|
+
If True, columns in `self` that do not exist in `other` will be
|
|
66
|
+
overwritten with NaNs.
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
DataFrame
|
|
71
|
+
Combination of the provided DataFrames.
|
|
72
|
+
|
|
73
|
+
See Also
|
|
74
|
+
--------
|
|
75
|
+
DataFrame.combine_first : Combine two DataFrame objects and default to
|
|
76
|
+
non-null values in frame calling the method.
|
|
77
|
+
|
|
78
|
+
Examples
|
|
79
|
+
--------
|
|
80
|
+
Combine using a simple function that chooses the smaller column.
|
|
81
|
+
|
|
82
|
+
>>> import maxframe.tensor as mt
|
|
83
|
+
>>> import maxframe.dataframe as md
|
|
84
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
|
|
85
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
86
|
+
>>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2
|
|
87
|
+
>>> df1.combine(df2, take_smaller).execute()
|
|
88
|
+
A B
|
|
89
|
+
0 0 3
|
|
90
|
+
1 0 3
|
|
91
|
+
|
|
92
|
+
Example using a true element-wise combine function.
|
|
93
|
+
|
|
94
|
+
>>> df1 = md.DataFrame({'A': [5, 0], 'B': [2, 4]})
|
|
95
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
96
|
+
>>> df1.combine(df2, mt.minimum).execute()
|
|
97
|
+
A B
|
|
98
|
+
0 1 2
|
|
99
|
+
1 0 3
|
|
100
|
+
|
|
101
|
+
Using `fill_value` fills Nones prior to passing the column to the
|
|
102
|
+
merge function.
|
|
103
|
+
|
|
104
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
|
|
105
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
106
|
+
>>> df1.combine(df2, take_smaller, fill_value=-5).execute()
|
|
107
|
+
A B
|
|
108
|
+
0 0 -5.0
|
|
109
|
+
1 0 4.0
|
|
110
|
+
|
|
111
|
+
However, if the same element in both dataframes is None, that None
|
|
112
|
+
is preserved
|
|
113
|
+
|
|
114
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [None, 4]})
|
|
115
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [None, 3]})
|
|
116
|
+
>>> df1.combine(df2, take_smaller, fill_value=-5).execute()
|
|
117
|
+
A B
|
|
118
|
+
0 0 -5.0
|
|
119
|
+
1 0 3.0
|
|
120
|
+
|
|
121
|
+
Example that demonstrates the use of `overwrite` and behavior when
|
|
122
|
+
the axis differ between the dataframes.
|
|
123
|
+
|
|
124
|
+
>>> df1 = md.DataFrame({'A': [0, 0], 'B': [4, 4]})
|
|
125
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2])
|
|
126
|
+
>>> df1.combine(df2, take_smaller).execute()
|
|
127
|
+
A B C
|
|
128
|
+
0 NaN NaN NaN
|
|
129
|
+
1 NaN 3.0 -10.0
|
|
130
|
+
2 NaN 3.0 1.0
|
|
131
|
+
|
|
132
|
+
>>> df1.combine(df2, take_smaller, overwrite=False).execute()
|
|
133
|
+
A B C
|
|
134
|
+
0 0.0 NaN NaN
|
|
135
|
+
1 0.0 3.0 -10.0
|
|
136
|
+
2 NaN 3.0 1.0
|
|
137
|
+
|
|
138
|
+
Demonstrating the preference of the passed in dataframe.
|
|
139
|
+
|
|
140
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2])
|
|
141
|
+
>>> df2.combine(df1, take_smaller).execute()
|
|
142
|
+
A B C
|
|
143
|
+
0 0.0 NaN NaN
|
|
144
|
+
1 0.0 3.0 NaN
|
|
145
|
+
2 NaN 3.0 NaN
|
|
146
|
+
|
|
147
|
+
>>> df2.combine(df1, take_smaller, overwrite=False).execute()
|
|
148
|
+
A B C
|
|
149
|
+
0 0.0 NaN NaN
|
|
150
|
+
1 0.0 3.0 1.0
|
|
151
|
+
2 NaN 3.0 1.0
|
|
152
|
+
"""
|
|
153
|
+
# todo merge series logic into whole dataframe to reduce latency
|
|
154
|
+
from ..indexing.align import align
|
|
155
|
+
from .concat import concat
|
|
156
|
+
|
|
157
|
+
src_df_cols = set(df.dtypes.index)
|
|
158
|
+
src_other_cols = set(other.dtypes.index)
|
|
159
|
+
|
|
160
|
+
df, other = align(df, other)
|
|
161
|
+
col_data = []
|
|
162
|
+
for c in df.dtypes.index:
|
|
163
|
+
if c in src_df_cols and c in src_other_cols:
|
|
164
|
+
col_data.append(func(df[c], other[c]))
|
|
165
|
+
elif c in src_other_cols and not overwrite:
|
|
166
|
+
col_data.append(df[c])
|
|
167
|
+
else:
|
|
168
|
+
col_data.append(other[c])
|
|
169
|
+
res = concat(col_data, axis=1)
|
|
170
|
+
if fill_value is not None:
|
|
171
|
+
res = res.fillna(fill_value)
|
|
172
|
+
return res
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def series_combine(series, other, func, fill_value=None):
|
|
176
|
+
"""
|
|
177
|
+
Combine the Series with a Series or scalar according to `func`.
|
|
178
|
+
|
|
179
|
+
Combine the Series and `other` using `func` to perform elementwise
|
|
180
|
+
selection for combined Series.
|
|
181
|
+
`fill_value` is assumed when value is missing at some index
|
|
182
|
+
from one of the two objects being combined.
|
|
183
|
+
|
|
184
|
+
Parameters
|
|
185
|
+
----------
|
|
186
|
+
other : Series or scalar
|
|
187
|
+
The value(s) to be combined with the `Series`.
|
|
188
|
+
func : function
|
|
189
|
+
Function that takes two scalars as inputs and returns an element.
|
|
190
|
+
fill_value : scalar, optional
|
|
191
|
+
The value to assume when an index is missing from
|
|
192
|
+
one Series or the other. The default specifies to use the
|
|
193
|
+
appropriate NaN value for the underlying dtype of the Series.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
Series
|
|
198
|
+
The result of combining the Series with the other object.
|
|
199
|
+
|
|
200
|
+
See Also
|
|
201
|
+
--------
|
|
202
|
+
Series.combine_first : Combine Series values, choosing the calling
|
|
203
|
+
Series' values first.
|
|
204
|
+
|
|
205
|
+
Examples
|
|
206
|
+
--------
|
|
207
|
+
Consider 2 Datasets ``s1`` and ``s2`` containing
|
|
208
|
+
highest clocked speeds of different birds.
|
|
209
|
+
|
|
210
|
+
>>> import maxframe.dataframe as md
|
|
211
|
+
>>> s1 = md.Series({'falcon': 330.0, 'eagle': 160.0})
|
|
212
|
+
>>> s1.execute()
|
|
213
|
+
falcon 330.0
|
|
214
|
+
eagle 160.0
|
|
215
|
+
dtype: float64
|
|
216
|
+
>>> s2 = md.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
|
|
217
|
+
>>> s2.execute()
|
|
218
|
+
falcon 345.0
|
|
219
|
+
eagle 200.0
|
|
220
|
+
duck 30.0
|
|
221
|
+
dtype: float64
|
|
222
|
+
|
|
223
|
+
Now, to combine the two datasets and view the highest speeds
|
|
224
|
+
of the birds across the two datasets
|
|
225
|
+
|
|
226
|
+
>>> s1.combine(s2, max).execute()
|
|
227
|
+
duck NaN
|
|
228
|
+
eagle 200.0
|
|
229
|
+
falcon 345.0
|
|
230
|
+
dtype: float64
|
|
231
|
+
|
|
232
|
+
In the previous example, the resulting value for duck is missing,
|
|
233
|
+
because the maximum of a NaN and a float is a NaN.
|
|
234
|
+
So, in the example, we set ``fill_value=0``,
|
|
235
|
+
so the maximum value returned will be the value from some dataset.
|
|
236
|
+
|
|
237
|
+
>>> s1.combine(s2, max, fill_value=0).execute()
|
|
238
|
+
duck 30.0
|
|
239
|
+
eagle 200.0
|
|
240
|
+
falcon 345.0
|
|
241
|
+
dtype: float64
|
|
242
|
+
"""
|
|
243
|
+
op = DataFrameCombine(func=func, fill_value=fill_value, overwrite=True)
|
|
244
|
+
return op(series, other)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def df_combine_first(df, other):
|
|
17
|
+
"""
|
|
18
|
+
Update null elements with value in the same location in `other`.
|
|
19
|
+
|
|
20
|
+
Combine two DataFrame objects by filling null values in one DataFrame
|
|
21
|
+
with non-null values from other DataFrame. The row and column indexes
|
|
22
|
+
of the resulting DataFrame will be the union of the two. The resulting
|
|
23
|
+
dataframe contains the 'first' dataframe values and overrides the
|
|
24
|
+
second one values where both first.loc[index, col] and
|
|
25
|
+
second.loc[index, col] are not missing values, upon calling
|
|
26
|
+
first.combine_first(second).
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
other : DataFrame
|
|
31
|
+
Provided DataFrame to use to fill null values.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
DataFrame
|
|
36
|
+
The result of combining the provided DataFrame with the other object.
|
|
37
|
+
|
|
38
|
+
See Also
|
|
39
|
+
--------
|
|
40
|
+
DataFrame.combine : Perform series-wise operation on two DataFrames
|
|
41
|
+
using a given function.
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
>>> import maxframe.dataframe as md
|
|
46
|
+
>>> df1 = md.DataFrame({'A': [None, 0], 'B': [None, 4]})
|
|
47
|
+
>>> df2 = md.DataFrame({'A': [1, 1], 'B': [3, 3]})
|
|
48
|
+
>>> df1.combine_first(df2).execute()
|
|
49
|
+
A B
|
|
50
|
+
0 1.0 3.0
|
|
51
|
+
1 0.0 4.0
|
|
52
|
+
|
|
53
|
+
Null values still persist if the location of that null value
|
|
54
|
+
does not exist in `other`
|
|
55
|
+
|
|
56
|
+
>>> df1 = md.DataFrame({'A': [None, 0], 'B': [4, None]})
|
|
57
|
+
>>> df2 = md.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])
|
|
58
|
+
>>> df1.combine_first(df2).execute()
|
|
59
|
+
A B C
|
|
60
|
+
0 NaN 4.0 NaN
|
|
61
|
+
1 0.0 3.0 1.0
|
|
62
|
+
2 NaN 3.0 1.0
|
|
63
|
+
"""
|
|
64
|
+
ret = df.copy()
|
|
65
|
+
ret.update(other, join="outer", overwrite=False)
|
|
66
|
+
return ret
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def series_combine_first(series, other):
|
|
70
|
+
"""
|
|
71
|
+
Update null elements with value in the same location in 'other'.
|
|
72
|
+
|
|
73
|
+
Combine two Series objects by filling null values in one Series with
|
|
74
|
+
non-null values from the other Series. Result index will be the union
|
|
75
|
+
of the two indexes.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
other : Series
|
|
80
|
+
The value(s) to be used for filling null values.
|
|
81
|
+
|
|
82
|
+
Returns
|
|
83
|
+
-------
|
|
84
|
+
Series
|
|
85
|
+
The result of combining the provided Series with the other object.
|
|
86
|
+
|
|
87
|
+
See Also
|
|
88
|
+
--------
|
|
89
|
+
Series.combine : Perform element-wise operation on two Series
|
|
90
|
+
using a given function.
|
|
91
|
+
|
|
92
|
+
Examples
|
|
93
|
+
--------
|
|
94
|
+
>>> import maxframe.tensor as mt
|
|
95
|
+
>>> import maxframe.dataframe as md
|
|
96
|
+
>>> s1 = md.Series([1, mt.nan])
|
|
97
|
+
>>> s2 = md.Series([3, 4, 5])
|
|
98
|
+
>>> s1.combine_first(s2).execute()
|
|
99
|
+
0 1.0
|
|
100
|
+
1 4.0
|
|
101
|
+
2 5.0
|
|
102
|
+
dtype: float64
|
|
103
|
+
|
|
104
|
+
Null values still persist if the location of that null value
|
|
105
|
+
does not exist in `other`
|
|
106
|
+
|
|
107
|
+
>>> s1 = md.Series({'falcon': mt.nan, 'eagle': 160.0})
|
|
108
|
+
>>> s2 = md.Series({'eagle': 200.0, 'duck': 30.0})
|
|
109
|
+
>>> s1.combine_first(s2).execute()
|
|
110
|
+
duck 30.0
|
|
111
|
+
eagle 160.0
|
|
112
|
+
falcon NaN
|
|
113
|
+
dtype: float64
|
|
114
|
+
"""
|
|
115
|
+
ret = series.copy()
|
|
116
|
+
# as Series.update does not have other args, we add them manually
|
|
117
|
+
# to the operator object
|
|
118
|
+
ret.update(other)
|
|
119
|
+
ret.op.join, ret.op.overwrite = "outer", False
|
|
120
|
+
return ret
|