maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -12,17 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import List
|
|
16
|
-
|
|
17
15
|
from ... import opcodes
|
|
18
|
-
from ...core import EntityData
|
|
19
16
|
from ...serialization.serializables import (
|
|
20
17
|
AnyField,
|
|
21
18
|
BoolField,
|
|
22
19
|
DictField,
|
|
23
20
|
Int32Field,
|
|
24
21
|
Int64Field,
|
|
25
|
-
KeyField,
|
|
26
22
|
ListField,
|
|
27
23
|
StringField,
|
|
28
24
|
)
|
|
@@ -33,27 +29,26 @@ from .core import DataFrameDataStore
|
|
|
33
29
|
class DataFrameToCSV(DataFrameDataStore):
|
|
34
30
|
_op_type_ = opcodes.TO_CSV
|
|
35
31
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
storage_options = DictField("storage_options")
|
|
32
|
+
path = AnyField("path", default=None)
|
|
33
|
+
sep = StringField("sep", default=None)
|
|
34
|
+
na_rep = StringField("na_rep", default=None)
|
|
35
|
+
float_format = StringField("float_format", default=None)
|
|
36
|
+
columns = ListField("columns", default=None)
|
|
37
|
+
header = AnyField("header", default=None)
|
|
38
|
+
index = BoolField("index", default=None)
|
|
39
|
+
index_label = AnyField("index_label", default=None)
|
|
40
|
+
mode = StringField("mode", default=None)
|
|
41
|
+
encoding = StringField("encoding", default=None)
|
|
42
|
+
compression = AnyField("compression", default=None)
|
|
43
|
+
quoting = Int32Field("quoting", default=None)
|
|
44
|
+
quotechar = StringField("quotechar", default=None)
|
|
45
|
+
line_terminator = StringField("line_terminator", default=None)
|
|
46
|
+
chunksize = Int64Field("chunksize", default=None)
|
|
47
|
+
date_format = StringField("date_format", default=None)
|
|
48
|
+
doublequote = BoolField("doublequote", default=None)
|
|
49
|
+
escapechar = StringField("escapechar", default=None)
|
|
50
|
+
decimal = StringField("decimal", default=None)
|
|
51
|
+
storage_options = DictField("storage_options", default=None)
|
|
57
52
|
|
|
58
53
|
def __init__(self, output_types=None, **kw):
|
|
59
54
|
super().__init__(_output_types=output_types, **kw)
|
|
@@ -63,19 +58,6 @@ class DataFrameToCSV(DataFrameDataStore):
|
|
|
63
58
|
# if wildcard in path, write csv into multiple files
|
|
64
59
|
return "*" not in self.path
|
|
65
60
|
|
|
66
|
-
@property
|
|
67
|
-
def output_stat(self):
|
|
68
|
-
return self.output_stat
|
|
69
|
-
|
|
70
|
-
@property
|
|
71
|
-
def output_limit(self):
|
|
72
|
-
return 1 if not self.output_stat else 2
|
|
73
|
-
|
|
74
|
-
@classmethod
|
|
75
|
-
def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
|
|
76
|
-
super()._set_inputs(op, inputs)
|
|
77
|
-
op._input = op._inputs[0]
|
|
78
|
-
|
|
79
61
|
def __call__(self, df):
|
|
80
62
|
index_value = parse_index(df.index_value.to_pandas()[:0], df)
|
|
81
63
|
if df.ndim == 2:
|
|
@@ -110,13 +92,14 @@ def to_csv(
|
|
|
110
92
|
compression="infer",
|
|
111
93
|
quoting=None,
|
|
112
94
|
quotechar='"',
|
|
113
|
-
|
|
95
|
+
lineterminator=None,
|
|
114
96
|
chunksize=None,
|
|
115
97
|
date_format=None,
|
|
116
98
|
doublequote=True,
|
|
117
99
|
escapechar=None,
|
|
118
100
|
decimal=".",
|
|
119
101
|
storage_options=None,
|
|
102
|
+
**kw,
|
|
120
103
|
):
|
|
121
104
|
r"""
|
|
122
105
|
Write object to a comma-separated values (csv) file.
|
|
@@ -169,7 +152,7 @@ def to_csv(
|
|
|
169
152
|
will treat them as non-numeric.
|
|
170
153
|
quotechar : str, default '\"'
|
|
171
154
|
String of length 1. Character used to quote fields.
|
|
172
|
-
|
|
155
|
+
lineterminator : str, optional
|
|
173
156
|
The newline character or character sequence to use in the output
|
|
174
157
|
file. Defaults to `os.linesep`, which depends on the OS in which
|
|
175
158
|
this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
|
|
@@ -203,6 +186,11 @@ def to_csv(
|
|
|
203
186
|
... 'weapon': ['sai', 'bo staff']})
|
|
204
187
|
>>> df.to_csv('out.csv', index=False).execute()
|
|
205
188
|
"""
|
|
189
|
+
lineterminator = lineterminator or kw.pop("line_terminator", None)
|
|
190
|
+
if kw:
|
|
191
|
+
raise TypeError(
|
|
192
|
+
f"to_csv() got an unexpected keyword argument '{next(iter(kw))}'"
|
|
193
|
+
)
|
|
206
194
|
|
|
207
195
|
if mode != "w": # pragma: no cover
|
|
208
196
|
raise NotImplementedError("only support to_csv with mode 'w' for now")
|
|
@@ -220,7 +208,7 @@ def to_csv(
|
|
|
220
208
|
compression=compression,
|
|
221
209
|
quoting=quoting,
|
|
222
210
|
quotechar=quotechar,
|
|
223
|
-
line_terminator=
|
|
211
|
+
line_terminator=lineterminator,
|
|
224
212
|
chunksize=chunksize,
|
|
225
213
|
date_format=date_format,
|
|
226
214
|
doublequote=doublequote,
|
|
@@ -56,10 +56,17 @@ class DataFrameToODPSTable(DataFrameDataStore):
|
|
|
56
56
|
index_label = ListField("index_label", FieldTypes.string, default=None)
|
|
57
57
|
lifecycle = Int64Field("lifecycle", default=None)
|
|
58
58
|
table_properties = DictField("table_properties", default=None)
|
|
59
|
+
primary_key = ListField("primary_key", FieldTypes.string, default=None)
|
|
60
|
+
use_generated_table_meta = BoolField("use_generated_table_meta", default=False)
|
|
59
61
|
|
|
60
62
|
def __init__(self, **kw):
|
|
61
63
|
super().__init__(_output_types=[OutputType.dataframe], **kw)
|
|
62
64
|
|
|
65
|
+
def check_inputs(self, inputs: List[TileableType]):
|
|
66
|
+
if self.use_generated_table_meta:
|
|
67
|
+
return None
|
|
68
|
+
return super().check_inputs(inputs)
|
|
69
|
+
|
|
63
70
|
def __call__(self, x):
|
|
64
71
|
shape = (0,) * len(x.shape)
|
|
65
72
|
index_value = parse_index(x.index_value.to_pandas()[:0], x.key, "index")
|
|
@@ -100,11 +107,12 @@ def to_odps_table(
|
|
|
100
107
|
partition: Optional[str] = None,
|
|
101
108
|
partition_col: Union[None, str, List[str]] = None,
|
|
102
109
|
overwrite: bool = False,
|
|
103
|
-
unknown_as_string: Optional[bool] =
|
|
110
|
+
unknown_as_string: Optional[bool] = True,
|
|
104
111
|
index: bool = True,
|
|
105
112
|
index_label: Union[None, str, List[str]] = None,
|
|
106
113
|
lifecycle: Optional[int] = None,
|
|
107
114
|
table_properties: Optional[dict] = None,
|
|
115
|
+
primary_key: Union[None, str, List[str]] = None,
|
|
108
116
|
):
|
|
109
117
|
"""
|
|
110
118
|
Write DataFrame object into a MaxCompute (ODPS) table.
|
|
@@ -145,6 +153,10 @@ def to_odps_table(
|
|
|
145
153
|
Specify lifecycle of the output table.
|
|
146
154
|
table_properties: Optional[dict]
|
|
147
155
|
Specify properties of the output table.
|
|
156
|
+
primary_key: Union[None, str, List[str]]
|
|
157
|
+
If provided and target table does not exist, target table
|
|
158
|
+
will be a delta table with columns specified in this argument
|
|
159
|
+
as primary key.
|
|
148
160
|
|
|
149
161
|
Returns
|
|
150
162
|
-------
|
|
@@ -201,12 +213,14 @@ def to_odps_table(
|
|
|
201
213
|
index_table_intersect = index_cols & table_cols
|
|
202
214
|
if index_table_intersect:
|
|
203
215
|
raise ValueError(
|
|
204
|
-
f"Index column(s) {index_table_intersect} conflict with
|
|
216
|
+
f"Index column(s) {index_table_intersect} conflict with "
|
|
217
|
+
f"column(s) of the input dataframe."
|
|
205
218
|
)
|
|
206
219
|
index_partition_intersect = index_cols & partition_col_set
|
|
207
220
|
if index_partition_intersect:
|
|
208
221
|
raise ValueError(
|
|
209
|
-
f"Index column(s) {index_partition_intersect} conflict
|
|
222
|
+
f"Index column(s) {index_partition_intersect} conflict "
|
|
223
|
+
f"with partition column(s)."
|
|
210
224
|
)
|
|
211
225
|
|
|
212
226
|
if partition_col:
|
|
@@ -217,6 +231,23 @@ def to_odps_table(
|
|
|
217
231
|
" is not the data column(s) of the input dataframe."
|
|
218
232
|
)
|
|
219
233
|
|
|
234
|
+
table_properties = table_properties or {}
|
|
235
|
+
if primary_key is not None:
|
|
236
|
+
table_properties["transactional"] = "true"
|
|
237
|
+
if odps_entry.exist_table(table):
|
|
238
|
+
table_obj = odps_entry.get_table(table)
|
|
239
|
+
if table_obj.is_transactional:
|
|
240
|
+
table_properties = table_properties or {}
|
|
241
|
+
table_properties["transactional"] = "true"
|
|
242
|
+
primary_key = primary_key or table_obj.primary_key or ()
|
|
243
|
+
if set(primary_key) != set(table_obj.primary_key or ()):
|
|
244
|
+
raise ValueError(
|
|
245
|
+
f"Primary keys between existing table {table} and "
|
|
246
|
+
f"provided arguments are not same."
|
|
247
|
+
)
|
|
248
|
+
if primary_key and not isinstance(primary_key, (list, tuple)):
|
|
249
|
+
primary_key = [primary_key]
|
|
250
|
+
|
|
220
251
|
op = DataFrameToODPSTable(
|
|
221
252
|
dtypes=df.dtypes,
|
|
222
253
|
table_name=table,
|
|
@@ -227,6 +258,7 @@ def to_odps_table(
|
|
|
227
258
|
index=index,
|
|
228
259
|
index_label=index_label,
|
|
229
260
|
lifecycle=lifecycle or options.session.table_lifecycle,
|
|
230
|
-
table_properties=table_properties,
|
|
261
|
+
table_properties=table_properties or None,
|
|
262
|
+
primary_key=primary_key or None,
|
|
231
263
|
)
|
|
232
264
|
return op(df)
|
|
@@ -24,20 +24,36 @@ from .apply_chunk import (
|
|
|
24
24
|
df_apply_chunk,
|
|
25
25
|
series_apply_chunk,
|
|
26
26
|
)
|
|
27
|
+
from .cartesian_chunk import cartesian_chunk
|
|
28
|
+
from .collect_kv import collect_kv
|
|
29
|
+
from .extract_kv import extract_kv
|
|
27
30
|
from .flatjson import series_flatjson
|
|
28
31
|
from .flatmap import df_flatmap, series_flatmap
|
|
32
|
+
from .map_reduce import map_reduce
|
|
33
|
+
from .rebalance import DataFrameRebalance, rebalance
|
|
29
34
|
from .reshuffle import DataFrameReshuffle, df_reshuffle
|
|
30
35
|
|
|
31
36
|
|
|
32
37
|
def _install():
|
|
33
38
|
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
34
39
|
|
|
35
|
-
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
36
|
-
DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
|
|
37
40
|
DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
42
|
+
DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
|
|
43
|
+
DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
44
|
+
DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
|
|
45
|
+
DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
|
|
46
|
+
DataFrameMaxFrameAccessor._register("rebalance", rebalance)
|
|
47
|
+
DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
|
|
48
|
+
|
|
40
49
|
SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
|
|
50
|
+
SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
|
|
51
|
+
SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
|
|
52
|
+
SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
|
|
53
|
+
SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
|
|
54
|
+
SeriesMaxFrameAccessor._register("rebalance", rebalance)
|
|
55
|
+
|
|
56
|
+
IndexMaxFrameAccessor._register("rebalance", rebalance)
|
|
41
57
|
|
|
42
58
|
if DataFrameMaxFrameAccessor._api_count:
|
|
43
59
|
for t in DATAFRAME_TYPE:
|
|
@@ -26,9 +26,10 @@ from ...serialization.serializables import (
|
|
|
26
26
|
Int32Field,
|
|
27
27
|
TupleField,
|
|
28
28
|
)
|
|
29
|
+
from ...typing_ import TileableType
|
|
29
30
|
from ...udf import BuiltinFunction, MarkedFunction
|
|
30
31
|
from ...utils import copy_if_possible, make_dtype, make_dtypes
|
|
31
|
-
from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
|
|
32
|
+
from ..core import DATAFRAME_TYPE, INDEX_TYPE, DataFrame, IndexValue, Series
|
|
32
33
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
33
34
|
from ..utils import (
|
|
34
35
|
InferredDataFrameMeta,
|
|
@@ -43,7 +44,7 @@ from ..utils import (
|
|
|
43
44
|
|
|
44
45
|
class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
45
46
|
_op_type_ = opcodes.APPLY_CHUNK
|
|
46
|
-
_legacy_name = "DataFrameApplyChunkOperator"
|
|
47
|
+
_legacy_name = "DataFrameApplyChunkOperator" # since v2.0.0
|
|
47
48
|
|
|
48
49
|
func = FunctionField("func")
|
|
49
50
|
batch_rows = Int32Field("batch_rows", default=None)
|
|
@@ -60,16 +61,26 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
60
61
|
def has_custom_code(self) -> bool:
|
|
61
62
|
return not isinstance(self.func, BuiltinFunction)
|
|
62
63
|
|
|
64
|
+
def check_inputs(self, inputs: List[TileableType]):
|
|
65
|
+
# for apply_chunk we allow called on non-deterministic tileables
|
|
66
|
+
pass
|
|
67
|
+
|
|
63
68
|
def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
|
|
64
69
|
# return dataframe
|
|
65
70
|
if self.output_types[0] == OutputType.dataframe:
|
|
66
71
|
dtypes = make_dtypes(dtypes)
|
|
72
|
+
if dtypes is not None:
|
|
73
|
+
shape = df.shape if element_wise else (np.nan, len(dtypes))
|
|
74
|
+
cols_value = parse_index(dtypes.index, store_data=True)
|
|
75
|
+
else:
|
|
76
|
+
shape = (np.nan, np.nan)
|
|
77
|
+
cols_value = None
|
|
67
78
|
# apply_chunk will use generate new range index for results
|
|
68
79
|
return self.new_dataframe(
|
|
69
80
|
[df],
|
|
70
|
-
shape=
|
|
81
|
+
shape=shape,
|
|
71
82
|
index_value=index_value,
|
|
72
|
-
columns_value=
|
|
83
|
+
columns_value=cols_value,
|
|
73
84
|
dtypes=dtypes,
|
|
74
85
|
)
|
|
75
86
|
|
|
@@ -106,11 +117,17 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
106
117
|
name: Any = None,
|
|
107
118
|
output_type=None,
|
|
108
119
|
index=None,
|
|
120
|
+
skip_infer=False,
|
|
109
121
|
):
|
|
110
122
|
args = self.args or ()
|
|
111
123
|
kwargs = self.kwargs or {}
|
|
112
124
|
# if not dtypes and not skip_infer:
|
|
113
|
-
|
|
125
|
+
try:
|
|
126
|
+
packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
|
|
127
|
+
except:
|
|
128
|
+
if not skip_infer:
|
|
129
|
+
raise
|
|
130
|
+
packed_func = self.func
|
|
114
131
|
|
|
115
132
|
# if skip_infer, directly build a frame
|
|
116
133
|
if self.output_types and self.output_types[0] == OutputType.df_or_series:
|
|
@@ -125,13 +142,15 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
125
142
|
dtype=dtype,
|
|
126
143
|
name=name,
|
|
127
144
|
index=index,
|
|
145
|
+
skip_infer=skip_infer,
|
|
128
146
|
)
|
|
129
147
|
|
|
130
148
|
if inferred_meta.index_value is None:
|
|
131
149
|
inferred_meta.index_value = parse_index(
|
|
132
150
|
None, (df_or_series.key, df_or_series.index_value.key, self.func)
|
|
133
151
|
)
|
|
134
|
-
|
|
152
|
+
if not skip_infer:
|
|
153
|
+
inferred_meta.check_absence("output_type", "dtypes", "dtype")
|
|
135
154
|
|
|
136
155
|
if isinstance(df_or_series, DATAFRAME_TYPE):
|
|
137
156
|
return self._call_dataframe(
|
|
@@ -163,6 +182,7 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
163
182
|
name: Any = None,
|
|
164
183
|
index: Union[pd.Index, IndexValue] = None,
|
|
165
184
|
elementwise: bool = None,
|
|
185
|
+
skip_infer: bool = False,
|
|
166
186
|
**kwargs,
|
|
167
187
|
) -> InferredDataFrameMeta:
|
|
168
188
|
inferred_meta = infer_dataframe_return_value(
|
|
@@ -174,7 +194,10 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
174
194
|
name=name,
|
|
175
195
|
index=index,
|
|
176
196
|
elementwise=elementwise,
|
|
197
|
+
skip_infer=skip_infer,
|
|
177
198
|
)
|
|
199
|
+
if skip_infer:
|
|
200
|
+
return inferred_meta
|
|
178
201
|
|
|
179
202
|
# merge specified and inferred index, dtypes, output_type
|
|
180
203
|
# elementwise used to decide shape
|
|
@@ -186,6 +209,8 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
186
209
|
if self.output_types:
|
|
187
210
|
inferred_meta.output_type = self.output_types[0]
|
|
188
211
|
inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
|
|
212
|
+
if isinstance(index, INDEX_TYPE):
|
|
213
|
+
index = index.index_value
|
|
189
214
|
if index is not None:
|
|
190
215
|
inferred_meta.index_value = (
|
|
191
216
|
parse_index(index)
|
|
@@ -458,6 +483,7 @@ def df_apply_chunk(
|
|
|
458
483
|
name=name,
|
|
459
484
|
index=index,
|
|
460
485
|
output_type=output_type,
|
|
486
|
+
skip_infer=skip_infer,
|
|
461
487
|
)
|
|
462
488
|
|
|
463
489
|
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
from ... import opcodes
|
|
21
|
+
from ...core import EntityData, OutputType
|
|
22
|
+
from ...serialization.serializables import (
|
|
23
|
+
DictField,
|
|
24
|
+
FunctionField,
|
|
25
|
+
KeyField,
|
|
26
|
+
TupleField,
|
|
27
|
+
)
|
|
28
|
+
from ...udf import BuiltinFunction
|
|
29
|
+
from ...utils import quiet_stdio
|
|
30
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
31
|
+
from ..utils import (
|
|
32
|
+
build_df,
|
|
33
|
+
build_empty_df,
|
|
34
|
+
build_series,
|
|
35
|
+
parse_index,
|
|
36
|
+
validate_output_types,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DataFrameCartesianChunk(DataFrameOperator, DataFrameOperatorMixin):
|
|
41
|
+
_op_type_ = opcodes.CARTESIAN_CHUNK
|
|
42
|
+
|
|
43
|
+
left = KeyField("left")
|
|
44
|
+
right = KeyField("right")
|
|
45
|
+
func = FunctionField("func")
|
|
46
|
+
args = TupleField("args")
|
|
47
|
+
kwargs = DictField("kwargs")
|
|
48
|
+
|
|
49
|
+
def __init__(self, output_types=None, **kw):
|
|
50
|
+
super().__init__(_output_types=output_types, **kw)
|
|
51
|
+
if self.memory_scale is None:
|
|
52
|
+
self.memory_scale = 2.0
|
|
53
|
+
|
|
54
|
+
@classmethod
|
|
55
|
+
def _set_inputs(cls, op: "DataFrameCartesianChunk", inputs: List[EntityData]):
|
|
56
|
+
super()._set_inputs(op, inputs)
|
|
57
|
+
op.left, op.right = op.inputs[:2]
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _build_test_obj(obj):
|
|
61
|
+
return (
|
|
62
|
+
build_df(obj, size=2)
|
|
63
|
+
if obj.ndim == 2
|
|
64
|
+
else build_series(obj, size=2, name=obj.name)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def has_custom_code(self) -> bool:
|
|
68
|
+
return not isinstance(self.func, BuiltinFunction)
|
|
69
|
+
|
|
70
|
+
def __call__(self, left, right, index=None, dtypes=None):
|
|
71
|
+
test_left = self._build_test_obj(left)
|
|
72
|
+
test_right = self._build_test_obj(right)
|
|
73
|
+
output_type = self._output_types[0] if self._output_types else None
|
|
74
|
+
|
|
75
|
+
if output_type == OutputType.df_or_series:
|
|
76
|
+
return self.new_df_or_series([left, right])
|
|
77
|
+
|
|
78
|
+
# try run to infer meta
|
|
79
|
+
try:
|
|
80
|
+
with np.errstate(all="ignore"), quiet_stdio():
|
|
81
|
+
obj = self.func(test_left, test_right, *self.args, **self.kwargs)
|
|
82
|
+
except: # noqa: E722 # nosec # pylint: disable=bare-except
|
|
83
|
+
if output_type == OutputType.series:
|
|
84
|
+
obj = pd.Series([], dtype=np.dtype(object))
|
|
85
|
+
elif output_type == OutputType.dataframe and dtypes is not None:
|
|
86
|
+
obj = build_empty_df(dtypes)
|
|
87
|
+
else:
|
|
88
|
+
raise TypeError(
|
|
89
|
+
"Cannot determine `output_type`, "
|
|
90
|
+
"you have to specify it as `dataframe` or `series`, "
|
|
91
|
+
"for dataframe, `dtypes` is required as well "
|
|
92
|
+
"if output_type='dataframe'"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if getattr(obj, "ndim", 0) == 1 or output_type == OutputType.series:
|
|
96
|
+
shape = self.kwargs.pop("shape", (np.nan,))
|
|
97
|
+
if index is None:
|
|
98
|
+
index = obj.index
|
|
99
|
+
index_value = parse_index(
|
|
100
|
+
index, left, right, self.func, self.args, self.kwargs
|
|
101
|
+
)
|
|
102
|
+
return self.new_series(
|
|
103
|
+
[left, right],
|
|
104
|
+
dtype=obj.dtype,
|
|
105
|
+
shape=shape,
|
|
106
|
+
index_value=index_value,
|
|
107
|
+
name=obj.name,
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
dtypes = dtypes if dtypes is not None else obj.dtypes
|
|
111
|
+
# dataframe
|
|
112
|
+
shape = (np.nan, len(dtypes))
|
|
113
|
+
columns_value = parse_index(dtypes.index, store_data=True)
|
|
114
|
+
if index is None:
|
|
115
|
+
index = obj.index
|
|
116
|
+
index_value = parse_index(
|
|
117
|
+
index, left, right, self.func, self.args, self.kwargs
|
|
118
|
+
)
|
|
119
|
+
return self.new_dataframe(
|
|
120
|
+
[left, right],
|
|
121
|
+
shape=shape,
|
|
122
|
+
dtypes=dtypes,
|
|
123
|
+
index_value=index_value,
|
|
124
|
+
columns_value=columns_value,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
|
|
129
|
+
output_type = kwargs.pop("output_type", None)
|
|
130
|
+
output_types = kwargs.pop("output_types", None)
|
|
131
|
+
object_type = kwargs.pop("object_type", None)
|
|
132
|
+
output_types = validate_output_types(
|
|
133
|
+
output_type=output_type, output_types=output_types, object_type=object_type
|
|
134
|
+
)
|
|
135
|
+
output_type = output_types[0] if output_types else None
|
|
136
|
+
if output_type:
|
|
137
|
+
output_types = [output_type]
|
|
138
|
+
elif skip_infer:
|
|
139
|
+
output_types = [OutputType.df_or_series]
|
|
140
|
+
index = kwargs.pop("index", None)
|
|
141
|
+
dtypes = kwargs.pop("dtypes", None)
|
|
142
|
+
memory_scale = kwargs.pop("memory_scale", None)
|
|
143
|
+
|
|
144
|
+
op = DataFrameCartesianChunk(
|
|
145
|
+
left=left,
|
|
146
|
+
right=right,
|
|
147
|
+
func=func,
|
|
148
|
+
args=args,
|
|
149
|
+
kwargs=kwargs,
|
|
150
|
+
output_types=output_types,
|
|
151
|
+
memory_scale=memory_scale,
|
|
152
|
+
)
|
|
153
|
+
return op(left, right, index=index, dtypes=dtypes)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from ... import opcodes
|
|
19
|
+
from ...serialization.serializables import AnyField, StringField
|
|
20
|
+
from ...utils import no_default
|
|
21
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
22
|
+
from ..utils import make_column_list
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DataFrameCollectKv(DataFrameOperator, DataFrameOperatorMixin):
|
|
26
|
+
_op_type_ = opcodes.COLLECT_KV
|
|
27
|
+
|
|
28
|
+
columns = AnyField("columns", default=None)
|
|
29
|
+
kv_delim = StringField("kv_delim", default=None)
|
|
30
|
+
item_delim = StringField("item_delim", default=None)
|
|
31
|
+
kv_col = StringField("kv_col", default=None)
|
|
32
|
+
|
|
33
|
+
def __call__(self, df):
|
|
34
|
+
if self.columns is None:
|
|
35
|
+
cols = list(df.dtypes.index)
|
|
36
|
+
else:
|
|
37
|
+
cols = self.columns if isinstance(self.columns, list) else [self.columns]
|
|
38
|
+
new_dtypes = df.dtypes.drop(cols, errors="ignore")
|
|
39
|
+
new_dtypes = pd.concat(
|
|
40
|
+
[new_dtypes, pd.Series([np.dtype("object")], index=[self.kv_col])]
|
|
41
|
+
)
|
|
42
|
+
shape = (df.shape[0], len(new_dtypes))
|
|
43
|
+
return self.new_dataframe(
|
|
44
|
+
[df],
|
|
45
|
+
shape=shape,
|
|
46
|
+
dtypes=new_dtypes,
|
|
47
|
+
index_value=df.index_value,
|
|
48
|
+
columns_value=new_dtypes.index,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def collect_kv(
|
|
53
|
+
data,
|
|
54
|
+
columns=None,
|
|
55
|
+
kv_delim="=",
|
|
56
|
+
item_delim=",",
|
|
57
|
+
kv_col="kv_col",
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Merge values in specified columns into a key-value represented column.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
columns : list, default None
|
|
65
|
+
The columns to be merged.
|
|
66
|
+
kv_delim : str, default '='
|
|
67
|
+
Delimiter between key and value.
|
|
68
|
+
item_delim : str, default ','
|
|
69
|
+
Delimiter between key-value pairs.
|
|
70
|
+
kv_col : str, default 'kv_col'
|
|
71
|
+
Name of the new key-value column
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
DataFrame
|
|
76
|
+
converted data frame
|
|
77
|
+
|
|
78
|
+
See Also
|
|
79
|
+
--------
|
|
80
|
+
DataFrame.mf.extract_kv
|
|
81
|
+
|
|
82
|
+
Examples
|
|
83
|
+
-------
|
|
84
|
+
>>> import maxframe.dataframe as md
|
|
85
|
+
|
|
86
|
+
>>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
|
|
87
|
+
... "k1": [1.0, NaN, 7.1, NaN, NaN],
|
|
88
|
+
... "k2": [3.0, 3.0, NaN, 1.2, 1.0],
|
|
89
|
+
... "k3": [NaN, 5.1, NaN, 1.5, NaN],
|
|
90
|
+
... "k5": [10.0, NaN, NaN, NaN, NaN,],
|
|
91
|
+
... "k7": [NaN, NaN, 8.2, NaN, NaN, ],
|
|
92
|
+
... "k9": [NaN, NaN, NaN, NaN, 1.1]})
|
|
93
|
+
>>> df.execute()
|
|
94
|
+
name k1 k2 k3 k5 k7 k9
|
|
95
|
+
0 name1 1.0 3.0 NaN 10.0 NaN NaN
|
|
96
|
+
1 name2 NaN 3.0 5.1 NaN NaN NaN
|
|
97
|
+
2 name3 7.1 NaN NaN NaN 8.2 NaN
|
|
98
|
+
3 name4 NaN 1.2 1.5 NaN NaN NaN
|
|
99
|
+
4 name5 NaN 1.0 NaN NaN NaN 1.1
|
|
100
|
+
|
|
101
|
+
The field names to be merged are specified by columns
|
|
102
|
+
kv_delim is to delimit the key and value and '=' is default
|
|
103
|
+
item_delim is to delimit the Key-Value pairs, ',' is default
|
|
104
|
+
The new column name is specified by kv_col, 'kv_col' is default
|
|
105
|
+
|
|
106
|
+
>>> df.mf.collect_kv(columns=['k1', 'k2', 'k3', 'k5', 'k7', 'k9']).execute()
|
|
107
|
+
name kv_col
|
|
108
|
+
0 name1 k1=1.0,k2=3.0,k5=10.0
|
|
109
|
+
1 name2 k2=3.0,k3=5.1
|
|
110
|
+
2 name3 k1=7.1,k7=8.2
|
|
111
|
+
3 name4 k2=1.2,k3=1.5
|
|
112
|
+
4 name5 k2=1.0,k9=1.1
|
|
113
|
+
"""
|
|
114
|
+
columns_list = make_column_list(columns, data.dtypes) or []
|
|
115
|
+
non_exist_key = next(
|
|
116
|
+
(c for c in columns_list if c not in data.dtypes.index), no_default
|
|
117
|
+
)
|
|
118
|
+
if columns_list and non_exist_key is not no_default:
|
|
119
|
+
raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
|
|
120
|
+
op = DataFrameCollectKv(
|
|
121
|
+
columns=columns,
|
|
122
|
+
kv_delim=kv_delim,
|
|
123
|
+
item_delim=item_delim,
|
|
124
|
+
kv_col=kv_col,
|
|
125
|
+
)
|
|
126
|
+
return op(data)
|