maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
15
16
|
import time
|
|
16
17
|
from io import BytesIO
|
|
17
18
|
|
|
@@ -21,7 +22,7 @@ import pytest
|
|
|
21
22
|
from .. import oss
|
|
22
23
|
from .._oss_lib import glob as og
|
|
23
24
|
from .._oss_lib.common import OSSFileEntry
|
|
24
|
-
from ..oss import build_oss_path
|
|
25
|
+
from ..oss import HostEnforceType, _rewrite_internal_endpoint, build_oss_path
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class OSSObjInfo:
|
|
@@ -54,19 +55,25 @@ class MockObject:
|
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
class SideEffectBucket:
|
|
58
|
+
cached_ctx = {}
|
|
59
|
+
|
|
57
60
|
def __init__(self, *_, **__):
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
61
|
+
cur_test = os.environ["PYTEST_CURRENT_TEST"]
|
|
62
|
+
if cur_test in self.cached_ctx:
|
|
63
|
+
self.obj_dict = self.cached_ctx[cur_test]
|
|
64
|
+
else:
|
|
65
|
+
self.obj_dict = self.cached_ctx[cur_test] = {
|
|
66
|
+
"file.csv": "id1,id2,id3\n1,2,3\n",
|
|
67
|
+
"dir/": "",
|
|
68
|
+
"dir/file1.csv": "2",
|
|
69
|
+
"dir/file2.csv": "3",
|
|
70
|
+
"dir/subdir/": "",
|
|
71
|
+
"dir/subdir/file3.csv": "s4",
|
|
72
|
+
"dir/subdir/file4.csv": "s5",
|
|
73
|
+
"dir2/": "",
|
|
74
|
+
"dir2/file6.csv": "6",
|
|
75
|
+
"dir2/file7.csv": "7",
|
|
76
|
+
}
|
|
70
77
|
|
|
71
78
|
def get_object_meta(self, key):
|
|
72
79
|
return ObjectMeta(key, self.obj_dict)
|
|
@@ -77,6 +84,17 @@ class SideEffectBucket:
|
|
|
77
84
|
def get_object(self, key, byte_range):
|
|
78
85
|
return MockObject(self.obj_dict, key, byte_range)
|
|
79
86
|
|
|
87
|
+
def copy_object(self, bucket, src_key, dst_key):
|
|
88
|
+
self.obj_dict[dst_key] = self.obj_dict[src_key]
|
|
89
|
+
|
|
90
|
+
def delete_object(self, key):
|
|
91
|
+
from oss2.exceptions import NoSuchKey
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
del self.obj_dict[key]
|
|
95
|
+
except KeyError:
|
|
96
|
+
raise NoSuchKey(404, {}, key, {})
|
|
97
|
+
|
|
80
98
|
|
|
81
99
|
class SideEffectObjIter:
|
|
82
100
|
def __init__(self, *args, **kwargs):
|
|
@@ -96,28 +114,29 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
96
114
|
access_key_secret = "your_access_key_secret"
|
|
97
115
|
end_point = "your_endpoint"
|
|
98
116
|
|
|
99
|
-
file_path = f"oss://bucket/file.csv"
|
|
100
|
-
|
|
101
|
-
|
|
117
|
+
file_path = f"oss://your_endpoint/bucket/file.csv"
|
|
118
|
+
new_file_path = f"oss://your_endpoint/bucket/file1.csv"
|
|
119
|
+
dir_path = f"oss://your_endpoint/bucket/dir/"
|
|
120
|
+
dir_path_content_magic = f"oss://your_endpoint/bucket/dir*/"
|
|
102
121
|
other_scheme_path = f"scheme://netloc/path"
|
|
103
|
-
not_exist_file_path = f"oss://bucket/not_exist.csv"
|
|
122
|
+
not_exist_file_path = f"oss://your_endpoint/bucket/not_exist.csv"
|
|
104
123
|
|
|
105
124
|
fake_file_path = build_oss_path(
|
|
106
|
-
file_path, access_key_id, access_key_secret
|
|
125
|
+
file_path, end_point, access_key_id, access_key_secret
|
|
126
|
+
)
|
|
127
|
+
fake_new_file_path = build_oss_path(
|
|
128
|
+
new_file_path, end_point, access_key_id, access_key_secret
|
|
107
129
|
)
|
|
108
130
|
fake_dir_path = build_oss_path(
|
|
109
|
-
dir_path, access_key_id, access_key_secret
|
|
131
|
+
dir_path, end_point, access_key_id, access_key_secret
|
|
110
132
|
)
|
|
111
133
|
fake_dir_path_contains_magic = build_oss_path(
|
|
112
|
-
dir_path_content_magic, access_key_id, access_key_secret
|
|
113
|
-
)
|
|
114
|
-
fake_other_scheme_path = build_oss_path(
|
|
115
|
-
other_scheme_path, access_key_id, access_key_secret, end_point
|
|
134
|
+
dir_path_content_magic, end_point, access_key_id, access_key_secret
|
|
116
135
|
)
|
|
117
136
|
fake_not_exist_file_path = build_oss_path(
|
|
118
|
-
not_exist_file_path, access_key_id, access_key_secret
|
|
137
|
+
not_exist_file_path, end_point, access_key_id, access_key_secret
|
|
119
138
|
)
|
|
120
|
-
fs = oss.OSSFileSystem
|
|
139
|
+
fs = oss.OSSFileSystem()
|
|
121
140
|
|
|
122
141
|
# Test OSSFileSystem.
|
|
123
142
|
assert len(fs.ls(fake_dir_path)) == 4
|
|
@@ -131,21 +150,15 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
131
150
|
assert fs.stat(fake_dir_path)["type"] == "directory"
|
|
132
151
|
assert fs.glob(fake_dir_path) == [fake_dir_path]
|
|
133
152
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
assert (
|
|
138
|
-
msg1 == f"Except scheme oss, but got scheme: "
|
|
139
|
-
f"scheme in path: {fake_other_scheme_path}"
|
|
140
|
-
)
|
|
153
|
+
msg1 = f"Except scheme oss, but got scheme: scheme in path: {other_scheme_path}"
|
|
154
|
+
with pytest.raises(ValueError, match=msg1):
|
|
155
|
+
fs.exists(other_scheme_path)
|
|
141
156
|
|
|
142
|
-
with pytest.raises(
|
|
157
|
+
with pytest.raises(ValueError, match="No credentials provided"):
|
|
143
158
|
fs.exists(file_path)
|
|
144
|
-
msg2 = e.value.args[0]
|
|
145
|
-
assert msg2 == "Please use build_oss_path to add OSS info"
|
|
146
159
|
|
|
147
160
|
with pytest.raises(OSError):
|
|
148
|
-
|
|
161
|
+
fs.ls(fake_file_path)
|
|
149
162
|
|
|
150
163
|
assert len(fs.glob(fake_file_path)) == 1
|
|
151
164
|
assert len(fs.glob(fake_dir_path + "*", recursive=True)) == 4
|
|
@@ -180,3 +193,28 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
180
193
|
|
|
181
194
|
fe = OSSFileEntry(fake_file_path)
|
|
182
195
|
assert fe.path == fake_file_path
|
|
196
|
+
|
|
197
|
+
fs.rename(fake_file_path, fake_new_file_path)
|
|
198
|
+
assert not fs.exists(fake_file_path)
|
|
199
|
+
assert fs.exists(fake_new_file_path)
|
|
200
|
+
|
|
201
|
+
with pytest.raises(FileNotFoundError):
|
|
202
|
+
fs.delete(fake_not_exist_file_path)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def test_host_rewrite():
|
|
206
|
+
assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
|
|
207
|
+
"cn-shanghai.oss.service.com", HostEnforceType.force_external
|
|
208
|
+
)
|
|
209
|
+
assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
|
|
210
|
+
"cn-shanghai-internal.oss.service.com", HostEnforceType.force_external
|
|
211
|
+
)
|
|
212
|
+
assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
|
|
213
|
+
"cn-shanghai.oss.service.com", HostEnforceType.force_internal
|
|
214
|
+
)
|
|
215
|
+
assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
|
|
216
|
+
"cn-shanghai-internal.oss.service.com", HostEnforceType.force_internal
|
|
217
|
+
)
|
|
218
|
+
assert "1.2.3.4" == _rewrite_internal_endpoint(
|
|
219
|
+
"1.2.3.4", HostEnforceType.force_internal
|
|
220
|
+
)
|
maxframe/lib/mmh3.cp37-win32.pyd
CHANGED
|
Binary file
|
maxframe/lib/wrapped_pickle.py
CHANGED
|
@@ -75,6 +75,16 @@ class Unpickler(pickle_mod.Unpickler):
|
|
|
75
75
|
raise ValueError("Unpickle is forbidden here")
|
|
76
76
|
return super().load()
|
|
77
77
|
|
|
78
|
+
def find_class(self, module, name):
|
|
79
|
+
try:
|
|
80
|
+
return super().find_class(module, name)
|
|
81
|
+
except ImportError:
|
|
82
|
+
# workaround for pickle incompatibility since numpy>=2.0
|
|
83
|
+
if not module.startswith("numpy._core"):
|
|
84
|
+
raise
|
|
85
|
+
module = module.replace("numpy._core", "numpy.core")
|
|
86
|
+
return super().find_class(module, name)
|
|
87
|
+
|
|
78
88
|
|
|
79
89
|
@functools.wraps(pickle_mod.load)
|
|
80
90
|
def load(file, **kwargs):
|
maxframe/opcodes.py
CHANGED
|
@@ -271,6 +271,9 @@ SEM = 352
|
|
|
271
271
|
STR_CONCAT = 353
|
|
272
272
|
MAD = 354
|
|
273
273
|
MEDIAN = 355
|
|
274
|
+
IDXMAX = 357
|
|
275
|
+
IDXMIN = 358
|
|
276
|
+
MODE = 359
|
|
274
277
|
|
|
275
278
|
# tensor operator
|
|
276
279
|
RESHAPE = 401
|
|
@@ -389,6 +392,15 @@ ALIGN = 741
|
|
|
389
392
|
CASE_WHEN = 742
|
|
390
393
|
PIVOT = 743
|
|
391
394
|
PIVOT_TABLE = 744
|
|
395
|
+
TO_NUMERIC = 745
|
|
396
|
+
DATAFRAME_FILTER = 746
|
|
397
|
+
REORDER_LEVELS = 747
|
|
398
|
+
DATAFRAME_COMPARE = 748
|
|
399
|
+
DROPLEVEL = 749
|
|
400
|
+
DATAFRAME_UPDATE = 750
|
|
401
|
+
DATAFRAME_COMBINE = 751
|
|
402
|
+
DATAFRAME_INFER_DTYPES = 752
|
|
403
|
+
BETWEEN_TIME = 753
|
|
392
404
|
|
|
393
405
|
FUSE = 801
|
|
394
406
|
|
|
@@ -400,6 +412,9 @@ MANAGED_MULTI_MODAL_GENERATION = 813
|
|
|
400
412
|
LLM_TEXT_SUMMARIZE_TASK = 814
|
|
401
413
|
LLM_TEXT_TRANSLATE_TASK = 815
|
|
402
414
|
LLM_TEXT_CLASSIFY_TASK = 816
|
|
415
|
+
LLM_TEXT_EXTRACT_TASK = 817
|
|
416
|
+
LLM_TEXT_EMBEDDING_TASK = 818
|
|
417
|
+
OPENAI_COMPATIBLE_TEXT_GENERATION = 819
|
|
403
418
|
|
|
404
419
|
# table like input for tensor
|
|
405
420
|
TABLE_COO = 1003
|
|
@@ -417,6 +432,7 @@ DATAFRAME_ILOC_GETITEM = 2021
|
|
|
417
432
|
DATAFRAME_ILOC_SETITEM = 2022
|
|
418
433
|
DATAFRAME_LOC_GETITEM = 2023
|
|
419
434
|
DATAFRAME_LOC_SETITEM = 2024
|
|
435
|
+
GET_LEVEL_VALUES = 2025
|
|
420
436
|
|
|
421
437
|
# merge
|
|
422
438
|
DATAFRAME_MERGE = 2010
|
|
@@ -426,7 +442,7 @@ DATAFRAME_SHUFFLE_MERGE_ALIGN = 2011
|
|
|
426
442
|
DATAFRAME_BLOOM_FILTER = 2014
|
|
427
443
|
|
|
428
444
|
# append
|
|
429
|
-
APPEND = 2015
|
|
445
|
+
APPEND = 2015 # deprecated since v2.2
|
|
430
446
|
|
|
431
447
|
# reset index
|
|
432
448
|
RESET_INDEX = 2028
|
|
@@ -439,15 +455,14 @@ GROUPBY_AGG = 2033
|
|
|
439
455
|
GROUPBY_CONCAT = 2034
|
|
440
456
|
GROUPBY_HEAD = 2035
|
|
441
457
|
GROUPBY_SAMPLE_ILOC = 2036
|
|
442
|
-
GROUPBY_SORT_REGULAR_SAMPLE = 2037
|
|
443
|
-
GROUPBY_SORT_PIVOT = 2038
|
|
444
458
|
GROUPBY_SORT_SHUFFLE = 2039
|
|
445
459
|
|
|
446
460
|
# parallel sorting by regular sampling
|
|
447
|
-
|
|
461
|
+
PSRS_SORT_REGULAR_SAMPLE = 2040
|
|
448
462
|
PSRS_CONCAT_PIVOT = 2041
|
|
449
463
|
PSRS_SHUFFLE = 2042
|
|
450
464
|
PSRS_ALIGN = 2043
|
|
465
|
+
PSRS_RANK_SHUFFLE = 2044
|
|
451
466
|
# partition
|
|
452
467
|
CALC_PARTITIONS_INFO = 2046
|
|
453
468
|
PARTITION_MERGED = 2047
|
|
@@ -455,6 +470,7 @@ PARTITION_MERGED = 2047
|
|
|
455
470
|
# dataframe sort
|
|
456
471
|
SORT_VALUES = 2050
|
|
457
472
|
SORT_INDEX = 2051
|
|
473
|
+
RANK = 2052
|
|
458
474
|
|
|
459
475
|
# window
|
|
460
476
|
ROLLING_AGG = 2060
|
|
@@ -475,6 +491,8 @@ TO_ODPS_TABLE = 20112
|
|
|
475
491
|
READ_ODPS_VOLUME = 20113
|
|
476
492
|
TO_ODPS_VOLUME = 20114
|
|
477
493
|
READ_ODPS_QUERY = 20115
|
|
494
|
+
READ_ODPS_MODEL = 20116 # reserved
|
|
495
|
+
TO_ODPS_MODEL = 20117
|
|
478
496
|
|
|
479
497
|
TO_CSV_STAT = 2102
|
|
480
498
|
|
|
@@ -506,9 +524,10 @@ FAISS_TRAIN_SAMPLED_INDEX = 2235
|
|
|
506
524
|
FAISS_QUERY = 2236
|
|
507
525
|
PROXIMA_SIMPLE_BUILDER = 2238
|
|
508
526
|
PROXIMA_SIMPLE_SEARCHER = 2239
|
|
509
|
-
|
|
527
|
+
K_NEIGHBORS_GRAPH = 2237
|
|
510
528
|
|
|
511
529
|
# cluster
|
|
530
|
+
# k-means related
|
|
512
531
|
KMEANS_PLUS_PLUS_INIT = 2250
|
|
513
532
|
KMEANS_SCALABLE_PLUS_PLUS_INIT = 2251
|
|
514
533
|
KMEANS_ELKAN_INIT_BOUNDS = 2252
|
|
@@ -517,7 +536,12 @@ KMEANS_ELKAN_POSTPROCESS = 2254
|
|
|
517
536
|
KMEANS_LLOYD_UPDATE = 2255
|
|
518
537
|
KMEANS_LLOYD_POSTPROCESS = 2256
|
|
519
538
|
KMEANS_INERTIA = 2257
|
|
520
|
-
|
|
539
|
+
KMEANS_RELOCATE_EMPTY_CLUSTERS = 2258
|
|
540
|
+
KMEANS_FIT = 2259
|
|
541
|
+
KMEANS_PREDICT = 2260
|
|
542
|
+
|
|
543
|
+
# linear models
|
|
544
|
+
LOGISTIC_REGRESSION_FIT = 2270
|
|
521
545
|
|
|
522
546
|
# XGBoost
|
|
523
547
|
XGBOOST_TRAIN = 3001
|
|
@@ -580,6 +604,10 @@ RUN_SCRIPT = 5002
|
|
|
580
604
|
# learn metrics
|
|
581
605
|
MULTILABEL_CONFUSION_MATRIX = 5201
|
|
582
606
|
PRECISION_RECALL_F_SCORE_SUPPORT = 5202
|
|
607
|
+
AUC = 5203
|
|
608
|
+
ROC_CURVE = 5204
|
|
609
|
+
ROC_AUC_SCORE = 5205
|
|
610
|
+
LOG_LOSS = 5206
|
|
583
611
|
|
|
584
612
|
CHOLESKY_FUSE = 999988
|
|
585
613
|
|
|
@@ -588,23 +616,21 @@ DATAFRAME_RESHUFFLE = 10001
|
|
|
588
616
|
FLATMAP = 10002
|
|
589
617
|
FLATJSON = 10003
|
|
590
618
|
APPLY_CHUNK = 10004
|
|
619
|
+
EXTRACT_KV = 10013
|
|
620
|
+
COLLECT_KV = 10014
|
|
621
|
+
|
|
622
|
+
SERIES_DICT_METHOD = 10011
|
|
623
|
+
SERIES_LIST_METHOD = 10012
|
|
624
|
+
SERIES_STRUCT_METHOD = 10015
|
|
591
625
|
|
|
626
|
+
# placeholders for compatibility, DO NOT REMOVE
|
|
592
627
|
SERIES_DICT_GETITEM = 10005
|
|
593
628
|
SERIES_DICT_SETITEM = 10006
|
|
594
629
|
SERIES_DICT_LENGTH = 10007
|
|
595
630
|
SERIES_DICT_REMOVE = 10008
|
|
596
631
|
SERIES_DICT_CONTAINS = 10009
|
|
597
|
-
SERIES_DICT_FLATTEN = 10010
|
|
598
|
-
|
|
599
632
|
SERIES_LIST_GETITEM = 10020
|
|
600
|
-
SERIES_LIST_SETITEM = 10021
|
|
601
|
-
SERIES_LIST_CONTAINS = 10022
|
|
602
633
|
SERIES_LIST_LENGTH = 10023
|
|
603
|
-
SERIES_LIST_INSERT = 10024
|
|
604
|
-
SERIES_LIST_EXTEND = 10025
|
|
605
|
-
SERIES_LIST_POP = 10026
|
|
606
|
-
SERIES_LIST_SORT = 10027
|
|
607
|
-
SERIES_LIST_FLATTEN = 10028
|
|
608
634
|
|
|
609
635
|
# MaxFrame internal operators
|
|
610
636
|
DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
|
maxframe/protocol.py
CHANGED
|
@@ -39,6 +39,7 @@ from .serialization.serializables import (
|
|
|
39
39
|
SeriesField,
|
|
40
40
|
StringField,
|
|
41
41
|
)
|
|
42
|
+
from .utils import combine_error_message_and_traceback
|
|
42
43
|
|
|
43
44
|
pickling_support.install()
|
|
44
45
|
|
|
@@ -244,6 +245,9 @@ class ErrorInfo(JsonSerializable):
|
|
|
244
245
|
"raw_error_source", ErrorSource, FieldTypes.int8, default=None
|
|
245
246
|
)
|
|
246
247
|
raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
|
|
248
|
+
displayed_error_message: Optional[str] = StringField(
|
|
249
|
+
"displayed_error_message", default=None
|
|
250
|
+
)
|
|
247
251
|
|
|
248
252
|
@classmethod
|
|
249
253
|
def from_exception(cls, exc: Exception):
|
|
@@ -282,6 +286,7 @@ class ErrorInfo(JsonSerializable):
|
|
|
282
286
|
"error_messages": self.error_messages,
|
|
283
287
|
"error_tracebacks": self.error_tracebacks,
|
|
284
288
|
"raw_error_source": self.raw_error_source.value,
|
|
289
|
+
"displayed_error_message": self.displayed_error_message,
|
|
285
290
|
}
|
|
286
291
|
err_data_bufs = None
|
|
287
292
|
if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
|
|
@@ -299,6 +304,13 @@ class ErrorInfo(JsonSerializable):
|
|
|
299
304
|
]
|
|
300
305
|
return ret
|
|
301
306
|
|
|
307
|
+
def get_displayed_error_message(self) -> str:
|
|
308
|
+
if self.displayed_error_message is not None:
|
|
309
|
+
return self.displayed_error_message
|
|
310
|
+
return combine_error_message_and_traceback(
|
|
311
|
+
self.error_messages, self.error_tracebacks
|
|
312
|
+
)
|
|
313
|
+
|
|
302
314
|
|
|
303
315
|
class DagInfo(JsonSerializable):
|
|
304
316
|
session_id: str = StringField("session_id", default=None)
|
maxframe/remote/core.py
CHANGED
|
@@ -27,6 +27,7 @@ from ..serialization.serializables import (
|
|
|
27
27
|
ListField,
|
|
28
28
|
)
|
|
29
29
|
from ..tensor.core import TENSOR_TYPE
|
|
30
|
+
from ..typing_ import TileableType
|
|
30
31
|
from ..udf import BuiltinFunction
|
|
31
32
|
from ..utils import find_objects, replace_objects
|
|
32
33
|
|
|
@@ -59,6 +60,9 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
|
|
|
59
60
|
def has_custom_code(self) -> bool:
|
|
60
61
|
return not isinstance(self.function, BuiltinFunction)
|
|
61
62
|
|
|
63
|
+
def check_inputs(self, inputs: List[TileableType]):
|
|
64
|
+
return
|
|
65
|
+
|
|
62
66
|
@classmethod
|
|
63
67
|
def _set_inputs(cls, op: "RemoteFunction", inputs: List[EntityData]):
|
|
64
68
|
raw_inputs = getattr(op, "_inputs", None)
|
|
@@ -12,7 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from . import
|
|
15
|
+
from . import (
|
|
16
|
+
arrow,
|
|
17
|
+
blob,
|
|
18
|
+
exception,
|
|
19
|
+
maxframe_objects,
|
|
20
|
+
numpy,
|
|
21
|
+
pandas,
|
|
22
|
+
scipy,
|
|
23
|
+
serializables,
|
|
24
|
+
)
|
|
16
25
|
from .core import (
|
|
17
26
|
PickleContainer,
|
|
18
27
|
PickleHookOptions,
|
|
@@ -27,4 +36,4 @@ from .core import (
|
|
|
27
36
|
)
|
|
28
37
|
from .exception import RemoteException
|
|
29
38
|
|
|
30
|
-
del arrow,
|
|
39
|
+
del arrow, blob, exception, maxframe_objects, numpy, pandas, scipy
|
maxframe/serialization/arrow.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Dict, List, Union
|
|
16
16
|
|
|
17
|
-
from ..utils import arrow_type_from_str
|
|
17
|
+
from ..utils import arrow_type_from_str, extract_class_name
|
|
18
18
|
from .core import Serializer, buffered
|
|
19
19
|
|
|
20
20
|
try:
|
|
@@ -30,30 +30,54 @@ except ImportError: # pragma: no cover
|
|
|
30
30
|
|
|
31
31
|
_TYPE_CHAR_ARROW_ARRAY = "A"
|
|
32
32
|
_TYPE_CHAR_ARROW_CHUNKED_ARRAY = "C"
|
|
33
|
+
_TYPE_CHAR_ARROW_REDUCED = "R"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ArrowDataTypeSerializer(Serializer):
|
|
37
|
+
def serial(self, obj: pa.DataType, context):
|
|
38
|
+
return [str(obj)], [], True
|
|
39
|
+
|
|
40
|
+
def deserial(self, serialized, context, subs):
|
|
41
|
+
return arrow_type_from_str(serialized[0])
|
|
33
42
|
|
|
34
43
|
|
|
35
44
|
class ArrowArraySerializer(Serializer):
|
|
36
45
|
@buffered
|
|
37
46
|
def serial(self, obj: PA_ARRAY_TYPES, context: Dict):
|
|
38
|
-
|
|
39
|
-
if isinstance(obj, pa.Array):
|
|
40
|
-
array_type = _TYPE_CHAR_ARROW_ARRAY
|
|
41
|
-
buffers = obj.buffers()
|
|
42
|
-
sizes = len(obj)
|
|
43
|
-
elif isinstance(obj, pa.ChunkedArray):
|
|
44
|
-
array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
|
|
45
|
-
buffers = [c.buffers() for c in obj.chunks]
|
|
46
|
-
sizes = [len(c) for c in obj.chunks]
|
|
47
|
-
else: # pragma: no cover
|
|
47
|
+
if not isinstance(obj, (pa.Array, pa.ChunkedArray)):
|
|
48
48
|
raise NotImplementedError(f"Array type {type(obj)} not supported")
|
|
49
|
-
|
|
49
|
+
|
|
50
|
+
if obj.type.num_fields == 0:
|
|
51
|
+
# use legacy serialization in case arrow changes deserializer method
|
|
52
|
+
data_type = str(obj.type)
|
|
53
|
+
if isinstance(obj, pa.Array):
|
|
54
|
+
array_type = _TYPE_CHAR_ARROW_ARRAY
|
|
55
|
+
buffers = obj.buffers()
|
|
56
|
+
sizes = len(obj)
|
|
57
|
+
else: # ChunkedArray
|
|
58
|
+
array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
|
|
59
|
+
buffers = [c.buffers() for c in obj.chunks]
|
|
60
|
+
sizes = [len(c) for c in obj.chunks]
|
|
61
|
+
return [array_type, data_type, sizes], buffers, False
|
|
62
|
+
|
|
63
|
+
meth, extracted = obj.__reduce__()
|
|
64
|
+
meth_name = extract_class_name(meth)
|
|
65
|
+
return [_TYPE_CHAR_ARROW_REDUCED, meth_name, None], list(extracted), False
|
|
50
66
|
|
|
51
67
|
def deserial(self, serialized: List, context: Dict, subs: List):
|
|
52
68
|
array_type, data_type_str, sizes = serialized[:3]
|
|
53
|
-
|
|
69
|
+
if array_type == _TYPE_CHAR_ARROW_REDUCED:
|
|
70
|
+
if data_type_str == "pyarrow.lib#chunked_array":
|
|
71
|
+
return pa.chunked_array(*subs)
|
|
72
|
+
elif data_type_str == "pyarrow.lib#_restore_array":
|
|
73
|
+
return pa.lib._restore_array(*subs)
|
|
74
|
+
else:
|
|
75
|
+
raise NotImplementedError(f"Unknown array type: {array_type}")
|
|
54
76
|
if array_type == _TYPE_CHAR_ARROW_ARRAY:
|
|
77
|
+
data_type = arrow_type_from_str(data_type_str)
|
|
55
78
|
return pa.Array.from_buffers(data_type, sizes, subs)
|
|
56
79
|
elif array_type == _TYPE_CHAR_ARROW_CHUNKED_ARRAY:
|
|
80
|
+
data_type = arrow_type_from_str(data_type_str)
|
|
57
81
|
chunks = [
|
|
58
82
|
pa.Array.from_buffers(data_type, size, bufs)
|
|
59
83
|
for size, bufs in zip(sizes, subs)
|
|
@@ -89,6 +113,7 @@ class ArrowBatchSerializer(Serializer):
|
|
|
89
113
|
|
|
90
114
|
|
|
91
115
|
if pa is not None: # pragma: no branch
|
|
116
|
+
ArrowDataTypeSerializer.register(pa.DataType)
|
|
92
117
|
ArrowArraySerializer.register(pa.Array)
|
|
93
118
|
ArrowArraySerializer.register(pa.ChunkedArray)
|
|
94
119
|
ArrowBatchSerializer.register(pa.Table)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Dict
|
|
16
|
+
|
|
17
|
+
from ..lib.dtypes_extension.blob import AbstractExternalBlob, SolidBlob
|
|
18
|
+
from .core import Serializer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ExternalBlobSerializer(Serializer):
|
|
22
|
+
def serial(self, obj: AbstractExternalBlob, context: Dict):
|
|
23
|
+
_, vals = obj.__reduce__()
|
|
24
|
+
return [type(obj).__name__], list(vals), False
|
|
25
|
+
|
|
26
|
+
def deserial(self, serialized, context, subs):
|
|
27
|
+
cls_name = serialized[0]
|
|
28
|
+
cls = AbstractExternalBlob.get_cls_by_name(cls_name)
|
|
29
|
+
return cls(*subs)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
ExternalBlobSerializer.register(SolidBlob)
|
|
Binary file
|
maxframe/serialization/core.pyx
CHANGED
|
@@ -52,6 +52,11 @@ try:
|
|
|
52
52
|
except (ImportError, AttributeError):
|
|
53
53
|
pass
|
|
54
54
|
|
|
55
|
+
try:
|
|
56
|
+
import pyarrow as pa
|
|
57
|
+
except ImportError:
|
|
58
|
+
pa = None
|
|
59
|
+
|
|
55
60
|
try:
|
|
56
61
|
import pytz
|
|
57
62
|
from pytz import BaseTzInfo as PyTZ_BaseTzInfo
|
|
@@ -95,6 +100,8 @@ cdef:
|
|
|
95
100
|
int SLICE_SERIALIZER = 13
|
|
96
101
|
int REGEX_SERIALIZER = 14
|
|
97
102
|
int NO_DEFAULT_SERIALIZER = 15
|
|
103
|
+
int ARROW_BUFFER_SERIALIZER = 16
|
|
104
|
+
int RANGE_SERIALIZER = 17
|
|
98
105
|
int PLACEHOLDER_SERIALIZER = 4096
|
|
99
106
|
|
|
100
107
|
|
|
@@ -874,12 +881,28 @@ cdef class SliceSerializer(Serializer):
|
|
|
874
881
|
serializer_id = SLICE_SERIALIZER
|
|
875
882
|
|
|
876
883
|
cpdef serial(self, object obj: slice, dict context):
|
|
877
|
-
|
|
884
|
+
cdef list elems = [obj.start, obj.stop, obj.step]
|
|
885
|
+
for x in elems:
|
|
886
|
+
if x is not None and not isinstance(x, int):
|
|
887
|
+
return [], elems, False
|
|
888
|
+
return elems, [], True
|
|
878
889
|
|
|
879
890
|
cpdef deserial(self, list serialized, dict context, list subs):
|
|
891
|
+
if len(serialized) == 0:
|
|
892
|
+
return slice(subs[0], subs[1], subs[2])
|
|
880
893
|
return slice(*serialized[:3])
|
|
881
894
|
|
|
882
895
|
|
|
896
|
+
cdef class RangeSerializer(Serializer):
|
|
897
|
+
serializer_id = RANGE_SERIALIZER
|
|
898
|
+
|
|
899
|
+
cpdef serial(self, object obj: range, dict context):
|
|
900
|
+
return [obj.start, obj.stop, obj.step], [], True
|
|
901
|
+
|
|
902
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
903
|
+
return range(*serialized[:3])
|
|
904
|
+
|
|
905
|
+
|
|
883
906
|
cdef class RegexSerializer(Serializer):
|
|
884
907
|
serializer_id = REGEX_SERIALIZER
|
|
885
908
|
|
|
@@ -906,6 +929,18 @@ cdef class NoDefaultSerializer(Serializer):
|
|
|
906
929
|
return no_default
|
|
907
930
|
|
|
908
931
|
|
|
932
|
+
cdef class ArrowBufferSerializer(Serializer):
|
|
933
|
+
serializer_id = ARROW_BUFFER_SERIALIZER
|
|
934
|
+
|
|
935
|
+
cpdef serial(self, object obj, dict context):
|
|
936
|
+
return [], [obj], True
|
|
937
|
+
|
|
938
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
939
|
+
if not isinstance(subs[0], pa.Buffer):
|
|
940
|
+
return pa.py_buffer(subs[0])
|
|
941
|
+
return subs[0]
|
|
942
|
+
|
|
943
|
+
|
|
909
944
|
cdef class Placeholder:
|
|
910
945
|
"""
|
|
911
946
|
Placeholder object to reduce duplicated serialization
|
|
@@ -959,8 +994,11 @@ DtypeSerializer.register(np.dtype)
|
|
|
959
994
|
DtypeSerializer.register(ExtensionDtype)
|
|
960
995
|
ComplexSerializer.register(complex)
|
|
961
996
|
SliceSerializer.register(slice)
|
|
997
|
+
RangeSerializer.register(range)
|
|
962
998
|
RegexSerializer.register(re.Pattern)
|
|
963
999
|
NoDefaultSerializer.register(NoDefault)
|
|
1000
|
+
if pa is not None:
|
|
1001
|
+
ArrowBufferSerializer.register(pa.Buffer)
|
|
964
1002
|
PlaceholderSerializer.register(Placeholder)
|
|
965
1003
|
|
|
966
1004
|
|
|
@@ -18,6 +18,7 @@ from typing import Dict, List
|
|
|
18
18
|
|
|
19
19
|
from ..errors import MaxFrameError
|
|
20
20
|
from ..lib import wrapped_pickle as pickle
|
|
21
|
+
from ..utils import combine_error_message_and_traceback
|
|
21
22
|
from .core import Serializer, buffered, pickle_buffers, unpickle_buffers
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
@@ -53,10 +54,7 @@ class RemoteException(MaxFrameError):
|
|
|
53
54
|
return unpickle_buffers(self.buffers) if self.buffers else self
|
|
54
55
|
|
|
55
56
|
def __str__(self):
|
|
56
|
-
|
|
57
|
-
for msg, tb in zip(self.messages, self.tracebacks):
|
|
58
|
-
tbs.append("".join([msg + "\n"] + tb))
|
|
59
|
-
return "\nCaused by:\n".join(tbs)
|
|
57
|
+
return combine_error_message_and_traceback(self.messages, self.tracebacks)
|
|
60
58
|
|
|
61
59
|
|
|
62
60
|
class ExceptionSerializer(Serializer):
|
maxframe/serialization/numpy.py
CHANGED
|
@@ -95,5 +95,16 @@ class NDArraySerializer(Serializer):
|
|
|
95
95
|
return val
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
class RandomStateSerializer(Serializer):
|
|
99
|
+
def serial(self, obj: np.random.RandomState, context: Dict):
|
|
100
|
+
return [], [obj.get_state()], False
|
|
101
|
+
|
|
102
|
+
def deserial(self, serialized, context: Dict, subs: List):
|
|
103
|
+
rs = np.random.RandomState()
|
|
104
|
+
rs.set_state(subs[0])
|
|
105
|
+
return rs
|
|
106
|
+
|
|
107
|
+
|
|
98
108
|
NDArraySerializer.register(np.generic)
|
|
99
109
|
NDArraySerializer.register(np.ndarray)
|
|
110
|
+
RandomStateSerializer.register(np.random.RandomState)
|