maxframe 2.0.0b2__cp310-cp310-macosx_10_9_universal2.whl → 2.2.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
15
16
|
import time
|
|
16
17
|
from io import BytesIO
|
|
17
18
|
|
|
@@ -21,7 +22,7 @@ import pytest
|
|
|
21
22
|
from .. import oss
|
|
22
23
|
from .._oss_lib import glob as og
|
|
23
24
|
from .._oss_lib.common import OSSFileEntry
|
|
24
|
-
from ..oss import build_oss_path
|
|
25
|
+
from ..oss import HostEnforceType, _rewrite_internal_endpoint, build_oss_path
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class OSSObjInfo:
|
|
@@ -54,19 +55,25 @@ class MockObject:
|
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
class SideEffectBucket:
|
|
58
|
+
cached_ctx = {}
|
|
59
|
+
|
|
57
60
|
def __init__(self, *_, **__):
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
61
|
+
cur_test = os.environ["PYTEST_CURRENT_TEST"]
|
|
62
|
+
if cur_test in self.cached_ctx:
|
|
63
|
+
self.obj_dict = self.cached_ctx[cur_test]
|
|
64
|
+
else:
|
|
65
|
+
self.obj_dict = self.cached_ctx[cur_test] = {
|
|
66
|
+
"file.csv": "id1,id2,id3\n1,2,3\n",
|
|
67
|
+
"dir/": "",
|
|
68
|
+
"dir/file1.csv": "2",
|
|
69
|
+
"dir/file2.csv": "3",
|
|
70
|
+
"dir/subdir/": "",
|
|
71
|
+
"dir/subdir/file3.csv": "s4",
|
|
72
|
+
"dir/subdir/file4.csv": "s5",
|
|
73
|
+
"dir2/": "",
|
|
74
|
+
"dir2/file6.csv": "6",
|
|
75
|
+
"dir2/file7.csv": "7",
|
|
76
|
+
}
|
|
70
77
|
|
|
71
78
|
def get_object_meta(self, key):
|
|
72
79
|
return ObjectMeta(key, self.obj_dict)
|
|
@@ -77,6 +84,17 @@ class SideEffectBucket:
|
|
|
77
84
|
def get_object(self, key, byte_range):
|
|
78
85
|
return MockObject(self.obj_dict, key, byte_range)
|
|
79
86
|
|
|
87
|
+
def copy_object(self, bucket, src_key, dst_key):
|
|
88
|
+
self.obj_dict[dst_key] = self.obj_dict[src_key]
|
|
89
|
+
|
|
90
|
+
def delete_object(self, key):
|
|
91
|
+
from oss2.exceptions import NoSuchKey
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
del self.obj_dict[key]
|
|
95
|
+
except KeyError:
|
|
96
|
+
raise NoSuchKey(404, {}, key, {})
|
|
97
|
+
|
|
80
98
|
|
|
81
99
|
class SideEffectObjIter:
|
|
82
100
|
def __init__(self, *args, **kwargs):
|
|
@@ -96,28 +114,29 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
96
114
|
access_key_secret = "your_access_key_secret"
|
|
97
115
|
end_point = "your_endpoint"
|
|
98
116
|
|
|
99
|
-
file_path = f"oss://bucket/file.csv"
|
|
100
|
-
|
|
101
|
-
|
|
117
|
+
file_path = f"oss://your_endpoint/bucket/file.csv"
|
|
118
|
+
new_file_path = f"oss://your_endpoint/bucket/file1.csv"
|
|
119
|
+
dir_path = f"oss://your_endpoint/bucket/dir/"
|
|
120
|
+
dir_path_content_magic = f"oss://your_endpoint/bucket/dir*/"
|
|
102
121
|
other_scheme_path = f"scheme://netloc/path"
|
|
103
|
-
not_exist_file_path = f"oss://bucket/not_exist.csv"
|
|
122
|
+
not_exist_file_path = f"oss://your_endpoint/bucket/not_exist.csv"
|
|
104
123
|
|
|
105
124
|
fake_file_path = build_oss_path(
|
|
106
|
-
file_path, access_key_id, access_key_secret
|
|
125
|
+
file_path, end_point, access_key_id, access_key_secret
|
|
126
|
+
)
|
|
127
|
+
fake_new_file_path = build_oss_path(
|
|
128
|
+
new_file_path, end_point, access_key_id, access_key_secret
|
|
107
129
|
)
|
|
108
130
|
fake_dir_path = build_oss_path(
|
|
109
|
-
dir_path, access_key_id, access_key_secret
|
|
131
|
+
dir_path, end_point, access_key_id, access_key_secret
|
|
110
132
|
)
|
|
111
133
|
fake_dir_path_contains_magic = build_oss_path(
|
|
112
|
-
dir_path_content_magic, access_key_id, access_key_secret
|
|
113
|
-
)
|
|
114
|
-
fake_other_scheme_path = build_oss_path(
|
|
115
|
-
other_scheme_path, access_key_id, access_key_secret, end_point
|
|
134
|
+
dir_path_content_magic, end_point, access_key_id, access_key_secret
|
|
116
135
|
)
|
|
117
136
|
fake_not_exist_file_path = build_oss_path(
|
|
118
|
-
not_exist_file_path, access_key_id, access_key_secret
|
|
137
|
+
not_exist_file_path, end_point, access_key_id, access_key_secret
|
|
119
138
|
)
|
|
120
|
-
fs = oss.OSSFileSystem
|
|
139
|
+
fs = oss.OSSFileSystem()
|
|
121
140
|
|
|
122
141
|
# Test OSSFileSystem.
|
|
123
142
|
assert len(fs.ls(fake_dir_path)) == 4
|
|
@@ -131,21 +150,15 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
131
150
|
assert fs.stat(fake_dir_path)["type"] == "directory"
|
|
132
151
|
assert fs.glob(fake_dir_path) == [fake_dir_path]
|
|
133
152
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
assert (
|
|
138
|
-
msg1 == f"Except scheme oss, but got scheme: "
|
|
139
|
-
f"scheme in path: {fake_other_scheme_path}"
|
|
140
|
-
)
|
|
153
|
+
msg1 = f"Except scheme oss, but got scheme: scheme in path: {other_scheme_path}"
|
|
154
|
+
with pytest.raises(ValueError, match=msg1):
|
|
155
|
+
fs.exists(other_scheme_path)
|
|
141
156
|
|
|
142
|
-
with pytest.raises(
|
|
157
|
+
with pytest.raises(ValueError, match="No credentials provided"):
|
|
143
158
|
fs.exists(file_path)
|
|
144
|
-
msg2 = e.value.args[0]
|
|
145
|
-
assert msg2 == "Please use build_oss_path to add OSS info"
|
|
146
159
|
|
|
147
160
|
with pytest.raises(OSError):
|
|
148
|
-
|
|
161
|
+
fs.ls(fake_file_path)
|
|
149
162
|
|
|
150
163
|
assert len(fs.glob(fake_file_path)) == 1
|
|
151
164
|
assert len(fs.glob(fake_dir_path + "*", recursive=True)) == 4
|
|
@@ -180,3 +193,28 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
|
|
|
180
193
|
|
|
181
194
|
fe = OSSFileEntry(fake_file_path)
|
|
182
195
|
assert fe.path == fake_file_path
|
|
196
|
+
|
|
197
|
+
fs.rename(fake_file_path, fake_new_file_path)
|
|
198
|
+
assert not fs.exists(fake_file_path)
|
|
199
|
+
assert fs.exists(fake_new_file_path)
|
|
200
|
+
|
|
201
|
+
with pytest.raises(FileNotFoundError):
|
|
202
|
+
fs.delete(fake_not_exist_file_path)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def test_host_rewrite():
|
|
206
|
+
assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
|
|
207
|
+
"cn-shanghai.oss.service.com", HostEnforceType.force_external
|
|
208
|
+
)
|
|
209
|
+
assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
|
|
210
|
+
"cn-shanghai-internal.oss.service.com", HostEnforceType.force_external
|
|
211
|
+
)
|
|
212
|
+
assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
|
|
213
|
+
"cn-shanghai.oss.service.com", HostEnforceType.force_internal
|
|
214
|
+
)
|
|
215
|
+
assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
|
|
216
|
+
"cn-shanghai-internal.oss.service.com", HostEnforceType.force_internal
|
|
217
|
+
)
|
|
218
|
+
assert "1.2.3.4" == _rewrite_internal_endpoint(
|
|
219
|
+
"1.2.3.4", HostEnforceType.force_internal
|
|
220
|
+
)
|
|
Binary file
|
maxframe/lib/wrapped_pickle.py
CHANGED
|
@@ -75,6 +75,16 @@ class Unpickler(pickle_mod.Unpickler):
|
|
|
75
75
|
raise ValueError("Unpickle is forbidden here")
|
|
76
76
|
return super().load()
|
|
77
77
|
|
|
78
|
+
def find_class(self, module, name):
|
|
79
|
+
try:
|
|
80
|
+
return super().find_class(module, name)
|
|
81
|
+
except ImportError:
|
|
82
|
+
# workaround for pickle incompatibility since numpy>=2.0
|
|
83
|
+
if not module.startswith("numpy._core"):
|
|
84
|
+
raise
|
|
85
|
+
module = module.replace("numpy._core", "numpy.core")
|
|
86
|
+
return super().find_class(module, name)
|
|
87
|
+
|
|
78
88
|
|
|
79
89
|
@functools.wraps(pickle_mod.load)
|
|
80
90
|
def load(file, **kwargs):
|
maxframe/opcodes.py
CHANGED
|
@@ -271,6 +271,9 @@ SEM = 352
|
|
|
271
271
|
STR_CONCAT = 353
|
|
272
272
|
MAD = 354
|
|
273
273
|
MEDIAN = 355
|
|
274
|
+
RANK = 356
|
|
275
|
+
IDXMAX = 357
|
|
276
|
+
IDXMIN = 358
|
|
274
277
|
|
|
275
278
|
# tensor operator
|
|
276
279
|
RESHAPE = 401
|
|
@@ -389,6 +392,12 @@ ALIGN = 741
|
|
|
389
392
|
CASE_WHEN = 742
|
|
390
393
|
PIVOT = 743
|
|
391
394
|
PIVOT_TABLE = 744
|
|
395
|
+
TO_NUMERIC = 745
|
|
396
|
+
DATAFRAME_FILTER = 746
|
|
397
|
+
REORDER_LEVELS = 747
|
|
398
|
+
DATAFRAME_COMPARE = 748
|
|
399
|
+
DROPLEVEL = 749
|
|
400
|
+
DATAFRAME_UPDATE = 750
|
|
392
401
|
|
|
393
402
|
FUSE = 801
|
|
394
403
|
|
|
@@ -417,6 +426,7 @@ DATAFRAME_ILOC_GETITEM = 2021
|
|
|
417
426
|
DATAFRAME_ILOC_SETITEM = 2022
|
|
418
427
|
DATAFRAME_LOC_GETITEM = 2023
|
|
419
428
|
DATAFRAME_LOC_SETITEM = 2024
|
|
429
|
+
GET_LEVEL_VALUES = 2025
|
|
420
430
|
|
|
421
431
|
# merge
|
|
422
432
|
DATAFRAME_MERGE = 2010
|
|
@@ -426,7 +436,7 @@ DATAFRAME_SHUFFLE_MERGE_ALIGN = 2011
|
|
|
426
436
|
DATAFRAME_BLOOM_FILTER = 2014
|
|
427
437
|
|
|
428
438
|
# append
|
|
429
|
-
APPEND = 2015
|
|
439
|
+
APPEND = 2015 # deprecated since v2.2
|
|
430
440
|
|
|
431
441
|
# reset index
|
|
432
442
|
RESET_INDEX = 2028
|
|
@@ -439,12 +449,10 @@ GROUPBY_AGG = 2033
|
|
|
439
449
|
GROUPBY_CONCAT = 2034
|
|
440
450
|
GROUPBY_HEAD = 2035
|
|
441
451
|
GROUPBY_SAMPLE_ILOC = 2036
|
|
442
|
-
GROUPBY_SORT_REGULAR_SAMPLE = 2037
|
|
443
|
-
GROUPBY_SORT_PIVOT = 2038
|
|
444
452
|
GROUPBY_SORT_SHUFFLE = 2039
|
|
445
453
|
|
|
446
454
|
# parallel sorting by regular sampling
|
|
447
|
-
|
|
455
|
+
PSRS_SORT_REGULAR_SAMPLE = 2040
|
|
448
456
|
PSRS_CONCAT_PIVOT = 2041
|
|
449
457
|
PSRS_SHUFFLE = 2042
|
|
450
458
|
PSRS_ALIGN = 2043
|
|
@@ -475,6 +483,8 @@ TO_ODPS_TABLE = 20112
|
|
|
475
483
|
READ_ODPS_VOLUME = 20113
|
|
476
484
|
TO_ODPS_VOLUME = 20114
|
|
477
485
|
READ_ODPS_QUERY = 20115
|
|
486
|
+
READ_ODPS_MODEL = 20116 # reserved
|
|
487
|
+
TO_ODPS_MODEL = 20117
|
|
478
488
|
|
|
479
489
|
TO_CSV_STAT = 2102
|
|
480
490
|
|
|
@@ -506,9 +516,10 @@ FAISS_TRAIN_SAMPLED_INDEX = 2235
|
|
|
506
516
|
FAISS_QUERY = 2236
|
|
507
517
|
PROXIMA_SIMPLE_BUILDER = 2238
|
|
508
518
|
PROXIMA_SIMPLE_SEARCHER = 2239
|
|
509
|
-
|
|
519
|
+
K_NEIGHBORS_GRAPH = 2237
|
|
510
520
|
|
|
511
521
|
# cluster
|
|
522
|
+
# k-means related
|
|
512
523
|
KMEANS_PLUS_PLUS_INIT = 2250
|
|
513
524
|
KMEANS_SCALABLE_PLUS_PLUS_INIT = 2251
|
|
514
525
|
KMEANS_ELKAN_INIT_BOUNDS = 2252
|
|
@@ -517,7 +528,12 @@ KMEANS_ELKAN_POSTPROCESS = 2254
|
|
|
517
528
|
KMEANS_LLOYD_UPDATE = 2255
|
|
518
529
|
KMEANS_LLOYD_POSTPROCESS = 2256
|
|
519
530
|
KMEANS_INERTIA = 2257
|
|
520
|
-
|
|
531
|
+
KMEANS_RELOCATE_EMPTY_CLUSTERS = 2258
|
|
532
|
+
KMEANS_FIT = 2259
|
|
533
|
+
KMEANS_PREDICT = 2260
|
|
534
|
+
|
|
535
|
+
# linear models
|
|
536
|
+
LOGISTIC_REGRESSION_FIT = 2270
|
|
521
537
|
|
|
522
538
|
# XGBoost
|
|
523
539
|
XGBOOST_TRAIN = 3001
|
|
@@ -580,6 +596,10 @@ RUN_SCRIPT = 5002
|
|
|
580
596
|
# learn metrics
|
|
581
597
|
MULTILABEL_CONFUSION_MATRIX = 5201
|
|
582
598
|
PRECISION_RECALL_F_SCORE_SUPPORT = 5202
|
|
599
|
+
AUC = 5203
|
|
600
|
+
ROC_CURVE = 5204
|
|
601
|
+
ROC_AUC_SCORE = 5205
|
|
602
|
+
LOG_LOSS = 5206
|
|
583
603
|
|
|
584
604
|
CHOLESKY_FUSE = 999988
|
|
585
605
|
|
|
@@ -588,23 +608,21 @@ DATAFRAME_RESHUFFLE = 10001
|
|
|
588
608
|
FLATMAP = 10002
|
|
589
609
|
FLATJSON = 10003
|
|
590
610
|
APPLY_CHUNK = 10004
|
|
611
|
+
EXTRACT_KV = 10013
|
|
612
|
+
COLLECT_KV = 10014
|
|
613
|
+
|
|
614
|
+
SERIES_DICT_METHOD = 10011
|
|
615
|
+
SERIES_LIST_METHOD = 10012
|
|
616
|
+
SERIES_STRUCT_METHOD = 10015
|
|
591
617
|
|
|
618
|
+
# placeholders for compatibility, DO NOT REMOVE
|
|
592
619
|
SERIES_DICT_GETITEM = 10005
|
|
593
620
|
SERIES_DICT_SETITEM = 10006
|
|
594
621
|
SERIES_DICT_LENGTH = 10007
|
|
595
622
|
SERIES_DICT_REMOVE = 10008
|
|
596
623
|
SERIES_DICT_CONTAINS = 10009
|
|
597
|
-
SERIES_DICT_FLATTEN = 10010
|
|
598
|
-
|
|
599
624
|
SERIES_LIST_GETITEM = 10020
|
|
600
|
-
SERIES_LIST_SETITEM = 10021
|
|
601
|
-
SERIES_LIST_CONTAINS = 10022
|
|
602
625
|
SERIES_LIST_LENGTH = 10023
|
|
603
|
-
SERIES_LIST_INSERT = 10024
|
|
604
|
-
SERIES_LIST_EXTEND = 10025
|
|
605
|
-
SERIES_LIST_POP = 10026
|
|
606
|
-
SERIES_LIST_SORT = 10027
|
|
607
|
-
SERIES_LIST_FLATTEN = 10028
|
|
608
626
|
|
|
609
627
|
# MaxFrame internal operators
|
|
610
628
|
DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
|
maxframe/protocol.py
CHANGED
|
@@ -39,6 +39,7 @@ from .serialization.serializables import (
|
|
|
39
39
|
SeriesField,
|
|
40
40
|
StringField,
|
|
41
41
|
)
|
|
42
|
+
from .utils import combine_error_message_and_traceback
|
|
42
43
|
|
|
43
44
|
pickling_support.install()
|
|
44
45
|
|
|
@@ -244,6 +245,9 @@ class ErrorInfo(JsonSerializable):
|
|
|
244
245
|
"raw_error_source", ErrorSource, FieldTypes.int8, default=None
|
|
245
246
|
)
|
|
246
247
|
raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
|
|
248
|
+
displayed_error_message: Optional[str] = StringField(
|
|
249
|
+
"displayed_error_message", default=None
|
|
250
|
+
)
|
|
247
251
|
|
|
248
252
|
@classmethod
|
|
249
253
|
def from_exception(cls, exc: Exception):
|
|
@@ -282,6 +286,7 @@ class ErrorInfo(JsonSerializable):
|
|
|
282
286
|
"error_messages": self.error_messages,
|
|
283
287
|
"error_tracebacks": self.error_tracebacks,
|
|
284
288
|
"raw_error_source": self.raw_error_source.value,
|
|
289
|
+
"displayed_error_message": self.displayed_error_message,
|
|
285
290
|
}
|
|
286
291
|
err_data_bufs = None
|
|
287
292
|
if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
|
|
@@ -299,6 +304,13 @@ class ErrorInfo(JsonSerializable):
|
|
|
299
304
|
]
|
|
300
305
|
return ret
|
|
301
306
|
|
|
307
|
+
def get_displayed_error_message(self) -> str:
|
|
308
|
+
if self.displayed_error_message is not None:
|
|
309
|
+
return self.displayed_error_message
|
|
310
|
+
return combine_error_message_and_traceback(
|
|
311
|
+
self.error_messages, self.error_tracebacks
|
|
312
|
+
)
|
|
313
|
+
|
|
302
314
|
|
|
303
315
|
class DagInfo(JsonSerializable):
|
|
304
316
|
session_id: str = StringField("session_id", default=None)
|
|
@@ -12,7 +12,16 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from . import
|
|
15
|
+
from . import (
|
|
16
|
+
arrow,
|
|
17
|
+
blob,
|
|
18
|
+
exception,
|
|
19
|
+
maxframe_objects,
|
|
20
|
+
numpy,
|
|
21
|
+
pandas,
|
|
22
|
+
scipy,
|
|
23
|
+
serializables,
|
|
24
|
+
)
|
|
16
25
|
from .core import (
|
|
17
26
|
PickleContainer,
|
|
18
27
|
PickleHookOptions,
|
|
@@ -27,4 +36,4 @@ from .core import (
|
|
|
27
36
|
)
|
|
28
37
|
from .exception import RemoteException
|
|
29
38
|
|
|
30
|
-
del arrow,
|
|
39
|
+
del arrow, blob, exception, maxframe_objects, numpy, pandas, scipy
|
maxframe/serialization/arrow.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Dict, List, Union
|
|
16
16
|
|
|
17
|
-
from ..utils import arrow_type_from_str
|
|
17
|
+
from ..utils import arrow_type_from_str, extract_class_name
|
|
18
18
|
from .core import Serializer, buffered
|
|
19
19
|
|
|
20
20
|
try:
|
|
@@ -30,30 +30,54 @@ except ImportError: # pragma: no cover
|
|
|
30
30
|
|
|
31
31
|
_TYPE_CHAR_ARROW_ARRAY = "A"
|
|
32
32
|
_TYPE_CHAR_ARROW_CHUNKED_ARRAY = "C"
|
|
33
|
+
_TYPE_CHAR_ARROW_REDUCED = "R"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ArrowDataTypeSerializer(Serializer):
|
|
37
|
+
def serial(self, obj: pa.DataType, context):
|
|
38
|
+
return [str(obj)], [], True
|
|
39
|
+
|
|
40
|
+
def deserial(self, serialized, context, subs):
|
|
41
|
+
return arrow_type_from_str(serialized[0])
|
|
33
42
|
|
|
34
43
|
|
|
35
44
|
class ArrowArraySerializer(Serializer):
|
|
36
45
|
@buffered
|
|
37
46
|
def serial(self, obj: PA_ARRAY_TYPES, context: Dict):
|
|
38
|
-
|
|
39
|
-
if isinstance(obj, pa.Array):
|
|
40
|
-
array_type = _TYPE_CHAR_ARROW_ARRAY
|
|
41
|
-
buffers = obj.buffers()
|
|
42
|
-
sizes = len(obj)
|
|
43
|
-
elif isinstance(obj, pa.ChunkedArray):
|
|
44
|
-
array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
|
|
45
|
-
buffers = [c.buffers() for c in obj.chunks]
|
|
46
|
-
sizes = [len(c) for c in obj.chunks]
|
|
47
|
-
else: # pragma: no cover
|
|
47
|
+
if not isinstance(obj, (pa.Array, pa.ChunkedArray)):
|
|
48
48
|
raise NotImplementedError(f"Array type {type(obj)} not supported")
|
|
49
|
-
|
|
49
|
+
|
|
50
|
+
if obj.type.num_fields == 0:
|
|
51
|
+
# use legacy serialization in case arrow changes deserializer method
|
|
52
|
+
data_type = str(obj.type)
|
|
53
|
+
if isinstance(obj, pa.Array):
|
|
54
|
+
array_type = _TYPE_CHAR_ARROW_ARRAY
|
|
55
|
+
buffers = obj.buffers()
|
|
56
|
+
sizes = len(obj)
|
|
57
|
+
else: # ChunkedArray
|
|
58
|
+
array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
|
|
59
|
+
buffers = [c.buffers() for c in obj.chunks]
|
|
60
|
+
sizes = [len(c) for c in obj.chunks]
|
|
61
|
+
return [array_type, data_type, sizes], buffers, False
|
|
62
|
+
|
|
63
|
+
meth, extracted = obj.__reduce__()
|
|
64
|
+
meth_name = extract_class_name(meth)
|
|
65
|
+
return [_TYPE_CHAR_ARROW_REDUCED, meth_name, None], list(extracted), False
|
|
50
66
|
|
|
51
67
|
def deserial(self, serialized: List, context: Dict, subs: List):
|
|
52
68
|
array_type, data_type_str, sizes = serialized[:3]
|
|
53
|
-
|
|
69
|
+
if array_type == _TYPE_CHAR_ARROW_REDUCED:
|
|
70
|
+
if data_type_str == "pyarrow.lib#chunked_array":
|
|
71
|
+
return pa.chunked_array(*subs)
|
|
72
|
+
elif data_type_str == "pyarrow.lib#_restore_array":
|
|
73
|
+
return pa.lib._restore_array(*subs)
|
|
74
|
+
else:
|
|
75
|
+
raise NotImplementedError(f"Unknown array type: {array_type}")
|
|
54
76
|
if array_type == _TYPE_CHAR_ARROW_ARRAY:
|
|
77
|
+
data_type = arrow_type_from_str(data_type_str)
|
|
55
78
|
return pa.Array.from_buffers(data_type, sizes, subs)
|
|
56
79
|
elif array_type == _TYPE_CHAR_ARROW_CHUNKED_ARRAY:
|
|
80
|
+
data_type = arrow_type_from_str(data_type_str)
|
|
57
81
|
chunks = [
|
|
58
82
|
pa.Array.from_buffers(data_type, size, bufs)
|
|
59
83
|
for size, bufs in zip(sizes, subs)
|
|
@@ -89,6 +113,7 @@ class ArrowBatchSerializer(Serializer):
|
|
|
89
113
|
|
|
90
114
|
|
|
91
115
|
if pa is not None: # pragma: no branch
|
|
116
|
+
ArrowDataTypeSerializer.register(pa.DataType)
|
|
92
117
|
ArrowArraySerializer.register(pa.Array)
|
|
93
118
|
ArrowArraySerializer.register(pa.ChunkedArray)
|
|
94
119
|
ArrowBatchSerializer.register(pa.Table)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Dict
|
|
16
|
+
|
|
17
|
+
from ..lib.dtypes_extension.blob import AbstractExternalBlob, SolidBlob
|
|
18
|
+
from .core import Serializer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ExternalBlobSerializer(Serializer):
|
|
22
|
+
def serial(self, obj: AbstractExternalBlob, context: Dict):
|
|
23
|
+
_, vals = obj.__reduce__()
|
|
24
|
+
return [type(obj).__name__], list(vals), False
|
|
25
|
+
|
|
26
|
+
def deserial(self, serialized, context, subs):
|
|
27
|
+
cls_name = serialized[0]
|
|
28
|
+
cls = AbstractExternalBlob.get_cls_by_name(cls_name)
|
|
29
|
+
return cls(*subs)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
ExternalBlobSerializer.register(SolidBlob)
|
|
Binary file
|
maxframe/serialization/core.pyx
CHANGED
|
@@ -52,6 +52,11 @@ try:
|
|
|
52
52
|
except (ImportError, AttributeError):
|
|
53
53
|
pass
|
|
54
54
|
|
|
55
|
+
try:
|
|
56
|
+
import pyarrow as pa
|
|
57
|
+
except ImportError:
|
|
58
|
+
pa = None
|
|
59
|
+
|
|
55
60
|
try:
|
|
56
61
|
import pytz
|
|
57
62
|
from pytz import BaseTzInfo as PyTZ_BaseTzInfo
|
|
@@ -95,6 +100,8 @@ cdef:
|
|
|
95
100
|
int SLICE_SERIALIZER = 13
|
|
96
101
|
int REGEX_SERIALIZER = 14
|
|
97
102
|
int NO_DEFAULT_SERIALIZER = 15
|
|
103
|
+
int ARROW_BUFFER_SERIALIZER = 16
|
|
104
|
+
int RANGE_SERIALIZER = 17
|
|
98
105
|
int PLACEHOLDER_SERIALIZER = 4096
|
|
99
106
|
|
|
100
107
|
|
|
@@ -874,12 +881,28 @@ cdef class SliceSerializer(Serializer):
|
|
|
874
881
|
serializer_id = SLICE_SERIALIZER
|
|
875
882
|
|
|
876
883
|
cpdef serial(self, object obj: slice, dict context):
|
|
877
|
-
|
|
884
|
+
cdef list elems = [obj.start, obj.stop, obj.step]
|
|
885
|
+
for x in elems:
|
|
886
|
+
if x is not None and not isinstance(x, int):
|
|
887
|
+
return [], elems, False
|
|
888
|
+
return elems, [], True
|
|
878
889
|
|
|
879
890
|
cpdef deserial(self, list serialized, dict context, list subs):
|
|
891
|
+
if len(serialized) == 0:
|
|
892
|
+
return slice(subs[0], subs[1], subs[2])
|
|
880
893
|
return slice(*serialized[:3])
|
|
881
894
|
|
|
882
895
|
|
|
896
|
+
cdef class RangeSerializer(Serializer):
|
|
897
|
+
serializer_id = RANGE_SERIALIZER
|
|
898
|
+
|
|
899
|
+
cpdef serial(self, object obj: range, dict context):
|
|
900
|
+
return [obj.start, obj.stop, obj.step], [], True
|
|
901
|
+
|
|
902
|
+
cpdef deserial(self, list serialized, dict context, list subs):
|
|
903
|
+
return range(*serialized[:3])
|
|
904
|
+
|
|
905
|
+
|
|
883
906
|
cdef class RegexSerializer(Serializer):
|
|
884
907
|
serializer_id = REGEX_SERIALIZER
|
|
885
908
|
|
|
@@ -906,6 +929,18 @@ cdef class NoDefaultSerializer(Serializer):
|
|
|
906
929
|
return no_default
|
|
907
930
|
|
|
908
931
|
|
|
932
|
+
cdef class ArrowBufferSerializer(Serializer):
|
|
933
|
+
serializer_id = ARROW_BUFFER_SERIALIZER
|
|
934
|
+
|
|
935
|
+
cpdef serial(self, object obj, dict context):
|
|
936
|
+
return [], [obj], True
|
|
937
|
+
|
|
938
|
+
cpdef deserial(self, list obj, dict context, list subs):
|
|
939
|
+
if not isinstance(subs[0], pa.Buffer):
|
|
940
|
+
return pa.py_buffer(subs[0])
|
|
941
|
+
return subs[0]
|
|
942
|
+
|
|
943
|
+
|
|
909
944
|
cdef class Placeholder:
|
|
910
945
|
"""
|
|
911
946
|
Placeholder object to reduce duplicated serialization
|
|
@@ -959,8 +994,11 @@ DtypeSerializer.register(np.dtype)
|
|
|
959
994
|
DtypeSerializer.register(ExtensionDtype)
|
|
960
995
|
ComplexSerializer.register(complex)
|
|
961
996
|
SliceSerializer.register(slice)
|
|
997
|
+
RangeSerializer.register(range)
|
|
962
998
|
RegexSerializer.register(re.Pattern)
|
|
963
999
|
NoDefaultSerializer.register(NoDefault)
|
|
1000
|
+
if pa is not None:
|
|
1001
|
+
ArrowBufferSerializer.register(pa.Buffer)
|
|
964
1002
|
PlaceholderSerializer.register(Placeholder)
|
|
965
1003
|
|
|
966
1004
|
|
|
@@ -18,6 +18,7 @@ from typing import Dict, List
|
|
|
18
18
|
|
|
19
19
|
from ..errors import MaxFrameError
|
|
20
20
|
from ..lib import wrapped_pickle as pickle
|
|
21
|
+
from ..utils import combine_error_message_and_traceback
|
|
21
22
|
from .core import Serializer, buffered, pickle_buffers, unpickle_buffers
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
@@ -53,10 +54,7 @@ class RemoteException(MaxFrameError):
|
|
|
53
54
|
return unpickle_buffers(self.buffers) if self.buffers else self
|
|
54
55
|
|
|
55
56
|
def __str__(self):
|
|
56
|
-
|
|
57
|
-
for msg, tb in zip(self.messages, self.tracebacks):
|
|
58
|
-
tbs.append("".join([msg + "\n"] + tb))
|
|
59
|
-
return "\nCaused by:\n".join(tbs)
|
|
57
|
+
return combine_error_message_and_traceback(self.messages, self.tracebacks)
|
|
60
58
|
|
|
61
59
|
|
|
62
60
|
class ExceptionSerializer(Serializer):
|
maxframe/serialization/numpy.py
CHANGED
|
@@ -95,5 +95,16 @@ class NDArraySerializer(Serializer):
|
|
|
95
95
|
return val
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
class RandomStateSerializer(Serializer):
|
|
99
|
+
def serial(self, obj: np.random.RandomState, context: Dict):
|
|
100
|
+
return [], [obj.get_state()], False
|
|
101
|
+
|
|
102
|
+
def deserial(self, serialized, context: Dict, subs: List):
|
|
103
|
+
rs = np.random.RandomState()
|
|
104
|
+
rs.set_state(subs[0])
|
|
105
|
+
return rs
|
|
106
|
+
|
|
107
|
+
|
|
98
108
|
NDArraySerializer.register(np.generic)
|
|
99
109
|
NDArraySerializer.register(np.ndarray)
|
|
110
|
+
RandomStateSerializer.register(np.random.RandomState)
|