maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.2.0__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
maxframe/config/config.py
CHANGED
|
@@ -18,7 +18,7 @@ import os
|
|
|
18
18
|
import traceback
|
|
19
19
|
import warnings
|
|
20
20
|
from copy import deepcopy
|
|
21
|
-
from typing import Any, Dict, Optional, Union
|
|
21
|
+
from typing import Any, Callable, Dict, Optional, Union
|
|
22
22
|
|
|
23
23
|
from odps.lib import tzlocal
|
|
24
24
|
|
|
@@ -33,13 +33,17 @@ from ..utils import get_python_tag
|
|
|
33
33
|
from .validators import (
|
|
34
34
|
ValidatorType,
|
|
35
35
|
all_validator,
|
|
36
|
+
dtype_backend_validator,
|
|
36
37
|
is_all_dict_keys_in,
|
|
37
38
|
is_bool,
|
|
38
39
|
is_dict,
|
|
39
40
|
is_float,
|
|
41
|
+
is_great_than,
|
|
40
42
|
is_in,
|
|
41
43
|
is_integer,
|
|
44
|
+
is_less_than_or_equal_to,
|
|
42
45
|
is_non_negative_integer,
|
|
46
|
+
is_notnull,
|
|
43
47
|
is_null,
|
|
44
48
|
is_numeric,
|
|
45
49
|
is_string,
|
|
@@ -59,6 +63,7 @@ _DEFAULT_TASK_START_TIMEOUT = 60
|
|
|
59
63
|
_DEFAULT_TASK_RESTART_TIMEOUT = 300
|
|
60
64
|
_DEFAULT_LOGVIEW_HOURS = 24 * 30
|
|
61
65
|
_DEFAULT_FUNCTION_RUNNING_OPTIONS = {"cpu": 1, "memory": "4GiB", "gpu": 0}
|
|
66
|
+
_DEFAULT_MAX_MEMORY_CPU_RATIO = 12
|
|
62
67
|
|
|
63
68
|
|
|
64
69
|
class OptionError(Exception):
|
|
@@ -66,11 +71,19 @@ class OptionError(Exception):
|
|
|
66
71
|
|
|
67
72
|
|
|
68
73
|
class Redirection:
|
|
69
|
-
def __init__(
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
item: str,
|
|
77
|
+
warn: Optional[str] = None,
|
|
78
|
+
getter: Callable = None,
|
|
79
|
+
setter: Callable = None,
|
|
80
|
+
):
|
|
70
81
|
self._items = item.split(".")
|
|
71
82
|
self._warn = warn
|
|
72
83
|
self._warned = True
|
|
73
84
|
self._parent = None
|
|
85
|
+
self._getter = getter
|
|
86
|
+
self._setter = setter
|
|
74
87
|
|
|
75
88
|
def bind(self, attr_dict):
|
|
76
89
|
self._parent = attr_dict
|
|
@@ -88,6 +101,8 @@ class Redirection:
|
|
|
88
101
|
conf = self._parent.root
|
|
89
102
|
for it in self._items:
|
|
90
103
|
conf = getattr(conf, it)
|
|
104
|
+
if callable(self._getter):
|
|
105
|
+
conf = self._getter(conf)
|
|
91
106
|
return conf
|
|
92
107
|
|
|
93
108
|
def setvalue(self, value: str, silent: bool = False) -> None:
|
|
@@ -97,6 +112,8 @@ class Redirection:
|
|
|
97
112
|
conf = self._parent.root
|
|
98
113
|
for it in self._items[:-1]:
|
|
99
114
|
conf = getattr(conf, it)
|
|
115
|
+
if callable(self._setter):
|
|
116
|
+
value = self._setter(value)
|
|
100
117
|
setattr(conf, self._items[-1], value)
|
|
101
118
|
|
|
102
119
|
|
|
@@ -251,9 +268,19 @@ class Config:
|
|
|
251
268
|
self._remote_options.add(option)
|
|
252
269
|
|
|
253
270
|
def redirect_option(
|
|
254
|
-
self,
|
|
271
|
+
self,
|
|
272
|
+
option: str,
|
|
273
|
+
target: str,
|
|
274
|
+
warn: str = _DEFAULT_REDIRECT_WARN,
|
|
275
|
+
getter: Callable = None,
|
|
276
|
+
setter: Callable = None,
|
|
255
277
|
) -> None:
|
|
256
|
-
redir = Redirection(
|
|
278
|
+
redir = Redirection(
|
|
279
|
+
target,
|
|
280
|
+
warn=warn.format(source=option, target=target),
|
|
281
|
+
getter=getter,
|
|
282
|
+
setter=setter,
|
|
283
|
+
)
|
|
257
284
|
self.register_option(option, redir)
|
|
258
285
|
|
|
259
286
|
def unregister_option(self, option: str) -> None:
|
|
@@ -315,10 +342,18 @@ class Config:
|
|
|
315
342
|
|
|
316
343
|
|
|
317
344
|
def _get_legal_local_tz_name() -> Optional[str]:
|
|
318
|
-
"""
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
345
|
+
"""
|
|
346
|
+
Sometimes we may get illegal tz name from tzlocal.get_localzone().
|
|
347
|
+
In some environments we can't get any tz name.
|
|
348
|
+
"""
|
|
349
|
+
tz_name = None
|
|
350
|
+
try:
|
|
351
|
+
tz_name = str(tzlocal.get_localzone())
|
|
352
|
+
if tz_name not in available_timezones():
|
|
353
|
+
tz_name = None
|
|
354
|
+
except:
|
|
355
|
+
pass
|
|
356
|
+
|
|
322
357
|
return tz_name
|
|
323
358
|
|
|
324
359
|
|
|
@@ -400,6 +435,9 @@ default_options.register_option(
|
|
|
400
435
|
default_options.register_option(
|
|
401
436
|
"session.quota_name", None, validator=is_null | is_string, remote=True
|
|
402
437
|
)
|
|
438
|
+
default_options.register_option(
|
|
439
|
+
"session.region_id", None, validator=is_null | is_string, remote=True
|
|
440
|
+
)
|
|
403
441
|
default_options.register_option(
|
|
404
442
|
"session.enable_schema", None, validator=is_null | is_bool, remote=True
|
|
405
443
|
)
|
|
@@ -446,7 +484,15 @@ default_options.register_option(
|
|
|
446
484
|
)
|
|
447
485
|
|
|
448
486
|
default_options.register_option("warn_duplicated_execution", False, validator=is_bool)
|
|
449
|
-
default_options.register_option(
|
|
487
|
+
default_options.register_option(
|
|
488
|
+
"dataframe.dtype_backend", "numpy", validator=dtype_backend_validator
|
|
489
|
+
)
|
|
490
|
+
default_options.redirect_option(
|
|
491
|
+
"dataframe.use_arrow_dtype",
|
|
492
|
+
"dataframe.dtype_backend",
|
|
493
|
+
getter=lambda x: x == "pyarrow",
|
|
494
|
+
setter=lambda x: "pyarrow" if x else "numpy",
|
|
495
|
+
)
|
|
450
496
|
default_options.register_option(
|
|
451
497
|
"dataframe.arrow_array.pandas_only", True, validator=is_bool
|
|
452
498
|
)
|
|
@@ -464,6 +510,15 @@ default_options.register_option(
|
|
|
464
510
|
validator=is_dict | is_all_dict_keys_in("cpu", "memory", "gpu"),
|
|
465
511
|
)
|
|
466
512
|
|
|
513
|
+
default_options.register_option(
|
|
514
|
+
"function.allowed_max_memory_cpu_ratio",
|
|
515
|
+
_DEFAULT_MAX_MEMORY_CPU_RATIO,
|
|
516
|
+
validator=is_integer
|
|
517
|
+
& is_notnull
|
|
518
|
+
& is_less_than_or_equal_to(_DEFAULT_MAX_MEMORY_CPU_RATIO)
|
|
519
|
+
& is_great_than(0),
|
|
520
|
+
)
|
|
521
|
+
|
|
467
522
|
################
|
|
468
523
|
# DPE Settings #
|
|
469
524
|
################
|
|
@@ -505,9 +560,15 @@ default_options.register_option(
|
|
|
505
560
|
assume_finite = os.environ.get("SKLEARN_ASSUME_FINITE")
|
|
506
561
|
if assume_finite is not None:
|
|
507
562
|
assume_finite = bool(assume_finite)
|
|
563
|
+
working_memory = os.environ.get("SKLEARN_WORKING_MEMORY")
|
|
564
|
+
if working_memory is not None:
|
|
565
|
+
working_memory = int(working_memory)
|
|
508
566
|
default_options.register_option(
|
|
509
567
|
"learn.assume_finite", assume_finite, validator=is_null | is_bool
|
|
510
568
|
)
|
|
569
|
+
default_options.register_option(
|
|
570
|
+
"learn.working_memory", working_memory, validator=is_null | is_integer
|
|
571
|
+
)
|
|
511
572
|
|
|
512
573
|
_options_ctx_var = contextvars.ContextVar("_options_ctx_var")
|
|
513
574
|
|
|
@@ -14,7 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
import pytest
|
|
16
16
|
|
|
17
|
-
from ..validators import
|
|
17
|
+
from ..validators import (
|
|
18
|
+
is_less_than_or_equal_to,
|
|
19
|
+
is_positive_integer,
|
|
20
|
+
simple_yaml_str_validator,
|
|
21
|
+
)
|
|
18
22
|
|
|
19
23
|
|
|
20
24
|
@pytest.mark.parametrize("value", ["a", "http://127.0.0.1:1234", "a-b#", "ab_", "123"])
|
|
@@ -32,3 +36,11 @@ def test_simple_yaml_str_validator_invalid(value):
|
|
|
32
36
|
)
|
|
33
37
|
def test_is_positive_integer_validator(value, valid):
|
|
34
38
|
assert is_positive_integer(value) is valid
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@pytest.mark.parametrize(
|
|
42
|
+
"value,upper_bound,valid",
|
|
43
|
+
[(3, 5, True), (5, 5, True), (6, 5, False), (None, None, False), (None, 5, False)],
|
|
44
|
+
)
|
|
45
|
+
def test_is_less_than_or_equal_to_validator(value, upper_bound, valid):
|
|
46
|
+
assert is_less_than_or_equal_to(upper_bound)(value) is valid
|
maxframe/config/validators.py
CHANGED
|
@@ -12,9 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import os
|
|
15
16
|
from typing import Callable
|
|
16
17
|
from urllib.parse import urlparse
|
|
17
18
|
|
|
19
|
+
from .. import env
|
|
20
|
+
from ..utils import str_to_bool
|
|
21
|
+
|
|
18
22
|
ValidatorType = Callable[..., bool]
|
|
19
23
|
|
|
20
24
|
|
|
@@ -43,13 +47,22 @@ class Validator:
|
|
|
43
47
|
def __or__(self, other):
|
|
44
48
|
return OrValidator(self, other)
|
|
45
49
|
|
|
50
|
+
def __and__(self, other):
|
|
51
|
+
return AndValidator(self, other)
|
|
52
|
+
|
|
46
53
|
|
|
47
54
|
class OrValidator(Validator):
|
|
48
55
|
def __init__(self, lhs: Validator, rhs: Validator):
|
|
49
56
|
super().__init__(lambda x: lhs(x) or rhs(x))
|
|
50
57
|
|
|
51
58
|
|
|
59
|
+
class AndValidator(Validator):
|
|
60
|
+
def __init__(self, lhs: Validator, rhs: Validator):
|
|
61
|
+
super().__init__(lambda x: lhs(x) and rhs(x))
|
|
62
|
+
|
|
63
|
+
|
|
52
64
|
is_null = Validator(lambda x: x is None)
|
|
65
|
+
is_notnull = Validator(lambda x: x is not None)
|
|
53
66
|
is_bool = Validator(lambda x: isinstance(x, bool))
|
|
54
67
|
is_float = Validator(lambda x: isinstance(x, float))
|
|
55
68
|
is_integer = Validator(lambda x: isinstance(x, int))
|
|
@@ -69,6 +82,30 @@ def is_all_dict_keys_in(*keys):
|
|
|
69
82
|
return Validator(lambda x: x in keys_set)
|
|
70
83
|
|
|
71
84
|
|
|
85
|
+
def is_less_than(upper_bound):
|
|
86
|
+
return Validator(
|
|
87
|
+
lambda x: is_notnull(x) and is_notnull(upper_bound) and x < upper_bound
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def is_less_than_or_equal_to(upper_bound):
|
|
92
|
+
return Validator(
|
|
93
|
+
lambda x: is_notnull(x) and is_notnull(upper_bound) and x <= upper_bound
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def is_great_than(lower_bound):
|
|
98
|
+
return Validator(
|
|
99
|
+
lambda x: is_notnull(x) and is_notnull(lower_bound) and x > lower_bound
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def is_great_than_or_equal_to(lower_bound):
|
|
104
|
+
return Validator(
|
|
105
|
+
lambda x: is_notnull(x) and is_notnull(lower_bound) and x >= lower_bound
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
72
109
|
def _is_valid_cache_path(path: str) -> bool:
|
|
73
110
|
"""
|
|
74
111
|
path should look like oss://oss_endpoint/oss_bucket/path
|
|
@@ -91,3 +128,15 @@ _invalid_char_in_yaml_str = {'"', "'", "\n", "\\"}
|
|
|
91
128
|
def simple_yaml_str_validator(name: str) -> bool:
|
|
92
129
|
chars = set(name)
|
|
93
130
|
return len(_invalid_char_in_yaml_str & chars) == 0
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def dtype_backend_validator(name: str) -> bool:
|
|
134
|
+
from ..utils import pd_release_version
|
|
135
|
+
|
|
136
|
+
check_pd_version = not str_to_bool(os.getenv(env.MAXFRAME_INSIDE_TASK))
|
|
137
|
+
name = "pyarrow" if name == "arrow" else name
|
|
138
|
+
if name not in (None, "numpy", "pyarrow"):
|
|
139
|
+
return False
|
|
140
|
+
if check_pd_version and name == "pyarrow" and pd_release_version[:2] < (1, 5):
|
|
141
|
+
raise ValueError("Need pandas>=1.5 to use pyarrow backend")
|
|
142
|
+
return True
|
maxframe/conftest.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import contextlib
|
|
15
16
|
import faulthandler
|
|
16
17
|
import os
|
|
17
18
|
from configparser import ConfigParser, NoOptionError, NoSectionError
|
|
@@ -87,17 +88,19 @@ def _get_bearer_token_env(test_config: ConfigParser, section_name: str) -> ODPS:
|
|
|
87
88
|
)
|
|
88
89
|
|
|
89
90
|
|
|
90
|
-
@
|
|
91
|
-
def
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
91
|
+
@contextlib.contextmanager
|
|
92
|
+
def _enter_odps_envs(entry, drop_temp_tables=True):
|
|
93
|
+
stored_envs = {}
|
|
94
|
+
for env_name in (
|
|
95
|
+
"ODPS_BEARER_TOKEN",
|
|
96
|
+
"ODPS_PROJECT_NAME",
|
|
97
|
+
"ODPS_ENDPOINT",
|
|
98
|
+
"RAY_ISOLATION_UT_ENV",
|
|
99
|
+
"ODPS_TUNNEL_ENDPOINT",
|
|
100
|
+
):
|
|
101
|
+
if env_name in os.environ:
|
|
102
|
+
stored_envs[env_name] = os.environ[env_name]
|
|
103
|
+
del os.environ[env_name]
|
|
101
104
|
|
|
102
105
|
os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
|
|
103
106
|
os.environ["ODPS_PROJECT_NAME"] = entry.project
|
|
@@ -115,13 +118,37 @@ def odps_envs(test_config):
|
|
|
115
118
|
os.environ.pop("ODPS_TUNNEL_ENDPOINT", None)
|
|
116
119
|
os.environ.pop("RAY_ISOLATION_UT_ENV", None)
|
|
117
120
|
|
|
118
|
-
|
|
121
|
+
for env_name, val in stored_envs.items():
|
|
122
|
+
os.environ[env_name] = val
|
|
123
|
+
|
|
124
|
+
if drop_temp_tables:
|
|
125
|
+
from .tests.utils import _test_tables_to_drop
|
|
126
|
+
|
|
127
|
+
for table_name in _test_tables_to_drop:
|
|
128
|
+
try:
|
|
129
|
+
entry.delete_table(table_name, wait=False)
|
|
130
|
+
except:
|
|
131
|
+
pass
|
|
132
|
+
|
|
119
133
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
134
|
+
@pytest.fixture
|
|
135
|
+
def odps_with_schema(test_config, request):
|
|
136
|
+
try:
|
|
137
|
+
entry = _get_bearer_token_env(test_config, "odps_with_schema")
|
|
138
|
+
except NoSectionError:
|
|
139
|
+
pytest.skip("Need to specify odps_with_schema section in test.conf")
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
with _enter_odps_envs(entry, drop_temp_tables=False):
|
|
143
|
+
yield entry
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
147
|
+
def odps_envs(test_config):
|
|
148
|
+
entry = _get_bearer_token_env(test_config, "odps")
|
|
149
|
+
|
|
150
|
+
with _enter_odps_envs(entry):
|
|
151
|
+
yield
|
|
125
152
|
|
|
126
153
|
|
|
127
154
|
@pytest.fixture(scope="session")
|
maxframe/core/accessor.py
CHANGED
|
@@ -20,13 +20,13 @@ class BaseMaxFrameAccessor:
|
|
|
20
20
|
self.obj = obj
|
|
21
21
|
|
|
22
22
|
@classmethod
|
|
23
|
-
def _register(cls, name, func):
|
|
23
|
+
def _register(cls, name, func, is_property=False):
|
|
24
24
|
@functools.wraps(func)
|
|
25
25
|
def wrapped(self, *args, **kw):
|
|
26
26
|
return func(self.obj, *args, **kw)
|
|
27
27
|
|
|
28
28
|
wrapped.__name__ = name
|
|
29
|
-
setattr(cls, name, wrapped)
|
|
29
|
+
setattr(cls, name, wrapped if not is_property else property(wrapped))
|
|
30
30
|
if hasattr(cls, "_api_count"): # pragma: no branch
|
|
31
31
|
cls._api_count += 1
|
|
32
32
|
|
maxframe/core/entity/core.py
CHANGED
|
Binary file
|
maxframe/core/graph/entity.py
CHANGED
|
@@ -26,7 +26,6 @@ from ...serialization.serializables import (
|
|
|
26
26
|
)
|
|
27
27
|
from ...serialization.serializables.core import SerializableSerializer
|
|
28
28
|
from ...utils import extract_class_name, tokenize
|
|
29
|
-
from ..operator import Fetch
|
|
30
29
|
from .core import DAG
|
|
31
30
|
|
|
32
31
|
|
|
@@ -123,7 +122,7 @@ class SerializableGraph(Serializable):
|
|
|
123
122
|
def from_graph(cls, graph: EntityGraph) -> "SerializableGraph":
|
|
124
123
|
return SerializableGraph(
|
|
125
124
|
_is_chunk=False,
|
|
126
|
-
_fetch_nodes=[chunk for chunk in graph if
|
|
125
|
+
_fetch_nodes=[chunk for chunk in graph if chunk.is_fetch()],
|
|
127
126
|
_nodes=graph._nodes,
|
|
128
127
|
_predecessors=graph._predecessors,
|
|
129
128
|
_successors=graph._successors,
|
maxframe/core/operator/base.py
CHANGED
|
@@ -40,6 +40,7 @@ from ..entity.core import ENTITY_TYPE, Entity, EntityData
|
|
|
40
40
|
from ..entity.output_types import OutputType
|
|
41
41
|
from ..entity.tileables import Tileable
|
|
42
42
|
from ..mode import enter_mode
|
|
43
|
+
from .utils import rewrite_stop_iteration
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class OperatorMetaclass(SerializableMeta):
|
|
@@ -91,6 +92,10 @@ class SchedulingHint(Serializable):
|
|
|
91
92
|
# id of gang scheduling for machine learning trainings
|
|
92
93
|
gang_scheduling_id = StringField("gang_scheduling_id", default=None)
|
|
93
94
|
|
|
95
|
+
def __init__(self, *, pure_depends=None, **kwargs):
|
|
96
|
+
kwargs["_pure_depends"] = pure_depends or kwargs.get("_pure_depends") or []
|
|
97
|
+
super().__init__(**kwargs)
|
|
98
|
+
|
|
94
99
|
@classproperty
|
|
95
100
|
@lru_cache(1)
|
|
96
101
|
def all_hint_names(cls):
|
|
@@ -306,7 +311,8 @@ class Operator(Base, OperatorLogicKeyGeneratorMixin, metaclass=OperatorMetaclass
|
|
|
306
311
|
The replaced input object.
|
|
307
312
|
"""
|
|
308
313
|
self.inputs[index] = replaced_input
|
|
309
|
-
|
|
314
|
+
with rewrite_stop_iteration():
|
|
315
|
+
self._set_inputs(self, self.inputs)
|
|
310
316
|
|
|
311
317
|
@property
|
|
312
318
|
def inputs(self) -> List[Union[ENTITY_TYPE]]:
|
|
@@ -317,7 +323,8 @@ class Operator(Base, OperatorLogicKeyGeneratorMixin, metaclass=OperatorMetaclass
|
|
|
317
323
|
|
|
318
324
|
@inputs.setter
|
|
319
325
|
def inputs(self, vals):
|
|
320
|
-
|
|
326
|
+
with rewrite_stop_iteration():
|
|
327
|
+
self._set_inputs(self, vals)
|
|
321
328
|
|
|
322
329
|
@property
|
|
323
330
|
def output_limit(self) -> int:
|
maxframe/core/operator/core.py
CHANGED
|
@@ -18,6 +18,9 @@ from collections import defaultdict
|
|
|
18
18
|
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, Union
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from .utils import rewrite_stop_iteration
|
|
21
24
|
|
|
22
25
|
if TYPE_CHECKING:
|
|
23
26
|
from .. import TileableGraph
|
|
@@ -82,7 +85,7 @@ class TileableOperatorMixin:
|
|
|
82
85
|
nsplits = tuple(nsplits)
|
|
83
86
|
shape = list(shape)
|
|
84
87
|
for idx, (s, sp) in enumerate(zip(shape, nsplits)):
|
|
85
|
-
if not
|
|
88
|
+
if not pd.isna(s):
|
|
86
89
|
continue
|
|
87
90
|
s = sum(sp)
|
|
88
91
|
if not np.isnan(s):
|
|
@@ -118,11 +121,16 @@ class TileableOperatorMixin:
|
|
|
118
121
|
def _new_tileables(
|
|
119
122
|
self, inputs: List[TileableType], kws: List[dict] = None, **kw
|
|
120
123
|
) -> List[TileableType]:
|
|
124
|
+
assert (
|
|
125
|
+
isinstance(inputs, (list, tuple)) or inputs is None
|
|
126
|
+
), f"{inputs} is not a list"
|
|
127
|
+
|
|
121
128
|
output_limit = kw.pop("output_limit", None)
|
|
122
129
|
if output_limit is None:
|
|
123
130
|
output_limit = getattr(self, "output_limit")
|
|
124
131
|
|
|
125
|
-
|
|
132
|
+
with rewrite_stop_iteration():
|
|
133
|
+
self._set_inputs(self, inputs)
|
|
126
134
|
if self.gpu is None:
|
|
127
135
|
self.gpu = self._check_if_gpu(self._inputs)
|
|
128
136
|
if getattr(self, "_key", None) is None:
|
maxframe/core/operator/utils.py
CHANGED
|
@@ -12,6 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import contextlib
|
|
16
|
+
import sys
|
|
17
|
+
|
|
15
18
|
from ...typing_ import EntityType, TileableType
|
|
16
19
|
from ..entity import TILEABLE_TYPE
|
|
17
20
|
|
|
@@ -53,3 +56,13 @@ def build_fetch(entity: EntityType, **kw) -> EntityType:
|
|
|
53
56
|
|
|
54
57
|
def add_fetch_builder(entity_type, builder_func):
|
|
55
58
|
_type_to_builder.append((entity_type, builder_func))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@contextlib.contextmanager
|
|
62
|
+
def rewrite_stop_iteration():
|
|
63
|
+
try:
|
|
64
|
+
yield
|
|
65
|
+
except StopIteration:
|
|
66
|
+
raise RuntimeError("Unexpected StopIteration happened.").with_traceback(
|
|
67
|
+
sys.exc_info()[2]
|
|
68
|
+
) from None
|
maxframe/dataframe/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from . import (
|
|
|
21
21
|
fetch,
|
|
22
22
|
groupby,
|
|
23
23
|
indexing,
|
|
24
|
+
merge,
|
|
24
25
|
misc,
|
|
25
26
|
missing,
|
|
26
27
|
reduction,
|
|
@@ -45,13 +46,13 @@ from .merge import concat, merge
|
|
|
45
46
|
from .misc.cut import cut
|
|
46
47
|
from .misc.eval import maxframe_eval as eval # pylint: disable=redefined-builtin
|
|
47
48
|
from .misc.get_dummies import get_dummies
|
|
48
|
-
from .misc.melt import melt
|
|
49
|
-
from .misc.pivot import pivot
|
|
50
|
-
from .misc.pivot_table import pivot_table
|
|
51
49
|
from .misc.qcut import qcut
|
|
52
50
|
from .misc.to_numeric import to_numeric
|
|
53
51
|
from .missing import isna, isnull, notna, notnull
|
|
54
52
|
from .reduction import CustomReduction, unique
|
|
53
|
+
from .reshape.melt import melt
|
|
54
|
+
from .reshape.pivot import pivot
|
|
55
|
+
from .reshape.pivot_table import pivot_table
|
|
55
56
|
from .tseries.to_datetime import to_datetime
|
|
56
57
|
|
|
57
58
|
try:
|
|
@@ -80,3 +81,9 @@ del (
|
|
|
80
81
|
)
|
|
81
82
|
|
|
82
83
|
from_pandas = read_pandas
|
|
84
|
+
|
|
85
|
+
# isort: off
|
|
86
|
+
from .typing_ import register_pandas_typing_funcs
|
|
87
|
+
|
|
88
|
+
register_pandas_typing_funcs()
|
|
89
|
+
del register_pandas_typing_funcs
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LegacySeriesMethodOperator(DataFrameOperator, DataFrameOperatorMixin):
|
|
23
|
+
_method_name = None
|
|
24
|
+
_method_cls = None
|
|
25
|
+
|
|
26
|
+
def __on_deserialize__(self):
|
|
27
|
+
cls = type(self)
|
|
28
|
+
local_fields = {
|
|
29
|
+
f
|
|
30
|
+
for f, name_hash in cls._FIELD_TO_NAME_HASH.items()
|
|
31
|
+
if name_hash == cls._NAME_HASH
|
|
32
|
+
}
|
|
33
|
+
kw = {
|
|
34
|
+
f: getattr(self, f)
|
|
35
|
+
for f in cls._FIELD_TO_NAME_HASH
|
|
36
|
+
if f not in local_fields and hasattr(self, f)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
kw["method"] = self._method_name
|
|
40
|
+
kw["method_kwargs"] = {
|
|
41
|
+
f: getattr(self, f) for f in local_fields if hasattr(self, f)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
logger.warning(f"Using deprecated operator class {cls.__name__}")
|
|
45
|
+
return self._method_cls(**kw)
|
|
@@ -12,7 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .core import SeriesDatetimeMethod
|
|
15
|
+
from .core import SeriesDatetimeMethod, datetime_method_to_handlers
|
|
16
|
+
|
|
17
|
+
methods = set(datetime_method_to_handlers.keys())
|
|
18
|
+
del datetime_method_to_handlers
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
def _install():
|
|
@@ -16,27 +16,16 @@ import pandas as pd
|
|
|
16
16
|
import pyarrow as pa
|
|
17
17
|
|
|
18
18
|
from .... import opcodes
|
|
19
|
-
from ....core.entity.output_types import OutputType
|
|
20
19
|
from ....serialization.serializables.field import AnyField
|
|
21
|
-
from
|
|
20
|
+
from .core import LegacySeriesDictOperator, SeriesDictMethod
|
|
22
21
|
|
|
23
22
|
|
|
24
|
-
class SeriesDictContainsOperator(
|
|
23
|
+
class SeriesDictContainsOperator(LegacySeriesDictOperator):
|
|
24
|
+
# operator class deprecated since v2.3.0
|
|
25
25
|
_op_type_ = opcodes.SERIES_DICT_CONTAINS
|
|
26
|
+
_method_name = "contains"
|
|
26
27
|
query_key = AnyField("query_key", default=None)
|
|
27
28
|
|
|
28
|
-
def __init__(self, **kw):
|
|
29
|
-
super().__init__(_output_types=[OutputType.series], **kw)
|
|
30
|
-
|
|
31
|
-
def __call__(self, series):
|
|
32
|
-
return self.new_series(
|
|
33
|
-
[series],
|
|
34
|
-
shape=series.shape,
|
|
35
|
-
dtype=pd.ArrowDtype(pa.bool_()),
|
|
36
|
-
index_value=series.index_value,
|
|
37
|
-
name=None,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
29
|
|
|
41
30
|
def series_dict_contains(series, query_key):
|
|
42
31
|
"""
|
|
@@ -78,4 +67,6 @@ def series_dict_contains(series, query_key):
|
|
|
78
67
|
3 <NA>
|
|
79
68
|
dtype: bool[pyarrow]
|
|
80
69
|
"""
|
|
81
|
-
return
|
|
70
|
+
return SeriesDictMethod(method="contains", method_kwargs=dict(query_key=query_key))(
|
|
71
|
+
series, dtype=pd.ArrowDtype(pa.bool_()), name=None
|
|
72
|
+
)
|