maxframe 2.0.0b1__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win_amd64.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/read_odps_query.py +76 -16
- maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +4 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/io/odpsio/tests/test_volumeio.py +4 -15
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +87 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +3 -13
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
maxframe/tensor/utils.py
CHANGED
|
@@ -167,7 +167,7 @@ def normalize_axis_tuple(axis, ndim, argname=None, allow_duplicate=False):
|
|
|
167
167
|
except TypeError:
|
|
168
168
|
pass
|
|
169
169
|
# Going via an iterator directly is slower than via list comprehension.
|
|
170
|
-
axis = tuple(
|
|
170
|
+
axis = tuple(validate_axis(ndim, ax, argname) for ax in axis)
|
|
171
171
|
if not allow_duplicate and len(set(axis)) != len(axis):
|
|
172
172
|
if argname:
|
|
173
173
|
raise ValueError(f"repeated axis in `{argname}` argument")
|
|
@@ -709,8 +709,8 @@ def implement_scipy(scipy_fun_name):
|
|
|
709
709
|
return wrapper
|
|
710
710
|
|
|
711
711
|
|
|
712
|
-
def infer_scipy_dtype(scipy_fun_name):
|
|
712
|
+
def infer_scipy_dtype(scipy_fun_name, **kw):
|
|
713
713
|
scipy_fun = _load_scipy_func(scipy_fun_name)
|
|
714
714
|
if scipy_fun is None:
|
|
715
715
|
return lambda x: x
|
|
716
|
-
return infer_dtype(scipy_fun)
|
|
716
|
+
return infer_dtype(scipy_fun, **kw)
|
maxframe/tests/test_utils.py
CHANGED
|
@@ -32,7 +32,7 @@ import pytest
|
|
|
32
32
|
|
|
33
33
|
from .. import utils
|
|
34
34
|
from ..serialization import PickleContainer
|
|
35
|
-
from ..utils import parse_size_to_megabytes
|
|
35
|
+
from ..utils import parse_size_to_megabytes, validate_and_adjust_resource_ratio
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def test_string_conversion():
|
|
@@ -369,6 +369,12 @@ def test_arrow_type_from_string():
|
|
|
369
369
|
_assert_arrow_type_convert(
|
|
370
370
|
pa.struct([("key", pa.string()), ("value", pa.list_(pa.int64()))])
|
|
371
371
|
)
|
|
372
|
+
_assert_arrow_type_convert(
|
|
373
|
+
pa.struct([("key", pa.string(), False), ("value", pa.list_(pa.int64()))])
|
|
374
|
+
)
|
|
375
|
+
_assert_arrow_type_convert(
|
|
376
|
+
pa.struct([("key", pa.string()), ("value", pa.list_(pa.int64()), False)])
|
|
377
|
+
)
|
|
372
378
|
|
|
373
379
|
|
|
374
380
|
@pytest.mark.parametrize("use_async", [False, True])
|
|
@@ -527,6 +533,7 @@ def test_numeric_inputs_with_default_units(value, default_unit):
|
|
|
527
533
|
"input_string, expected",
|
|
528
534
|
[
|
|
529
535
|
# Basic binary units
|
|
536
|
+
("1B", 1 / BYTES_PER_MIB),
|
|
530
537
|
("1KiB", BYTES_PER_KIB / BYTES_PER_MIB),
|
|
531
538
|
("5miB", 5),
|
|
532
539
|
("2giB", 2 * BYTES_PER_GIB / BYTES_PER_MIB),
|
|
@@ -571,3 +578,38 @@ def test_parse_size_to_mega_bytes_invalid_inputs(invalid_input, default_unit):
|
|
|
571
578
|
"""Test invalid inputs that should raise ValueError"""
|
|
572
579
|
with pytest.raises(ValueError): # Catch ValueError
|
|
573
580
|
parse_size_to_megabytes(invalid_input, default_number_unit=default_unit)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
@pytest.mark.parametrize(
|
|
584
|
+
"udf_resources, max_memory_cpu_ratio, adjust, expected_resources, expected_adjusted, should_warn",
|
|
585
|
+
[
|
|
586
|
+
({"other": "value"}, 4, False, {"other": "value"}, False, False),
|
|
587
|
+
({"memory": 8}, 4, False, {"memory": 8}, False, False),
|
|
588
|
+
({"cpu": 2}, 4, False, {"cpu": 2}, False, False),
|
|
589
|
+
({"cpu": 2, "memory": 2}, 4, True, {"cpu": 2, "memory": 2}, False, False),
|
|
590
|
+
({"cpu": 2, "memory": 8}, 4, False, {"cpu": 2, "memory": 8}, False, False),
|
|
591
|
+
({"cpu": 1, "memory": 8}, 4, False, {"cpu": 1, "memory": 8}, False, False),
|
|
592
|
+
({"cpu": 2, "memory": 8}, 4, False, {"cpu": 2, "memory": 8}, False, False),
|
|
593
|
+
({"cpu": 2, "memory": 8}, 4, False, {"cpu": 2, "memory": 8}, False, False),
|
|
594
|
+
({"cpu": 1, "memory": 8}, 4, True, {"cpu": 2, "memory": 8}, True, True),
|
|
595
|
+
({"cpu": 1, "memory": 18}, 7, True, {"cpu": 3, "memory": 18}, True, True),
|
|
596
|
+
({"cpu": 1, "memory": 7.5}, 4, True, {"cpu": 2, "memory": 7.5}, True, True),
|
|
597
|
+
],
|
|
598
|
+
)
|
|
599
|
+
def test_validate_and_adjust_resource_ratio(
|
|
600
|
+
udf_resources,
|
|
601
|
+
max_memory_cpu_ratio,
|
|
602
|
+
adjust,
|
|
603
|
+
expected_resources,
|
|
604
|
+
expected_adjusted,
|
|
605
|
+
should_warn,
|
|
606
|
+
recwarn,
|
|
607
|
+
):
|
|
608
|
+
result_resources, was_adjusted = validate_and_adjust_resource_ratio(
|
|
609
|
+
udf_resources, max_memory_cpu_ratio, adjust
|
|
610
|
+
)
|
|
611
|
+
assert result_resources == expected_resources
|
|
612
|
+
assert was_adjusted == expected_adjusted
|
|
613
|
+
if should_warn:
|
|
614
|
+
# check warning
|
|
615
|
+
assert len(recwarn) == 1
|
maxframe/tests/utils.py
CHANGED
|
@@ -191,14 +191,8 @@ def assert_mf_index_dtype(idx_obj, dtype):
|
|
|
191
191
|
|
|
192
192
|
@contextlib.contextmanager
|
|
193
193
|
def create_test_volume(vol_name, oss_config):
|
|
194
|
-
test_vol_name = vol_name
|
|
195
194
|
odps_entry = ODPS.from_environments()
|
|
196
195
|
|
|
197
|
-
try:
|
|
198
|
-
odps_entry.delete_volume(test_vol_name, auto_remove_dir=True, recursive=True)
|
|
199
|
-
except:
|
|
200
|
-
pass
|
|
201
|
-
|
|
202
196
|
oss_test_dir_name = "test_dir_" + vol_name
|
|
203
197
|
if oss_config is None:
|
|
204
198
|
pytest.skip("Need oss and its config to run this test")
|
|
@@ -232,17 +226,13 @@ def create_test_volume(vol_name, oss_config):
|
|
|
232
226
|
rolearn = oss_config.oss_rolearn
|
|
233
227
|
|
|
234
228
|
oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
|
|
235
|
-
odps_entry.create_external_volume(
|
|
236
|
-
test_vol_name, location=test_location, rolearn=rolearn
|
|
237
|
-
)
|
|
229
|
+
odps_entry.create_external_volume(vol_name, location=test_location, rolearn=rolearn)
|
|
238
230
|
|
|
239
231
|
try:
|
|
240
|
-
yield
|
|
232
|
+
yield vol_name
|
|
241
233
|
finally:
|
|
242
234
|
try:
|
|
243
|
-
odps_entry.delete_volume(
|
|
244
|
-
test_vol_name, auto_remove_dir=True, recursive=True
|
|
245
|
-
)
|
|
235
|
+
odps_entry.delete_volume(vol_name, auto_remove_dir=True, recursive=True)
|
|
246
236
|
except:
|
|
247
237
|
pass
|
|
248
238
|
|
maxframe/typing_.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
from numbers import Integral
|
|
16
16
|
from typing import List, TypeVar, Union
|
|
17
17
|
|
|
18
|
+
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
19
20
|
import pyarrow as pa
|
|
20
21
|
|
|
@@ -22,6 +23,7 @@ SlicesType = List[Union[None, Integral, slice]]
|
|
|
22
23
|
|
|
23
24
|
TimeoutType = Union[int, float, None]
|
|
24
25
|
|
|
26
|
+
PandasDType = Union[np.dtype, pd.api.extensions.ExtensionDtype]
|
|
25
27
|
|
|
26
28
|
ArrowTableType = Union[pa.Table, pa.RecordBatch]
|
|
27
29
|
PandasObjectTypes = Union[
|
maxframe/udf.py
CHANGED
|
@@ -17,6 +17,7 @@ from typing import Callable, List, Optional, Union
|
|
|
17
17
|
|
|
18
18
|
from odps.models import Resource
|
|
19
19
|
|
|
20
|
+
from .config.validators import is_positive_integer
|
|
20
21
|
from .serialization import load_member
|
|
21
22
|
from .serialization.serializables import (
|
|
22
23
|
BoolField,
|
|
@@ -106,6 +107,7 @@ class MarkedFunction(Serializable):
|
|
|
106
107
|
expect_resources = DictField(
|
|
107
108
|
"expect_resources", FieldTypes.string, default_factory=dict
|
|
108
109
|
)
|
|
110
|
+
gpu = BoolField("gpu", default=False)
|
|
109
111
|
|
|
110
112
|
def __init__(self, func: Optional[Callable] = None, **kw):
|
|
111
113
|
super().__init__(func=func, **kw)
|
|
@@ -177,6 +179,8 @@ def with_running_options(
|
|
|
177
179
|
engine: Optional[str] = None,
|
|
178
180
|
cpu: Optional[int] = None,
|
|
179
181
|
memory: Optional[Union[str, int]] = None,
|
|
182
|
+
gu: Optional[int] = None,
|
|
183
|
+
gu_quota: Optional[Union[str, List[str]]] = None,
|
|
180
184
|
**kwargs,
|
|
181
185
|
):
|
|
182
186
|
"""
|
|
@@ -191,6 +195,10 @@ def with_running_options(
|
|
|
191
195
|
memory: Optional[Union[str, int]]
|
|
192
196
|
The memory to run the UDF. If it is an int, it is in GB.
|
|
193
197
|
If it is a str, it is in the format of "10GiB", "30MiB", etc.
|
|
198
|
+
gu: Optional[int]
|
|
199
|
+
The GU number to run the UDF.
|
|
200
|
+
gu_quota: Optional[Union[str, List[str]]]
|
|
201
|
+
The GU quota nicknames to run the UDF. The order is the priority of the usage.
|
|
194
202
|
kwargs
|
|
195
203
|
Other running options.
|
|
196
204
|
"""
|
|
@@ -204,20 +212,37 @@ def with_running_options(
|
|
|
204
212
|
raise TypeError("memory must be an int or str")
|
|
205
213
|
if isinstance(memory, int) and memory <= 0:
|
|
206
214
|
raise ValueError("memory must be greater than 0")
|
|
215
|
+
if gu is not None and gu <= 0:
|
|
216
|
+
raise ValueError("gu must be greater than 0")
|
|
217
|
+
if gu is not None and (cpu or memory):
|
|
218
|
+
raise ValueError("gu can't be specified with cpu or memory")
|
|
207
219
|
|
|
208
220
|
if cpu:
|
|
209
221
|
resources["cpu"] = cpu
|
|
210
222
|
if memory:
|
|
211
223
|
resources["memory"] = memory
|
|
212
224
|
|
|
225
|
+
if isinstance(gu_quota, str):
|
|
226
|
+
gu_quota = [gu_quota]
|
|
227
|
+
|
|
228
|
+
resources["gpu"] = gu
|
|
229
|
+
resources["gu_quota"] = gu_quota
|
|
230
|
+
use_gpu = is_positive_integer(gu)
|
|
231
|
+
|
|
213
232
|
def func_wrapper(func):
|
|
214
|
-
if all(v is None for v in (engine, cpu, memory)):
|
|
233
|
+
if all(v is None for v in (engine, cpu, memory, gu, gu_quota)):
|
|
215
234
|
return func
|
|
216
235
|
if isinstance(func, MarkedFunction):
|
|
217
236
|
func.expect_engine = engine
|
|
218
237
|
func.expect_resources = resources
|
|
238
|
+
func.gpu = use_gpu
|
|
219
239
|
return func
|
|
220
|
-
return MarkedFunction(
|
|
240
|
+
return MarkedFunction(
|
|
241
|
+
func,
|
|
242
|
+
expect_engine=engine,
|
|
243
|
+
expect_resources=resources,
|
|
244
|
+
gpu=use_gpu,
|
|
245
|
+
)
|
|
221
246
|
|
|
222
247
|
return func_wrapper
|
|
223
248
|
|
maxframe/utils.py
CHANGED
|
@@ -25,6 +25,7 @@ import inspect
|
|
|
25
25
|
import io
|
|
26
26
|
import itertools
|
|
27
27
|
import logging
|
|
28
|
+
import math
|
|
28
29
|
import numbers
|
|
29
30
|
import os
|
|
30
31
|
import pkgutil
|
|
@@ -32,10 +33,12 @@ import random
|
|
|
32
33
|
import re
|
|
33
34
|
import struct
|
|
34
35
|
import sys
|
|
36
|
+
import tempfile
|
|
35
37
|
import threading
|
|
36
38
|
import time
|
|
37
39
|
import tokenize as pytokenize
|
|
38
40
|
import types
|
|
41
|
+
import warnings
|
|
39
42
|
import weakref
|
|
40
43
|
import zlib
|
|
41
44
|
from collections.abc import Hashable, Mapping
|
|
@@ -45,6 +48,7 @@ from typing import (
|
|
|
45
48
|
Awaitable,
|
|
46
49
|
Callable,
|
|
47
50
|
Dict,
|
|
51
|
+
Generator,
|
|
48
52
|
Iterable,
|
|
49
53
|
List,
|
|
50
54
|
Optional,
|
|
@@ -547,6 +551,20 @@ class ToThreadMixin:
|
|
|
547
551
|
return self.to_thread(func, *args, wait_on_cancel=wait_on_cancel, **kwargs)
|
|
548
552
|
|
|
549
553
|
|
|
554
|
+
class PatchableMixin:
|
|
555
|
+
"""Patch not None field to dest_obj"""
|
|
556
|
+
|
|
557
|
+
__slots__ = ()
|
|
558
|
+
|
|
559
|
+
_patchable_attrs = tuple()
|
|
560
|
+
|
|
561
|
+
def patch_to(self, dest_obj) -> None:
|
|
562
|
+
for attr in self._patchable_attrs:
|
|
563
|
+
val = getattr(self, attr, None)
|
|
564
|
+
if val is not None:
|
|
565
|
+
setattr(dest_obj, attr, val)
|
|
566
|
+
|
|
567
|
+
|
|
550
568
|
def config_odps_default_options():
|
|
551
569
|
from odps import options as odps_options
|
|
552
570
|
|
|
@@ -712,7 +730,10 @@ def sbytes(x: Any) -> bytes:
|
|
|
712
730
|
elif isinstance(x, str):
|
|
713
731
|
return bytes(x, encoding="utf-8")
|
|
714
732
|
else:
|
|
715
|
-
|
|
733
|
+
try:
|
|
734
|
+
return bytes(x)
|
|
735
|
+
except TypeError:
|
|
736
|
+
return bytes(str(x), encoding="utf-8")
|
|
716
737
|
|
|
717
738
|
|
|
718
739
|
def is_full_slice(slc: Any) -> bool:
|
|
@@ -914,7 +935,7 @@ def stringify_path(path: Union[str, os.PathLike]) -> str:
|
|
|
914
935
|
raise TypeError("not a path-like object")
|
|
915
936
|
|
|
916
937
|
|
|
917
|
-
_memory_size_indices = {"": 0, "k": 1, "m": 2, "g": 3, "t": 4}
|
|
938
|
+
_memory_size_indices = {"": 0, "b": 0, "k": 1, "m": 2, "g": 3, "t": 4}
|
|
918
939
|
|
|
919
940
|
_size_pattern = re.compile(r"^([0-9.-]+)\s*([a-z]*)$")
|
|
920
941
|
|
|
@@ -1050,13 +1071,19 @@ def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
|
|
|
1050
1071
|
return value, match
|
|
1051
1072
|
|
|
1052
1073
|
|
|
1053
|
-
def find_objects(
|
|
1074
|
+
def find_objects(
|
|
1075
|
+
nested: Union[List, Dict],
|
|
1076
|
+
types: Union[None, Type, Tuple[Type]] = None,
|
|
1077
|
+
checker: Callable[..., bool] = None,
|
|
1078
|
+
) -> List:
|
|
1054
1079
|
found = []
|
|
1055
1080
|
stack = [nested]
|
|
1056
1081
|
|
|
1057
1082
|
while len(stack) > 0:
|
|
1058
1083
|
it = stack.pop()
|
|
1059
|
-
if isinstance(it, types)
|
|
1084
|
+
if (types is not None and isinstance(it, types)) or (
|
|
1085
|
+
checker is not None and checker(it)
|
|
1086
|
+
):
|
|
1060
1087
|
found.append(it)
|
|
1061
1088
|
continue
|
|
1062
1089
|
|
|
@@ -1184,7 +1211,7 @@ def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
|
1184
1211
|
token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
|
|
1185
1212
|
value_stack, op_stack = [], []
|
|
1186
1213
|
|
|
1187
|
-
def _pop_make_type(with_args: bool = False, combined: bool = True)
|
|
1214
|
+
def _pop_make_type(with_args: bool = False, combined: bool = True):
|
|
1188
1215
|
"""
|
|
1189
1216
|
Pops tops of value stacks, creates a DataType instance and push back
|
|
1190
1217
|
|
|
@@ -1208,6 +1235,23 @@ def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
|
1208
1235
|
else: # pragma: no cover
|
|
1209
1236
|
value_stack.append(type_name)
|
|
1210
1237
|
|
|
1238
|
+
def _pop_make_struct_field():
|
|
1239
|
+
"""parameterized sub-types need to be represented as tuples"""
|
|
1240
|
+
nonlocal value_stack
|
|
1241
|
+
|
|
1242
|
+
op_stack.pop(-1)
|
|
1243
|
+
if isinstance(value_stack[-1], str) and value_stack[-1].lower() in (
|
|
1244
|
+
"null",
|
|
1245
|
+
"not null",
|
|
1246
|
+
):
|
|
1247
|
+
values = value_stack[-3:]
|
|
1248
|
+
value_stack = value_stack[:-3]
|
|
1249
|
+
values[-1] = values[-1] == "null"
|
|
1250
|
+
else:
|
|
1251
|
+
values = value_stack[-2:]
|
|
1252
|
+
value_stack = value_stack[:-2]
|
|
1253
|
+
value_stack.append(tuple(values))
|
|
1254
|
+
|
|
1211
1255
|
for token in token_iter:
|
|
1212
1256
|
if token.type == pytokenize.OP:
|
|
1213
1257
|
if token.string == ":":
|
|
@@ -1216,13 +1260,9 @@ def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
|
1216
1260
|
# gather previous sub-types
|
|
1217
1261
|
if op_stack[-1] in ("<", ":"):
|
|
1218
1262
|
_pop_make_type()
|
|
1219
|
-
|
|
1220
1263
|
if op_stack[-1] == ":":
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
values = value_stack[-2:]
|
|
1224
|
-
value_stack = value_stack[:-2]
|
|
1225
|
-
value_stack.append(tuple(values))
|
|
1264
|
+
_pop_make_struct_field()
|
|
1265
|
+
|
|
1226
1266
|
# put generated item into the parameter list
|
|
1227
1267
|
val = value_stack.pop(-1)
|
|
1228
1268
|
value_stack[-1].append(val)
|
|
@@ -1239,22 +1279,20 @@ def arrow_type_from_str(type_str: str) -> pa.DataType:
|
|
|
1239
1279
|
op_stack.pop(-1)
|
|
1240
1280
|
elif token.string == ">":
|
|
1241
1281
|
_pop_make_type()
|
|
1242
|
-
|
|
1243
1282
|
if op_stack[-1] == ":":
|
|
1244
|
-
|
|
1245
|
-
op_stack.pop(-1)
|
|
1246
|
-
values = value_stack[-2:]
|
|
1247
|
-
value_stack = value_stack[:-2]
|
|
1248
|
-
value_stack.append(tuple(values))
|
|
1283
|
+
_pop_make_struct_field()
|
|
1249
1284
|
|
|
1250
1285
|
# put generated item into the parameter list
|
|
1251
1286
|
val = value_stack.pop(-1)
|
|
1252
1287
|
value_stack[-1].append(val)
|
|
1253
1288
|
# make DataType (i.e., list / map / struct) given args
|
|
1254
|
-
_pop_make_type(True)
|
|
1289
|
+
_pop_make_type(with_args=True)
|
|
1255
1290
|
op_stack.pop(-1)
|
|
1256
1291
|
elif token.type == pytokenize.NAME:
|
|
1257
|
-
value_stack
|
|
1292
|
+
if value_stack and value_stack[-1] == "not":
|
|
1293
|
+
value_stack[-1] += " " + token.string
|
|
1294
|
+
else:
|
|
1295
|
+
value_stack.append(token.string)
|
|
1258
1296
|
elif token.type == pytokenize.NUMBER:
|
|
1259
1297
|
value_stack.append(int(token.string))
|
|
1260
1298
|
elif token.type == pytokenize.ENDMARKER:
|
|
@@ -1545,3 +1583,139 @@ def cache_tileables(*tileables):
|
|
|
1545
1583
|
for t in tileables:
|
|
1546
1584
|
if isinstance(t, ENTITY_TYPE):
|
|
1547
1585
|
t.cache = True
|
|
1586
|
+
|
|
1587
|
+
|
|
1588
|
+
def ignore_warning(func: Callable):
|
|
1589
|
+
@functools.wraps(func)
|
|
1590
|
+
def inner(*args, **kwargs):
|
|
1591
|
+
with warnings.catch_warnings():
|
|
1592
|
+
warnings.simplefilter("ignore")
|
|
1593
|
+
return func(*args, **kwargs)
|
|
1594
|
+
|
|
1595
|
+
return inner
|
|
1596
|
+
|
|
1597
|
+
|
|
1598
|
+
class ServiceLoggerAdapter(logging.LoggerAdapter):
|
|
1599
|
+
extra_key_mapping = {}
|
|
1600
|
+
|
|
1601
|
+
def process(self, msg, kwargs):
|
|
1602
|
+
merged_extra = (self.extra or {}).copy()
|
|
1603
|
+
merged_extra.update(kwargs)
|
|
1604
|
+
|
|
1605
|
+
prefix = " ".join(
|
|
1606
|
+
f"{self.extra_key_mapping.get(k) or k.capitalize()}={merged_extra[k]}"
|
|
1607
|
+
for k in merged_extra.keys()
|
|
1608
|
+
)
|
|
1609
|
+
msg = f"[{prefix}] {msg}"
|
|
1610
|
+
return msg, kwargs
|
|
1611
|
+
|
|
1612
|
+
|
|
1613
|
+
@contextmanager
|
|
1614
|
+
def atomic_writer(filename, mode="w", **kwargs):
|
|
1615
|
+
"""
|
|
1616
|
+
Write to a file in an atomic way.
|
|
1617
|
+
"""
|
|
1618
|
+
temp_fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(filename) or ".")
|
|
1619
|
+
os.chmod(temp_path, 0o644)
|
|
1620
|
+
os.close(temp_fd) # Close the file descriptor immediately and we reopen this later.
|
|
1621
|
+
|
|
1622
|
+
try:
|
|
1623
|
+
# Write to temp file.
|
|
1624
|
+
with open(temp_path, mode, **kwargs) as temp_file:
|
|
1625
|
+
yield temp_file
|
|
1626
|
+
|
|
1627
|
+
# Replace the original file with the temp file atomically.
|
|
1628
|
+
os.replace(temp_path, filename)
|
|
1629
|
+
finally:
|
|
1630
|
+
try:
|
|
1631
|
+
os.remove(temp_path)
|
|
1632
|
+
except OSError:
|
|
1633
|
+
pass
|
|
1634
|
+
|
|
1635
|
+
|
|
1636
|
+
def prevent_called_from_pandas(level=2):
|
|
1637
|
+
"""Prevent method from being called from pandas"""
|
|
1638
|
+
frame = sys._getframe(level)
|
|
1639
|
+
called_frame = sys._getframe(1)
|
|
1640
|
+
pd_pack_location = os.path.dirname(pd.__file__)
|
|
1641
|
+
if frame.f_code.co_filename.startswith(pd_pack_location):
|
|
1642
|
+
raise AttributeError(called_frame.f_code.co_name)
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
def combine_error_message_and_traceback(
|
|
1646
|
+
messages: List[str], tracebacks: List[List[str]]
|
|
1647
|
+
) -> str:
|
|
1648
|
+
tbs = []
|
|
1649
|
+
for msg, tb in zip(messages, tracebacks):
|
|
1650
|
+
tbs.append("".join([msg + "\n"] + tb))
|
|
1651
|
+
return "\nCaused by:\n".join(tbs)
|
|
1652
|
+
|
|
1653
|
+
|
|
1654
|
+
def generate_unique_id(byte_len: int) -> Generator[str, None, None]:
|
|
1655
|
+
"""
|
|
1656
|
+
The ids are ensured to be unique in one generator.
|
|
1657
|
+
DO NOT use this generator in global scope or singleton class members,
|
|
1658
|
+
as it may not free the set.
|
|
1659
|
+
"""
|
|
1660
|
+
generated_ids = set()
|
|
1661
|
+
while True:
|
|
1662
|
+
new_id = new_random_id(byte_len).hex()
|
|
1663
|
+
if new_id not in generated_ids:
|
|
1664
|
+
generated_ids.add(new_id)
|
|
1665
|
+
yield new_id
|
|
1666
|
+
|
|
1667
|
+
|
|
1668
|
+
def validate_and_adjust_resource_ratio(
|
|
1669
|
+
expect_resources: Dict[str, Any],
|
|
1670
|
+
max_memory_cpu_ratio: float = None,
|
|
1671
|
+
adjust: bool = False,
|
|
1672
|
+
) -> Tuple[Dict[str, Any], bool]:
|
|
1673
|
+
"""
|
|
1674
|
+
Validate and optionally adjust CPU:memory ratio to meet maximum requirements.
|
|
1675
|
+
|
|
1676
|
+
Args:
|
|
1677
|
+
expect_resources: Dictionary containing resource specifications
|
|
1678
|
+
max_memory_cpu_ratio: Maximum memory/cpu ratio (if None, will use config value)
|
|
1679
|
+
adjust: Whether to automatically adjust resources to meet ratio
|
|
1680
|
+
|
|
1681
|
+
Returns:
|
|
1682
|
+
Tuple of (adjusted_resources, was_adjusted)
|
|
1683
|
+
"""
|
|
1684
|
+
cpu = expect_resources.get("cpu") or 1
|
|
1685
|
+
memory = expect_resources.get("memory")
|
|
1686
|
+
|
|
1687
|
+
if cpu is None or memory is None or max_memory_cpu_ratio is None:
|
|
1688
|
+
return expect_resources, False
|
|
1689
|
+
|
|
1690
|
+
# Convert memory to GiB if it's a string
|
|
1691
|
+
cpu = max(cpu, 1)
|
|
1692
|
+
memory_gib = parse_size_to_megabytes(memory, default_number_unit="GiB") / 1024
|
|
1693
|
+
current_ratio = memory_gib / cpu
|
|
1694
|
+
|
|
1695
|
+
if current_ratio > max_memory_cpu_ratio:
|
|
1696
|
+
# Adjust CPU to meet maximum ratio, don't reduce resources
|
|
1697
|
+
recommended_cpu = math.ceil(memory_gib / max_memory_cpu_ratio)
|
|
1698
|
+
new_ratio = memory_gib / recommended_cpu
|
|
1699
|
+
if adjust:
|
|
1700
|
+
adjusted_resources = expect_resources.copy()
|
|
1701
|
+
adjusted_resources["cpu"] = recommended_cpu
|
|
1702
|
+
|
|
1703
|
+
warnings.warn(
|
|
1704
|
+
f"UDF resource auto-adjustment: Current UDF settings"
|
|
1705
|
+
f" (CPU: {cpu}, Memory: {memory_gib}Gib, Ratio: {current_ratio:.2f})"
|
|
1706
|
+
f" exceed maximum allowed ratio {max_memory_cpu_ratio:.1f}. "
|
|
1707
|
+
f"Automatically adjusted to (CPU: {recommended_cpu},"
|
|
1708
|
+
f" Memory: {memory_gib:.2f}:1Gib,"
|
|
1709
|
+
f" Ratio: {new_ratio:.2f}:1) to meet requirements."
|
|
1710
|
+
)
|
|
1711
|
+
return adjusted_resources, True
|
|
1712
|
+
else:
|
|
1713
|
+
warnings.warn(
|
|
1714
|
+
f"UDF resource ratio warning: Current UDF settings"
|
|
1715
|
+
f" (CPU: {cpu}, Memory: {memory_gib}Gib, Ratio: {current_ratio:.2f})"
|
|
1716
|
+
f" exceed maximum allowed ratio {max_memory_cpu_ratio:.1f}. "
|
|
1717
|
+
f"Consider adjusting CPU to at least {recommended_cpu}"
|
|
1718
|
+
f" (which would result in Ratio: {new_ratio:.2f}) to meet requirements."
|
|
1719
|
+
)
|
|
1720
|
+
|
|
1721
|
+
return expect_resources, False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 2.0
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
5
|
Requires-Dist: numpy <2.0.0,>=1.19.0
|
|
6
6
|
Requires-Dist: pandas >=1.0.0
|
|
@@ -19,6 +19,7 @@ Requires-Dist: pickle5 ; python_version < "3.8"
|
|
|
19
19
|
Provides-Extra: dev
|
|
20
20
|
Requires-Dist: black >=22.3.0 ; extra == 'dev'
|
|
21
21
|
Requires-Dist: flake8 >=5.0.4 ; extra == 'dev'
|
|
22
|
+
Requires-Dist: flake8-type-checking >=1.0.3 ; extra == 'dev'
|
|
22
23
|
Requires-Dist: pre-commit >=2.15.0 ; extra == 'dev'
|
|
23
24
|
Requires-Dist: graphviz >=0.20.1 ; extra == 'dev'
|
|
24
25
|
Provides-Extra: test
|
|
@@ -30,7 +31,7 @@ Requires-Dist: pytest-timeout >=2.1.0 ; extra == 'test'
|
|
|
30
31
|
Requires-Dist: matplotlib >=2.0.0 ; extra == 'test'
|
|
31
32
|
Requires-Dist: lightgbm <4.0.0,>=3.0.0 ; extra == 'test'
|
|
32
33
|
Requires-Dist: scikit-learn >=1.0 ; extra == 'test'
|
|
33
|
-
Requires-Dist: xgboost <
|
|
34
|
+
Requires-Dist: xgboost <2.1.0,>=1.6.2 ; extra == 'test'
|
|
34
35
|
|
|
35
36
|
MaxCompute MaxFrame Client
|
|
36
37
|
==========================
|