maxframe 2.0.0b2__cp311-cp311-win32.whl → 2.2.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....config import options
|
|
21
|
+
from ....core import EntityData
|
|
22
|
+
from ....core.operator import OperatorStage
|
|
23
|
+
from ....serialization.serializables import (
|
|
24
|
+
AnyField,
|
|
25
|
+
BoolField,
|
|
26
|
+
DictField,
|
|
27
|
+
Int64Field,
|
|
28
|
+
KeyField,
|
|
29
|
+
)
|
|
30
|
+
from ....tensor.core import TensorOrder
|
|
31
|
+
from .core import PairwiseDistances
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class PairwiseDistancesTopk(PairwiseDistances):
|
|
35
|
+
_op_type_ = opcodes.PAIRWISE_DISTANCES_TOPK
|
|
36
|
+
|
|
37
|
+
x = KeyField("x")
|
|
38
|
+
y = KeyField("y")
|
|
39
|
+
k = Int64Field("k", default=None)
|
|
40
|
+
metric = AnyField("metric", default=None)
|
|
41
|
+
metric_kwargs = DictField("metric_kwargs", default=None)
|
|
42
|
+
return_index = BoolField("return_index", default=None)
|
|
43
|
+
working_memory = AnyField("working_memory", default=None)
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def output_limit(self):
|
|
47
|
+
return 2 if self.return_index else 1
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def _set_inputs(cls, op: "PairwiseDistancesTopk", inputs: List[EntityData]):
|
|
51
|
+
super()._set_inputs(op, inputs)
|
|
52
|
+
if op.stage != OperatorStage.agg:
|
|
53
|
+
op.x, op.y = inputs[:2]
|
|
54
|
+
else:
|
|
55
|
+
op.x = op.y = None
|
|
56
|
+
|
|
57
|
+
def __call__(self, X, Y):
|
|
58
|
+
from .pairwise import pairwise_distances
|
|
59
|
+
|
|
60
|
+
# leverage pairwise_distances for checks
|
|
61
|
+
d = pairwise_distances(X, Y, metric=self.metric, **self.metric_kwargs)
|
|
62
|
+
|
|
63
|
+
if self.k > Y.shape[0]:
|
|
64
|
+
self.k = Y.shape[0]
|
|
65
|
+
|
|
66
|
+
X, Y = d.op.inputs
|
|
67
|
+
|
|
68
|
+
shape_list = [X.shape[0]]
|
|
69
|
+
shape_list.append(min(Y.shape[0], self.k))
|
|
70
|
+
shape = tuple(shape_list)
|
|
71
|
+
kws = [
|
|
72
|
+
{
|
|
73
|
+
"shape": shape,
|
|
74
|
+
"order": TensorOrder.C_ORDER,
|
|
75
|
+
"dtype": np.dtype(np.float64),
|
|
76
|
+
"_type_": "distance",
|
|
77
|
+
},
|
|
78
|
+
]
|
|
79
|
+
if self.return_index:
|
|
80
|
+
kws.append(
|
|
81
|
+
{
|
|
82
|
+
"shape": shape,
|
|
83
|
+
"order": TensorOrder.C_ORDER,
|
|
84
|
+
"dtype": np.dtype(np.int64),
|
|
85
|
+
"_type_": "index",
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
return self.new_tensors([X, Y], kws=kws)
|
|
89
|
+
else:
|
|
90
|
+
return self.new_tensors([X, Y], kws=kws)[0]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def pairwise_distances_topk(
|
|
94
|
+
X,
|
|
95
|
+
Y=None,
|
|
96
|
+
k=None,
|
|
97
|
+
metric="euclidean",
|
|
98
|
+
return_index=True,
|
|
99
|
+
axis=1,
|
|
100
|
+
working_memory=None,
|
|
101
|
+
**kwds
|
|
102
|
+
):
|
|
103
|
+
if k is None: # pragma: no cover
|
|
104
|
+
raise ValueError("`k` has to be specified")
|
|
105
|
+
|
|
106
|
+
if Y is None:
|
|
107
|
+
Y = X
|
|
108
|
+
if axis == 0:
|
|
109
|
+
X, Y = Y, X
|
|
110
|
+
if working_memory is None:
|
|
111
|
+
working_memory = options.learn.working_memory
|
|
112
|
+
op = PairwiseDistancesTopk(
|
|
113
|
+
x=X,
|
|
114
|
+
y=Y,
|
|
115
|
+
k=k,
|
|
116
|
+
metric=metric,
|
|
117
|
+
metric_kwargs=kwds,
|
|
118
|
+
return_index=return_index,
|
|
119
|
+
working_memory=working_memory,
|
|
120
|
+
)
|
|
121
|
+
return op(X, Y)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .... import tensor as mt
|
|
16
|
+
from .core import PairwiseDistances
|
|
17
|
+
from .euclidean import euclidean_distances
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def rbf_kernel(X, Y=None, gamma=None):
|
|
21
|
+
"""
|
|
22
|
+
Compute the rbf (gaussian) kernel between X and Y::
|
|
23
|
+
|
|
24
|
+
K(x, y) = exp(-gamma ||x-y||^2)
|
|
25
|
+
|
|
26
|
+
for each pair of rows x in X and y in Y.
|
|
27
|
+
|
|
28
|
+
Read more in the :ref:`User Guide <rbf_kernel>`.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
X : tensor of shape (n_samples_X, n_features)
|
|
33
|
+
|
|
34
|
+
Y : tensor of shape (n_samples_Y, n_features)
|
|
35
|
+
|
|
36
|
+
gamma : float, default None
|
|
37
|
+
If None, defaults to 1.0 / n_features
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
kernel_matrix : tensor of shape (n_samples_X, n_samples_Y)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)
|
|
45
|
+
if gamma is None:
|
|
46
|
+
gamma = 1.0 / X.shape[1]
|
|
47
|
+
|
|
48
|
+
K = euclidean_distances(X, Y, squared=True)
|
|
49
|
+
K *= -gamma
|
|
50
|
+
K = mt.exp(K)
|
|
51
|
+
return K
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
from sklearn.metrics import r2_score
|
|
17
|
+
|
|
18
|
+
from .. import get_scorer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_get_scorer():
|
|
22
|
+
with pytest.raises(ValueError):
|
|
23
|
+
get_scorer("unknown")
|
|
24
|
+
|
|
25
|
+
assert get_scorer("r2") is not None
|
|
26
|
+
assert get_scorer(r2_score) is not None
|
maxframe/learn/utils/__init__.py
CHANGED
|
@@ -16,4 +16,4 @@ from .core import convert_to_tensor_or_dataframe
|
|
|
16
16
|
from .multiclass import check_classification_targets
|
|
17
17
|
from .shuffle import shuffle
|
|
18
18
|
from .sparsefuncs import count_nonzero
|
|
19
|
-
from .validation import check_consistent_length
|
|
19
|
+
from .validation import check_array, check_consistent_length
|
maxframe/learn/utils/checks.py
CHANGED
|
@@ -20,7 +20,7 @@ from ... import opcodes
|
|
|
20
20
|
from ... import tensor as mt
|
|
21
21
|
from ...config import options
|
|
22
22
|
from ...core import ENTITY_TYPE, EntityData, OutputType, get_output_types
|
|
23
|
-
from ...core.operator import Operator
|
|
23
|
+
from ...core.operator import Operator
|
|
24
24
|
from ...serialization.serializables import (
|
|
25
25
|
BoolField,
|
|
26
26
|
DataTypeField,
|
|
@@ -56,7 +56,6 @@ class CheckBase(Operator, LearnOperatorMixin):
|
|
|
56
56
|
# output input if value not specified
|
|
57
57
|
self.value = value = value if value is not None else x
|
|
58
58
|
self.output_types = get_output_types(value)
|
|
59
|
-
self.stage = OperatorStage.agg
|
|
60
59
|
return self.new_tileable([x, value], kws=[value.params])
|
|
61
60
|
|
|
62
61
|
|
maxframe/learn/utils/core.py
CHANGED
|
@@ -14,9 +14,19 @@
|
|
|
14
14
|
|
|
15
15
|
import math
|
|
16
16
|
import numbers
|
|
17
|
+
import warnings
|
|
17
18
|
|
|
19
|
+
import numpy as np
|
|
18
20
|
import pandas as pd
|
|
19
21
|
|
|
22
|
+
from ...utils import parse_readable_size
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from sklearn import get_config as sklearn_get_config
|
|
26
|
+
except ImportError:
|
|
27
|
+
sklearn_get_config = None
|
|
28
|
+
|
|
29
|
+
from ...config import options
|
|
20
30
|
from ...dataframe import DataFrame, Series
|
|
21
31
|
from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
22
32
|
from ...tensor import tensor as astensor
|
|
@@ -60,3 +70,52 @@ def is_scalar_nan(x):
|
|
|
60
70
|
False
|
|
61
71
|
"""
|
|
62
72
|
return isinstance(x, numbers.Real) and math.isnan(x)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_chunk_n_rows(row_bytes, max_n_rows=None, working_memory=None):
|
|
76
|
+
"""Calculates how many rows can be processed within working_memory
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
row_bytes : int
|
|
81
|
+
The expected number of bytes of memory that will be consumed
|
|
82
|
+
during the processing of each row.
|
|
83
|
+
max_n_rows : int, optional
|
|
84
|
+
The maximum return value.
|
|
85
|
+
working_memory : int or float, optional
|
|
86
|
+
The number of rows to fit inside this number of MiB will be returned.
|
|
87
|
+
When None (default), the value of
|
|
88
|
+
``sklearn.get_config()['working_memory']`` is used.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
int or the value of n_samples
|
|
93
|
+
|
|
94
|
+
Warns
|
|
95
|
+
-----
|
|
96
|
+
Issues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if working_memory is None: # pragma: no cover
|
|
100
|
+
working_memory = options.learn.working_memory
|
|
101
|
+
if working_memory is None and sklearn_get_config is not None:
|
|
102
|
+
working_memory = sklearn_get_config()["working_memory"]
|
|
103
|
+
elif working_memory is None:
|
|
104
|
+
working_memory = 1024
|
|
105
|
+
|
|
106
|
+
if isinstance(working_memory, int):
|
|
107
|
+
working_memory *= 2**20
|
|
108
|
+
else:
|
|
109
|
+
working_memory = parse_readable_size(working_memory)[0]
|
|
110
|
+
|
|
111
|
+
chunk_n_rows = int(working_memory // row_bytes)
|
|
112
|
+
if max_n_rows is not None:
|
|
113
|
+
chunk_n_rows = min(chunk_n_rows, max_n_rows)
|
|
114
|
+
if chunk_n_rows < 1: # pragma: no cover
|
|
115
|
+
warnings.warn(
|
|
116
|
+
"Could not adhere to working_memory config. "
|
|
117
|
+
"Currently %.0fMiB, %.0fMiB required."
|
|
118
|
+
% (working_memory, np.ceil(row_bytes * 2**-20))
|
|
119
|
+
)
|
|
120
|
+
chunk_n_rows = 1
|
|
121
|
+
return chunk_n_rows
|
maxframe/learn/utils/extmath.py
CHANGED
|
@@ -49,6 +49,19 @@ def _safe_accumulator_op(op, x, *args, **kwargs):
|
|
|
49
49
|
return result
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
def logsumexp_real(a, axis=None, keepdims=False):
|
|
53
|
+
"""Simplified logsumexp for real arrays without biases"""
|
|
54
|
+
from ... import tensor as mt
|
|
55
|
+
|
|
56
|
+
x = mt.tensor(a)
|
|
57
|
+
x_max = mt.amax(a, axis=axis, keepdims=True)
|
|
58
|
+
exp_x_shifted = mt.exp(x - x_max)
|
|
59
|
+
ret = mt.log(mt.sum(exp_x_shifted, axis=axis, keepdims=True)) + x_max
|
|
60
|
+
if keepdims:
|
|
61
|
+
return ret
|
|
62
|
+
return mt.squeeze(ret, axis=1)
|
|
63
|
+
|
|
64
|
+
|
|
52
65
|
def _incremental_mean_and_var(
|
|
53
66
|
X, last_mean, last_variance, last_sample_count, sample_weight=None
|
|
54
67
|
):
|
|
@@ -174,3 +187,27 @@ def _incremental_mean_and_var(
|
|
|
174
187
|
updated_variance = updated_unnormalized_variance / updated_sample_count
|
|
175
188
|
|
|
176
189
|
return updated_mean, updated_variance, updated_sample_count
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def row_norms(X, squared=False):
|
|
193
|
+
"""Row-wise (squared) Euclidean norm of X.
|
|
194
|
+
|
|
195
|
+
Performs no input validation.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
X : array_like
|
|
200
|
+
The input tensor
|
|
201
|
+
squared : bool, optional (default = False)
|
|
202
|
+
If True, return squared norms.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
array_like
|
|
207
|
+
The row-wise (squared) Euclidean norm of X.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
norms = (X**2).sum(axis=1)
|
|
211
|
+
if not squared:
|
|
212
|
+
norms = mt.sqrt(norms)
|
|
213
|
+
return norms
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict, List, NamedTuple, Optional
|
|
16
|
+
|
|
17
|
+
from ... import opcodes
|
|
18
|
+
from ...core import ENTITY_TYPE, EntityData, OutputType
|
|
19
|
+
from ...core.operator import ObjectOperator
|
|
20
|
+
from ...serialization.serializables import (
|
|
21
|
+
AnyField,
|
|
22
|
+
BoolField,
|
|
23
|
+
DictField,
|
|
24
|
+
Int32Field,
|
|
25
|
+
StringField,
|
|
26
|
+
)
|
|
27
|
+
from ...utils import find_objects, replace_objects
|
|
28
|
+
from ..core import LearnOperatorMixin
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ToODPSModel(ObjectOperator, LearnOperatorMixin):
|
|
32
|
+
_op_type_ = opcodes.TO_ODPS_MODEL
|
|
33
|
+
|
|
34
|
+
model_name = StringField("model_name", default=None)
|
|
35
|
+
model_version = StringField("model_version", default=None)
|
|
36
|
+
training_info = AnyField("training_info", default=None)
|
|
37
|
+
params = AnyField("params", default=None)
|
|
38
|
+
format = StringField("format", default=None)
|
|
39
|
+
lifecycle = Int32Field("lifecycle", default=None)
|
|
40
|
+
version_lifecycle = Int32Field("version_lifecycle", default=None)
|
|
41
|
+
description = StringField("description", default=None)
|
|
42
|
+
version_description = StringField("version_description", default=None)
|
|
43
|
+
create_model = BoolField("create_model", default=True)
|
|
44
|
+
set_default_version = BoolField("set_default_version", default=True)
|
|
45
|
+
location = StringField("location", default=None)
|
|
46
|
+
storage_options = DictField("storage_options", default=None)
|
|
47
|
+
|
|
48
|
+
def __init__(self, **kw):
|
|
49
|
+
super().__init__(_output_types=[OutputType.object], **kw)
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def _set_inputs(cls, op: "ToODPSModel", inputs: List[EntityData]):
|
|
53
|
+
super()._set_inputs(op, inputs)
|
|
54
|
+
|
|
55
|
+
if isinstance(op.training_info, ENTITY_TYPE):
|
|
56
|
+
has_training_info = True
|
|
57
|
+
op.training_info = inputs[0]
|
|
58
|
+
else:
|
|
59
|
+
has_training_info = False
|
|
60
|
+
|
|
61
|
+
tileables = find_objects([op.params], ENTITY_TYPE)
|
|
62
|
+
param_pos = int(has_training_info)
|
|
63
|
+
replaces = dict(zip(tileables, inputs[param_pos:]))
|
|
64
|
+
[op.params] = replace_objects([op.params], replaces)
|
|
65
|
+
|
|
66
|
+
def __call__(self, training_info, params):
|
|
67
|
+
inputs = []
|
|
68
|
+
if isinstance(training_info, ENTITY_TYPE):
|
|
69
|
+
inputs.append(training_info)
|
|
70
|
+
|
|
71
|
+
self.training_info = training_info
|
|
72
|
+
self.params = params
|
|
73
|
+
inputs.extend(find_objects([params], ENTITY_TYPE))
|
|
74
|
+
return self.new_tileable(inputs, shape=())
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ToODPSModelMixin:
|
|
78
|
+
class ODPSModelInfo(NamedTuple):
|
|
79
|
+
model_format: str
|
|
80
|
+
model_params: Any
|
|
81
|
+
|
|
82
|
+
def _get_odps_model_info(self) -> ODPSModelInfo:
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
def to_odps_model(
|
|
86
|
+
self,
|
|
87
|
+
model_name: str = None,
|
|
88
|
+
model_version: str = None,
|
|
89
|
+
schema: str = None,
|
|
90
|
+
project: str = None,
|
|
91
|
+
lifecycle: Optional[int] = None,
|
|
92
|
+
version_lifecycle: Optional[int] = None,
|
|
93
|
+
description: Optional[str] = None,
|
|
94
|
+
version_description: Optional[str] = None,
|
|
95
|
+
create_model: bool = True,
|
|
96
|
+
set_default_version: bool = False,
|
|
97
|
+
location: Optional[str] = None,
|
|
98
|
+
storage_options: Dict[str, Any] = None,
|
|
99
|
+
):
|
|
100
|
+
"""
|
|
101
|
+
Save trained model to MaxCompute.
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
model_name : str, optional
|
|
106
|
+
Name of the model. Can be a fully qualified name with format
|
|
107
|
+
"project.schema.model" or just "model" if project and schema are
|
|
108
|
+
specified separately.
|
|
109
|
+
model_version : str, optional
|
|
110
|
+
Version of the model. If not provided, a default version will be used.
|
|
111
|
+
schema : str, optional
|
|
112
|
+
Schema name where the model will be stored. If not provided and
|
|
113
|
+
project is specified, "default" schema will be used.
|
|
114
|
+
project : str, optional
|
|
115
|
+
Project name where the model will be stored.
|
|
116
|
+
lifecycle : int, optional
|
|
117
|
+
Lifecycle of the model in days. After this period, the model will
|
|
118
|
+
be automatically deleted.
|
|
119
|
+
version_lifecycle : int, optional
|
|
120
|
+
Lifecycle of the model version in days. After this period, the
|
|
121
|
+
model version will be automatically deleted.
|
|
122
|
+
description : str, optional
|
|
123
|
+
Description of the model.
|
|
124
|
+
version_description : str, optional
|
|
125
|
+
Description of the model version.
|
|
126
|
+
create_model : bool, default True
|
|
127
|
+
Whether to create the model if it doesn't exist.
|
|
128
|
+
set_default_version : bool, default False
|
|
129
|
+
Whether to set this version as the default version of the model.
|
|
130
|
+
location : str, optional
|
|
131
|
+
Storage location for the model. If specified, the model can be stored
|
|
132
|
+
into a customized location. Can be an OSS path with format
|
|
133
|
+
oss://endpoint/bucket/path.
|
|
134
|
+
storage_options : dict, optional
|
|
135
|
+
Extra options for storage, such as role_arn or policy for OSS storage.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
Scalar
|
|
140
|
+
A scalar that can be executed to save the model.
|
|
141
|
+
|
|
142
|
+
Examples
|
|
143
|
+
--------
|
|
144
|
+
First we fit an XGBoost model.
|
|
145
|
+
|
|
146
|
+
>>> import maxframe.dataframe as md
|
|
147
|
+
>>> from maxframe.learn.datasets import make_classification
|
|
148
|
+
>>> from maxframe.learn.contrib.xgboost import XGBClassifier
|
|
149
|
+
>>> X, y = make_classification(1000, n_features=10, n_classes=2)
|
|
150
|
+
>>> cols = [f"f{idx}" for idx in range(10)]
|
|
151
|
+
>>> clf = XGBClassifier(n_estimators=10)
|
|
152
|
+
>>> X_df = md.DataFrame(X, columns=cols)
|
|
153
|
+
>>> clf.fit(X_df, y)
|
|
154
|
+
|
|
155
|
+
Trigger execution and save model with fully qualified name.
|
|
156
|
+
|
|
157
|
+
>>> clf.to_odps_model(model_name="project.schema.my_model",
|
|
158
|
+
... model_version="v1.0").execute()
|
|
159
|
+
|
|
160
|
+
You can also save model with a customized path. Need to change `<my_bucket>`
|
|
161
|
+
and `<user_id>` into your own bucket and user ID.
|
|
162
|
+
|
|
163
|
+
>>> clf.to_odps_model(model_name="project.schema.my_model",
|
|
164
|
+
... model_version="v1.0",
|
|
165
|
+
... location="oss://oss-cn-shanghai.aliyuncs.com/<my_bucket>/model_name",
|
|
166
|
+
... storage_options={
|
|
167
|
+
... "role_arn": "acs:ram::<user_id>:role/aliyunodpsdefaultrole"
|
|
168
|
+
... }).execute()
|
|
169
|
+
"""
|
|
170
|
+
if "." not in model_name:
|
|
171
|
+
if project and not schema:
|
|
172
|
+
schema = "default"
|
|
173
|
+
if schema:
|
|
174
|
+
model_name = f"{schema}.{model_name}"
|
|
175
|
+
if project:
|
|
176
|
+
model_name = f"{project}.{model_name}"
|
|
177
|
+
|
|
178
|
+
model_info = self._get_odps_model_info()
|
|
179
|
+
|
|
180
|
+
op = ToODPSModel(
|
|
181
|
+
model_name=model_name,
|
|
182
|
+
model_version=model_version,
|
|
183
|
+
format=model_info.model_format,
|
|
184
|
+
lifecycle=lifecycle,
|
|
185
|
+
version_lifecycle=version_lifecycle,
|
|
186
|
+
description=description,
|
|
187
|
+
version_description=version_description,
|
|
188
|
+
create_model=create_model,
|
|
189
|
+
set_default_version=set_default_version,
|
|
190
|
+
location=location,
|
|
191
|
+
storage_options=storage_options,
|
|
192
|
+
)
|
|
193
|
+
return op(getattr(self, "training_info_"), model_info.model_params)
|
|
@@ -695,9 +695,9 @@ def _check_sample_weight(
|
|
|
695
695
|
dtype = np.float64
|
|
696
696
|
|
|
697
697
|
if sample_weight is None:
|
|
698
|
-
sample_weight =
|
|
698
|
+
sample_weight = mt.ones(n_samples, dtype=dtype)
|
|
699
699
|
elif isinstance(sample_weight, numbers.Number):
|
|
700
|
-
sample_weight =
|
|
700
|
+
sample_weight = mt.full(n_samples, sample_weight, dtype=dtype)
|
|
701
701
|
else:
|
|
702
702
|
if dtype is None:
|
|
703
703
|
dtype = [np.float64, np.float32]
|
maxframe/lib/compat.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
1
3
|
from typing import TYPE_CHECKING, Callable, List, Sequence, Tuple, Union
|
|
2
4
|
|
|
3
5
|
import numpy as np
|
|
@@ -143,3 +145,41 @@ def case_when(
|
|
|
143
145
|
def patch_pandas():
|
|
144
146
|
if not hasattr(pd.Series, "case_when"):
|
|
145
147
|
pd.Series.case_when = case_when
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class cached_property:
|
|
151
|
+
"""
|
|
152
|
+
A property that is only computed once per instance and then replaces itself
|
|
153
|
+
with an ordinary attribute. Deleting the attribute resets the property.
|
|
154
|
+
Source: https://github.com/bottlepy/bottle/commit/fa7733e075da0d790d809aa3d2f53071897e6f76
|
|
155
|
+
""" # noqa
|
|
156
|
+
|
|
157
|
+
def __init__(self, func):
|
|
158
|
+
self.__doc__ = getattr(func, "__doc__")
|
|
159
|
+
self.func = func
|
|
160
|
+
|
|
161
|
+
def __get__(self, obj, cls):
|
|
162
|
+
if obj is None:
|
|
163
|
+
return self
|
|
164
|
+
|
|
165
|
+
if asyncio.iscoroutinefunction(self.func):
|
|
166
|
+
return self._wrap_in_coroutine(obj)
|
|
167
|
+
|
|
168
|
+
value = obj.__dict__[self.func.__name__] = self.func(obj)
|
|
169
|
+
return value
|
|
170
|
+
|
|
171
|
+
def _wrap_in_coroutine(self, obj):
|
|
172
|
+
@functools.wraps(obj)
|
|
173
|
+
def wrapper():
|
|
174
|
+
future = asyncio.ensure_future(self.func(obj))
|
|
175
|
+
obj.__dict__[self.func.__name__] = future
|
|
176
|
+
return future
|
|
177
|
+
|
|
178
|
+
return wrapper()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# isort: off
|
|
182
|
+
try:
|
|
183
|
+
from functools import cached_property # noqa: F811, F401
|
|
184
|
+
except ImportError:
|
|
185
|
+
pass
|
|
@@ -12,4 +12,19 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .
|
|
15
|
+
from .blob import (
|
|
16
|
+
ArrowBlobType,
|
|
17
|
+
ExternalBlobDtype,
|
|
18
|
+
ExternalBlobExtensionArray,
|
|
19
|
+
SolidBlob,
|
|
20
|
+
new_blob,
|
|
21
|
+
)
|
|
22
|
+
from .dtypes import (
|
|
23
|
+
ArrowDtype,
|
|
24
|
+
dict_,
|
|
25
|
+
is_list_dtype,
|
|
26
|
+
is_map_dtype,
|
|
27
|
+
is_struct_dtype,
|
|
28
|
+
list_,
|
|
29
|
+
struct_,
|
|
30
|
+
)
|