maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....config import options
|
|
21
|
+
from ....core import EntityData
|
|
22
|
+
from ....core.operator import OperatorStage
|
|
23
|
+
from ....serialization.serializables import (
|
|
24
|
+
AnyField,
|
|
25
|
+
BoolField,
|
|
26
|
+
DictField,
|
|
27
|
+
Int64Field,
|
|
28
|
+
KeyField,
|
|
29
|
+
)
|
|
30
|
+
from ....tensor.core import TensorOrder
|
|
31
|
+
from .core import PairwiseDistances
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class PairwiseDistancesTopk(PairwiseDistances):
|
|
35
|
+
_op_type_ = opcodes.PAIRWISE_DISTANCES_TOPK
|
|
36
|
+
|
|
37
|
+
x = KeyField("x")
|
|
38
|
+
y = KeyField("y")
|
|
39
|
+
k = Int64Field("k", default=None)
|
|
40
|
+
metric = AnyField("metric", default=None)
|
|
41
|
+
metric_kwargs = DictField("metric_kwargs", default=None)
|
|
42
|
+
return_index = BoolField("return_index", default=None)
|
|
43
|
+
working_memory = AnyField("working_memory", default=None)
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def output_limit(self):
|
|
47
|
+
return 2 if self.return_index else 1
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def _set_inputs(cls, op: "PairwiseDistancesTopk", inputs: List[EntityData]):
|
|
51
|
+
super()._set_inputs(op, inputs)
|
|
52
|
+
if op.stage != OperatorStage.agg:
|
|
53
|
+
op.x, op.y = inputs[:2]
|
|
54
|
+
else:
|
|
55
|
+
op.x = op.y = None
|
|
56
|
+
|
|
57
|
+
def __call__(self, X, Y):
|
|
58
|
+
from .pairwise import pairwise_distances
|
|
59
|
+
|
|
60
|
+
# leverage pairwise_distances for checks
|
|
61
|
+
d = pairwise_distances(X, Y, metric=self.metric, **self.metric_kwargs)
|
|
62
|
+
|
|
63
|
+
if self.k > Y.shape[0]:
|
|
64
|
+
self.k = Y.shape[0]
|
|
65
|
+
|
|
66
|
+
X, Y = d.op.inputs
|
|
67
|
+
|
|
68
|
+
shape_list = [X.shape[0]]
|
|
69
|
+
shape_list.append(min(Y.shape[0], self.k))
|
|
70
|
+
shape = tuple(shape_list)
|
|
71
|
+
kws = [
|
|
72
|
+
{
|
|
73
|
+
"shape": shape,
|
|
74
|
+
"order": TensorOrder.C_ORDER,
|
|
75
|
+
"dtype": np.dtype(np.float64),
|
|
76
|
+
"_type_": "distance",
|
|
77
|
+
},
|
|
78
|
+
]
|
|
79
|
+
if self.return_index:
|
|
80
|
+
kws.append(
|
|
81
|
+
{
|
|
82
|
+
"shape": shape,
|
|
83
|
+
"order": TensorOrder.C_ORDER,
|
|
84
|
+
"dtype": np.dtype(np.int64),
|
|
85
|
+
"_type_": "index",
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
return self.new_tensors([X, Y], kws=kws)
|
|
89
|
+
else:
|
|
90
|
+
return self.new_tensors([X, Y], kws=kws)[0]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def pairwise_distances_topk(
|
|
94
|
+
X,
|
|
95
|
+
Y=None,
|
|
96
|
+
k=None,
|
|
97
|
+
metric="euclidean",
|
|
98
|
+
return_index=True,
|
|
99
|
+
axis=1,
|
|
100
|
+
working_memory=None,
|
|
101
|
+
**kwds
|
|
102
|
+
):
|
|
103
|
+
if k is None: # pragma: no cover
|
|
104
|
+
raise ValueError("`k` has to be specified")
|
|
105
|
+
|
|
106
|
+
if Y is None:
|
|
107
|
+
Y = X
|
|
108
|
+
if axis == 0:
|
|
109
|
+
X, Y = Y, X
|
|
110
|
+
if working_memory is None:
|
|
111
|
+
working_memory = options.learn.working_memory
|
|
112
|
+
op = PairwiseDistancesTopk(
|
|
113
|
+
x=X,
|
|
114
|
+
y=Y,
|
|
115
|
+
k=k,
|
|
116
|
+
metric=metric,
|
|
117
|
+
metric_kwargs=kwds,
|
|
118
|
+
return_index=return_index,
|
|
119
|
+
working_memory=working_memory,
|
|
120
|
+
)
|
|
121
|
+
return op(X, Y)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .... import tensor as mt
|
|
16
|
+
from .core import PairwiseDistances
|
|
17
|
+
from .euclidean import euclidean_distances
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def rbf_kernel(X, Y=None, gamma=None):
|
|
21
|
+
"""
|
|
22
|
+
Compute the rbf (gaussian) kernel between X and Y::
|
|
23
|
+
|
|
24
|
+
K(x, y) = exp(-gamma ||x-y||^2)
|
|
25
|
+
|
|
26
|
+
for each pair of rows x in X and y in Y.
|
|
27
|
+
|
|
28
|
+
Read more in the :ref:`User Guide <rbf_kernel>`.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
X : tensor of shape (n_samples_X, n_features)
|
|
33
|
+
|
|
34
|
+
Y : tensor of shape (n_samples_Y, n_features)
|
|
35
|
+
|
|
36
|
+
gamma : float, default None
|
|
37
|
+
If None, defaults to 1.0 / n_features
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
kernel_matrix : tensor of shape (n_samples_X, n_samples_Y)
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)
|
|
45
|
+
if gamma is None:
|
|
46
|
+
gamma = 1.0 / X.shape[1]
|
|
47
|
+
|
|
48
|
+
K = euclidean_distances(X, Y, squared=True)
|
|
49
|
+
K *= -gamma
|
|
50
|
+
K = mt.exp(K)
|
|
51
|
+
return K
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
from sklearn.metrics import r2_score
|
|
17
|
+
|
|
18
|
+
from .. import get_scorer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_get_scorer():
|
|
22
|
+
with pytest.raises(ValueError):
|
|
23
|
+
get_scorer("unknown")
|
|
24
|
+
|
|
25
|
+
assert get_scorer("r2") is not None
|
|
26
|
+
assert get_scorer(r2_score) is not None
|
|
@@ -106,10 +106,11 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
|
|
|
106
106
|
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
|
|
107
107
|
"""
|
|
108
108
|
|
|
109
|
-
def __init__(self, feature_range=(0, 1), copy=True, clip=False):
|
|
109
|
+
def __init__(self, feature_range=(0, 1), copy=True, clip=False, validate=True):
|
|
110
110
|
self.feature_range = feature_range
|
|
111
111
|
self.copy = copy
|
|
112
112
|
self.clip = clip
|
|
113
|
+
self.validate = validate
|
|
113
114
|
|
|
114
115
|
def _reset(self): # pragma: no cover
|
|
115
116
|
"""Reset internal data-dependent state of the scaler, if necessary.
|
|
@@ -186,13 +187,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
|
|
|
186
187
|
)
|
|
187
188
|
|
|
188
189
|
first_pass = not hasattr(self, "n_samples_seen_")
|
|
189
|
-
|
|
190
|
-
X
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
190
|
+
if self.validate:
|
|
191
|
+
X = self._validate_data(
|
|
192
|
+
X,
|
|
193
|
+
reset=first_pass,
|
|
194
|
+
estimator=self,
|
|
195
|
+
dtype=FLOAT_DTYPES,
|
|
196
|
+
force_all_finite="allow-nan",
|
|
197
|
+
)
|
|
196
198
|
|
|
197
199
|
if isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE, INDEX_TYPE)):
|
|
198
200
|
data_min = X.min(axis=0)
|
|
@@ -239,13 +241,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
|
|
|
239
241
|
"""
|
|
240
242
|
check_is_fitted(self)
|
|
241
243
|
|
|
242
|
-
|
|
243
|
-
X
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
244
|
+
if self.validate:
|
|
245
|
+
X = self._validate_data(
|
|
246
|
+
X,
|
|
247
|
+
copy=self.copy,
|
|
248
|
+
dtype=FLOAT_DTYPES,
|
|
249
|
+
force_all_finite="allow-nan",
|
|
250
|
+
reset=False,
|
|
251
|
+
)
|
|
249
252
|
|
|
250
253
|
X *= self.scale_
|
|
251
254
|
X += self.min_
|
|
@@ -290,6 +293,7 @@ def minmax_scale(
|
|
|
290
293
|
*,
|
|
291
294
|
axis=0,
|
|
292
295
|
copy=True,
|
|
296
|
+
validate=True,
|
|
293
297
|
execute=False,
|
|
294
298
|
session=None,
|
|
295
299
|
run_kwargs=None
|
|
@@ -368,21 +372,28 @@ def minmax_scale(
|
|
|
368
372
|
""" # noqa
|
|
369
373
|
# Unlike the scaler object, this function allows 1d input.
|
|
370
374
|
# If copy is required, it will be done inside the scaler object.
|
|
371
|
-
|
|
372
|
-
X
|
|
373
|
-
|
|
374
|
-
|
|
375
|
+
if validate:
|
|
376
|
+
X = check_array(
|
|
377
|
+
X,
|
|
378
|
+
copy=False,
|
|
379
|
+
ensure_2d=False,
|
|
380
|
+
dtype=FLOAT_DTYPES,
|
|
381
|
+
force_all_finite="allow-nan",
|
|
382
|
+
)
|
|
383
|
+
original_ndim = X.ndim
|
|
375
384
|
|
|
376
|
-
|
|
377
|
-
|
|
385
|
+
if original_ndim == 1:
|
|
386
|
+
X = X.reshape(X.shape[0], 1)
|
|
387
|
+
else:
|
|
388
|
+
original_ndim = X.ndim
|
|
378
389
|
|
|
379
|
-
s = MinMaxScaler(feature_range=feature_range, copy=copy)
|
|
390
|
+
s = MinMaxScaler(feature_range=feature_range, copy=copy, validate=validate)
|
|
380
391
|
if axis == 0:
|
|
381
392
|
X = s.fit_transform(X)
|
|
382
393
|
else:
|
|
383
394
|
X = s.fit_transform(X.T).T
|
|
384
395
|
|
|
385
|
-
if original_ndim == 1:
|
|
396
|
+
if validate and original_ndim == 1:
|
|
386
397
|
X = X.ravel()
|
|
387
398
|
|
|
388
399
|
if not execute:
|
|
@@ -156,10 +156,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
156
156
|
[[3. 3.]]
|
|
157
157
|
"""
|
|
158
158
|
|
|
159
|
-
def __init__(self, *, copy=True, with_mean=True, with_std=True):
|
|
159
|
+
def __init__(self, *, copy=True, with_mean=True, with_std=True, validate=True):
|
|
160
160
|
self.with_mean = with_mean
|
|
161
161
|
self.with_std = with_std
|
|
162
162
|
self.copy = copy
|
|
163
|
+
self.validate = validate
|
|
163
164
|
|
|
164
165
|
def _reset(self):
|
|
165
166
|
"""Reset internal data-dependent state of the scaler, if necessary.
|
|
@@ -246,14 +247,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
246
247
|
Fitted scaler.
|
|
247
248
|
"""
|
|
248
249
|
first_call = not hasattr(self, "n_samples_seen_")
|
|
249
|
-
|
|
250
|
-
X
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
250
|
+
if self.validate:
|
|
251
|
+
X = self._validate_data(
|
|
252
|
+
X,
|
|
253
|
+
accept_sparse=("csr", "csc"),
|
|
254
|
+
dtype=FLOAT_DTYPES,
|
|
255
|
+
force_all_finite="allow-nan",
|
|
256
|
+
reset=first_call,
|
|
257
|
+
)
|
|
258
|
+
n_features = X.shape[1] if X.ndim == 2 else 1
|
|
257
259
|
|
|
258
260
|
if sample_weight is not None:
|
|
259
261
|
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
|
|
@@ -267,7 +269,9 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
267
269
|
# incr_mean_variance_axis and _incremental_variance_axis
|
|
268
270
|
dtype = np.int64 if sample_weight is None else X.dtype
|
|
269
271
|
if not hasattr(self, "n_samples_seen_"):
|
|
270
|
-
self.n_samples_seen_ =
|
|
272
|
+
self.n_samples_seen_ = (
|
|
273
|
+
mt.zeros(n_features, dtype=dtype) if X.ndim == 2 else 0
|
|
274
|
+
)
|
|
271
275
|
# elif np.size(self.n_samples_seen_) == 1:
|
|
272
276
|
# self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
|
|
273
277
|
# self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
|
|
@@ -309,9 +313,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
309
313
|
constant_mask = _is_constant_feature(
|
|
310
314
|
self.var_, self.mean_, self.n_samples_seen_
|
|
311
315
|
)
|
|
312
|
-
self.scale_ =
|
|
313
|
-
|
|
314
|
-
|
|
316
|
+
self.scale_ = mt.sqrt(self.var_)
|
|
317
|
+
if self.validate:
|
|
318
|
+
self.scale_ = _handle_zeros_in_scale(
|
|
319
|
+
self.scale_, copy=False, constant_mask=constant_mask
|
|
320
|
+
)
|
|
315
321
|
else:
|
|
316
322
|
self.scale_ = None
|
|
317
323
|
|
|
@@ -337,14 +343,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
337
343
|
check_is_fitted(self)
|
|
338
344
|
|
|
339
345
|
copy = copy if copy is not None else self.copy
|
|
340
|
-
|
|
341
|
-
X
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
346
|
+
if self.validate:
|
|
347
|
+
X = self._validate_data(
|
|
348
|
+
X,
|
|
349
|
+
reset=False,
|
|
350
|
+
accept_sparse="csr",
|
|
351
|
+
copy=copy,
|
|
352
|
+
dtype=FLOAT_DTYPES,
|
|
353
|
+
force_all_finite="allow-nan",
|
|
354
|
+
)
|
|
348
355
|
|
|
349
356
|
if sparse.issparse(X):
|
|
350
357
|
raise NotImplementedError("Scaling on sparse tensors is not supported")
|
|
@@ -397,7 +404,7 @@ class StandardScaler(TransformerMixin, BaseEstimator):
|
|
|
397
404
|
return X
|
|
398
405
|
|
|
399
406
|
|
|
400
|
-
def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
|
|
407
|
+
def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True, validate=True):
|
|
401
408
|
"""Standardize a dataset along any axis.
|
|
402
409
|
|
|
403
410
|
Center to the mean and component wise scale to unit variance.
|
|
@@ -488,16 +495,18 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
|
|
|
488
495
|
X = mt.tensor(X)
|
|
489
496
|
|
|
490
497
|
ndim = X.ndim
|
|
491
|
-
if ndim == 1:
|
|
498
|
+
if validate and ndim == 1:
|
|
492
499
|
X = X.reshape((X.shape[0], 1))
|
|
493
500
|
if axis == 1:
|
|
494
501
|
X = X.T
|
|
495
502
|
|
|
496
|
-
scaler = StandardScaler(
|
|
503
|
+
scaler = StandardScaler(
|
|
504
|
+
with_mean=with_mean, with_std=with_std, copy=copy, validate=validate
|
|
505
|
+
)
|
|
497
506
|
transformed = scaler.fit_transform(X)
|
|
498
507
|
|
|
499
508
|
if axis == 1:
|
|
500
509
|
transformed = transformed.T
|
|
501
|
-
if ndim == 1:
|
|
510
|
+
if validate and ndim == 1:
|
|
502
511
|
transformed = transformed.reshape(transformed.shape[0])
|
|
503
512
|
return transformed
|
maxframe/learn/utils/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from .core import convert_to_tensor_or_dataframe
|
|
16
16
|
from .multiclass import check_classification_targets
|
|
17
|
+
from .odpsio import read_odps_model
|
|
17
18
|
from .shuffle import shuffle
|
|
18
19
|
from .sparsefuncs import count_nonzero
|
|
19
|
-
from .validation import check_consistent_length
|
|
20
|
+
from .validation import check_array, check_consistent_length
|
maxframe/learn/utils/checks.py
CHANGED
|
@@ -20,7 +20,7 @@ from ... import opcodes
|
|
|
20
20
|
from ... import tensor as mt
|
|
21
21
|
from ...config import options
|
|
22
22
|
from ...core import ENTITY_TYPE, EntityData, OutputType, get_output_types
|
|
23
|
-
from ...core.operator import Operator
|
|
23
|
+
from ...core.operator import Operator
|
|
24
24
|
from ...serialization.serializables import (
|
|
25
25
|
BoolField,
|
|
26
26
|
DataTypeField,
|
|
@@ -56,7 +56,6 @@ class CheckBase(Operator, LearnOperatorMixin):
|
|
|
56
56
|
# output input if value not specified
|
|
57
57
|
self.value = value = value if value is not None else x
|
|
58
58
|
self.output_types = get_output_types(value)
|
|
59
|
-
self.stage = OperatorStage.agg
|
|
60
59
|
return self.new_tileable([x, value], kws=[value.params])
|
|
61
60
|
|
|
62
61
|
|
maxframe/learn/utils/core.py
CHANGED
|
@@ -14,9 +14,19 @@
|
|
|
14
14
|
|
|
15
15
|
import math
|
|
16
16
|
import numbers
|
|
17
|
+
import warnings
|
|
17
18
|
|
|
19
|
+
import numpy as np
|
|
18
20
|
import pandas as pd
|
|
19
21
|
|
|
22
|
+
from ...utils import parse_readable_size
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from sklearn import get_config as sklearn_get_config
|
|
26
|
+
except ImportError:
|
|
27
|
+
sklearn_get_config = None
|
|
28
|
+
|
|
29
|
+
from ...config import options
|
|
20
30
|
from ...dataframe import DataFrame, Series
|
|
21
31
|
from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
22
32
|
from ...tensor import tensor as astensor
|
|
@@ -60,3 +70,52 @@ def is_scalar_nan(x):
|
|
|
60
70
|
False
|
|
61
71
|
"""
|
|
62
72
|
return isinstance(x, numbers.Real) and math.isnan(x)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_chunk_n_rows(row_bytes, max_n_rows=None, working_memory=None):
|
|
76
|
+
"""Calculates how many rows can be processed within working_memory
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
row_bytes : int
|
|
81
|
+
The expected number of bytes of memory that will be consumed
|
|
82
|
+
during the processing of each row.
|
|
83
|
+
max_n_rows : int, optional
|
|
84
|
+
The maximum return value.
|
|
85
|
+
working_memory : int or float, optional
|
|
86
|
+
The number of rows to fit inside this number of MiB will be returned.
|
|
87
|
+
When None (default), the value of
|
|
88
|
+
``sklearn.get_config()['working_memory']`` is used.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
int or the value of n_samples
|
|
93
|
+
|
|
94
|
+
Warns
|
|
95
|
+
-----
|
|
96
|
+
Issues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
if working_memory is None: # pragma: no cover
|
|
100
|
+
working_memory = options.learn.working_memory
|
|
101
|
+
if working_memory is None and sklearn_get_config is not None:
|
|
102
|
+
working_memory = sklearn_get_config()["working_memory"]
|
|
103
|
+
elif working_memory is None:
|
|
104
|
+
working_memory = 1024
|
|
105
|
+
|
|
106
|
+
if isinstance(working_memory, int):
|
|
107
|
+
working_memory *= 2**20
|
|
108
|
+
else:
|
|
109
|
+
working_memory = parse_readable_size(working_memory)[0]
|
|
110
|
+
|
|
111
|
+
chunk_n_rows = int(working_memory // row_bytes)
|
|
112
|
+
if max_n_rows is not None:
|
|
113
|
+
chunk_n_rows = min(chunk_n_rows, max_n_rows)
|
|
114
|
+
if chunk_n_rows < 1: # pragma: no cover
|
|
115
|
+
warnings.warn(
|
|
116
|
+
"Could not adhere to working_memory config. "
|
|
117
|
+
"Currently %.0fMiB, %.0fMiB required."
|
|
118
|
+
% (working_memory, np.ceil(row_bytes * 2**-20))
|
|
119
|
+
)
|
|
120
|
+
chunk_n_rows = 1
|
|
121
|
+
return chunk_n_rows
|
maxframe/learn/utils/extmath.py
CHANGED
|
@@ -15,6 +15,9 @@
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
|
|
17
17
|
from ... import tensor as mt
|
|
18
|
+
from ...core import ENTITY_TYPE
|
|
19
|
+
from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
20
|
+
from ...tensor.datasource import TensorZeros
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
# Use at least float64 for the accumulating functions to avoid precision issue
|
|
@@ -42,13 +45,30 @@ def _safe_accumulator_op(op, x, *args, **kwargs):
|
|
|
42
45
|
-------
|
|
43
46
|
result : The output of the accumulator function passed to this function
|
|
44
47
|
"""
|
|
45
|
-
if
|
|
48
|
+
if (
|
|
49
|
+
hasattr(x, "dtype")
|
|
50
|
+
and np.issubdtype(x.dtype, np.floating)
|
|
51
|
+
and x.dtype.itemsize < 8
|
|
52
|
+
):
|
|
46
53
|
result = op(x, *args, **kwargs, dtype=np.float64)
|
|
47
54
|
else:
|
|
48
55
|
result = op(x, *args, **kwargs)
|
|
49
56
|
return result
|
|
50
57
|
|
|
51
58
|
|
|
59
|
+
def logsumexp_real(a, axis=None, keepdims=False):
|
|
60
|
+
"""Simplified logsumexp for real arrays without biases"""
|
|
61
|
+
from ... import tensor as mt
|
|
62
|
+
|
|
63
|
+
x = mt.tensor(a)
|
|
64
|
+
x_max = mt.amax(a, axis=axis, keepdims=True)
|
|
65
|
+
exp_x_shifted = mt.exp(x - x_max)
|
|
66
|
+
ret = mt.log(mt.sum(exp_x_shifted, axis=axis, keepdims=True)) + x_max
|
|
67
|
+
if keepdims:
|
|
68
|
+
return ret
|
|
69
|
+
return mt.squeeze(ret, axis=1)
|
|
70
|
+
|
|
71
|
+
|
|
52
72
|
def _incremental_mean_and_var(
|
|
53
73
|
X, last_mean, last_variance, last_sample_count, sample_weight=None
|
|
54
74
|
):
|
|
@@ -104,16 +124,31 @@ def _incremental_mean_and_var(
|
|
|
104
124
|
`utils.sparsefuncs.incr_mean_variance_axis` and
|
|
105
125
|
`utils.sparsefuncs_fast.incr_mean_variance_axis0`
|
|
106
126
|
"""
|
|
127
|
+
has_last_sample = isinstance(last_sample_count, ENTITY_TYPE) and not isinstance(
|
|
128
|
+
last_sample_count.op, TensorZeros
|
|
129
|
+
)
|
|
130
|
+
is_df_type = isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE))
|
|
131
|
+
|
|
107
132
|
# old = stats until now
|
|
108
133
|
# new = the current increment
|
|
109
134
|
# updated = the aggregated stats
|
|
110
|
-
last_sum = last_mean * last_sample_count
|
|
135
|
+
last_sum = last_mean * last_sample_count if has_last_sample else 0
|
|
111
136
|
X_nan_mask = mt.isnan(X)
|
|
112
137
|
# if mt.any(X_nan_mask):
|
|
113
138
|
# sum_op = mt.nansum
|
|
114
139
|
# else:
|
|
115
140
|
# sum_op = mt.sum
|
|
116
|
-
|
|
141
|
+
|
|
142
|
+
def df_sum(val, **kw):
|
|
143
|
+
if "dtype" in kw:
|
|
144
|
+
val = val.astype(kw.pop("dtype"))
|
|
145
|
+
return val.sum(**kw)
|
|
146
|
+
|
|
147
|
+
if is_df_type:
|
|
148
|
+
sum_op = df_sum
|
|
149
|
+
else:
|
|
150
|
+
sum_op = mt.nansum
|
|
151
|
+
|
|
117
152
|
if sample_weight is not None:
|
|
118
153
|
# equivalent to np.nansum(X * sample_weight, axis=0)
|
|
119
154
|
# safer because np.float64(X*W) != np.float64(X)*np.float64(W)
|
|
@@ -125,10 +160,16 @@ def _incremental_mean_and_var(
|
|
|
125
160
|
)
|
|
126
161
|
else:
|
|
127
162
|
new_sum = _safe_accumulator_op(sum_op, X, axis=0)
|
|
128
|
-
|
|
129
|
-
|
|
163
|
+
if is_df_type:
|
|
164
|
+
new_sample_count = X.count()
|
|
165
|
+
else:
|
|
166
|
+
n_samples = X.shape[0]
|
|
167
|
+
new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
|
|
130
168
|
|
|
131
|
-
|
|
169
|
+
if not has_last_sample:
|
|
170
|
+
updated_sample_count = new_sample_count
|
|
171
|
+
else:
|
|
172
|
+
updated_sample_count = last_sample_count + new_sample_count
|
|
132
173
|
|
|
133
174
|
updated_mean = (last_sum + new_sum) / updated_sample_count
|
|
134
175
|
|
|
@@ -157,7 +198,9 @@ def _incremental_mean_and_var(
|
|
|
157
198
|
# and recommendations", by Chan, Golub, and LeVeque.
|
|
158
199
|
new_unnormalized_variance -= correction**2 / new_sample_count
|
|
159
200
|
|
|
160
|
-
last_unnormalized_variance =
|
|
201
|
+
last_unnormalized_variance = (
|
|
202
|
+
last_variance * last_sample_count if has_last_sample else 0
|
|
203
|
+
)
|
|
161
204
|
|
|
162
205
|
with mt.errstate(divide="ignore", invalid="ignore"):
|
|
163
206
|
last_over_new_count = last_sample_count / new_sample_count
|
|
@@ -169,8 +212,35 @@ def _incremental_mean_and_var(
|
|
|
169
212
|
* (last_sum / last_over_new_count - new_sum) ** 2
|
|
170
213
|
)
|
|
171
214
|
|
|
172
|
-
|
|
173
|
-
|
|
215
|
+
if not has_last_sample:
|
|
216
|
+
updated_unnormalized_variance = new_unnormalized_variance
|
|
217
|
+
else:
|
|
218
|
+
zeros = last_sample_count == 0
|
|
219
|
+
updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
|
|
174
220
|
updated_variance = updated_unnormalized_variance / updated_sample_count
|
|
175
221
|
|
|
176
222
|
return updated_mean, updated_variance, updated_sample_count
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def row_norms(X, squared=False):
|
|
226
|
+
"""Row-wise (squared) Euclidean norm of X.
|
|
227
|
+
|
|
228
|
+
Performs no input validation.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
X : array_like
|
|
233
|
+
The input tensor
|
|
234
|
+
squared : bool, optional (default = False)
|
|
235
|
+
If True, return squared norms.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
array_like
|
|
240
|
+
The row-wise (squared) Euclidean norm of X.
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
norms = (X**2).sum(axis=1)
|
|
244
|
+
if not squared:
|
|
245
|
+
norms = mt.sqrt(norms)
|
|
246
|
+
return norms
|