maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from ... import opcodes
|
|
20
|
+
from ...core import ENTITY_TYPE, ExecutableTuple
|
|
21
|
+
from ...serialization.serializables import (
|
|
22
|
+
AnyField,
|
|
23
|
+
BoolField,
|
|
24
|
+
FieldTypes,
|
|
25
|
+
Int32Field,
|
|
26
|
+
ListField,
|
|
27
|
+
StringField,
|
|
28
|
+
)
|
|
29
|
+
from ...typing_ import EntityType
|
|
30
|
+
from ..core import TENSOR_TYPE, TensorOrder
|
|
31
|
+
from ..datasource import tensor as astensor
|
|
32
|
+
from ..operators import TensorOperator, TensorOperatorMixin
|
|
33
|
+
from ..utils import validate_axis, validate_order
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TensorPartition(TensorOperatorMixin, TensorOperator):
|
|
37
|
+
_op_type_ = opcodes.PARTITION
|
|
38
|
+
|
|
39
|
+
kth = AnyField("kth")
|
|
40
|
+
axis = Int32Field("axis")
|
|
41
|
+
kind = StringField("kind")
|
|
42
|
+
order = ListField("order", FieldTypes.string)
|
|
43
|
+
need_align = BoolField("need_align")
|
|
44
|
+
return_value = BoolField("return_value")
|
|
45
|
+
return_indices = BoolField("return_indices")
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def _set_inputs(cls, op: "TensorPartition", inputs: List[EntityType]):
|
|
49
|
+
super()._set_inputs(op, inputs)
|
|
50
|
+
if len(op._inputs) > 1:
|
|
51
|
+
op.kth = op._inputs[1]
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def psrs_kinds(self):
|
|
55
|
+
# to keep compatibility with PSRS
|
|
56
|
+
# remember when merging data in PSRSShuffle(reduce),
|
|
57
|
+
# we don't need sort, thus set psrs_kinds[2] to None
|
|
58
|
+
return ["quicksort", "mergesort", None]
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def output_limit(self):
|
|
62
|
+
return int(bool(self.return_value)) + int(bool(self.return_indices))
|
|
63
|
+
|
|
64
|
+
def __call__(self, a, kth):
|
|
65
|
+
inputs = [a]
|
|
66
|
+
if isinstance(kth, TENSOR_TYPE):
|
|
67
|
+
inputs.append(kth)
|
|
68
|
+
kws = []
|
|
69
|
+
if self.return_value:
|
|
70
|
+
kws.append(
|
|
71
|
+
{
|
|
72
|
+
"shape": a.shape,
|
|
73
|
+
"order": a.order,
|
|
74
|
+
"type": "sorted",
|
|
75
|
+
"dtype": a.dtype,
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
if self.return_indices:
|
|
79
|
+
kws.append(
|
|
80
|
+
{
|
|
81
|
+
"shape": a.shape,
|
|
82
|
+
"order": TensorOrder.C_ORDER,
|
|
83
|
+
"type": "argsort",
|
|
84
|
+
"dtype": np.dtype(np.int64),
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
ret = self.new_tensors(inputs, kws=kws)
|
|
88
|
+
if len(kws) == 1:
|
|
89
|
+
return ret[0]
|
|
90
|
+
return ExecutableTuple(ret)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _check_kth_dtype(dtype):
|
|
94
|
+
if not np.issubdtype(dtype, np.integer):
|
|
95
|
+
raise TypeError("Partition index must be integer")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _validate_kth_value(kth, size):
|
|
99
|
+
kth = np.where(kth < 0, kth + size, kth)
|
|
100
|
+
if np.any((kth < 0) | (kth >= size)):
|
|
101
|
+
invalid_kth = next(k for k in kth if k < 0 or k >= size)
|
|
102
|
+
raise ValueError(f"kth(={invalid_kth}) out of bounds ({size})")
|
|
103
|
+
return kth
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _validate_partition_arguments(a, kth, axis, kind, order, kw):
|
|
107
|
+
a = astensor(a)
|
|
108
|
+
if axis is None:
|
|
109
|
+
a = a.flatten()
|
|
110
|
+
axis = 0
|
|
111
|
+
else:
|
|
112
|
+
axis = validate_axis(a.ndim, axis)
|
|
113
|
+
if isinstance(kth, ENTITY_TYPE):
|
|
114
|
+
kth = astensor(kth)
|
|
115
|
+
_check_kth_dtype(kth.dtype)
|
|
116
|
+
else:
|
|
117
|
+
kth = np.atleast_1d(kth)
|
|
118
|
+
kth = _validate_kth_value(kth, a.shape[axis])
|
|
119
|
+
if kth.ndim > 1:
|
|
120
|
+
raise ValueError("object too deep for desired array")
|
|
121
|
+
if kind != "introselect":
|
|
122
|
+
raise ValueError(f"{kind} is an unrecognized kind of select")
|
|
123
|
+
# if a is structure type and order is not None
|
|
124
|
+
order = validate_order(a.dtype, order)
|
|
125
|
+
need_align = kw.pop("need_align", None)
|
|
126
|
+
if len(kw) > 0:
|
|
127
|
+
raise TypeError(
|
|
128
|
+
f"partition() got an unexpected keyword argument '{next(iter(kw))}'"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return a, kth, axis, kind, order, need_align
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def partition(a, kth, axis=-1, kind="introselect", order=None, **kw):
|
|
135
|
+
r"""
|
|
136
|
+
Return a partitioned copy of a tensor.
|
|
137
|
+
|
|
138
|
+
Creates a copy of the tensor with its elements rearranged in such a
|
|
139
|
+
way that the value of the element in k-th position is in the
|
|
140
|
+
position it would be in a sorted tensor. All elements smaller than
|
|
141
|
+
the k-th element are moved before this element and all equal or
|
|
142
|
+
greater are moved behind it. The ordering of the elements in the two
|
|
143
|
+
partitions is undefined.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
a : array_like
|
|
148
|
+
Tensor to be sorted.
|
|
149
|
+
kth : int or sequence of ints
|
|
150
|
+
Element index to partition by. The k-th value of the element
|
|
151
|
+
will be in its final sorted position and all smaller elements
|
|
152
|
+
will be moved before it and all equal or greater elements behind
|
|
153
|
+
it. The order of all elements in the partitions is undefined. If
|
|
154
|
+
provided with a sequence of k-th it will partition all elements
|
|
155
|
+
indexed by k-th of them into their sorted position at once.
|
|
156
|
+
axis : int or None, optional
|
|
157
|
+
Axis along which to sort. If None, the tensor is flattened before
|
|
158
|
+
sorting. The default is -1, which sorts along the last axis.
|
|
159
|
+
kind : {'introselect'}, optional
|
|
160
|
+
Selection algorithm. Default is 'introselect'.
|
|
161
|
+
order : str or list of str, optional
|
|
162
|
+
When `a` is a tensor with fields defined, this argument
|
|
163
|
+
specifies which fields to compare first, second, etc. A single
|
|
164
|
+
field can be specified as a string. Not all fields need be
|
|
165
|
+
specified, but unspecified fields will still be used, in the
|
|
166
|
+
order in which they come up in the dtype, to break ties.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
partitioned_tensor : Tensor
|
|
171
|
+
Tensor of the same type and shape as `a`.
|
|
172
|
+
|
|
173
|
+
See Also
|
|
174
|
+
--------
|
|
175
|
+
Tensor.partition : Method to sort a tensor in-place.
|
|
176
|
+
argpartition : Indirect partition.
|
|
177
|
+
sort : Full sorting
|
|
178
|
+
|
|
179
|
+
Notes
|
|
180
|
+
-----
|
|
181
|
+
The various selection algorithms are characterized by their average
|
|
182
|
+
speed, worst case performance, work space size, and whether they are
|
|
183
|
+
stable. A stable sort keeps items with the same key in the same
|
|
184
|
+
relative order. The available algorithms have the following
|
|
185
|
+
properties:
|
|
186
|
+
|
|
187
|
+
================= ======= ============= ============ =======
|
|
188
|
+
kind speed worst case work space stable
|
|
189
|
+
================= ======= ============= ============ =======
|
|
190
|
+
'introselect' 1 O(n) 0 no
|
|
191
|
+
================= ======= ============= ============ =======
|
|
192
|
+
|
|
193
|
+
All the partition algorithms make temporary copies of the data when
|
|
194
|
+
partitioning along any but the last axis. Consequently,
|
|
195
|
+
partitioning along the last axis is faster and uses less space than
|
|
196
|
+
partitioning along any other axis.
|
|
197
|
+
|
|
198
|
+
The sort order for complex numbers is lexicographic. If both the
|
|
199
|
+
real and imaginary parts are non-nan then the order is determined by
|
|
200
|
+
the real parts except when they are equal, in which case the order
|
|
201
|
+
is determined by the imaginary parts.
|
|
202
|
+
|
|
203
|
+
Examples
|
|
204
|
+
--------
|
|
205
|
+
>>> import maxframe.tensor as mt
|
|
206
|
+
>>> a = mt.array([3, 4, 2, 1])
|
|
207
|
+
>>> mt.partition(a, 3).execute()
|
|
208
|
+
array([2, 1, 3, 4])
|
|
209
|
+
|
|
210
|
+
>>> mt.partition(a, (1, 3)).execute()
|
|
211
|
+
array([1, 2, 3, 4])
|
|
212
|
+
"""
|
|
213
|
+
return_indices = kw.pop("return_index", False)
|
|
214
|
+
a, kth, axis, kind, order, need_align = _validate_partition_arguments(
|
|
215
|
+
a, kth, axis, kind, order, kw
|
|
216
|
+
)
|
|
217
|
+
op = TensorPartition(
|
|
218
|
+
kth=kth,
|
|
219
|
+
axis=axis,
|
|
220
|
+
kind=kind,
|
|
221
|
+
order=order,
|
|
222
|
+
need_align=need_align,
|
|
223
|
+
return_value=True,
|
|
224
|
+
return_indices=return_indices,
|
|
225
|
+
dtype=a.dtype,
|
|
226
|
+
gpu=a.op.gpu,
|
|
227
|
+
)
|
|
228
|
+
return op(a, kth)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from . import distance
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .cdist import cdist
|
|
16
|
+
from .pdist import pdist
|
|
17
|
+
from .squareform import squareform
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List, Tuple
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import EntityData
|
|
21
|
+
from ....serialization import PickleContainer
|
|
22
|
+
from ....serialization.serializables import AnyField, Float16Field, KeyField
|
|
23
|
+
from ....udf import BuiltinFunction
|
|
24
|
+
from ...core import TensorOrder
|
|
25
|
+
from ...datasource import tensor as astensor
|
|
26
|
+
from ...operators import TensorOperator, TensorOperatorMixin
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TensorCDist(TensorOperator, TensorOperatorMixin):
|
|
30
|
+
_op_type_ = opcodes.CDIST
|
|
31
|
+
|
|
32
|
+
xa = KeyField("XA", default=None)
|
|
33
|
+
xb = KeyField("XB", default=None)
|
|
34
|
+
metric = AnyField("metric", default=None)
|
|
35
|
+
p = Float16Field(
|
|
36
|
+
"p", on_serialize=lambda x: float(x) if x is not None else x, default=None
|
|
37
|
+
)
|
|
38
|
+
w = KeyField("w", default=None)
|
|
39
|
+
v = KeyField("V", default=None)
|
|
40
|
+
vi = KeyField("VI", default=None)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def _set_inputs(cls, op: "TensorCDist", inputs: List[EntityData]):
|
|
44
|
+
super()._set_inputs(op, inputs)
|
|
45
|
+
inputs_iter = iter(inputs)
|
|
46
|
+
op.xa = next(inputs_iter)
|
|
47
|
+
op.xb = next(inputs_iter)
|
|
48
|
+
if op.w is not None:
|
|
49
|
+
op.w = next(inputs_iter)
|
|
50
|
+
if op.v is not None:
|
|
51
|
+
op.v = next(inputs_iter)
|
|
52
|
+
if op.vi is not None:
|
|
53
|
+
op.vi = next(inputs_iter)
|
|
54
|
+
|
|
55
|
+
def __call__(self, xa, xb, shape: Tuple):
|
|
56
|
+
inputs = [xa, xb]
|
|
57
|
+
for val in [self.w, self.v, self.vi]:
|
|
58
|
+
if val is not None:
|
|
59
|
+
inputs.append(val)
|
|
60
|
+
return self.new_tensor(inputs, shape=shape, order=TensorOrder.C_ORDER)
|
|
61
|
+
|
|
62
|
+
def has_custom_code(self) -> bool:
|
|
63
|
+
return (
|
|
64
|
+
callable(self.metric) and not isinstance(self.metric, BuiltinFunction)
|
|
65
|
+
) or isinstance(self.metric, PickleContainer)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def cdist(XA, XB, metric="euclidean", **kwargs):
|
|
69
|
+
"""
|
|
70
|
+
Compute distance between each pair of the two collections of inputs.
|
|
71
|
+
|
|
72
|
+
See Notes for common calling conventions.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
XA : Tensor
|
|
77
|
+
An :math:`m_A` by :math:`n` tensor of :math:`m_A`
|
|
78
|
+
original observations in an :math:`n`-dimensional space.
|
|
79
|
+
Inputs are converted to float type.
|
|
80
|
+
XB : Tensor
|
|
81
|
+
An :math:`m_B` by :math:`n` tensor of :math:`m_B`
|
|
82
|
+
original observations in an :math:`n`-dimensional space.
|
|
83
|
+
Inputs are converted to float type.
|
|
84
|
+
metric : str or callable, optional
|
|
85
|
+
The distance metric to use. If a string, the distance function can be
|
|
86
|
+
'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
|
|
87
|
+
'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'jensenshannon',
|
|
88
|
+
'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
|
|
89
|
+
'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
|
|
90
|
+
'wminkowski', 'yule'.
|
|
91
|
+
**kwargs : dict, optional
|
|
92
|
+
Extra arguments to `metric`: refer to each metric documentation for a
|
|
93
|
+
list of all possible arguments.
|
|
94
|
+
|
|
95
|
+
Some possible arguments:
|
|
96
|
+
|
|
97
|
+
p : scalar
|
|
98
|
+
The p-norm to apply for Minkowski, weighted and unweighted.
|
|
99
|
+
Default: 2.
|
|
100
|
+
|
|
101
|
+
w : Tensor
|
|
102
|
+
The weight vector for metrics that support weights (e.g., Minkowski).
|
|
103
|
+
|
|
104
|
+
V : Tensor
|
|
105
|
+
The variance vector for standardized Euclidean.
|
|
106
|
+
Default: var(vstack([XA, XB]), axis=0, ddof=1)
|
|
107
|
+
|
|
108
|
+
VI : Tensor
|
|
109
|
+
The inverse of the covariance matrix for Mahalanobis.
|
|
110
|
+
Default: inv(cov(vstack([XA, XB].T))).T
|
|
111
|
+
|
|
112
|
+
out : Tensor
|
|
113
|
+
The output tensor
|
|
114
|
+
If not None, the distance matrix Y is stored in this tensor.
|
|
115
|
+
Note: metric independent, it will become a regular keyword arg in a
|
|
116
|
+
future scipy version
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
Y : Tensor
|
|
121
|
+
A :math:`m_A` by :math:`m_B` distance matrix is returned.
|
|
122
|
+
For each :math:`i` and :math:`j`, the metric
|
|
123
|
+
``dist(u=XA[i], v=XB[j])`` is computed and stored in the
|
|
124
|
+
:math:`ij` th entry.
|
|
125
|
+
|
|
126
|
+
Raises
|
|
127
|
+
------
|
|
128
|
+
ValueError
|
|
129
|
+
An exception is thrown if `XA` and `XB` do not have
|
|
130
|
+
the same number of columns.
|
|
131
|
+
|
|
132
|
+
Notes
|
|
133
|
+
-----
|
|
134
|
+
The following are common calling conventions:
|
|
135
|
+
|
|
136
|
+
1. ``Y = cdist(XA, XB, 'euclidean')``
|
|
137
|
+
|
|
138
|
+
Computes the distance between :math:`m` points using
|
|
139
|
+
Euclidean distance (2-norm) as the distance metric between the
|
|
140
|
+
points. The points are arranged as :math:`m`
|
|
141
|
+
:math:`n`-dimensional row vectors in the matrix X.
|
|
142
|
+
|
|
143
|
+
2. ``Y = cdist(XA, XB, 'minkowski', p=2.)``
|
|
144
|
+
|
|
145
|
+
Computes the distances using the Minkowski distance
|
|
146
|
+
:math:`||u-v||_p` (:math:`p`-norm) where :math:`p \\geq 1`.
|
|
147
|
+
|
|
148
|
+
3. ``Y = cdist(XA, XB, 'cityblock')``
|
|
149
|
+
|
|
150
|
+
Computes the city block or Manhattan distance between the
|
|
151
|
+
points.
|
|
152
|
+
|
|
153
|
+
4. ``Y = cdist(XA, XB, 'seuclidean', V=None)``
|
|
154
|
+
|
|
155
|
+
Computes the standardized Euclidean distance. The standardized
|
|
156
|
+
Euclidean distance between two n-vectors ``u`` and ``v`` is
|
|
157
|
+
|
|
158
|
+
.. math::
|
|
159
|
+
|
|
160
|
+
\\sqrt{\\sum {(u_i-v_i)^2 / V[x_i]}}.
|
|
161
|
+
|
|
162
|
+
V is the variance vector; V[i] is the variance computed over all
|
|
163
|
+
the i'th components of the points. If not passed, it is
|
|
164
|
+
automatically computed.
|
|
165
|
+
|
|
166
|
+
5. ``Y = cdist(XA, XB, 'sqeuclidean')``
|
|
167
|
+
|
|
168
|
+
Computes the squared Euclidean distance :math:`||u-v||_2^2` between
|
|
169
|
+
the vectors.
|
|
170
|
+
|
|
171
|
+
6. ``Y = cdist(XA, XB, 'cosine')``
|
|
172
|
+
|
|
173
|
+
Computes the cosine distance between vectors u and v,
|
|
174
|
+
|
|
175
|
+
.. math::
|
|
176
|
+
|
|
177
|
+
1 - \\frac{u \\cdot v}
|
|
178
|
+
{{||u||}_2 {||v||}_2}
|
|
179
|
+
|
|
180
|
+
where :math:`||*||_2` is the 2-norm of its argument ``*``, and
|
|
181
|
+
:math:`u \\cdot v` is the dot product of :math:`u` and :math:`v`.
|
|
182
|
+
|
|
183
|
+
7. ``Y = cdist(XA, XB, 'correlation')``
|
|
184
|
+
|
|
185
|
+
Computes the correlation distance between vectors u and v. This is
|
|
186
|
+
|
|
187
|
+
.. math::
|
|
188
|
+
|
|
189
|
+
1 - \\frac{(u - \\bar{u}) \\cdot (v - \\bar{v})}
|
|
190
|
+
{{||(u - \\bar{u})||}_2 {||(v - \\bar{v})||}_2}
|
|
191
|
+
|
|
192
|
+
where :math:`\\bar{v}` is the mean of the elements of vector v,
|
|
193
|
+
and :math:`x \\cdot y` is the dot product of :math:`x` and :math:`y`.
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
8. ``Y = cdist(XA, XB, 'hamming')``
|
|
197
|
+
|
|
198
|
+
Computes the normalized Hamming distance, or the proportion of
|
|
199
|
+
those vector elements between two n-vectors ``u`` and ``v``
|
|
200
|
+
which disagree. To save memory, the matrix ``X`` can be of type
|
|
201
|
+
boolean.
|
|
202
|
+
|
|
203
|
+
9. ``Y = cdist(XA, XB, 'jaccard')``
|
|
204
|
+
|
|
205
|
+
Computes the Jaccard distance between the points. Given two
|
|
206
|
+
vectors, ``u`` and ``v``, the Jaccard distance is the
|
|
207
|
+
proportion of those elements ``u[i]`` and ``v[i]`` that
|
|
208
|
+
disagree where at least one of them is non-zero.
|
|
209
|
+
|
|
210
|
+
10. ``Y = cdist(XA, XB, 'chebyshev')``
|
|
211
|
+
|
|
212
|
+
Computes the Chebyshev distance between the points. The
|
|
213
|
+
Chebyshev distance between two n-vectors ``u`` and ``v`` is the
|
|
214
|
+
maximum norm-1 distance between their respective elements. More
|
|
215
|
+
precisely, the distance is given by
|
|
216
|
+
|
|
217
|
+
.. math::
|
|
218
|
+
|
|
219
|
+
d(u,v) = \\max_i {|u_i-v_i|}.
|
|
220
|
+
|
|
221
|
+
11. ``Y = cdist(XA, XB, 'canberra')``
|
|
222
|
+
|
|
223
|
+
Computes the Canberra distance between the points. The
|
|
224
|
+
Canberra distance between two points ``u`` and ``v`` is
|
|
225
|
+
|
|
226
|
+
.. math::
|
|
227
|
+
|
|
228
|
+
d(u,v) = \\sum_i \\frac{|u_i-v_i|}
|
|
229
|
+
{|u_i|+|v_i|}.
|
|
230
|
+
|
|
231
|
+
12. ``Y = cdist(XA, XB, 'braycurtis')``
|
|
232
|
+
|
|
233
|
+
Computes the Bray-Curtis distance between the points. The
|
|
234
|
+
Bray-Curtis distance between two points ``u`` and ``v`` is
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
.. math::
|
|
238
|
+
|
|
239
|
+
d(u,v) = \\frac{\\sum_i (|u_i-v_i|)}
|
|
240
|
+
{\\sum_i (|u_i+v_i|)}
|
|
241
|
+
|
|
242
|
+
13. ``Y = cdist(XA, XB, 'mahalanobis', VI=None)``
|
|
243
|
+
|
|
244
|
+
Computes the Mahalanobis distance between the points. The
|
|
245
|
+
Mahalanobis distance between two points ``u`` and ``v`` is
|
|
246
|
+
:math:`\\sqrt{(u-v)(1/V)(u-v)^T}` where :math:`(1/V)` (the ``VI``
|
|
247
|
+
variable) is the inverse covariance. If ``VI`` is not None,
|
|
248
|
+
``VI`` will be used as the inverse covariance matrix.
|
|
249
|
+
|
|
250
|
+
14. ``Y = cdist(XA, XB, 'yule')``
|
|
251
|
+
|
|
252
|
+
Computes the Yule distance between the boolean
|
|
253
|
+
vectors. (see `yule` function documentation)
|
|
254
|
+
|
|
255
|
+
15. ``Y = cdist(XA, XB, 'matching')``
|
|
256
|
+
|
|
257
|
+
Synonym for 'hamming'.
|
|
258
|
+
|
|
259
|
+
16. ``Y = cdist(XA, XB, 'dice')``
|
|
260
|
+
|
|
261
|
+
Computes the Dice distance between the boolean vectors. (see
|
|
262
|
+
`dice` function documentation)
|
|
263
|
+
|
|
264
|
+
17. ``Y = cdist(XA, XB, 'kulsinski')``
|
|
265
|
+
|
|
266
|
+
Computes the Kulsinski distance between the boolean
|
|
267
|
+
vectors. (see `kulsinski` function documentation)
|
|
268
|
+
|
|
269
|
+
18. ``Y = cdist(XA, XB, 'rogerstanimoto')``
|
|
270
|
+
|
|
271
|
+
Computes the Rogers-Tanimoto distance between the boolean
|
|
272
|
+
vectors. (see `rogerstanimoto` function documentation)
|
|
273
|
+
|
|
274
|
+
19. ``Y = cdist(XA, XB, 'russellrao')``
|
|
275
|
+
|
|
276
|
+
Computes the Russell-Rao distance between the boolean
|
|
277
|
+
vectors. (see `russellrao` function documentation)
|
|
278
|
+
|
|
279
|
+
20. ``Y = cdist(XA, XB, 'sokalmichener')``
|
|
280
|
+
|
|
281
|
+
Computes the Sokal-Michener distance between the boolean
|
|
282
|
+
vectors. (see `sokalmichener` function documentation)
|
|
283
|
+
|
|
284
|
+
21. ``Y = cdist(XA, XB, 'sokalsneath')``
|
|
285
|
+
|
|
286
|
+
Computes the Sokal-Sneath distance between the vectors. (see
|
|
287
|
+
`sokalsneath` function documentation)
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
22. ``Y = cdist(XA, XB, 'wminkowski', p=2., w=w)``
|
|
291
|
+
|
|
292
|
+
Computes the weighted Minkowski distance between the
|
|
293
|
+
vectors. (see `wminkowski` function documentation)
|
|
294
|
+
|
|
295
|
+
23. ``Y = cdist(XA, XB, f)``
|
|
296
|
+
|
|
297
|
+
Computes the distance between all pairs of vectors in X
|
|
298
|
+
using the user supplied 2-arity function f. For example,
|
|
299
|
+
Euclidean distance between the vectors could be computed
|
|
300
|
+
as follows::
|
|
301
|
+
|
|
302
|
+
dm = cdist(XA, XB, lambda u, v: np.sqrt(((u-v)**2).sum()))
|
|
303
|
+
|
|
304
|
+
Note that you should avoid passing a reference to one of
|
|
305
|
+
the distance functions defined in this library. For example,::
|
|
306
|
+
|
|
307
|
+
dm = cdist(XA, XB, sokalsneath)
|
|
308
|
+
|
|
309
|
+
would calculate the pair-wise distances between the vectors in
|
|
310
|
+
X using the Python function `sokalsneath`. This would result in
|
|
311
|
+
sokalsneath being called :math:`{n \\choose 2}` times, which
|
|
312
|
+
is inefficient. Instead, the optimized C version is more
|
|
313
|
+
efficient, and we call it using the following syntax::
|
|
314
|
+
|
|
315
|
+
dm = cdist(XA, XB, 'sokalsneath')
|
|
316
|
+
|
|
317
|
+
Examples
|
|
318
|
+
--------
|
|
319
|
+
Find the Euclidean distances between four 2-D coordinates:
|
|
320
|
+
|
|
321
|
+
>>> from maxframe.tensor.spatial import distance
|
|
322
|
+
>>> coords = [(35.0456, -85.2672),
|
|
323
|
+
... (35.1174, -89.9711),
|
|
324
|
+
... (35.9728, -83.9422),
|
|
325
|
+
... (36.1667, -86.7833)]
|
|
326
|
+
>>> distance.cdist(coords, coords, 'euclidean').execute()
|
|
327
|
+
array([[ 0. , 4.7044, 1.6172, 1.8856],
|
|
328
|
+
[ 4.7044, 0. , 6.0893, 3.3561],
|
|
329
|
+
[ 1.6172, 6.0893, 0. , 2.8477],
|
|
330
|
+
[ 1.8856, 3.3561, 2.8477, 0. ]])
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
Find the Manhattan distance from a 3-D point to the corners of the unit
|
|
334
|
+
cube:
|
|
335
|
+
|
|
336
|
+
>>> import maxframe.tensor as mt
|
|
337
|
+
>>> a = mt.array([[0, 0, 0],
|
|
338
|
+
... [0, 0, 1],
|
|
339
|
+
... [0, 1, 0],
|
|
340
|
+
... [0, 1, 1],
|
|
341
|
+
... [1, 0, 0],
|
|
342
|
+
... [1, 0, 1],
|
|
343
|
+
... [1, 1, 0],
|
|
344
|
+
... [1, 1, 1]])
|
|
345
|
+
>>> b = mt.array([[ 0.1, 0.2, 0.4]])
|
|
346
|
+
>>> distance.cdist(a, b, 'cityblock').execute()
|
|
347
|
+
array([[ 0.7],
|
|
348
|
+
[ 0.9],
|
|
349
|
+
[ 1.3],
|
|
350
|
+
[ 1.5],
|
|
351
|
+
[ 1.5],
|
|
352
|
+
[ 1.7],
|
|
353
|
+
[ 2.1],
|
|
354
|
+
[ 2.3]])
|
|
355
|
+
|
|
356
|
+
"""
|
|
357
|
+
XA = astensor(XA, order="C")
|
|
358
|
+
XB = astensor(XB, order="C")
|
|
359
|
+
|
|
360
|
+
if XA.issparse() or XB.issparse():
|
|
361
|
+
raise ValueError("Sparse tensors are not supported by this function.")
|
|
362
|
+
|
|
363
|
+
s = XA.shape
|
|
364
|
+
sB = XB.shape
|
|
365
|
+
|
|
366
|
+
if len(s) != 2:
|
|
367
|
+
raise ValueError("XA must be a 2-dimensional array.")
|
|
368
|
+
if len(sB) != 2:
|
|
369
|
+
raise ValueError("XB must be a 2-dimensional array.")
|
|
370
|
+
if s[1] != sB[1]:
|
|
371
|
+
raise ValueError(
|
|
372
|
+
"XA and XB must have the same number of columns "
|
|
373
|
+
"(i.e. feature dimension.)"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
mA = s[0]
|
|
377
|
+
mB = sB[0]
|
|
378
|
+
out = kwargs.pop("out", None)
|
|
379
|
+
if out is not None:
|
|
380
|
+
if not hasattr(out, "shape"):
|
|
381
|
+
raise TypeError("return arrays must be a tensor")
|
|
382
|
+
if out.shape != (mA, mB):
|
|
383
|
+
raise ValueError("Output tensor has incorrect shape.")
|
|
384
|
+
if out.dtype != np.double:
|
|
385
|
+
raise ValueError("Output tensor must be double type.")
|
|
386
|
+
|
|
387
|
+
if not isinstance(metric, str) and not callable(metric):
|
|
388
|
+
raise TypeError(
|
|
389
|
+
"3rd argument metric must be a string identifier or a function."
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# scipy remove "wminkowski" since v1.8.0, use "minkowski" with `w=`
|
|
393
|
+
# keyword-argument for the given weight.
|
|
394
|
+
if metric == "wminkowski":
|
|
395
|
+
metric = "minkowski"
|
|
396
|
+
|
|
397
|
+
p = kwargs.pop("p", None)
|
|
398
|
+
w = kwargs.pop("w", None)
|
|
399
|
+
if w is not None:
|
|
400
|
+
w = astensor(w)
|
|
401
|
+
v = kwargs.pop("V", None)
|
|
402
|
+
if v is not None:
|
|
403
|
+
v = astensor(v)
|
|
404
|
+
vi = kwargs.pop("VI", None)
|
|
405
|
+
if vi is not None:
|
|
406
|
+
vi = astensor(vi)
|
|
407
|
+
|
|
408
|
+
if len(kwargs) > 0:
|
|
409
|
+
raise TypeError(
|
|
410
|
+
f"`cdist` got an unexpected keyword argument '{next(iter(kwargs))}'"
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
op = TensorCDist(metric=metric, p=p, w=w, v=v, vi=vi, dtype=np.dtype(float))
|
|
414
|
+
shape = (XA.shape[0], XB.shape[0])
|
|
415
|
+
ret = op(XA, XB, shape)
|
|
416
|
+
|
|
417
|
+
if out is None:
|
|
418
|
+
return ret
|
|
419
|
+
else:
|
|
420
|
+
out.data = ret.data
|
|
421
|
+
return out
|