maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import EntityData
|
|
21
|
+
from ....serialization.serializables import KeyField
|
|
22
|
+
from ....tensor.core import TensorOrder
|
|
23
|
+
from ...preprocessing import normalize
|
|
24
|
+
from .core import PairwiseDistances
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class CosineDistances(PairwiseDistances):
|
|
28
|
+
_op_type_ = opcodes.PAIRWISE_COSINE_DISTANCES
|
|
29
|
+
|
|
30
|
+
x = KeyField("x")
|
|
31
|
+
y = KeyField("y")
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def _set_inputs(cls, op: "CosineDistances", inputs: List[EntityData]):
|
|
35
|
+
super()._set_inputs(op, inputs)
|
|
36
|
+
op.x, op.y = inputs[:2]
|
|
37
|
+
|
|
38
|
+
def __call__(self, x, y=None):
|
|
39
|
+
x, y = self.check_pairwise_arrays(x, y)
|
|
40
|
+
return self.new_tensor(
|
|
41
|
+
[x, y], shape=(x.shape[0], y.shape[0]), order=TensorOrder.C_ORDER
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def cosine_similarity(X, Y=None, dense_output=True):
|
|
46
|
+
"""Compute cosine similarity between samples in X and Y.
|
|
47
|
+
|
|
48
|
+
Cosine similarity, or the cosine kernel, computes similarity as the
|
|
49
|
+
normalized dot product of X and Y:
|
|
50
|
+
|
|
51
|
+
K(X, Y) = <X, Y> / (||X||*||Y||)
|
|
52
|
+
|
|
53
|
+
On L2-normalized data, this function is equivalent to linear_kernel.
|
|
54
|
+
|
|
55
|
+
Read more in the :ref:`User Guide <cosine_similarity>`.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
X : Tensor or sparse tensor, shape: (n_samples_X, n_features)
|
|
60
|
+
Input data.
|
|
61
|
+
|
|
62
|
+
Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features)
|
|
63
|
+
Input data. If ``None``, the output will be the pairwise
|
|
64
|
+
similarities between all samples in ``X``.
|
|
65
|
+
|
|
66
|
+
dense_output : boolean (optional), default True
|
|
67
|
+
Whether to return dense output even when the input is sparse. If
|
|
68
|
+
``False``, the output is sparse if both input tensors are sparse.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
kernel matrix : Tensor
|
|
73
|
+
A tensor with shape (n_samples_X, n_samples_Y).
|
|
74
|
+
"""
|
|
75
|
+
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)
|
|
76
|
+
|
|
77
|
+
X_normalized = normalize(X, copy=True)
|
|
78
|
+
if X is Y:
|
|
79
|
+
Y_normalized = X_normalized
|
|
80
|
+
else:
|
|
81
|
+
Y_normalized = normalize(Y, copy=True)
|
|
82
|
+
|
|
83
|
+
K = X_normalized.dot(Y_normalized.T)
|
|
84
|
+
if dense_output:
|
|
85
|
+
K = K.todense()
|
|
86
|
+
return K
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def cosine_distances(X, Y=None):
|
|
90
|
+
"""Compute cosine distance between samples in X and Y.
|
|
91
|
+
|
|
92
|
+
Cosine distance is defined as 1.0 minus the cosine similarity.
|
|
93
|
+
|
|
94
|
+
Read more in the :ref:`User Guide <metrics>`.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
X : array_like, sparse matrix
|
|
99
|
+
with shape (n_samples_X, n_features).
|
|
100
|
+
|
|
101
|
+
Y : array_like, sparse matrix (optional)
|
|
102
|
+
with shape (n_samples_Y, n_features).
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
distance matrix : Tensor
|
|
107
|
+
A tensor with shape (n_samples_X, n_samples_Y).
|
|
108
|
+
|
|
109
|
+
See also
|
|
110
|
+
--------
|
|
111
|
+
maxframe.learn.metrics.pairwise.cosine_similarity
|
|
112
|
+
maxframe.tensor.spatial.distance.cosine : dense matrices only
|
|
113
|
+
"""
|
|
114
|
+
op = CosineDistances(x=X, y=Y, dtype=np.dtype(np.float64))
|
|
115
|
+
return op(X, y=Y)
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from .... import tensor as mt
|
|
21
|
+
from ....config import options
|
|
22
|
+
from ....core import EntityData
|
|
23
|
+
from ....serialization.serializables import BoolField, KeyField
|
|
24
|
+
from ....tensor.core import TensorOrder
|
|
25
|
+
from ...utils.validation import check_array
|
|
26
|
+
from .core import PairwiseDistances
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class EuclideanDistances(PairwiseDistances):
|
|
30
|
+
_op_type_ = opcodes.PAIRWISE_EUCLIDEAN_DISTANCES
|
|
31
|
+
|
|
32
|
+
x = KeyField("X")
|
|
33
|
+
y = KeyField("Y", default=None)
|
|
34
|
+
x_norm_squared = KeyField("X_norm_squared", default=None)
|
|
35
|
+
y_norm_squared = KeyField("Y_norm_squared", default=None)
|
|
36
|
+
squared = BoolField("squared", default=None)
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def _set_inputs(cls, op: "EuclideanDistances", inputs: List[EntityData]):
|
|
40
|
+
super()._set_inputs(op, inputs)
|
|
41
|
+
input_iter = iter(inputs)
|
|
42
|
+
op.x = next(input_iter)
|
|
43
|
+
if op.y is not None:
|
|
44
|
+
op.y = next(input_iter)
|
|
45
|
+
if op.x_norm_squared is not None:
|
|
46
|
+
op.x_norm_squared = next(input_iter)
|
|
47
|
+
if op.y_norm_squared is not None:
|
|
48
|
+
op.y_norm_squared = next(input_iter)
|
|
49
|
+
|
|
50
|
+
def __call__(self, X, Y=None, Y_norm_squared=None, X_norm_squared=None):
|
|
51
|
+
# If norms are passed as float32, they are unused. If arrays are passed as
|
|
52
|
+
# float32, norms needs to be recomputed on upcast chunks.
|
|
53
|
+
# TODO: use a float64 accumulator in row_norms to avoid the latter.
|
|
54
|
+
if X_norm_squared is not None:
|
|
55
|
+
XX = check_array(X_norm_squared)
|
|
56
|
+
if XX.shape == (1, X.shape[0]):
|
|
57
|
+
XX = XX.T
|
|
58
|
+
elif XX.shape != (X.shape[0], 1):
|
|
59
|
+
raise ValueError("Incompatible dimensions for X and X_norm_squared")
|
|
60
|
+
if XX.dtype == np.float32:
|
|
61
|
+
XX = self.x_norm_squared = None
|
|
62
|
+
else:
|
|
63
|
+
XX = None
|
|
64
|
+
|
|
65
|
+
if X is Y and XX is not None:
|
|
66
|
+
# shortcut in the common case euclidean_distances(X, X)
|
|
67
|
+
YY = XX.T
|
|
68
|
+
elif Y_norm_squared is not None:
|
|
69
|
+
YY = mt.atleast_2d(Y_norm_squared)
|
|
70
|
+
|
|
71
|
+
if YY.shape != (1, Y.shape[0]):
|
|
72
|
+
raise ValueError("Incompatible dimensions for Y and Y_norm_squared")
|
|
73
|
+
if YY.dtype == np.float32:
|
|
74
|
+
YY = self.y_norm_squared = None
|
|
75
|
+
else:
|
|
76
|
+
YY = None
|
|
77
|
+
|
|
78
|
+
inputs = [X, Y]
|
|
79
|
+
if XX is not None:
|
|
80
|
+
inputs.append(XX)
|
|
81
|
+
if YY is not None:
|
|
82
|
+
inputs.append(YY)
|
|
83
|
+
return self.new_tensor(
|
|
84
|
+
inputs, shape=(X.shape[0], Y.shape[0]), order=TensorOrder.C_ORDER
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def euclidean_distances(
|
|
89
|
+
X, Y=None, Y_norm_squared=None, squared=False, X_norm_squared=None
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Considering the rows of X (and Y=X) as vectors, compute the
|
|
93
|
+
distance matrix between each pair of vectors.
|
|
94
|
+
|
|
95
|
+
For efficiency reasons, the euclidean distance between a pair of row
|
|
96
|
+
vector x and y is computed as::
|
|
97
|
+
|
|
98
|
+
dist(x, y) = sqrt(dot(x, x) - 2 * dot(x, y) + dot(y, y))
|
|
99
|
+
|
|
100
|
+
This formulation has two advantages over other ways of computing distances.
|
|
101
|
+
First, it is computationally efficient when dealing with sparse data.
|
|
102
|
+
Second, if one argument varies but the other remains unchanged, then
|
|
103
|
+
`dot(x, x)` and/or `dot(y, y)` can be pre-computed.
|
|
104
|
+
|
|
105
|
+
However, this is not the most precise way of doing this computation, and
|
|
106
|
+
the distance matrix returned by this function may not be exactly
|
|
107
|
+
symmetric as required by, e.g., ``scipy.spatial.distance`` functions.
|
|
108
|
+
|
|
109
|
+
Read more in the :ref:`User Guide <metrics>`.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
X : {array-like, sparse matrix}, shape (n_samples_1, n_features)
|
|
114
|
+
|
|
115
|
+
Y : {array-like, sparse matrix}, shape (n_samples_2, n_features)
|
|
116
|
+
|
|
117
|
+
Y_norm_squared : array-like, shape (n_samples_2, ), optional
|
|
118
|
+
Pre-computed dot-products of vectors in Y (e.g.,
|
|
119
|
+
``(Y**2).sum(axis=1)``)
|
|
120
|
+
May be ignored in some cases, see the note below.
|
|
121
|
+
|
|
122
|
+
squared : boolean, optional
|
|
123
|
+
Return squared Euclidean distances.
|
|
124
|
+
|
|
125
|
+
X_norm_squared : array-like, shape = [n_samples_1], optional
|
|
126
|
+
Pre-computed dot-products of vectors in X (e.g.,
|
|
127
|
+
``(X**2).sum(axis=1)``)
|
|
128
|
+
May be ignored in some cases, see the note below.
|
|
129
|
+
|
|
130
|
+
Notes
|
|
131
|
+
-----
|
|
132
|
+
To achieve better accuracy, `X_norm_squared` and `Y_norm_squared` may be
|
|
133
|
+
unused if they are passed as ``float32``.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
distances : tensor, shape (n_samples_1, n_samples_2)
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
>>> from maxframe.learn.metrics.pairwise import euclidean_distances
|
|
142
|
+
>>> X = [[0, 1], [1, 1]]
|
|
143
|
+
>>> # distance between rows of X
|
|
144
|
+
>>> euclidean_distances(X, X).execute()
|
|
145
|
+
array([[0., 1.],
|
|
146
|
+
[1., 0.]])
|
|
147
|
+
>>> # get distance to origin
|
|
148
|
+
>>> euclidean_distances(X, [[0, 0]]).execute()
|
|
149
|
+
array([[1. ],
|
|
150
|
+
[1.41421356]])
|
|
151
|
+
|
|
152
|
+
See also
|
|
153
|
+
--------
|
|
154
|
+
paired_distances : distances betweens pairs of elements of X and Y.
|
|
155
|
+
"""
|
|
156
|
+
if X.dtype == np.float32:
|
|
157
|
+
if Y is None:
|
|
158
|
+
dtype = X.dtype
|
|
159
|
+
elif Y.dtype == np.float32:
|
|
160
|
+
dtype = np.float32
|
|
161
|
+
else:
|
|
162
|
+
dtype = np.float64
|
|
163
|
+
else:
|
|
164
|
+
dtype = np.float64
|
|
165
|
+
|
|
166
|
+
X, Y = EuclideanDistances.check_pairwise_arrays(X, Y)
|
|
167
|
+
op = EuclideanDistances(
|
|
168
|
+
x=X,
|
|
169
|
+
y=Y,
|
|
170
|
+
x_norm_squared=X_norm_squared,
|
|
171
|
+
y_norm_squared=Y_norm_squared,
|
|
172
|
+
squared=squared,
|
|
173
|
+
dtype=np.dtype(dtype),
|
|
174
|
+
chunk_store_limit=options.chunk_store_limit,
|
|
175
|
+
)
|
|
176
|
+
return op(X, Y=Y, Y_norm_squared=Y_norm_squared, X_norm_squared=X_norm_squared)
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import EntityData
|
|
21
|
+
from ....serialization.serializables import BoolField, KeyField
|
|
22
|
+
from ....tensor.core import TensorOrder
|
|
23
|
+
from .core import PairwiseDistances
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class HaversineDistances(PairwiseDistances):
|
|
27
|
+
_op_type_ = opcodes.PAIRWISE_HAVERSINE_DISTANCES
|
|
28
|
+
|
|
29
|
+
x = KeyField("x")
|
|
30
|
+
y = KeyField("y")
|
|
31
|
+
# for test purpose
|
|
32
|
+
use_sklearn = BoolField("use_sklearn", default=True)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def _set_inputs(cls, op: "HaversineDistances", inputs: List[EntityData]):
|
|
36
|
+
super()._set_inputs(op, inputs)
|
|
37
|
+
op.x, op.y = op._inputs[:2]
|
|
38
|
+
|
|
39
|
+
def __call__(self, X, Y=None):
|
|
40
|
+
X, Y = self.check_pairwise_arrays(X, Y)
|
|
41
|
+
if self.y is None:
|
|
42
|
+
self.y = Y
|
|
43
|
+
|
|
44
|
+
if X.shape[1] != 2 or Y.shape[1] != 2:
|
|
45
|
+
raise ValueError("Haversine distance only valid in 2 dimensions")
|
|
46
|
+
if X.issparse() or Y.issparse():
|
|
47
|
+
raise TypeError("Haversine distance requires inputs dense")
|
|
48
|
+
|
|
49
|
+
return self.new_tensor(
|
|
50
|
+
[X, Y], shape=(X.shape[0], Y.shape[0]), order=TensorOrder.C_ORDER
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def haversine_distances(X, Y=None):
|
|
55
|
+
"""Compute the Haversine distance between samples in X and Y
|
|
56
|
+
|
|
57
|
+
The Haversine (or great circle) distance is the angular distance between
|
|
58
|
+
two points on the surface of a sphere. The first distance of each point is
|
|
59
|
+
assumed to be the latitude, the second is the longitude, given in radians.
|
|
60
|
+
The dimension of the data must be 2.
|
|
61
|
+
|
|
62
|
+
.. math::
|
|
63
|
+
D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)
|
|
64
|
+
+ \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
X : array_like, shape (n_samples_1, 2)
|
|
69
|
+
|
|
70
|
+
Y : array_like, shape (n_samples_2, 2), optional
|
|
71
|
+
|
|
72
|
+
Returns
|
|
73
|
+
-------
|
|
74
|
+
distance : {Tensor}, shape (n_samples_1, n_samples_2)
|
|
75
|
+
|
|
76
|
+
Notes
|
|
77
|
+
-----
|
|
78
|
+
As the Earth is nearly spherical, the haversine formula provides a good
|
|
79
|
+
approximation of the distance between two points of the Earth surface, with
|
|
80
|
+
a less than 1% error on average.
|
|
81
|
+
|
|
82
|
+
Examples
|
|
83
|
+
--------
|
|
84
|
+
We want to calculate the distance between the Ezeiza Airport
|
|
85
|
+
(Buenos Aires, Argentina) and the Charles de Gaulle Airport (Paris, France)
|
|
86
|
+
|
|
87
|
+
>>> from maxframe.learn.metrics.pairwise import haversine_distances
|
|
88
|
+
>>> bsas = [-34.83333, -58.5166646]
|
|
89
|
+
>>> paris = [49.0083899664, 2.53844117956]
|
|
90
|
+
>>> result = haversine_distances([bsas, paris])
|
|
91
|
+
>>> (result * 6371000/1000).execute() # multiply by Earth radius to get kilometers
|
|
92
|
+
array([[ 0. , 11279.45379464],
|
|
93
|
+
[11279.45379464, 0. ]])
|
|
94
|
+
"""
|
|
95
|
+
op = HaversineDistances(x=X, y=Y, dtype=np.dtype(np.float64))
|
|
96
|
+
return op(X, Y=Y)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
from .... import opcodes
|
|
20
|
+
from ....core import EntityData
|
|
21
|
+
from ....serialization.serializables import KeyField
|
|
22
|
+
from ....tensor.core import TensorOrder
|
|
23
|
+
from .core import PairwiseDistances
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ManhattanDistances(PairwiseDistances):
|
|
27
|
+
_op_type_ = opcodes.PAIRWISE_MANHATTAN_DISTANCES
|
|
28
|
+
|
|
29
|
+
x = KeyField("x")
|
|
30
|
+
y = KeyField("y")
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def _set_inputs(cls, op: "ManhattanDistances", inputs: List[EntityData]):
|
|
34
|
+
super()._set_inputs(op, inputs)
|
|
35
|
+
op.x, op.y = inputs[:2]
|
|
36
|
+
|
|
37
|
+
def __call__(self, X, Y=None):
|
|
38
|
+
X, Y = self.check_pairwise_arrays(X, Y)
|
|
39
|
+
if self.y is None:
|
|
40
|
+
self.y = Y
|
|
41
|
+
shape = (X.shape[0], Y.shape[0])
|
|
42
|
+
|
|
43
|
+
return self.new_tensor([X, Y], shape=shape, order=TensorOrder.C_ORDER)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def manhattan_distances(X, Y=None):
|
|
47
|
+
""" Compute the L1 distances between the vectors in X and Y.
|
|
48
|
+
|
|
49
|
+
Read more in the :ref:`User Guide <metrics>`.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
X : array_like
|
|
54
|
+
A tensor with shape (n_samples_X, n_features).
|
|
55
|
+
|
|
56
|
+
Y : array_like, optional
|
|
57
|
+
A tensor with shape (n_samples_Y, n_features).
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
D : Tensor
|
|
62
|
+
Shape is (n_samples_X, n_samples_Y) and D contains
|
|
63
|
+
the pairwise L1 distances.
|
|
64
|
+
|
|
65
|
+
Examples
|
|
66
|
+
--------
|
|
67
|
+
>>> from maxframe.learn.metrics.pairwise import manhattan_distances
|
|
68
|
+
>>> manhattan_distances([[3]], [[3]]).execute() #doctest:+ELLIPSIS
|
|
69
|
+
array([[0.]])
|
|
70
|
+
>>> manhattan_distances([[3]], [[2]]).execute() #doctest:+ELLIPSIS
|
|
71
|
+
array([[1.]])
|
|
72
|
+
>>> manhattan_distances([[2]], [[3]]).execute() #doctest:+ELLIPSIS
|
|
73
|
+
array([[1.]])
|
|
74
|
+
>>> manhattan_distances([[1, 2], [3, 4]],\
|
|
75
|
+
[[1, 2], [0, 3]]).execute() #doctest:+ELLIPSIS
|
|
76
|
+
array([[0., 2.],
|
|
77
|
+
[4., 4.]])
|
|
78
|
+
"""
|
|
79
|
+
op = ManhattanDistances(x=X, y=Y, dtype=np.dtype(np.float64))
|
|
80
|
+
return op(X, Y=Y)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import warnings
|
|
16
|
+
from functools import partial
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from sklearn.exceptions import DataConversionWarning
|
|
20
|
+
except ImportError: # pragma: no cover
|
|
21
|
+
DataConversionWarning = None
|
|
22
|
+
|
|
23
|
+
from ....tensor.spatial import distance
|
|
24
|
+
from ...utils.validation import check_non_negative
|
|
25
|
+
from .core import PairwiseDistances
|
|
26
|
+
from .cosine import cosine_distances
|
|
27
|
+
from .euclidean import euclidean_distances
|
|
28
|
+
from .haversine import haversine_distances
|
|
29
|
+
from .manhattan import manhattan_distances
|
|
30
|
+
|
|
31
|
+
_VALID_METRICS = [
|
|
32
|
+
"euclidean",
|
|
33
|
+
"l2",
|
|
34
|
+
"l1",
|
|
35
|
+
"manhattan",
|
|
36
|
+
"cityblock",
|
|
37
|
+
"braycurtis",
|
|
38
|
+
"canberra",
|
|
39
|
+
"chebyshev",
|
|
40
|
+
"correlation",
|
|
41
|
+
"cosine",
|
|
42
|
+
"dice",
|
|
43
|
+
"hamming",
|
|
44
|
+
"jaccard",
|
|
45
|
+
"kulsinski",
|
|
46
|
+
"mahalanobis",
|
|
47
|
+
"matching",
|
|
48
|
+
"minkowski",
|
|
49
|
+
"rogerstanimoto",
|
|
50
|
+
"russellrao",
|
|
51
|
+
"seuclidean",
|
|
52
|
+
"sokalmichener",
|
|
53
|
+
"sokalsneath",
|
|
54
|
+
"sqeuclidean",
|
|
55
|
+
"yule",
|
|
56
|
+
"wminkowski",
|
|
57
|
+
"haversine",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# Helper functions - distance
|
|
61
|
+
PAIRWISE_DISTANCE_FUNCTIONS = {
|
|
62
|
+
# If updating this dictionary, update the doc in both distance_metrics()
|
|
63
|
+
# and also in pairwise_distances()!
|
|
64
|
+
"cityblock": manhattan_distances,
|
|
65
|
+
"cosine": cosine_distances,
|
|
66
|
+
"euclidean": euclidean_distances,
|
|
67
|
+
"haversine": haversine_distances,
|
|
68
|
+
"l2": euclidean_distances,
|
|
69
|
+
"l1": manhattan_distances,
|
|
70
|
+
"manhattan": manhattan_distances,
|
|
71
|
+
"precomputed": None, # HACK: precomputed is always allowed, never called
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# These distances require boolean tensors, when using maxframe.tensor.spatial.distance
|
|
75
|
+
PAIRWISE_BOOLEAN_FUNCTIONS = [
|
|
76
|
+
"dice",
|
|
77
|
+
"jaccard",
|
|
78
|
+
"kulsinski",
|
|
79
|
+
"matching",
|
|
80
|
+
"rogerstanimoto",
|
|
81
|
+
"russellrao",
|
|
82
|
+
"sokalmichener",
|
|
83
|
+
"sokalsneath",
|
|
84
|
+
"yule",
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def pairwise_distances(X, Y=None, metric="euclidean", **kwds):
|
|
89
|
+
if (
|
|
90
|
+
metric not in _VALID_METRICS
|
|
91
|
+
and not callable(metric)
|
|
92
|
+
and metric != "precomputed"
|
|
93
|
+
):
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Unknown metric {metric}. Valid metrics are {_VALID_METRICS}, "
|
|
96
|
+
"or 'precomputed', or a callable"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if metric == "precomputed":
|
|
100
|
+
X, _ = PairwiseDistances.check_pairwise_arrays(X, Y, precomputed=True)
|
|
101
|
+
|
|
102
|
+
whom = (
|
|
103
|
+
"`pairwise_distances`. Precomputed distance "
|
|
104
|
+
" need to have non-negative values."
|
|
105
|
+
)
|
|
106
|
+
X = check_non_negative(X, whom=whom)
|
|
107
|
+
return X
|
|
108
|
+
elif metric in PAIRWISE_DISTANCE_FUNCTIONS:
|
|
109
|
+
func = PAIRWISE_DISTANCE_FUNCTIONS[metric]
|
|
110
|
+
else:
|
|
111
|
+
# including when metric is callable
|
|
112
|
+
dtype = bool if metric in PAIRWISE_BOOLEAN_FUNCTIONS else None
|
|
113
|
+
|
|
114
|
+
if (
|
|
115
|
+
dtype == bool
|
|
116
|
+
and (X.dtype != bool or (Y is not None and Y.dtype != bool))
|
|
117
|
+
and DataConversionWarning is not None
|
|
118
|
+
):
|
|
119
|
+
msg = f"Data was converted to boolean for metric {metric}"
|
|
120
|
+
warnings.warn(msg, DataConversionWarning)
|
|
121
|
+
|
|
122
|
+
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y, dtype=dtype)
|
|
123
|
+
if X is Y:
|
|
124
|
+
return distance.squareform(distance.pdist(X, metric=metric, **kwds))
|
|
125
|
+
func = partial(distance.cdist, metric=metric, **kwds)
|
|
126
|
+
|
|
127
|
+
return func(X, Y, **kwds)
|