maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import functools
|
|
16
16
|
import inspect
|
|
17
|
-
from collections import OrderedDict
|
|
17
|
+
from collections import OrderedDict, namedtuple
|
|
18
18
|
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
|
|
19
19
|
|
|
20
20
|
import msgpack
|
|
@@ -26,11 +26,13 @@ from ...serialization.serializables import (
|
|
|
26
26
|
AnyField,
|
|
27
27
|
BoolField,
|
|
28
28
|
DataTypeField,
|
|
29
|
+
DictField,
|
|
29
30
|
Int32Field,
|
|
31
|
+
Serializable,
|
|
30
32
|
StringField,
|
|
31
33
|
)
|
|
32
34
|
from ...typing_ import TileableType
|
|
33
|
-
from ...utils import get_item_if_scalar, pd_release_version, tokenize
|
|
35
|
+
from ...utils import get_item_if_scalar, get_pd_option, pd_release_version, tokenize
|
|
34
36
|
from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
|
|
35
37
|
from ..utils import (
|
|
36
38
|
build_df,
|
|
@@ -48,8 +50,14 @@ _level_reduction_keep_object = pd_release_version[:2] < (1, 3)
|
|
|
48
50
|
# results in object.
|
|
49
51
|
_reduce_bool_as_object = pd_release_version[:2] != (1, 2)
|
|
50
52
|
|
|
53
|
+
_idx_reduction_without_numeric_only = pd_release_version[:2] < (1, 5)
|
|
54
|
+
|
|
55
|
+
NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DataFrameReduction(DataFrameOperator):
|
|
59
|
+
_legacy_name = "DataFrameReductionOperator" # since v2.2.0
|
|
51
60
|
|
|
52
|
-
class DataFrameReductionOperator(DataFrameOperator):
|
|
53
61
|
axis = AnyField("axis", default=None)
|
|
54
62
|
skipna = BoolField("skipna", default=True)
|
|
55
63
|
level = AnyField("level", default=None)
|
|
@@ -59,8 +67,13 @@ class DataFrameReductionOperator(DataFrameOperator):
|
|
|
59
67
|
method = StringField("method", default=None)
|
|
60
68
|
|
|
61
69
|
dtype = DataTypeField("dtype", default=None)
|
|
70
|
+
combine_size = Int32Field("combine_size", default=None)
|
|
71
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
62
72
|
|
|
63
73
|
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
74
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
75
|
+
"use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
|
|
76
|
+
)
|
|
64
77
|
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
65
78
|
|
|
66
79
|
@property
|
|
@@ -78,23 +91,28 @@ class DataFrameReductionOperator(DataFrameOperator):
|
|
|
78
91
|
return {k: v for k, v in args.items() if v is not None}
|
|
79
92
|
|
|
80
93
|
|
|
81
|
-
|
|
94
|
+
# Keep for import compatibility
|
|
95
|
+
DataFrameReductionOperator = DataFrameReduction
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DataFrameCumReduction(DataFrameOperator):
|
|
99
|
+
_legacy_name = "DataFrameCumReductionOperator" # since v2.2.0
|
|
100
|
+
|
|
82
101
|
axis = AnyField("axis", default=None)
|
|
83
102
|
skipna = BoolField("skipna", default=None)
|
|
84
103
|
|
|
85
104
|
dtype = DataTypeField("dtype", default=None)
|
|
105
|
+
use_inf_as_na = BoolField("use_inf_as_na", default=None)
|
|
86
106
|
|
|
87
107
|
def __init__(self, gpu=None, sparse=None, output_types=None, **kw):
|
|
108
|
+
kw["use_inf_as_na"] = kw.pop(
|
|
109
|
+
"use_inf_as_na", get_pd_option("mode.use_inf_as_na", False)
|
|
110
|
+
)
|
|
88
111
|
super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
|
|
89
112
|
|
|
90
113
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
kw.pop("bool_only", None)
|
|
94
|
-
kw.pop("numeric_only", None)
|
|
95
|
-
return getattr(value, func_name)(**kw)
|
|
96
|
-
else:
|
|
97
|
-
return getattr(value, func_name)(**kw)
|
|
114
|
+
# Keep for import compatibility
|
|
115
|
+
DataFrameCumReductionOperator = DataFrameCumReduction
|
|
98
116
|
|
|
99
117
|
|
|
100
118
|
@functools.lru_cache(100)
|
|
@@ -117,6 +135,8 @@ def _get_series_reduction_dtype(
|
|
|
117
135
|
reduced = test_series.size
|
|
118
136
|
elif func_name == "str_concat":
|
|
119
137
|
reduced = pd.Series([test_series.str.cat()])
|
|
138
|
+
elif func_name in ("idxmin", "idxmax", "argmin", "argmax"):
|
|
139
|
+
reduced = getattr(test_series, func_name)(axis=axis, skipna=skipna)
|
|
120
140
|
else:
|
|
121
141
|
reduced = getattr(test_series, func_name)(
|
|
122
142
|
axis=axis, skipna=skipna, numeric_only=numeric_only
|
|
@@ -135,6 +155,8 @@ def _get_df_reduction_dtype(
|
|
|
135
155
|
reduced = getattr(test_df, func_name)(axis=axis)
|
|
136
156
|
elif func_name in ("all", "any"):
|
|
137
157
|
reduced = getattr(test_df, func_name)(axis=axis, bool_only=bool_only)
|
|
158
|
+
elif _idx_reduction_without_numeric_only and func_name in ("idxmin", "idxmax"):
|
|
159
|
+
reduced = getattr(test_df, func_name)(axis=axis, skipna=skipna)
|
|
138
160
|
elif func_name == "str_concat":
|
|
139
161
|
reduced = test_df.apply(lambda s: s.str.cat(), axis=axis)
|
|
140
162
|
else:
|
|
@@ -146,6 +168,27 @@ def _get_df_reduction_dtype(
|
|
|
146
168
|
return reduced.dtype
|
|
147
169
|
|
|
148
170
|
|
|
171
|
+
class ReductionCallable(Serializable):
|
|
172
|
+
func_name = StringField("func_name")
|
|
173
|
+
kwargs = DictField("kwargs", default=None)
|
|
174
|
+
|
|
175
|
+
def __name__(self):
|
|
176
|
+
return self.func_name
|
|
177
|
+
|
|
178
|
+
def __call__(self, value):
|
|
179
|
+
kw = self.kwargs.copy()
|
|
180
|
+
if value.ndim == 1:
|
|
181
|
+
kw.pop("bool_only", None)
|
|
182
|
+
kw.pop("numeric_only", None)
|
|
183
|
+
return getattr(value, self.func_name)(**kw)
|
|
184
|
+
else:
|
|
185
|
+
return getattr(value, self.func_name)(**kw)
|
|
186
|
+
|
|
187
|
+
def __maxframe_tokenize__(self):
|
|
188
|
+
# make sure compiled functions are correctly cached
|
|
189
|
+
return type(self), self.func_name, self.kwargs
|
|
190
|
+
|
|
191
|
+
|
|
149
192
|
class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
150
193
|
@classmethod
|
|
151
194
|
def get_reduction_callable(cls, op):
|
|
@@ -154,9 +197,7 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
|
154
197
|
skipna=op.skipna, numeric_only=op.numeric_only, bool_only=op.bool_only
|
|
155
198
|
)
|
|
156
199
|
kw = {k: v for k, v in kw.items() if v is not None}
|
|
157
|
-
|
|
158
|
-
fun.__name__ = func_name
|
|
159
|
-
return fun
|
|
200
|
+
return ReductionCallable(func_name=func_name, kwargs=kw)
|
|
160
201
|
|
|
161
202
|
def _call_groupby_level(self, df, level):
|
|
162
203
|
return df.groupby(level=level).agg(
|
|
@@ -261,10 +302,13 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
|
|
|
261
302
|
|
|
262
303
|
if func_name == "custom_reduction":
|
|
263
304
|
empty_series = build_series(series, ensure_string=True)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
305
|
+
custom_reduction_obj = getattr(self, "custom_reduction")
|
|
306
|
+
result_dtype = getattr(custom_reduction_obj, "result_dtype", None)
|
|
307
|
+
if result_dtype is None:
|
|
308
|
+
result_scalar = custom_reduction_obj.__call_agg__(empty_series)
|
|
309
|
+
if hasattr(result_scalar, "to_pandas"): # pragma: no cover
|
|
310
|
+
result_scalar = result_scalar.to_pandas()
|
|
311
|
+
result_dtype = pd.Series(result_scalar).dtype
|
|
268
312
|
else:
|
|
269
313
|
result_dtype = _get_series_reduction_dtype(
|
|
270
314
|
series.dtype,
|
|
@@ -339,6 +383,10 @@ class CustomReduction:
|
|
|
339
383
|
def __name__(self):
|
|
340
384
|
return self.name
|
|
341
385
|
|
|
386
|
+
@property
|
|
387
|
+
def result_dtype(self):
|
|
388
|
+
return None
|
|
389
|
+
|
|
342
390
|
def __call__(self, value):
|
|
343
391
|
if isinstance(value, ENTITY_TYPE):
|
|
344
392
|
from .custom_reduction import build_custom_reduction_result
|
|
@@ -426,6 +474,8 @@ _func_name_converts = dict(
|
|
|
426
474
|
true_divide="truediv",
|
|
427
475
|
floor_divide="floordiv",
|
|
428
476
|
power="pow",
|
|
477
|
+
subtract="sub",
|
|
478
|
+
multiply="mul",
|
|
429
479
|
)
|
|
430
480
|
_func_compile_cache = dict() # type: Dict[str, ReductionSteps]
|
|
431
481
|
|
|
@@ -442,8 +492,8 @@ _idl_primitive_types = (
|
|
|
442
492
|
|
|
443
493
|
IN_VAR_IDL_OP = "in_var"
|
|
444
494
|
OUT_VAR_IDL_OP = "out_var"
|
|
445
|
-
|
|
446
|
-
|
|
495
|
+
MASK_VAR_IDL_OP = "mask"
|
|
496
|
+
WHERE_VAR_IDL_OP = "where"
|
|
447
497
|
LET_VAR_OP = "let"
|
|
448
498
|
UNARY_IDL_OP_PREFIX = "unary:"
|
|
449
499
|
BINARY_IDL_OP_PREFIX = "bin:"
|
|
@@ -471,7 +521,7 @@ class ReductionCompiler:
|
|
|
471
521
|
def _check_function_valid(cls, func):
|
|
472
522
|
if isinstance(func, functools.partial):
|
|
473
523
|
return cls._check_function_valid(func.func)
|
|
474
|
-
elif
|
|
524
|
+
elif not hasattr(func, "__code__"):
|
|
475
525
|
return
|
|
476
526
|
|
|
477
527
|
func_code = func.__code__
|
|
@@ -569,6 +619,7 @@ class ReductionCompiler:
|
|
|
569
619
|
from ..datasource.dataframe import DataFrameDataSource
|
|
570
620
|
from ..datasource.series import SeriesDataSource
|
|
571
621
|
from ..indexing.where import DataFrameWhere
|
|
622
|
+
from .custom_reduction import DataFrameCustomReduction
|
|
572
623
|
|
|
573
624
|
func_token = tokenize(func, self._axis, func_name, ndim)
|
|
574
625
|
if func_token in _func_compile_cache:
|
|
@@ -639,6 +690,9 @@ class ReductionCompiler:
|
|
|
639
690
|
else:
|
|
640
691
|
map_func_name, agg_func_name = step_func_name, step_func_name
|
|
641
692
|
|
|
693
|
+
if isinstance(t.op, DataFrameCustomReduction):
|
|
694
|
+
custom_reduction = custom_reduction or t.op.custom_reduction
|
|
695
|
+
|
|
642
696
|
# build agg description
|
|
643
697
|
agg_funcs.append(
|
|
644
698
|
ReductionAggStep(
|
|
@@ -705,7 +759,7 @@ class ReductionCompiler:
|
|
|
705
759
|
input_op_types = (
|
|
706
760
|
DataFrameDataSource,
|
|
707
761
|
SeriesDataSource,
|
|
708
|
-
|
|
762
|
+
DataFrameReduction,
|
|
709
763
|
)
|
|
710
764
|
|
|
711
765
|
def _gen_expr_str(t):
|
|
@@ -776,9 +830,11 @@ class ReductionCompiler:
|
|
|
776
830
|
cond = _interpret_var(t.op.condition)
|
|
777
831
|
x = _interpret_var(t.op.x)
|
|
778
832
|
y = _interpret_var(t.op.y)
|
|
779
|
-
statements = [[
|
|
833
|
+
statements = [[WHERE_VAR_IDL_OP, var_name, [cond, x, y], {}]]
|
|
780
834
|
elif isinstance(t.op, DataFrameWhere):
|
|
781
|
-
func_name =
|
|
835
|
+
func_name = (
|
|
836
|
+
MASK_VAR_IDL_OP if t.op.replace_true else WHERE_VAR_IDL_OP
|
|
837
|
+
)
|
|
782
838
|
inp = _interpret_var(t.op.input)
|
|
783
839
|
cond = _interpret_var(t.op.cond)
|
|
784
840
|
other = _interpret_var(t.op.other)
|
|
@@ -14,10 +14,18 @@
|
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
|
-
from .core import DataFrameReductionMixin,
|
|
17
|
+
from .core import DataFrameReduction, DataFrameReductionMixin, ReductionCallable
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class
|
|
20
|
+
class CountReductionCallable(ReductionCallable):
|
|
21
|
+
def __call__(self, value):
|
|
22
|
+
skipna, numeric_only = self.kwargs["skipna"], self.kwargs["numeric_only"]
|
|
23
|
+
if value.ndim == 1:
|
|
24
|
+
return value.count()
|
|
25
|
+
return value.count(skipna=skipna, numeric_only=numeric_only)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DataFrameCount(DataFrameReduction, DataFrameReductionMixin):
|
|
21
29
|
_op_type_ = opcodes.COUNT
|
|
22
30
|
_func_name = "count"
|
|
23
31
|
|
|
@@ -28,13 +36,9 @@ class DataFrameCount(DataFrameReductionOperator, DataFrameReductionMixin):
|
|
|
28
36
|
@classmethod
|
|
29
37
|
def get_reduction_callable(cls, op):
|
|
30
38
|
skipna, numeric_only = op.skipna, op.numeric_only
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return value.count()
|
|
35
|
-
return value.count(skipna=skipna, numeric_only=numeric_only)
|
|
36
|
-
|
|
37
|
-
return count
|
|
39
|
+
return CountReductionCallable(
|
|
40
|
+
func_name="count", kwargs={"skipna": skipna, "numeric_only": numeric_only}
|
|
41
|
+
)
|
|
38
42
|
|
|
39
43
|
|
|
40
44
|
def count_series(series, level=None, **kw):
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cov_dataframe(df, min_periods=None, ddof=1, numeric_only=True):
|
|
17
|
+
"""
|
|
18
|
+
Compute pairwise covariance of columns, excluding NA/null values.
|
|
19
|
+
|
|
20
|
+
Compute the pairwise covariance among the series of a DataFrame.
|
|
21
|
+
The returned data frame is the `covariance matrix
|
|
22
|
+
<https://en.wikipedia.org/wiki/Covariance_matrix>`__ of the columns
|
|
23
|
+
of the DataFrame.
|
|
24
|
+
|
|
25
|
+
Both NA and null values are automatically excluded from the
|
|
26
|
+
calculation. (See the note below about bias from missing values.)
|
|
27
|
+
A threshold can be set for the minimum number of
|
|
28
|
+
observations for each value created. Comparisons with observations
|
|
29
|
+
below this threshold will be returned as ``NaN``.
|
|
30
|
+
|
|
31
|
+
This method is generally used for the analysis of time series data to
|
|
32
|
+
understand the relationship between different measures
|
|
33
|
+
across time.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
min_periods : int, optional
|
|
38
|
+
Minimum number of observations required per pair of columns
|
|
39
|
+
to have a valid result.
|
|
40
|
+
|
|
41
|
+
ddof : int, default 1
|
|
42
|
+
Delta degrees of freedom. The divisor used in calculations
|
|
43
|
+
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
44
|
+
This argument is applicable only when no ``nan`` is in the dataframe.
|
|
45
|
+
|
|
46
|
+
numeric_only : bool, default True
|
|
47
|
+
Include only `float`, `int` or `boolean` data.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
DataFrame
|
|
52
|
+
The covariance matrix of the series of the DataFrame.
|
|
53
|
+
|
|
54
|
+
See Also
|
|
55
|
+
--------
|
|
56
|
+
Series.cov : Compute covariance with another Series.
|
|
57
|
+
core.window.ewm.ExponentialMovingWindow.cov : Exponential weighted sample
|
|
58
|
+
covariance.
|
|
59
|
+
core.window.expanding.Expanding.cov : Expanding sample covariance.
|
|
60
|
+
core.window.rolling.Rolling.cov : Rolling sample covariance.
|
|
61
|
+
|
|
62
|
+
Notes
|
|
63
|
+
-----
|
|
64
|
+
Returns the covariance matrix of the DataFrame's time series.
|
|
65
|
+
The covariance is normalized by N-ddof.
|
|
66
|
+
|
|
67
|
+
For DataFrames that have Series that are missing data (assuming that
|
|
68
|
+
data is `missing at random
|
|
69
|
+
<https://en.wikipedia.org/wiki/Missing_data#Missing_at_random>`__)
|
|
70
|
+
the returned covariance matrix will be an unbiased estimate
|
|
71
|
+
of the variance and covariance between the member Series.
|
|
72
|
+
|
|
73
|
+
However, for many applications this estimate may not be acceptable
|
|
74
|
+
because the estimate covariance matrix is not guaranteed to be positive
|
|
75
|
+
semi-definite. This could lead to estimate correlations having
|
|
76
|
+
absolute values which are greater than one, and/or a non-invertible
|
|
77
|
+
covariance matrix. See `Estimation of covariance matrices
|
|
78
|
+
<https://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_
|
|
79
|
+
matrices>`__ for more details.
|
|
80
|
+
|
|
81
|
+
Examples
|
|
82
|
+
--------
|
|
83
|
+
>>> import maxframe.tensor as mt
|
|
84
|
+
>>> import maxframe.dataframe as md
|
|
85
|
+
>>> df = md.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)],
|
|
86
|
+
... columns=['dogs', 'cats'])
|
|
87
|
+
>>> df.cov().execute()
|
|
88
|
+
dogs cats
|
|
89
|
+
dogs 0.666667 -1.000000
|
|
90
|
+
cats -1.000000 1.666667
|
|
91
|
+
|
|
92
|
+
>>> mt.random.seed(42)
|
|
93
|
+
>>> df = md.DataFrame(mt.random.randn(1000, 5),
|
|
94
|
+
... columns=['a', 'b', 'c', 'd', 'e'])
|
|
95
|
+
>>> df.cov().execute()
|
|
96
|
+
a b c d e
|
|
97
|
+
a 0.998438 -0.020161 0.059277 -0.008943 0.014144
|
|
98
|
+
b -0.020161 1.059352 -0.008543 -0.024738 0.009826
|
|
99
|
+
c 0.059277 -0.008543 1.010670 -0.001486 -0.000271
|
|
100
|
+
d -0.008943 -0.024738 -0.001486 0.921297 -0.013692
|
|
101
|
+
e 0.014144 0.009826 -0.000271 -0.013692 0.977795
|
|
102
|
+
|
|
103
|
+
**Minimum number of periods**
|
|
104
|
+
|
|
105
|
+
This method also supports an optional ``min_periods`` keyword
|
|
106
|
+
that specifies the required minimum number of non-NA observations for
|
|
107
|
+
each column pair in order to have a valid result:
|
|
108
|
+
|
|
109
|
+
>>> mt.random.seed(42)
|
|
110
|
+
>>> df = md.DataFrame(mt.random.randn(20, 3),
|
|
111
|
+
... columns=['a', 'b', 'c'])
|
|
112
|
+
>>> df.loc[df.index[:5], 'a'] = mt.nan
|
|
113
|
+
>>> df.loc[df.index[5:10], 'b'] = mt.nan
|
|
114
|
+
>>> df.cov(min_periods=12).execute()
|
|
115
|
+
a b c
|
|
116
|
+
a 0.316741 NaN -0.150812
|
|
117
|
+
b NaN 1.248003 0.191417
|
|
118
|
+
c -0.150812 0.191417 0.895202
|
|
119
|
+
"""
|
|
120
|
+
from ..statistics.corr import DataFrameCorr
|
|
121
|
+
|
|
122
|
+
if not numeric_only:
|
|
123
|
+
raise NotImplementedError("numeric_only==True not supported")
|
|
124
|
+
op = DataFrameCorr(method="cov", min_periods=min_periods, ddof=ddof)
|
|
125
|
+
return op(df)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def cov_series(series, other, min_periods=None, ddof=1):
|
|
129
|
+
"""
|
|
130
|
+
Compute covariance with Series, excluding missing values.
|
|
131
|
+
|
|
132
|
+
The two `Series` objects are not required to be the same length and
|
|
133
|
+
will be aligned internally before the covariance is calculated.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
other : Series
|
|
138
|
+
Series with which to compute the covariance.
|
|
139
|
+
min_periods : int, optional
|
|
140
|
+
Minimum number of observations needed to have a valid result.
|
|
141
|
+
ddof : int, default 1
|
|
142
|
+
Delta degrees of freedom. The divisor used in calculations
|
|
143
|
+
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
float
|
|
148
|
+
Covariance between Series and other normalized by N-1
|
|
149
|
+
(unbiased estimator).
|
|
150
|
+
|
|
151
|
+
See Also
|
|
152
|
+
--------
|
|
153
|
+
DataFrame.cov : Compute pairwise covariance of columns.
|
|
154
|
+
|
|
155
|
+
Examples
|
|
156
|
+
--------
|
|
157
|
+
>>> import maxframe.dataframe as md
|
|
158
|
+
>>> s1 = md.Series([0.90010907, 0.13484424, 0.62036035])
|
|
159
|
+
>>> s2 = md.Series([0.12528585, 0.26962463, 0.51111198])
|
|
160
|
+
>>> s1.cov(s2).execute()
|
|
161
|
+
-0.01685762652715874
|
|
162
|
+
"""
|
|
163
|
+
from ..statistics.corr import DataFrameCorr
|
|
164
|
+
|
|
165
|
+
op = DataFrameCorr(other=other, method="cov", min_periods=min_periods, ddof=ddof)
|
|
166
|
+
return op(series)
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCummax(
|
|
19
|
+
class DataFrameCummax(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMMAX
|
|
21
21
|
_func_name = "cummax"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCummin(
|
|
19
|
+
class DataFrameCummin(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMMIN
|
|
21
21
|
_func_name = "cummin"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCumprod(
|
|
19
|
+
class DataFrameCumprod(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMPROD
|
|
21
21
|
_func_name = "cumprod"
|
|
22
22
|
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from .core import
|
|
16
|
+
from .core import DataFrameCumReduction, DataFrameCumReductionMixin
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
class DataFrameCumsum(
|
|
19
|
+
class DataFrameCumsum(DataFrameCumReduction, DataFrameCumReductionMixin):
|
|
20
20
|
_op_type_ = opcodes.CUMSUM
|
|
21
21
|
_func_name = "cumsum"
|
|
22
22
|
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
from ... import opcodes
|
|
16
16
|
from ...core import OutputType
|
|
17
17
|
from ...serialization.serializables import AnyField
|
|
18
|
-
from .core import
|
|
18
|
+
from .core import DataFrameReduction, DataFrameReductionMixin
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class DataFrameCustomReduction(
|
|
21
|
+
class DataFrameCustomReduction(DataFrameReduction, DataFrameReductionMixin):
|
|
22
22
|
_op_type_ = opcodes.CUSTOM_REDUCTION
|
|
23
23
|
_func_name = "custom_reduction"
|
|
24
24
|
|