maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
maxframe/dataframe/utils.py
CHANGED
|
@@ -25,21 +25,22 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional
|
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import pandas as pd
|
|
28
|
-
from pandas.api.types import is_string_dtype
|
|
29
28
|
from pandas.core.dtypes.inference import is_dict_like, is_list_like
|
|
30
29
|
|
|
31
|
-
from ..
|
|
30
|
+
from ..config.validators import dtype_backend_validator
|
|
31
|
+
from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
|
|
32
|
+
from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
|
|
32
33
|
from ..lib.mmh3 import hash as mmh_hash
|
|
33
34
|
from ..udf import MarkedFunction
|
|
34
35
|
from ..utils import (
|
|
35
36
|
ModulePlaceholder,
|
|
36
37
|
is_full_slice,
|
|
37
38
|
lazy_import,
|
|
38
|
-
make_dtype,
|
|
39
39
|
make_dtypes,
|
|
40
40
|
quiet_stdio,
|
|
41
41
|
sbytes,
|
|
42
42
|
tokenize,
|
|
43
|
+
validate_and_adjust_resource_ratio,
|
|
43
44
|
)
|
|
44
45
|
|
|
45
46
|
if TYPE_CHECKING:
|
|
@@ -57,7 +58,7 @@ cudf = lazy_import("cudf", rename="cudf")
|
|
|
57
58
|
logger = logging.getLogger(__name__)
|
|
58
59
|
|
|
59
60
|
try:
|
|
60
|
-
from
|
|
61
|
+
from ..lib.dtypes_extension import ArrowDtype
|
|
61
62
|
except ImportError:
|
|
62
63
|
ArrowDtype = None
|
|
63
64
|
|
|
@@ -103,9 +104,9 @@ def hash_dtypes(dtypes, size):
|
|
|
103
104
|
return [dtypes[index] for index in hashed_indexes]
|
|
104
105
|
|
|
105
106
|
|
|
106
|
-
def sort_dataframe_inplace(df, *axis):
|
|
107
|
+
def sort_dataframe_inplace(df, *axis, **kw):
|
|
107
108
|
for ax in axis:
|
|
108
|
-
df.sort_index(axis=ax, inplace=True)
|
|
109
|
+
df.sort_index(axis=ax, inplace=True, **kw)
|
|
109
110
|
return df
|
|
110
111
|
|
|
111
112
|
|
|
@@ -456,7 +457,7 @@ def build_split_idx_to_origin_idx(splits, increase=True):
|
|
|
456
457
|
|
|
457
458
|
|
|
458
459
|
def _generate_value(dtype, fill_value):
|
|
459
|
-
if ArrowDtype and isinstance(dtype,
|
|
460
|
+
if ArrowDtype and isinstance(dtype, ArrowDtype):
|
|
460
461
|
return _generate_value(dtype.pyarrow_dtype, fill_value)
|
|
461
462
|
|
|
462
463
|
if isinstance(dtype, pa.ListType):
|
|
@@ -470,9 +471,19 @@ def _generate_value(dtype, fill_value):
|
|
|
470
471
|
)
|
|
471
472
|
]
|
|
472
473
|
|
|
474
|
+
if isinstance(dtype, pa.StructType):
|
|
475
|
+
result = {}
|
|
476
|
+
for i in range(dtype.num_fields):
|
|
477
|
+
field = dtype[i]
|
|
478
|
+
result[field.name] = _generate_value(field.type, fill_value)
|
|
479
|
+
return result
|
|
480
|
+
|
|
473
481
|
if isinstance(dtype, pa.DataType):
|
|
474
482
|
return _generate_value(dtype.to_pandas_dtype(), fill_value)
|
|
475
483
|
|
|
484
|
+
if isinstance(dtype, ExternalBlobDtype):
|
|
485
|
+
return SolidBlob(str(fill_value).encode())
|
|
486
|
+
|
|
476
487
|
# special handle for datetime64 and timedelta64
|
|
477
488
|
dispatch = {
|
|
478
489
|
np.datetime64: pd.Timestamp,
|
|
@@ -1012,27 +1023,21 @@ def create_sa_connection(con, **kwargs):
|
|
|
1012
1023
|
engine.dispose()
|
|
1013
1024
|
|
|
1014
1025
|
|
|
1015
|
-
def to_arrow_dtypes(dtypes
|
|
1016
|
-
from .
|
|
1026
|
+
def to_arrow_dtypes(dtypes):
|
|
1027
|
+
from ..io.odpsio.schema import pandas_dtypes_to_arrow_schema
|
|
1017
1028
|
|
|
1029
|
+
arrow_schema = pandas_dtypes_to_arrow_schema(dtypes)
|
|
1018
1030
|
new_dtypes = dtypes.copy()
|
|
1019
1031
|
for i in range(len(dtypes)):
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
new_dtypes.iloc[i] = ArrowStringDtype()
|
|
1030
|
-
else: # pragma: no cover
|
|
1031
|
-
# empty, set arrow string dtype
|
|
1032
|
-
new_dtypes.iloc[i] = ArrowStringDtype()
|
|
1033
|
-
else:
|
|
1034
|
-
# empty, set arrow string dtype
|
|
1035
|
-
new_dtypes.iloc[i] = ArrowStringDtype()
|
|
1032
|
+
arrow_type = arrow_schema.types[i]
|
|
1033
|
+
dt = dtypes.iloc[i]
|
|
1034
|
+
if isinstance(dt, pd.api.extensions.ExtensionDtype):
|
|
1035
|
+
# make existing extension dtype consistent
|
|
1036
|
+
new_dtypes.iloc[i] = dt
|
|
1037
|
+
elif arrow_type == pa.string():
|
|
1038
|
+
new_dtypes.iloc[i] = pd.StringDtype("pyarrow")
|
|
1039
|
+
else:
|
|
1040
|
+
new_dtypes.iloc[i] = ArrowDtype(arrow_type)
|
|
1036
1041
|
return new_dtypes
|
|
1037
1042
|
|
|
1038
1043
|
|
|
@@ -1305,7 +1310,7 @@ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
|
|
|
1305
1310
|
if is_dict_like(funcs):
|
|
1306
1311
|
return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
|
|
1307
1312
|
|
|
1308
|
-
if is_list_like(funcs):
|
|
1313
|
+
if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
|
|
1309
1314
|
return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
|
|
1310
1315
|
|
|
1311
1316
|
f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
|
|
@@ -1406,23 +1411,54 @@ def infer_dataframe_return_value(
|
|
|
1406
1411
|
inherit_index=False,
|
|
1407
1412
|
build_kw=None,
|
|
1408
1413
|
elementwise=None,
|
|
1414
|
+
skip_infer=False,
|
|
1409
1415
|
) -> InferredDataFrameMeta:
|
|
1410
|
-
from .core import GROUPBY_TYPE
|
|
1416
|
+
from .core import GROUPBY_TYPE, INDEX_TYPE
|
|
1417
|
+
from .typing_ import get_function_output_meta
|
|
1418
|
+
|
|
1419
|
+
unwrapped_func = func
|
|
1420
|
+
if isinstance(unwrapped_func, MarkedFunction):
|
|
1421
|
+
unwrapped_func = unwrapped_func.func
|
|
1422
|
+
while True:
|
|
1423
|
+
if isinstance(unwrapped_func, functools.partial):
|
|
1424
|
+
unwrapped_func = unwrapped_func.func
|
|
1425
|
+
elif hasattr(unwrapped_func, "__wrapped__"):
|
|
1426
|
+
unwrapped_func = unwrapped_func.__wrapped__
|
|
1427
|
+
else:
|
|
1428
|
+
break
|
|
1429
|
+
|
|
1430
|
+
func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
|
|
1431
|
+
func_index_value = None
|
|
1432
|
+
if func_annotation_meta:
|
|
1433
|
+
output_type = output_type or func_annotation_meta.output_type
|
|
1434
|
+
dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
|
|
1435
|
+
dtype = dtype if dtype is not None else func_annotation_meta.dtype
|
|
1436
|
+
name = name if name is not None else func_annotation_meta.name
|
|
1437
|
+
func_index_value = func_annotation_meta.index_value
|
|
1438
|
+
|
|
1439
|
+
if skip_infer:
|
|
1440
|
+
if isinstance(index, INDEX_TYPE):
|
|
1441
|
+
ret_index_value = index.index_value
|
|
1442
|
+
elif index is not None:
|
|
1443
|
+
ret_index_value = parse_index(index, df_obj.key)
|
|
1444
|
+
else:
|
|
1445
|
+
ret_index_value = func_index_value
|
|
1446
|
+
|
|
1447
|
+
return InferredDataFrameMeta(
|
|
1448
|
+
output_type=output_type,
|
|
1449
|
+
dtypes=dtypes,
|
|
1450
|
+
dtype=dtype,
|
|
1451
|
+
name=name,
|
|
1452
|
+
index_value=ret_index_value,
|
|
1453
|
+
)
|
|
1454
|
+
|
|
1455
|
+
if isinstance(index, INDEX_TYPE):
|
|
1456
|
+
index = index.index_value
|
|
1411
1457
|
|
|
1412
1458
|
if elementwise is None:
|
|
1413
|
-
unwrapped_func = func
|
|
1414
|
-
if isinstance(unwrapped_func, MarkedFunction):
|
|
1415
|
-
unwrapped_func = unwrapped_func.func
|
|
1416
|
-
while True:
|
|
1417
|
-
if isinstance(unwrapped_func, functools.partial):
|
|
1418
|
-
unwrapped_func = unwrapped_func.func
|
|
1419
|
-
elif hasattr(unwrapped_func, "__wrapped__"):
|
|
1420
|
-
unwrapped_func = unwrapped_func.__wrapped__
|
|
1421
|
-
else:
|
|
1422
|
-
break
|
|
1423
1459
|
elementwise = isinstance(unwrapped_func, np.ufunc)
|
|
1424
1460
|
|
|
1425
|
-
ret_index_value =
|
|
1461
|
+
ret_index_value = func_index_value
|
|
1426
1462
|
if output_type is not None and (dtypes is not None or dtype is not None):
|
|
1427
1463
|
if inherit_index:
|
|
1428
1464
|
ret_index_value = df_obj.index_value
|
|
@@ -1439,7 +1475,8 @@ def infer_dataframe_return_value(
|
|
|
1439
1475
|
elementwise=elementwise or False,
|
|
1440
1476
|
)
|
|
1441
1477
|
|
|
1442
|
-
ret_output_type =
|
|
1478
|
+
ret_output_type = None
|
|
1479
|
+
ret_dtypes = dtypes
|
|
1443
1480
|
maybe_agg = False
|
|
1444
1481
|
build_kw = build_kw or {}
|
|
1445
1482
|
obj_key = df_obj.key
|
|
@@ -1486,7 +1523,8 @@ def infer_dataframe_return_value(
|
|
|
1486
1523
|
f'please specify `output_type` as "dataframe"'
|
|
1487
1524
|
)
|
|
1488
1525
|
ret_output_type = ret_output_type or OutputType.dataframe
|
|
1489
|
-
|
|
1526
|
+
if ret_dtypes is None:
|
|
1527
|
+
ret_dtypes = infer_df_obj.dtypes
|
|
1490
1528
|
else:
|
|
1491
1529
|
if output_type is not None and output_type == OutputType.dataframe:
|
|
1492
1530
|
raise TypeError(
|
|
@@ -1506,7 +1544,7 @@ def infer_dataframe_return_value(
|
|
|
1506
1544
|
return InferredDataFrameMeta(
|
|
1507
1545
|
ret_output_type,
|
|
1508
1546
|
make_dtypes(ret_dtypes),
|
|
1509
|
-
|
|
1547
|
+
make_dtypes(dtype),
|
|
1510
1548
|
name,
|
|
1511
1549
|
ret_index_value,
|
|
1512
1550
|
maybe_agg,
|
|
@@ -1519,7 +1557,7 @@ def infer_dataframe_return_value(
|
|
|
1519
1557
|
return InferredDataFrameMeta(
|
|
1520
1558
|
output_type,
|
|
1521
1559
|
make_dtypes(dtypes),
|
|
1522
|
-
|
|
1560
|
+
make_dtypes(dtype),
|
|
1523
1561
|
name,
|
|
1524
1562
|
ret_index_value,
|
|
1525
1563
|
maybe_agg,
|
|
@@ -1530,20 +1568,37 @@ def infer_dataframe_return_value(
|
|
|
1530
1568
|
def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
|
|
1531
1569
|
from ..config import options
|
|
1532
1570
|
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
op.expect_engine = func.expect_engine
|
|
1571
|
+
expect_engine = None
|
|
1572
|
+
expect_gpu = None
|
|
1573
|
+
default_options = options.function.default_running_options or {}
|
|
1537
1574
|
|
|
1538
|
-
|
|
1539
|
-
|
|
1575
|
+
if isinstance(func, MarkedFunction):
|
|
1576
|
+
# copy from marked function
|
|
1577
|
+
expect_engine = func.expect_engine
|
|
1578
|
+
expect_resources = func.expect_resources or {}
|
|
1579
|
+
expect_gpu = func.gpu
|
|
1540
1580
|
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
expect_resources
|
|
1581
|
+
# merge default options if not set
|
|
1582
|
+
for key, value in default_options.items():
|
|
1583
|
+
if key not in expect_resources or expect_resources.get(key) is None:
|
|
1584
|
+
expect_resources[key] = value
|
|
1585
|
+
else:
|
|
1586
|
+
# copy from default options
|
|
1587
|
+
expect_resources = default_options
|
|
1588
|
+
|
|
1589
|
+
# Validate and adjust resource ratio constraints on client side
|
|
1590
|
+
expect_resources, _ = validate_and_adjust_resource_ratio(
|
|
1591
|
+
expect_resources,
|
|
1592
|
+
max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
|
|
1593
|
+
adjust=True,
|
|
1594
|
+
)
|
|
1544
1595
|
|
|
1545
|
-
if
|
|
1596
|
+
if expect_engine:
|
|
1597
|
+
op.expect_engine = expect_engine
|
|
1598
|
+
if expect_resources:
|
|
1546
1599
|
op.expect_resources = expect_resources
|
|
1600
|
+
if expect_gpu:
|
|
1601
|
+
op.gpu = expect_gpu
|
|
1547
1602
|
|
|
1548
1603
|
|
|
1549
1604
|
def make_column_list(col, dtypes_or_columns, level=None):
|
|
@@ -1576,3 +1631,21 @@ def make_column_list(col, dtypes_or_columns, level=None):
|
|
|
1576
1631
|
return idx[mask]
|
|
1577
1632
|
except (IndexError, TypeError, ValueError):
|
|
1578
1633
|
return col
|
|
1634
|
+
|
|
1635
|
+
|
|
1636
|
+
def call_groupby_with_params(df_or_series, groupby_params: dict):
|
|
1637
|
+
params = groupby_params.copy()
|
|
1638
|
+
selection = params.pop("selection", None)
|
|
1639
|
+
res = df_or_series.groupby(**params)
|
|
1640
|
+
if selection:
|
|
1641
|
+
res = res[selection]
|
|
1642
|
+
return res
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
def validate_dtype_backend(value):
|
|
1646
|
+
if isinstance(value, bool):
|
|
1647
|
+
# compatibility for legacy use_arrow_dtype property
|
|
1648
|
+
value = "pyarrow" if value else "numpy"
|
|
1649
|
+
if not dtype_backend_validator(value):
|
|
1650
|
+
raise ValueError(f"Invalid dtype_backend: {value}")
|
|
1651
|
+
return value
|
|
@@ -19,14 +19,14 @@ import numpy as np
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
|
|
21
21
|
from ...serialization.serializables import AnyField, BoolField, Int32Field, Int64Field
|
|
22
|
-
from ..core import DATAFRAME_TYPE
|
|
22
|
+
from ..core import DATAFRAME_TYPE, ENTITY_TYPE
|
|
23
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
24
|
from ..utils import build_df, build_empty_series, parse_index
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
28
|
min_periods = Int64Field("min_periods", default=None)
|
|
29
|
-
axis = Int32Field("axis", default=
|
|
29
|
+
axis = Int32Field("axis", default=0)
|
|
30
30
|
func = AnyField("func", default=None)
|
|
31
31
|
|
|
32
32
|
# always treat count as valid. this behavior is cancelled in pandas 1.0
|
|
@@ -52,7 +52,7 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
52
52
|
index_value = parse_index(
|
|
53
53
|
test_df.index, expanding.params, inp, store_data=False
|
|
54
54
|
)
|
|
55
|
-
self.
|
|
55
|
+
self.append_index = test_df.columns.nlevels != empty_df.columns.nlevels
|
|
56
56
|
return self.new_dataframe(
|
|
57
57
|
[inp],
|
|
58
58
|
shape=(inp.shape[0], test_df.shape[1]),
|
|
@@ -92,5 +92,9 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
92
92
|
else:
|
|
93
93
|
new_func[k] = v
|
|
94
94
|
self.func = new_func
|
|
95
|
-
elif
|
|
95
|
+
elif (
|
|
96
|
+
isinstance(self.func, Iterable)
|
|
97
|
+
and not isinstance(self.func, ENTITY_TYPE)
|
|
98
|
+
and not isinstance(self.func, str)
|
|
99
|
+
):
|
|
96
100
|
self.func = list(self.func)
|
|
@@ -12,11 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ...serialization.serializables import KeyField, Serializable
|
|
15
|
+
from ...serialization.serializables import FieldTypes, KeyField, ListField, Serializable
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Window(Serializable):
|
|
19
|
+
_mf_specific_fields = ["order_cols", "ascending"]
|
|
20
|
+
|
|
19
21
|
input = KeyField("input", default=None)
|
|
22
|
+
order_cols = ListField("order_cols", default=None)
|
|
23
|
+
ascending = ListField("ascending", FieldTypes.bool, default=None)
|
|
24
|
+
|
|
25
|
+
def __init__(self, *, order_cols=None, ascending=True, **kwargs):
|
|
26
|
+
if order_cols and not isinstance(order_cols, list):
|
|
27
|
+
order_cols = [order_cols]
|
|
28
|
+
if not isinstance(ascending, list):
|
|
29
|
+
ascending = [ascending]
|
|
30
|
+
elif order_cols and len(order_cols) != len(ascending):
|
|
31
|
+
raise ValueError("order_cols and ascending must have same length")
|
|
32
|
+
super().__init__(order_cols=order_cols, ascending=ascending, **kwargs)
|
|
20
33
|
|
|
21
34
|
@property
|
|
22
35
|
def params(self):
|
maxframe/dataframe/window/ewm.py
CHANGED
|
@@ -36,8 +36,6 @@ _window_has_method = pd_release_version >= (1, 4, 0)
|
|
|
36
36
|
class DataFrameEwmAgg(BaseDataFrameExpandingAgg):
|
|
37
37
|
_op_type_ = opcodes.EWM_AGG
|
|
38
38
|
|
|
39
|
-
_exec_cache = dict()
|
|
40
|
-
|
|
41
39
|
alpha = Float64Field("alpha")
|
|
42
40
|
adjust = BoolField("adjust")
|
|
43
41
|
alpha_ignore_na = BoolField("alpha_ignore_na")
|
|
@@ -234,7 +232,7 @@ def ewm(
|
|
|
234
232
|
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
|
|
235
233
|
|
|
236
234
|
if alpha == 1:
|
|
237
|
-
return obj.expanding(min_periods=min_periods
|
|
235
|
+
return obj.expanding(min_periods=min_periods)
|
|
238
236
|
|
|
239
237
|
if _default_min_period_1:
|
|
240
238
|
min_periods = min_periods or 1
|
|
@@ -12,49 +12,53 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
from collections import OrderedDict
|
|
17
16
|
|
|
18
17
|
from ... import opcodes
|
|
19
|
-
from ...serialization.serializables import
|
|
20
|
-
BoolField,
|
|
21
|
-
Int32Field,
|
|
22
|
-
Int64Field,
|
|
23
|
-
StringField,
|
|
24
|
-
)
|
|
25
|
-
from ...utils import pd_release_version
|
|
26
|
-
from ..utils import validate_axis
|
|
18
|
+
from ...serialization.serializables import BoolField, Int64Field
|
|
27
19
|
from .aggregation import BaseDataFrameExpandingAgg
|
|
28
20
|
from .core import Window
|
|
29
21
|
|
|
30
|
-
_window_has_method = pd_release_version >= (1, 3, 0)
|
|
31
|
-
_window_has_center = pd_release_version < (2, 0, 0)
|
|
32
|
-
|
|
33
22
|
|
|
34
23
|
class DataFrameExpandingAgg(BaseDataFrameExpandingAgg):
|
|
35
24
|
_op_type_ = opcodes.EXPANDING_AGG
|
|
36
25
|
|
|
37
|
-
|
|
26
|
+
def __init__(self, *args, **kw):
|
|
27
|
+
# suspend MF-specific args by now
|
|
28
|
+
for key in Expanding._mf_specific_fields:
|
|
29
|
+
kw.pop(key, None)
|
|
30
|
+
super().__init__(*args, **kw)
|
|
38
31
|
|
|
39
32
|
|
|
40
33
|
class Expanding(Window):
|
|
34
|
+
_mf_specific_fields = Window._mf_specific_fields + ["shift", "reverse_range"]
|
|
35
|
+
|
|
41
36
|
min_periods = Int64Field("min_periods")
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
# MF specific argument for position shift of window
|
|
38
|
+
shift = Int64Field("shift", default=None)
|
|
39
|
+
# MF specific argument for reversed window (sort of "narrowing")
|
|
40
|
+
reverse_range = BoolField("reverse_range", default=False)
|
|
45
41
|
|
|
46
42
|
def __call__(self, df):
|
|
47
|
-
|
|
43
|
+
try:
|
|
44
|
+
return df.expanding(**self.params)
|
|
45
|
+
except TypeError:
|
|
46
|
+
params = (self.params or dict()).copy()
|
|
47
|
+
for key in self._mf_specific_fields:
|
|
48
|
+
params.pop(key, None)
|
|
49
|
+
return df.expanding(**params)
|
|
48
50
|
|
|
49
51
|
@property
|
|
50
52
|
def params(self):
|
|
51
53
|
p = OrderedDict()
|
|
52
54
|
|
|
53
|
-
args = [
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
args = [
|
|
56
|
+
"min_periods",
|
|
57
|
+
"shift",
|
|
58
|
+
"reverse_range",
|
|
59
|
+
"order_cols",
|
|
60
|
+
"ascending",
|
|
61
|
+
]
|
|
58
62
|
|
|
59
63
|
for k in args:
|
|
60
64
|
p[k] = getattr(self, k)
|
|
@@ -73,6 +77,9 @@ class Expanding(Window):
|
|
|
73
77
|
def sum(self):
|
|
74
78
|
return self.aggregate("sum")
|
|
75
79
|
|
|
80
|
+
def prod(self):
|
|
81
|
+
return self.aggregate("prod")
|
|
82
|
+
|
|
76
83
|
def count(self):
|
|
77
84
|
return self.aggregate("count")
|
|
78
85
|
|
|
@@ -85,14 +92,14 @@ class Expanding(Window):
|
|
|
85
92
|
def mean(self):
|
|
86
93
|
return self.aggregate("mean")
|
|
87
94
|
|
|
88
|
-
def var(self):
|
|
89
|
-
return self.aggregate("var")
|
|
95
|
+
def var(self, **kwargs):
|
|
96
|
+
return self.aggregate("var", **kwargs)
|
|
90
97
|
|
|
91
|
-
def std(self):
|
|
92
|
-
return self.aggregate("std")
|
|
98
|
+
def std(self, **kwargs):
|
|
99
|
+
return self.aggregate("std", **kwargs)
|
|
93
100
|
|
|
94
101
|
|
|
95
|
-
def expanding(obj, min_periods=1,
|
|
102
|
+
def expanding(obj, min_periods=1, shift=0, reverse_range=False):
|
|
96
103
|
"""
|
|
97
104
|
Provide expanding transformations.
|
|
98
105
|
|
|
@@ -139,11 +146,6 @@ def expanding(obj, min_periods=1, center=False, axis=0):
|
|
|
139
146
|
3 3.0
|
|
140
147
|
4 7.0
|
|
141
148
|
"""
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
raise NotImplementedError("center == True is not supported")
|
|
146
|
-
if axis == 1:
|
|
147
|
-
raise NotImplementedError("axis other than 0 is not supported")
|
|
148
|
-
|
|
149
|
-
return Expanding(input=obj, min_periods=min_periods, center=center, axis=axis)
|
|
149
|
+
return Expanding(
|
|
150
|
+
input=obj, min_periods=min_periods, shift=shift, reverse_range=reverse_range
|
|
151
|
+
)
|
|
@@ -23,38 +23,42 @@ from ...serialization.serializables import (
|
|
|
23
23
|
AnyField,
|
|
24
24
|
BoolField,
|
|
25
25
|
DictField,
|
|
26
|
+
FieldTypes,
|
|
26
27
|
Int32Field,
|
|
27
28
|
Int64Field,
|
|
28
29
|
KeyField,
|
|
30
|
+
ListField,
|
|
29
31
|
StringField,
|
|
30
32
|
TupleField,
|
|
31
33
|
)
|
|
32
|
-
from ...utils import pd_release_version
|
|
33
34
|
from ..core import DATAFRAME_TYPE
|
|
34
35
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
35
36
|
from ..utils import build_empty_df, build_empty_series, parse_index, validate_axis
|
|
36
37
|
from .core import Window
|
|
37
38
|
|
|
38
|
-
_window_has_method = pd_release_version >= (1, 3, 0)
|
|
39
|
-
_with_pandas_issue_38908 = pd_release_version == (1, 2, 0)
|
|
40
|
-
|
|
41
39
|
|
|
42
40
|
class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
43
41
|
_op_type_ = opcodes.ROLLING_AGG
|
|
44
42
|
|
|
45
43
|
input = KeyField("input")
|
|
46
|
-
window = AnyField("window")
|
|
47
|
-
min_periods = Int64Field("min_periods")
|
|
48
|
-
center = BoolField("center")
|
|
49
|
-
win_type = StringField("win_type")
|
|
50
|
-
on = StringField("on")
|
|
51
|
-
axis = Int32Field("axis")
|
|
52
|
-
closed = StringField("closed")
|
|
53
|
-
func = AnyField("func")
|
|
54
|
-
func_args = TupleField("func_args")
|
|
55
|
-
func_kwargs = DictField("func_kwargs")
|
|
44
|
+
window = AnyField("window", default=None)
|
|
45
|
+
min_periods = Int64Field("min_periods", default=None)
|
|
46
|
+
center = BoolField("center", default=None)
|
|
47
|
+
win_type = StringField("win_type", default=None)
|
|
48
|
+
on = StringField("on", default=None)
|
|
49
|
+
axis = Int32Field("axis", default=None)
|
|
50
|
+
closed = StringField("closed", default=None)
|
|
51
|
+
func = AnyField("func", default=None)
|
|
52
|
+
func_args = TupleField("func_args", default=None)
|
|
53
|
+
func_kwargs = DictField("func_kwargs", default=None)
|
|
54
|
+
# for chunks
|
|
55
|
+
preds = ListField("preds", FieldTypes.key, default=None)
|
|
56
|
+
succs = ListField("succs", FieldTypes.key, default=None)
|
|
56
57
|
|
|
57
58
|
def __init__(self, output_types=None, **kw):
|
|
59
|
+
# suspend MF-specific args by now
|
|
60
|
+
for key in Rolling._mf_specific_fields:
|
|
61
|
+
kw.pop(key, None)
|
|
58
62
|
super().__init__(_output_types=output_types, **kw)
|
|
59
63
|
|
|
60
64
|
@classmethod
|
|
@@ -62,6 +66,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
62
66
|
super()._set_inputs(op, inputs)
|
|
63
67
|
input_iter = iter(op._inputs)
|
|
64
68
|
op.input = next(input_iter)
|
|
69
|
+
if op.preds is not None:
|
|
70
|
+
op.preds = [next(input_iter) for _ in op.preds]
|
|
71
|
+
if op.succs is not None:
|
|
72
|
+
op.succs = [next(input_iter) for _ in op.succs]
|
|
65
73
|
|
|
66
74
|
def __call__(self, rolling):
|
|
67
75
|
inp = rolling.input
|
|
@@ -74,6 +82,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
74
82
|
params["win_type"] = None
|
|
75
83
|
if self.func != "count":
|
|
76
84
|
empty_df = empty_df._get_numeric_data()
|
|
85
|
+
for key in Rolling._mf_specific_fields:
|
|
86
|
+
params.pop(key, None)
|
|
77
87
|
test_df = empty_df.rolling(**params).agg(self.func)
|
|
78
88
|
if self.axis == 0:
|
|
79
89
|
index_value = inp.index_value
|
|
@@ -93,7 +103,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
93
103
|
empty_series = build_empty_series(
|
|
94
104
|
inp.dtype, index=pd_index[:0], name=inp.name
|
|
95
105
|
)
|
|
96
|
-
|
|
106
|
+
rolling_params = rolling.params.copy()
|
|
107
|
+
for k in Rolling._mf_specific_fields:
|
|
108
|
+
rolling_params.pop(k, None)
|
|
109
|
+
test_obj = empty_series.rolling(**rolling_params).agg(self.func)
|
|
97
110
|
if isinstance(test_obj, pd.DataFrame):
|
|
98
111
|
return self.new_dataframe(
|
|
99
112
|
[inp],
|
|
@@ -113,6 +126,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
113
126
|
|
|
114
127
|
|
|
115
128
|
class Rolling(Window):
|
|
129
|
+
_mf_specific_fields = Window._mf_specific_fields + ["shift"]
|
|
130
|
+
|
|
116
131
|
window = AnyField("window", default=None)
|
|
117
132
|
min_periods = Int64Field("min_periods", default=None)
|
|
118
133
|
center = BoolField("center", default=None)
|
|
@@ -120,33 +135,25 @@ class Rolling(Window):
|
|
|
120
135
|
on = StringField("on", default=None)
|
|
121
136
|
axis = Int32Field("axis", default=None)
|
|
122
137
|
closed = StringField("closed", default=None)
|
|
123
|
-
|
|
138
|
+
# MF specific argument for position shift of window
|
|
139
|
+
shift = Int64Field("shift", default=None)
|
|
124
140
|
|
|
125
141
|
@property
|
|
126
142
|
def params(self):
|
|
127
143
|
p = OrderedDict()
|
|
128
144
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
"window",
|
|
142
|
-
"min_periods",
|
|
143
|
-
"center",
|
|
144
|
-
"win_type",
|
|
145
|
-
"axis",
|
|
146
|
-
"on",
|
|
147
|
-
"closed",
|
|
148
|
-
"method",
|
|
149
|
-
]
|
|
145
|
+
args = [
|
|
146
|
+
"window",
|
|
147
|
+
"min_periods",
|
|
148
|
+
"center",
|
|
149
|
+
"win_type",
|
|
150
|
+
"axis",
|
|
151
|
+
"on",
|
|
152
|
+
"closed",
|
|
153
|
+
"shift",
|
|
154
|
+
"order_cols",
|
|
155
|
+
"ascending",
|
|
156
|
+
]
|
|
150
157
|
|
|
151
158
|
for attr in args:
|
|
152
159
|
p[attr] = getattr(self, attr)
|
|
@@ -164,8 +171,11 @@ class Rolling(Window):
|
|
|
164
171
|
empty_obj = build_empty_series(
|
|
165
172
|
self.input.dtype, index=pd_index[:0], name=self.input.name
|
|
166
173
|
)
|
|
167
|
-
|
|
168
|
-
for
|
|
174
|
+
params = (self.params or dict()).copy()
|
|
175
|
+
for key in self._mf_specific_fields:
|
|
176
|
+
params.pop(key, None)
|
|
177
|
+
pd_rolling = empty_obj.rolling(**params)
|
|
178
|
+
for k in params:
|
|
169
179
|
# update value according to pandas rolling
|
|
170
180
|
setattr(self, k, getattr(pd_rolling, k))
|
|
171
181
|
|
|
@@ -23,15 +23,9 @@ def test_expanding():
|
|
|
23
23
|
df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
|
|
24
24
|
df2 = md.DataFrame(df)
|
|
25
25
|
|
|
26
|
-
with pytest.raises(NotImplementedError):
|
|
27
|
-
_ = df2.expanding(3, center=True)
|
|
28
|
-
|
|
29
|
-
with pytest.raises(NotImplementedError):
|
|
30
|
-
_ = df2.expanding(3, axis=1)
|
|
31
|
-
|
|
32
26
|
r = df2.expanding(3)
|
|
33
27
|
expected = df.expanding(3)
|
|
34
|
-
assert repr(r) == repr(expected)
|
|
28
|
+
assert repr(r).split(",", 1)[0] == repr(expected).split(",", 1)[0]
|
|
35
29
|
|
|
36
30
|
assert "b" in dir(r)
|
|
37
31
|
|