maxframe 2.0.0b2__cp311-cp311-win32.whl → 2.2.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cp311-win32.pyd +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +86 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp311-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp311-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
maxframe/dataframe/utils.py
CHANGED
|
@@ -28,7 +28,8 @@ import pandas as pd
|
|
|
28
28
|
from pandas.api.types import is_string_dtype
|
|
29
29
|
from pandas.core.dtypes.inference import is_dict_like, is_list_like
|
|
30
30
|
|
|
31
|
-
from ..core import Entity, ExecutableTuple, OutputType, get_output_types
|
|
31
|
+
from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
|
|
32
|
+
from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
|
|
32
33
|
from ..lib.mmh3 import hash as mmh_hash
|
|
33
34
|
from ..udf import MarkedFunction
|
|
34
35
|
from ..utils import (
|
|
@@ -40,6 +41,7 @@ from ..utils import (
|
|
|
40
41
|
quiet_stdio,
|
|
41
42
|
sbytes,
|
|
42
43
|
tokenize,
|
|
44
|
+
validate_and_adjust_resource_ratio,
|
|
43
45
|
)
|
|
44
46
|
|
|
45
47
|
if TYPE_CHECKING:
|
|
@@ -57,7 +59,7 @@ cudf = lazy_import("cudf", rename="cudf")
|
|
|
57
59
|
logger = logging.getLogger(__name__)
|
|
58
60
|
|
|
59
61
|
try:
|
|
60
|
-
from
|
|
62
|
+
from ..lib.dtypes_extension import ArrowDtype
|
|
61
63
|
except ImportError:
|
|
62
64
|
ArrowDtype = None
|
|
63
65
|
|
|
@@ -456,7 +458,7 @@ def build_split_idx_to_origin_idx(splits, increase=True):
|
|
|
456
458
|
|
|
457
459
|
|
|
458
460
|
def _generate_value(dtype, fill_value):
|
|
459
|
-
if ArrowDtype and isinstance(dtype,
|
|
461
|
+
if ArrowDtype and isinstance(dtype, ArrowDtype):
|
|
460
462
|
return _generate_value(dtype.pyarrow_dtype, fill_value)
|
|
461
463
|
|
|
462
464
|
if isinstance(dtype, pa.ListType):
|
|
@@ -470,9 +472,19 @@ def _generate_value(dtype, fill_value):
|
|
|
470
472
|
)
|
|
471
473
|
]
|
|
472
474
|
|
|
475
|
+
if isinstance(dtype, pa.StructType):
|
|
476
|
+
result = {}
|
|
477
|
+
for i in range(dtype.num_fields):
|
|
478
|
+
field = dtype[i]
|
|
479
|
+
result[field.name] = _generate_value(field.type, fill_value)
|
|
480
|
+
return result
|
|
481
|
+
|
|
473
482
|
if isinstance(dtype, pa.DataType):
|
|
474
483
|
return _generate_value(dtype.to_pandas_dtype(), fill_value)
|
|
475
484
|
|
|
485
|
+
if isinstance(dtype, ExternalBlobDtype):
|
|
486
|
+
return SolidBlob(str(fill_value).encode())
|
|
487
|
+
|
|
476
488
|
# special handle for datetime64 and timedelta64
|
|
477
489
|
dispatch = {
|
|
478
490
|
np.datetime64: pd.Timestamp,
|
|
@@ -1305,7 +1317,7 @@ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
|
|
|
1305
1317
|
if is_dict_like(funcs):
|
|
1306
1318
|
return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
|
|
1307
1319
|
|
|
1308
|
-
if is_list_like(funcs):
|
|
1320
|
+
if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
|
|
1309
1321
|
return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
|
|
1310
1322
|
|
|
1311
1323
|
f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
|
|
@@ -1406,23 +1418,54 @@ def infer_dataframe_return_value(
|
|
|
1406
1418
|
inherit_index=False,
|
|
1407
1419
|
build_kw=None,
|
|
1408
1420
|
elementwise=None,
|
|
1421
|
+
skip_infer=False,
|
|
1409
1422
|
) -> InferredDataFrameMeta:
|
|
1410
|
-
from .core import GROUPBY_TYPE
|
|
1423
|
+
from .core import GROUPBY_TYPE, INDEX_TYPE
|
|
1424
|
+
from .typing_ import get_function_output_meta
|
|
1425
|
+
|
|
1426
|
+
unwrapped_func = func
|
|
1427
|
+
if isinstance(unwrapped_func, MarkedFunction):
|
|
1428
|
+
unwrapped_func = unwrapped_func.func
|
|
1429
|
+
while True:
|
|
1430
|
+
if isinstance(unwrapped_func, functools.partial):
|
|
1431
|
+
unwrapped_func = unwrapped_func.func
|
|
1432
|
+
elif hasattr(unwrapped_func, "__wrapped__"):
|
|
1433
|
+
unwrapped_func = unwrapped_func.__wrapped__
|
|
1434
|
+
else:
|
|
1435
|
+
break
|
|
1436
|
+
|
|
1437
|
+
func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
|
|
1438
|
+
func_index_value = None
|
|
1439
|
+
if func_annotation_meta:
|
|
1440
|
+
output_type = output_type or func_annotation_meta.output_type
|
|
1441
|
+
dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
|
|
1442
|
+
dtype = dtype if dtype is not None else func_annotation_meta.dtype
|
|
1443
|
+
name = name if name is not None else func_annotation_meta.name
|
|
1444
|
+
func_index_value = func_annotation_meta.index_value
|
|
1445
|
+
|
|
1446
|
+
if skip_infer:
|
|
1447
|
+
if isinstance(index, INDEX_TYPE):
|
|
1448
|
+
ret_index_value = index.index_value
|
|
1449
|
+
elif index is not None:
|
|
1450
|
+
ret_index_value = parse_index(index, df_obj.key)
|
|
1451
|
+
else:
|
|
1452
|
+
ret_index_value = func_index_value
|
|
1453
|
+
|
|
1454
|
+
return InferredDataFrameMeta(
|
|
1455
|
+
output_type=output_type,
|
|
1456
|
+
dtypes=dtypes,
|
|
1457
|
+
dtype=dtype,
|
|
1458
|
+
name=name,
|
|
1459
|
+
index_value=ret_index_value,
|
|
1460
|
+
)
|
|
1461
|
+
|
|
1462
|
+
if isinstance(index, INDEX_TYPE):
|
|
1463
|
+
index = index.index_value
|
|
1411
1464
|
|
|
1412
1465
|
if elementwise is None:
|
|
1413
|
-
unwrapped_func = func
|
|
1414
|
-
if isinstance(unwrapped_func, MarkedFunction):
|
|
1415
|
-
unwrapped_func = unwrapped_func.func
|
|
1416
|
-
while True:
|
|
1417
|
-
if isinstance(unwrapped_func, functools.partial):
|
|
1418
|
-
unwrapped_func = unwrapped_func.func
|
|
1419
|
-
elif hasattr(unwrapped_func, "__wrapped__"):
|
|
1420
|
-
unwrapped_func = unwrapped_func.__wrapped__
|
|
1421
|
-
else:
|
|
1422
|
-
break
|
|
1423
1466
|
elementwise = isinstance(unwrapped_func, np.ufunc)
|
|
1424
1467
|
|
|
1425
|
-
ret_index_value =
|
|
1468
|
+
ret_index_value = func_index_value
|
|
1426
1469
|
if output_type is not None and (dtypes is not None or dtype is not None):
|
|
1427
1470
|
if inherit_index:
|
|
1428
1471
|
ret_index_value = df_obj.index_value
|
|
@@ -1530,20 +1573,37 @@ def infer_dataframe_return_value(
|
|
|
1530
1573
|
def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
|
|
1531
1574
|
from ..config import options
|
|
1532
1575
|
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
op.expect_engine = func.expect_engine
|
|
1576
|
+
expect_engine = None
|
|
1577
|
+
expect_gpu = None
|
|
1578
|
+
default_options = options.function.default_running_options or {}
|
|
1537
1579
|
|
|
1538
|
-
|
|
1539
|
-
|
|
1580
|
+
if isinstance(func, MarkedFunction):
|
|
1581
|
+
# copy from marked function
|
|
1582
|
+
expect_engine = func.expect_engine
|
|
1583
|
+
expect_resources = func.expect_resources or {}
|
|
1584
|
+
expect_gpu = func.gpu
|
|
1540
1585
|
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
expect_resources
|
|
1586
|
+
# merge default options if not set
|
|
1587
|
+
for key, value in default_options.items():
|
|
1588
|
+
if key not in expect_resources or expect_resources.get(key) is None:
|
|
1589
|
+
expect_resources[key] = value
|
|
1590
|
+
else:
|
|
1591
|
+
# copy from default options
|
|
1592
|
+
expect_resources = default_options
|
|
1593
|
+
|
|
1594
|
+
# Validate and adjust resource ratio constraints on client side
|
|
1595
|
+
expect_resources, _ = validate_and_adjust_resource_ratio(
|
|
1596
|
+
expect_resources,
|
|
1597
|
+
max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
|
|
1598
|
+
adjust=True,
|
|
1599
|
+
)
|
|
1544
1600
|
|
|
1545
|
-
if
|
|
1601
|
+
if expect_engine:
|
|
1602
|
+
op.expect_engine = expect_engine
|
|
1603
|
+
if expect_resources:
|
|
1546
1604
|
op.expect_resources = expect_resources
|
|
1605
|
+
if expect_gpu:
|
|
1606
|
+
op.gpu = expect_gpu
|
|
1547
1607
|
|
|
1548
1608
|
|
|
1549
1609
|
def make_column_list(col, dtypes_or_columns, level=None):
|
|
@@ -1576,3 +1636,12 @@ def make_column_list(col, dtypes_or_columns, level=None):
|
|
|
1576
1636
|
return idx[mask]
|
|
1577
1637
|
except (IndexError, TypeError, ValueError):
|
|
1578
1638
|
return col
|
|
1639
|
+
|
|
1640
|
+
|
|
1641
|
+
def call_groupby_with_params(df_or_series, groupby_params: dict):
|
|
1642
|
+
params = groupby_params.copy()
|
|
1643
|
+
selection = params.pop("selection", None)
|
|
1644
|
+
res = df_or_series.groupby(**params)
|
|
1645
|
+
if selection:
|
|
1646
|
+
res = res[selection]
|
|
1647
|
+
return res
|
|
@@ -19,14 +19,14 @@ import numpy as np
|
|
|
19
19
|
import pandas as pd
|
|
20
20
|
|
|
21
21
|
from ...serialization.serializables import AnyField, BoolField, Int32Field, Int64Field
|
|
22
|
-
from ..core import DATAFRAME_TYPE
|
|
22
|
+
from ..core import DATAFRAME_TYPE, ENTITY_TYPE
|
|
23
23
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
24
24
|
from ..utils import build_df, build_empty_series, parse_index
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
28
28
|
min_periods = Int64Field("min_periods", default=None)
|
|
29
|
-
axis = Int32Field("axis", default=
|
|
29
|
+
axis = Int32Field("axis", default=0)
|
|
30
30
|
func = AnyField("func", default=None)
|
|
31
31
|
|
|
32
32
|
# always treat count as valid. this behavior is cancelled in pandas 1.0
|
|
@@ -52,7 +52,7 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
52
52
|
index_value = parse_index(
|
|
53
53
|
test_df.index, expanding.params, inp, store_data=False
|
|
54
54
|
)
|
|
55
|
-
self.
|
|
55
|
+
self.append_index = test_df.columns.nlevels != empty_df.columns.nlevels
|
|
56
56
|
return self.new_dataframe(
|
|
57
57
|
[inp],
|
|
58
58
|
shape=(inp.shape[0], test_df.shape[1]),
|
|
@@ -92,5 +92,9 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
92
92
|
else:
|
|
93
93
|
new_func[k] = v
|
|
94
94
|
self.func = new_func
|
|
95
|
-
elif
|
|
95
|
+
elif (
|
|
96
|
+
isinstance(self.func, Iterable)
|
|
97
|
+
and not isinstance(self.func, ENTITY_TYPE)
|
|
98
|
+
and not isinstance(self.func, str)
|
|
99
|
+
):
|
|
96
100
|
self.func = list(self.func)
|
|
@@ -12,11 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ...serialization.serializables import KeyField, Serializable
|
|
15
|
+
from ...serialization.serializables import FieldTypes, KeyField, ListField, Serializable
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class Window(Serializable):
|
|
19
|
+
_mf_specific_fields = ["order_cols", "ascending"]
|
|
20
|
+
|
|
19
21
|
input = KeyField("input", default=None)
|
|
22
|
+
order_cols = ListField("order_cols", default=None)
|
|
23
|
+
ascending = ListField("ascending", FieldTypes.bool, default=None)
|
|
24
|
+
|
|
25
|
+
def __init__(self, *, order_cols=None, ascending=True, **kwargs):
|
|
26
|
+
if order_cols and not isinstance(order_cols, list):
|
|
27
|
+
order_cols = [order_cols]
|
|
28
|
+
if not isinstance(ascending, list):
|
|
29
|
+
ascending = [ascending]
|
|
30
|
+
elif order_cols and len(order_cols) != len(ascending):
|
|
31
|
+
raise ValueError("order_cols and ascending must have same length")
|
|
32
|
+
super().__init__(order_cols=order_cols, ascending=ascending, **kwargs)
|
|
20
33
|
|
|
21
34
|
@property
|
|
22
35
|
def params(self):
|
maxframe/dataframe/window/ewm.py
CHANGED
|
@@ -36,8 +36,6 @@ _window_has_method = pd_release_version >= (1, 4, 0)
|
|
|
36
36
|
class DataFrameEwmAgg(BaseDataFrameExpandingAgg):
|
|
37
37
|
_op_type_ = opcodes.EWM_AGG
|
|
38
38
|
|
|
39
|
-
_exec_cache = dict()
|
|
40
|
-
|
|
41
39
|
alpha = Float64Field("alpha")
|
|
42
40
|
adjust = BoolField("adjust")
|
|
43
41
|
alpha_ignore_na = BoolField("alpha_ignore_na")
|
|
@@ -234,7 +232,7 @@ def ewm(
|
|
|
234
232
|
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
|
|
235
233
|
|
|
236
234
|
if alpha == 1:
|
|
237
|
-
return obj.expanding(min_periods=min_periods
|
|
235
|
+
return obj.expanding(min_periods=min_periods)
|
|
238
236
|
|
|
239
237
|
if _default_min_period_1:
|
|
240
238
|
min_periods = min_periods or 1
|
|
@@ -12,49 +12,53 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
from collections import OrderedDict
|
|
17
16
|
|
|
18
17
|
from ... import opcodes
|
|
19
|
-
from ...serialization.serializables import
|
|
20
|
-
BoolField,
|
|
21
|
-
Int32Field,
|
|
22
|
-
Int64Field,
|
|
23
|
-
StringField,
|
|
24
|
-
)
|
|
25
|
-
from ...utils import pd_release_version
|
|
26
|
-
from ..utils import validate_axis
|
|
18
|
+
from ...serialization.serializables import BoolField, Int64Field
|
|
27
19
|
from .aggregation import BaseDataFrameExpandingAgg
|
|
28
20
|
from .core import Window
|
|
29
21
|
|
|
30
|
-
_window_has_method = pd_release_version >= (1, 3, 0)
|
|
31
|
-
_window_has_center = pd_release_version < (2, 0, 0)
|
|
32
|
-
|
|
33
22
|
|
|
34
23
|
class DataFrameExpandingAgg(BaseDataFrameExpandingAgg):
|
|
35
24
|
_op_type_ = opcodes.EXPANDING_AGG
|
|
36
25
|
|
|
37
|
-
|
|
26
|
+
def __init__(self, *args, **kw):
|
|
27
|
+
# suspend MF-specific args by now
|
|
28
|
+
for key in Expanding._mf_specific_fields:
|
|
29
|
+
kw.pop(key, None)
|
|
30
|
+
super().__init__(*args, **kw)
|
|
38
31
|
|
|
39
32
|
|
|
40
33
|
class Expanding(Window):
|
|
34
|
+
_mf_specific_fields = Window._mf_specific_fields + ["shift", "reverse_range"]
|
|
35
|
+
|
|
41
36
|
min_periods = Int64Field("min_periods")
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
# MF specific argument for position shift of window
|
|
38
|
+
shift = Int64Field("shift", default=None)
|
|
39
|
+
# MF specific argument for reversed window (sort of "narrowing")
|
|
40
|
+
reverse_range = BoolField("reverse_range", default=False)
|
|
45
41
|
|
|
46
42
|
def __call__(self, df):
|
|
47
|
-
|
|
43
|
+
try:
|
|
44
|
+
return df.expanding(**self.params)
|
|
45
|
+
except TypeError:
|
|
46
|
+
params = (self.params or dict()).copy()
|
|
47
|
+
for key in self._mf_specific_fields:
|
|
48
|
+
params.pop(key, None)
|
|
49
|
+
return df.expanding(**params)
|
|
48
50
|
|
|
49
51
|
@property
|
|
50
52
|
def params(self):
|
|
51
53
|
p = OrderedDict()
|
|
52
54
|
|
|
53
|
-
args = [
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
args = [
|
|
56
|
+
"min_periods",
|
|
57
|
+
"shift",
|
|
58
|
+
"reverse_range",
|
|
59
|
+
"order_cols",
|
|
60
|
+
"ascending",
|
|
61
|
+
]
|
|
58
62
|
|
|
59
63
|
for k in args:
|
|
60
64
|
p[k] = getattr(self, k)
|
|
@@ -73,6 +77,9 @@ class Expanding(Window):
|
|
|
73
77
|
def sum(self):
|
|
74
78
|
return self.aggregate("sum")
|
|
75
79
|
|
|
80
|
+
def prod(self):
|
|
81
|
+
return self.aggregate("prod")
|
|
82
|
+
|
|
76
83
|
def count(self):
|
|
77
84
|
return self.aggregate("count")
|
|
78
85
|
|
|
@@ -85,14 +92,14 @@ class Expanding(Window):
|
|
|
85
92
|
def mean(self):
|
|
86
93
|
return self.aggregate("mean")
|
|
87
94
|
|
|
88
|
-
def var(self):
|
|
89
|
-
return self.aggregate("var")
|
|
95
|
+
def var(self, **kwargs):
|
|
96
|
+
return self.aggregate("var", **kwargs)
|
|
90
97
|
|
|
91
|
-
def std(self):
|
|
92
|
-
return self.aggregate("std")
|
|
98
|
+
def std(self, **kwargs):
|
|
99
|
+
return self.aggregate("std", **kwargs)
|
|
93
100
|
|
|
94
101
|
|
|
95
|
-
def expanding(obj, min_periods=1,
|
|
102
|
+
def expanding(obj, min_periods=1, shift=0, reverse_range=False):
|
|
96
103
|
"""
|
|
97
104
|
Provide expanding transformations.
|
|
98
105
|
|
|
@@ -139,11 +146,6 @@ def expanding(obj, min_periods=1, center=False, axis=0):
|
|
|
139
146
|
3 3.0
|
|
140
147
|
4 7.0
|
|
141
148
|
"""
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
raise NotImplementedError("center == True is not supported")
|
|
146
|
-
if axis == 1:
|
|
147
|
-
raise NotImplementedError("axis other than 0 is not supported")
|
|
148
|
-
|
|
149
|
-
return Expanding(input=obj, min_periods=min_periods, center=center, axis=axis)
|
|
149
|
+
return Expanding(
|
|
150
|
+
input=obj, min_periods=min_periods, shift=shift, reverse_range=reverse_range
|
|
151
|
+
)
|
|
@@ -23,38 +23,42 @@ from ...serialization.serializables import (
|
|
|
23
23
|
AnyField,
|
|
24
24
|
BoolField,
|
|
25
25
|
DictField,
|
|
26
|
+
FieldTypes,
|
|
26
27
|
Int32Field,
|
|
27
28
|
Int64Field,
|
|
28
29
|
KeyField,
|
|
30
|
+
ListField,
|
|
29
31
|
StringField,
|
|
30
32
|
TupleField,
|
|
31
33
|
)
|
|
32
|
-
from ...utils import pd_release_version
|
|
33
34
|
from ..core import DATAFRAME_TYPE
|
|
34
35
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
35
36
|
from ..utils import build_empty_df, build_empty_series, parse_index, validate_axis
|
|
36
37
|
from .core import Window
|
|
37
38
|
|
|
38
|
-
_window_has_method = pd_release_version >= (1, 3, 0)
|
|
39
|
-
_with_pandas_issue_38908 = pd_release_version == (1, 2, 0)
|
|
40
|
-
|
|
41
39
|
|
|
42
40
|
class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
43
41
|
_op_type_ = opcodes.ROLLING_AGG
|
|
44
42
|
|
|
45
43
|
input = KeyField("input")
|
|
46
|
-
window = AnyField("window")
|
|
47
|
-
min_periods = Int64Field("min_periods")
|
|
48
|
-
center = BoolField("center")
|
|
49
|
-
win_type = StringField("win_type")
|
|
50
|
-
on = StringField("on")
|
|
51
|
-
axis = Int32Field("axis")
|
|
52
|
-
closed = StringField("closed")
|
|
53
|
-
func = AnyField("func")
|
|
54
|
-
func_args = TupleField("func_args")
|
|
55
|
-
func_kwargs = DictField("func_kwargs")
|
|
44
|
+
window = AnyField("window", default=None)
|
|
45
|
+
min_periods = Int64Field("min_periods", default=None)
|
|
46
|
+
center = BoolField("center", default=None)
|
|
47
|
+
win_type = StringField("win_type", default=None)
|
|
48
|
+
on = StringField("on", default=None)
|
|
49
|
+
axis = Int32Field("axis", default=None)
|
|
50
|
+
closed = StringField("closed", default=None)
|
|
51
|
+
func = AnyField("func", default=None)
|
|
52
|
+
func_args = TupleField("func_args", default=None)
|
|
53
|
+
func_kwargs = DictField("func_kwargs", default=None)
|
|
54
|
+
# for chunks
|
|
55
|
+
preds = ListField("preds", FieldTypes.key, default=None)
|
|
56
|
+
succs = ListField("succs", FieldTypes.key, default=None)
|
|
56
57
|
|
|
57
58
|
def __init__(self, output_types=None, **kw):
|
|
59
|
+
# suspend MF-specific args by now
|
|
60
|
+
for key in Rolling._mf_specific_fields:
|
|
61
|
+
kw.pop(key, None)
|
|
58
62
|
super().__init__(_output_types=output_types, **kw)
|
|
59
63
|
|
|
60
64
|
@classmethod
|
|
@@ -62,6 +66,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
62
66
|
super()._set_inputs(op, inputs)
|
|
63
67
|
input_iter = iter(op._inputs)
|
|
64
68
|
op.input = next(input_iter)
|
|
69
|
+
if op.preds is not None:
|
|
70
|
+
op.preds = [next(input_iter) for _ in op.preds]
|
|
71
|
+
if op.succs is not None:
|
|
72
|
+
op.succs = [next(input_iter) for _ in op.succs]
|
|
65
73
|
|
|
66
74
|
def __call__(self, rolling):
|
|
67
75
|
inp = rolling.input
|
|
@@ -74,6 +82,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
74
82
|
params["win_type"] = None
|
|
75
83
|
if self.func != "count":
|
|
76
84
|
empty_df = empty_df._get_numeric_data()
|
|
85
|
+
for key in Rolling._mf_specific_fields:
|
|
86
|
+
params.pop(key, None)
|
|
77
87
|
test_df = empty_df.rolling(**params).agg(self.func)
|
|
78
88
|
if self.axis == 0:
|
|
79
89
|
index_value = inp.index_value
|
|
@@ -93,7 +103,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
93
103
|
empty_series = build_empty_series(
|
|
94
104
|
inp.dtype, index=pd_index[:0], name=inp.name
|
|
95
105
|
)
|
|
96
|
-
|
|
106
|
+
rolling_params = rolling.params.copy()
|
|
107
|
+
for k in Rolling._mf_specific_fields:
|
|
108
|
+
rolling_params.pop(k, None)
|
|
109
|
+
test_obj = empty_series.rolling(**rolling_params).agg(self.func)
|
|
97
110
|
if isinstance(test_obj, pd.DataFrame):
|
|
98
111
|
return self.new_dataframe(
|
|
99
112
|
[inp],
|
|
@@ -113,6 +126,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
113
126
|
|
|
114
127
|
|
|
115
128
|
class Rolling(Window):
|
|
129
|
+
_mf_specific_fields = Window._mf_specific_fields + ["shift"]
|
|
130
|
+
|
|
116
131
|
window = AnyField("window", default=None)
|
|
117
132
|
min_periods = Int64Field("min_periods", default=None)
|
|
118
133
|
center = BoolField("center", default=None)
|
|
@@ -120,33 +135,25 @@ class Rolling(Window):
|
|
|
120
135
|
on = StringField("on", default=None)
|
|
121
136
|
axis = Int32Field("axis", default=None)
|
|
122
137
|
closed = StringField("closed", default=None)
|
|
123
|
-
|
|
138
|
+
# MF specific argument for position shift of window
|
|
139
|
+
shift = Int64Field("shift", default=None)
|
|
124
140
|
|
|
125
141
|
@property
|
|
126
142
|
def params(self):
|
|
127
143
|
p = OrderedDict()
|
|
128
144
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
"window",
|
|
142
|
-
"min_periods",
|
|
143
|
-
"center",
|
|
144
|
-
"win_type",
|
|
145
|
-
"axis",
|
|
146
|
-
"on",
|
|
147
|
-
"closed",
|
|
148
|
-
"method",
|
|
149
|
-
]
|
|
145
|
+
args = [
|
|
146
|
+
"window",
|
|
147
|
+
"min_periods",
|
|
148
|
+
"center",
|
|
149
|
+
"win_type",
|
|
150
|
+
"axis",
|
|
151
|
+
"on",
|
|
152
|
+
"closed",
|
|
153
|
+
"shift",
|
|
154
|
+
"order_cols",
|
|
155
|
+
"ascending",
|
|
156
|
+
]
|
|
150
157
|
|
|
151
158
|
for attr in args:
|
|
152
159
|
p[attr] = getattr(self, attr)
|
|
@@ -164,8 +171,11 @@ class Rolling(Window):
|
|
|
164
171
|
empty_obj = build_empty_series(
|
|
165
172
|
self.input.dtype, index=pd_index[:0], name=self.input.name
|
|
166
173
|
)
|
|
167
|
-
|
|
168
|
-
for
|
|
174
|
+
params = (self.params or dict()).copy()
|
|
175
|
+
for key in self._mf_specific_fields:
|
|
176
|
+
params.pop(key, None)
|
|
177
|
+
pd_rolling = empty_obj.rolling(**params)
|
|
178
|
+
for k in params:
|
|
169
179
|
# update value according to pandas rolling
|
|
170
180
|
setattr(self, k, getattr(pd_rolling, k))
|
|
171
181
|
|
|
@@ -23,15 +23,9 @@ def test_expanding():
|
|
|
23
23
|
df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
|
|
24
24
|
df2 = md.DataFrame(df)
|
|
25
25
|
|
|
26
|
-
with pytest.raises(NotImplementedError):
|
|
27
|
-
_ = df2.expanding(3, center=True)
|
|
28
|
-
|
|
29
|
-
with pytest.raises(NotImplementedError):
|
|
30
|
-
_ = df2.expanding(3, axis=1)
|
|
31
|
-
|
|
32
26
|
r = df2.expanding(3)
|
|
33
27
|
expected = df.expanding(3)
|
|
34
|
-
assert repr(r) == repr(expected)
|
|
28
|
+
assert repr(r).split(",", 1)[0] == repr(expected).split(",", 1)[0]
|
|
35
29
|
|
|
36
30
|
assert "b" in dir(r)
|
|
37
31
|
|
maxframe/env.py
CHANGED
|
@@ -17,12 +17,14 @@ MAXFRAME_NAMESPACE = "MAXFRAME_NAMESPACE"
|
|
|
17
17
|
|
|
18
18
|
# Maxframe Service common envs
|
|
19
19
|
MAXFRAME_HTTP_PORT_FILE = "MAXFRAME_PROXY_PORT_FILE"
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
MAXFRAME_INSIDE_TASK = "MAXFRAME_INSIDE_TASK"
|
|
21
|
+
MAXFRAME_SERVICE_BASE_URL = "MF_SERVICE_BASE_URL"
|
|
22
|
+
MAXFRAME_SERVICE_ALLOW_ORIGIN = "MAXFRAME_SERVICE_ALLOW_ORIGIN"
|
|
22
23
|
MAXFRAME_SERVICE_LISTEN_ADDRESS = "MAXFRAME_SERVICE_LISTEN_ADDRESS"
|
|
23
24
|
MAXFRAME_SERVICE_LOG_CONFIG_FILE = "MAXFRAME_SERVICE_LOG_CONFIG_FILE"
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
MAXFRAME_SERVICE_PORT = "MAXFRAME_SERVICE_PORT"
|
|
26
|
+
MAXFRAME_SERVICE_PORT_RETRIES = "MAXFRAME_SERVICE_PORT_RETRIES"
|
|
27
|
+
MAXFRAME_USER_LOG_CONFIG_FILE = "MAXFRAME_USER_LOG_CONFIG_FILE"
|
|
26
28
|
|
|
27
29
|
# ODPS envs
|
|
28
30
|
ODPS_BEARER_TOKEN = "ODPS_BEARER_TOKEN"
|
|
@@ -31,4 +33,5 @@ ODPS_BEARER_TOKEN_TIMESTAMP_FILE = "ODPS_BEARER_TOKEN_TIMESTAMP_FILE"
|
|
|
31
33
|
ODPS_PROJECT_NAME = "ODPS_PROJECT_NAME"
|
|
32
34
|
ODPS_ENDPOINT = "ODPS_ENDPOINT"
|
|
33
35
|
ODPS_TUNNEL_ENDPOINT = "ODPS_TUNNEL_ENDPOINT"
|
|
36
|
+
ODPS_NAMESPACE = "ODPS_NAMESPACE"
|
|
34
37
|
ODPS_STORAGE_API_ENDPOINT = "ODPS_STORAGE_API_ENDPOINT"
|
maxframe/errors.py
CHANGED
|
@@ -43,5 +43,5 @@ class SessionAlreadyClosedError(MaxFrameError):
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class EngineUnavailableError(MaxFrameIntentionalError):
|
|
46
|
-
def __init__(self,
|
|
47
|
-
super().__init__(
|
|
46
|
+
def __init__(self, msg: str):
|
|
47
|
+
super().__init__(msg)
|
maxframe/io/odpsio/schema.py
CHANGED
|
@@ -22,9 +22,10 @@ import pyarrow as pa
|
|
|
22
22
|
from odps import types as odps_types
|
|
23
23
|
from pandas.api import types as pd_types
|
|
24
24
|
|
|
25
|
+
from ...config import options
|
|
25
26
|
from ...core import TILEABLE_TYPE, OutputType
|
|
26
27
|
from ...dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
27
|
-
from ...lib.dtypes_extension import ArrowDtype
|
|
28
|
+
from ...lib.dtypes_extension import ArrowBlobType, ArrowDtype
|
|
28
29
|
from ...protocol import DataFrameTableMeta
|
|
29
30
|
from ...tensor.core import TENSOR_TYPE
|
|
30
31
|
from ...utils import build_temp_table_name
|
|
@@ -65,7 +66,11 @@ _odps_type_to_arrow = {
|
|
|
65
66
|
odps_types.timestamp_ntz: pa.timestamp("ns"),
|
|
66
67
|
}
|
|
67
68
|
|
|
68
|
-
|
|
69
|
+
if hasattr(odps_types, "blob"):
|
|
70
|
+
_arrow_to_odps_types[ArrowBlobType()] = odps_types.blob
|
|
71
|
+
_odps_type_to_arrow[odps_types.blob] = ArrowBlobType()
|
|
72
|
+
|
|
73
|
+
_based_for_pandas_pa_types = (pa.ListType, pa.MapType, pa.StructType)
|
|
69
74
|
|
|
70
75
|
|
|
71
76
|
def is_based_for_pandas_dtype(arrow_type: pa.DataType) -> bool:
|
|
@@ -204,9 +209,10 @@ def odps_schema_to_pandas_dtypes(
|
|
|
204
209
|
def arrow_table_to_pandas_dataframe(
|
|
205
210
|
table: pa.Table, meta: DataFrameTableMeta = None
|
|
206
211
|
) -> pd.DataFrame:
|
|
212
|
+
use_arrow_backend = options.dataframe.dtype_backend == "pyarrow"
|
|
207
213
|
df = table.to_pandas(
|
|
208
214
|
types_mapper=lambda x: (
|
|
209
|
-
ArrowDtype(x) if is_based_for_pandas_dtype(x) else None
|
|
215
|
+
ArrowDtype(x) if is_based_for_pandas_dtype(x) or use_arrow_backend else None
|
|
210
216
|
),
|
|
211
217
|
ignore_metadata=True,
|
|
212
218
|
)
|