maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import pyarrow as pa
|
|
17
|
+
import pytest
|
|
18
|
+
|
|
19
|
+
from ...... import dataframe as md
|
|
20
|
+
from ......utils import ARROW_DTYPE_NOT_SUPPORTED
|
|
21
|
+
from ....core import SPECodeContext
|
|
22
|
+
from ...accessors.struct_ import SeriesStructMethodAdapter
|
|
23
|
+
|
|
24
|
+
pytestmark = pytest.mark.skipif(
|
|
25
|
+
ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _run_generated_code(
|
|
30
|
+
code: str, ctx: SPECodeContext, input_val: pd.DataFrame
|
|
31
|
+
) -> dict:
|
|
32
|
+
local_vars = ctx.constants.copy()
|
|
33
|
+
local_vars["var_0"] = input_val
|
|
34
|
+
import_code = "import pandas as pd\nimport numpy as np\n"
|
|
35
|
+
exec(import_code + code, local_vars, local_vars)
|
|
36
|
+
return local_vars
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.fixture
|
|
40
|
+
def series():
|
|
41
|
+
version_type = pa.struct(
|
|
42
|
+
[
|
|
43
|
+
("major", pa.int64()),
|
|
44
|
+
("minor", pa.int64()),
|
|
45
|
+
]
|
|
46
|
+
)
|
|
47
|
+
return md.Series(
|
|
48
|
+
[
|
|
49
|
+
{"version": {"major": 1, "minor": 5}, "project": "pandas"},
|
|
50
|
+
{"version": {"major": 2, "minor": 1}, "project": "pandas"},
|
|
51
|
+
{"version": {"major": 1, "minor": 26}, "project": "numpy"},
|
|
52
|
+
],
|
|
53
|
+
dtype=pd.ArrowDtype(
|
|
54
|
+
pa.struct([("version", version_type), ("project", pa.string())])
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_field(series):
|
|
60
|
+
s1 = series.struct.field(["version", "minor"])
|
|
61
|
+
context = SPECodeContext()
|
|
62
|
+
adapter = SeriesStructMethodAdapter()
|
|
63
|
+
results = adapter.generate_code(s1.op, context)
|
|
64
|
+
|
|
65
|
+
expected_results = [
|
|
66
|
+
"""
|
|
67
|
+
var_1 = var_0.struct.field(['version', 'minor'])
|
|
68
|
+
"""
|
|
69
|
+
]
|
|
70
|
+
assert results == expected_results
|
|
71
|
+
local_vars = _run_generated_code(results[0], context, series.op.data)
|
|
72
|
+
expected_series = pd.Series(
|
|
73
|
+
[5, 1, 26], dtype=pd.ArrowDtype(pa.int64()), name="minor"
|
|
74
|
+
)
|
|
75
|
+
pd.testing.assert_series_equal(expected_series, local_vars["var_1"])
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ....core import SPECodeContext
|
|
16
|
-
from ...indexing import DataFrameIlocGetItemAdapter
|
|
16
|
+
from ...indexing import DataFrameIlocGetItemAdapter, DataFrameIlocSetItemAdapter
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def test_series_iloc_single_row(s1):
|
|
@@ -24,6 +24,14 @@ def test_series_iloc_single_row(s1):
|
|
|
24
24
|
assert results == expected_results
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
def test_series_iloc_set_single_row(s1):
|
|
28
|
+
s1.iloc[2] = 2
|
|
29
|
+
context = SPECodeContext()
|
|
30
|
+
results = DataFrameIlocSetItemAdapter().generate_code(s1.op, context)
|
|
31
|
+
expected_results = ["var_1 = var_0.copy()", "var_1.iloc[2] = 2"]
|
|
32
|
+
assert results == expected_results
|
|
33
|
+
|
|
34
|
+
|
|
27
35
|
def test_series_iloc_multi_rows(s1):
|
|
28
36
|
df = s1.iloc[:2]
|
|
29
37
|
context = SPECodeContext()
|
|
@@ -48,6 +56,17 @@ def test_dataframe_iloc_only_columns(df1):
|
|
|
48
56
|
assert results == expected_results
|
|
49
57
|
|
|
50
58
|
|
|
59
|
+
def test_dataframe_iloc_set_only_column(df1):
|
|
60
|
+
df1.iloc[:, 0] = 5
|
|
61
|
+
context = SPECodeContext()
|
|
62
|
+
results = DataFrameIlocSetItemAdapter().generate_code(df1.op, context)
|
|
63
|
+
expected_results = [
|
|
64
|
+
"var_1 = var_0.copy()",
|
|
65
|
+
"var_1.iloc[slice(None, None, None), 0] = 5",
|
|
66
|
+
]
|
|
67
|
+
assert results == expected_results
|
|
68
|
+
|
|
69
|
+
|
|
51
70
|
def test_dataframe_iloc_only_rows(df1):
|
|
52
71
|
df = df1.iloc[1]
|
|
53
72
|
context = SPECodeContext()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ....core import SPECodeContext
|
|
16
|
+
from ...indexing import DataFrameLocGetItemAdapter, DataFrameLocSetItemAdapter
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_dataframe_iloc_only_columns(df1):
|
|
20
|
+
df = df1.iloc[:, [0, 1]]
|
|
21
|
+
context = SPECodeContext()
|
|
22
|
+
results = DataFrameLocGetItemAdapter().generate_code(df.op, context)
|
|
23
|
+
expected_results = ["var_1 = var_0.loc[slice(None, None, None), [0, 1]]"]
|
|
24
|
+
assert results == expected_results
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_dataframe_iloc_set_only_column(df1):
|
|
28
|
+
df1.iloc[:, 0] = 5
|
|
29
|
+
context = SPECodeContext()
|
|
30
|
+
results = DataFrameLocSetItemAdapter().generate_code(df1.op, context)
|
|
31
|
+
expected_results = [
|
|
32
|
+
"var_1 = var_0.copy()",
|
|
33
|
+
"var_1.loc[slice(None, None, None), 0] = 5",
|
|
34
|
+
]
|
|
35
|
+
assert results == expected_results
|
|
@@ -30,11 +30,8 @@ from ...misc import (
|
|
|
30
30
|
DataFrameExplodeAdapter,
|
|
31
31
|
DataFrameIsinAdapter,
|
|
32
32
|
DataFrameMapAdapter,
|
|
33
|
-
DataFrameMeltAdapter,
|
|
34
33
|
DataFrameMemoryUsageAdapter,
|
|
35
|
-
DataFramePivotTableAdapter,
|
|
36
34
|
DataFrameShiftAdapter,
|
|
37
|
-
DataFrameStackAdapter,
|
|
38
35
|
DataFrameToNumericAdapter,
|
|
39
36
|
)
|
|
40
37
|
|
|
@@ -181,17 +178,6 @@ def test_map(df1):
|
|
|
181
178
|
assert results == expected_results
|
|
182
179
|
|
|
183
180
|
|
|
184
|
-
def test_melt(df1):
|
|
185
|
-
context = SPECodeContext()
|
|
186
|
-
|
|
187
|
-
v1 = df1.melt(id_vars=["A"], value_vars=["B"])
|
|
188
|
-
results = DataFrameMeltAdapter().generate_code(v1.op, context)
|
|
189
|
-
assert (
|
|
190
|
-
results[0]
|
|
191
|
-
== "var_1 = var_0.melt(id_vars=['A'], value_vars=['B'], value_name='value')"
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
|
|
195
181
|
def test_memory_usage(df1):
|
|
196
182
|
context = SPECodeContext()
|
|
197
183
|
v1 = df1.memory_usage(index=False)
|
|
@@ -199,17 +185,6 @@ def test_memory_usage(df1):
|
|
|
199
185
|
assert results[0] == "var_1 = var_0.memory_usage(index=False, deep=False)"
|
|
200
186
|
|
|
201
187
|
|
|
202
|
-
def test_pivot_table(midx_df1):
|
|
203
|
-
df = midx_df1.pivot_table(columns="B", values="C")
|
|
204
|
-
context = SPECodeContext()
|
|
205
|
-
results = DataFramePivotTableAdapter().generate_code(df.op, context)
|
|
206
|
-
assert results[0] == (
|
|
207
|
-
"var_1 = var_0.pivot_table(values='C', columns='B', "
|
|
208
|
-
"aggfunc='mean', margins=False, dropna=True, margins_name='All', "
|
|
209
|
-
"sort=True)"
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
|
|
213
188
|
def test_shift(df1):
|
|
214
189
|
df = df1.shift(periods=1)
|
|
215
190
|
context = SPECodeContext()
|
|
@@ -217,13 +192,6 @@ def test_shift(df1):
|
|
|
217
192
|
assert results[0] == "var_1 = var_0.shift(periods=1, axis=0)"
|
|
218
193
|
|
|
219
194
|
|
|
220
|
-
def test_stack(midx_df1):
|
|
221
|
-
df = midx_df1.stack([0])
|
|
222
|
-
context = SPECodeContext()
|
|
223
|
-
results = DataFrameStackAdapter().generate_code(df.op, context)
|
|
224
|
-
assert results[0] == "var_1 = var_0.stack(level=[0], dropna=True)"
|
|
225
|
-
|
|
226
|
-
|
|
227
195
|
def test_to_numeric():
|
|
228
196
|
v0 = md.Series([1, 2, 3, 4, 5, 6])
|
|
229
197
|
v1 = md.to_numeric(v0, downcast="float")
|
|
@@ -21,10 +21,11 @@ from ..groupby import (
|
|
|
21
21
|
DataFrameGroupByOpAdapter,
|
|
22
22
|
GroupByApplyAdapter,
|
|
23
23
|
GroupByApplyChunkAdapter,
|
|
24
|
-
|
|
24
|
+
GroupByExpandingAggAdapter,
|
|
25
25
|
GroupByFillOperatorAdapter,
|
|
26
26
|
GroupByHeadAdapter,
|
|
27
27
|
GroupByIndexAdapter,
|
|
28
|
+
GroupByRollingAggAdapter,
|
|
28
29
|
GroupBySampleAdapter,
|
|
29
30
|
GroupByTransformAdapter,
|
|
30
31
|
_need_enforce_group_keys,
|
|
@@ -73,30 +74,92 @@ def test_dataframe_groupby_agg(df1):
|
|
|
73
74
|
@pytest.mark.parametrize(
|
|
74
75
|
"func, func_kwargs, expected_results",
|
|
75
76
|
[
|
|
76
|
-
("cumcount", {}, ["var_1 =
|
|
77
|
-
("cumcount", {"ascending": True}, ["var_1 =
|
|
78
|
-
("cumcount", {"ascending": False}, ["var_1 =
|
|
79
|
-
("cumsum", {}, ["var_1 =
|
|
80
|
-
("
|
|
81
|
-
("
|
|
82
|
-
("
|
|
83
|
-
("cumprod", {"axis": 0}, ["var_1 = var_0.cumprod(axis=0)"]),
|
|
84
|
-
("cumprod", {"axis": 1}, ["var_1 = var_0.cumprod(axis=1)"]),
|
|
85
|
-
("cummax", {}, ["var_1 = var_0.cummax(axis=0)"]),
|
|
86
|
-
("cummax", {"axis": 0}, ["var_1 = var_0.cummax(axis=0)"]),
|
|
87
|
-
("cummax", {"axis": 1}, ["var_1 = var_0.cummax(axis=1)"]),
|
|
88
|
-
("cummin", {}, ["var_1 = var_0.cummin(axis=0)"]),
|
|
89
|
-
("cummin", {"axis": 0}, ["var_1 = var_0.cummin(axis=0)"]),
|
|
90
|
-
("cummin", {"axis": 1}, ["var_1 = var_0.cummin(axis=1)"]),
|
|
77
|
+
("cumcount", {}, ["var_1 = {G}.cumcount(ascending=True)"]),
|
|
78
|
+
("cumcount", {"ascending": True}, ["var_1 = {G}.cumcount(ascending=True)"]),
|
|
79
|
+
("cumcount", {"ascending": False}, ["var_1 = {G}.cumcount(ascending=False)"]),
|
|
80
|
+
("cumsum", {}, ["var_1 = {G}.cumsum()"]),
|
|
81
|
+
("cumprod", {}, ["var_1 = {G}.cumprod()"]),
|
|
82
|
+
("cummax", {}, ["var_1 = {G}.cummax()"]),
|
|
83
|
+
("cummin", {}, ["var_1 = {G}.cummin()"]),
|
|
91
84
|
],
|
|
92
85
|
)
|
|
93
86
|
def test_dataframe_groupby_cum(df1, func, func_kwargs, expected_results):
|
|
94
87
|
context = SPECodeContext()
|
|
95
|
-
adapter =
|
|
88
|
+
adapter = GroupByExpandingAggAdapter()
|
|
96
89
|
|
|
90
|
+
groupby_code = "var_0.groupby(by=['A'], as_index=True, sort=True, group_keys=True)"
|
|
97
91
|
res = getattr(df1.groupby("A"), func)(**func_kwargs)
|
|
98
92
|
results = adapter.generate_code(res.op, context)
|
|
99
|
-
assert results == expected_results
|
|
93
|
+
assert results == [s.replace("{G}", groupby_code) for s in expected_results]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_dataframe_groupby_expanding(df1):
|
|
97
|
+
context = SPECodeContext()
|
|
98
|
+
adapter = GroupByExpandingAggAdapter()
|
|
99
|
+
res = df1.groupby("A").expanding(2).sum()
|
|
100
|
+
results = adapter.generate_code(res.op, context)
|
|
101
|
+
assert results == [
|
|
102
|
+
"def _exp_fun_var_1(frame, **_):",
|
|
103
|
+
" func = 'sum' if func != \"prod\" else lambda x: x.prod()",
|
|
104
|
+
" out_frame = frame.expanding(min_periods=2).agg(func)",
|
|
105
|
+
" return out_frame",
|
|
106
|
+
"var_1 = var_0.groupby(by=['A'], as_index=True, sort=True, group_keys=True"
|
|
107
|
+
").apply(_exp_fun_var_1, include_groups=False)",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
res = df1.groupby("A").expanding(2, shift=1).sum()
|
|
111
|
+
results = adapter.generate_code(res.op, context)
|
|
112
|
+
assert results == [
|
|
113
|
+
"def _exp_fun_var_2(frame, **_):",
|
|
114
|
+
" func = 'sum' if func != \"prod\" else lambda x: x.prod()",
|
|
115
|
+
" frame = frame.shift(-1)",
|
|
116
|
+
" out_frame = frame.expanding(min_periods=2).agg(func)",
|
|
117
|
+
" return out_frame",
|
|
118
|
+
"var_2 = var_0.groupby(by=['A'], as_index=True, sort=True, "
|
|
119
|
+
"group_keys=True).apply(_exp_fun_var_2, include_groups=False)",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
res = df1.groupby("A").expanding(2, shift=1, reverse_range=True).sum()
|
|
123
|
+
results = adapter.generate_code(res.op, context)
|
|
124
|
+
assert results == [
|
|
125
|
+
"def _exp_fun_var_3(frame, **_):",
|
|
126
|
+
" func = 'sum' if func != \"prod\" else lambda x: x.prod()",
|
|
127
|
+
" frame = frame.shift(-1)",
|
|
128
|
+
" frame = frame.iloc[::-1]",
|
|
129
|
+
" out_frame = frame.expanding(min_periods=2).agg(func)",
|
|
130
|
+
" out_frame = out_frame.iloc[::-1]",
|
|
131
|
+
" return out_frame",
|
|
132
|
+
"var_3 = var_0.groupby(by=['A'], as_index=True, sort=True, "
|
|
133
|
+
"group_keys=True).apply(_exp_fun_var_3, include_groups=False)",
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def test_dataframe_groupby_rolling(df1):
|
|
138
|
+
context = SPECodeContext()
|
|
139
|
+
adapter = GroupByRollingAggAdapter()
|
|
140
|
+
res = df1.groupby("A").rolling(3).sum()
|
|
141
|
+
results = adapter.generate_code(res.op, context)
|
|
142
|
+
assert results == [
|
|
143
|
+
"def _roll_fun_var_1(frame, **_):",
|
|
144
|
+
" func = 'sum' if func != \"prod\" else lambda x: x.prod()",
|
|
145
|
+
" frame = frame.shift(0)",
|
|
146
|
+
" return frame.rolling(window=3, min_periods=None, center=False, "
|
|
147
|
+
"win_type=None, axis=0, on=None, closed=None).agg(func)",
|
|
148
|
+
"var_1 = var_0.groupby(by=['A'], as_index=True, sort=True, "
|
|
149
|
+
"group_keys=True).apply(_roll_fun_var_1, include_groups=False)",
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
res = df1.groupby("A").rolling(3, shift=1).sum()
|
|
153
|
+
results = adapter.generate_code(res.op, context)
|
|
154
|
+
assert results == [
|
|
155
|
+
"def _roll_fun_var_2(frame, **_):",
|
|
156
|
+
" func = 'sum' if func != \"prod\" else lambda x: x.prod()",
|
|
157
|
+
" frame = frame.shift(-1)",
|
|
158
|
+
" return frame.rolling(window=3, min_periods=None, center=False, "
|
|
159
|
+
"win_type=None, axis=0, on=None, closed=None).agg(func)",
|
|
160
|
+
"var_2 = var_0.groupby(by=['A'], as_index=True, sort=True, "
|
|
161
|
+
"group_keys=True).apply(_roll_fun_var_2, include_groups=False)",
|
|
162
|
+
]
|
|
100
163
|
|
|
101
164
|
|
|
102
165
|
def test_dataframe_groupby_fill(df1):
|
|
@@ -17,7 +17,11 @@ import pytest
|
|
|
17
17
|
|
|
18
18
|
from ..... import dataframe as md
|
|
19
19
|
from ...core import SPECodeContext
|
|
20
|
-
from ..merge import
|
|
20
|
+
from ..merge import (
|
|
21
|
+
DataFrameConcatAdapter,
|
|
22
|
+
DataFrameMergeAdapter,
|
|
23
|
+
DataFrameUpdateAdapter,
|
|
24
|
+
)
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
@pytest.fixture
|
|
@@ -398,3 +402,25 @@ def test_concat_columns_from_dataframes(df2, df6):
|
|
|
398
402
|
" objs=[var_1, var_2], copy=True)"
|
|
399
403
|
]
|
|
400
404
|
assert results == expected_results
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def test_update_dataframe(df5, df6):
|
|
408
|
+
adapter = DataFrameUpdateAdapter()
|
|
409
|
+
df5.update(df6)
|
|
410
|
+
context = SPECodeContext()
|
|
411
|
+
results = adapter.generate_code(df5.op, context)
|
|
412
|
+
expected_results = [
|
|
413
|
+
"var_1 = var_0.copy()",
|
|
414
|
+
"var_1.update(var_2, join='left', overwrite=True, filter_func=None,"
|
|
415
|
+
" errors='ignore')",
|
|
416
|
+
]
|
|
417
|
+
assert results == expected_results
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def test_combine_first_dataframe(df5, df6):
|
|
421
|
+
adapter = DataFrameUpdateAdapter()
|
|
422
|
+
df = df5.combine_first(df6)
|
|
423
|
+
context = SPECodeContext()
|
|
424
|
+
results = adapter.generate_code(df.op, context)
|
|
425
|
+
expected_results = ["var_1 = var_0.combine_first(var_2)"]
|
|
426
|
+
assert results == expected_results
|
|
@@ -19,6 +19,7 @@ from ...core import SPECodeContext
|
|
|
19
19
|
from ..reduction import (
|
|
20
20
|
DataFrameAggregateAdapter,
|
|
21
21
|
DataFrameKurtosisAdapter,
|
|
22
|
+
DataFrameModeAdapter,
|
|
22
23
|
DataFrameNuniqueAdapter,
|
|
23
24
|
DataFrameReductionAdapter,
|
|
24
25
|
DataFrameUniqueAdapter,
|
|
@@ -96,6 +97,18 @@ def test_median(df1):
|
|
|
96
97
|
assert results == expected_results
|
|
97
98
|
|
|
98
99
|
|
|
100
|
+
def test_mode(df1, s1):
|
|
101
|
+
df = df1.mode(dropna=False, numeric_only=True)
|
|
102
|
+
results = DataFrameModeAdapter().generate_code(df.op, SPECodeContext())
|
|
103
|
+
expected_results = ["var_1 = var_0.mode(axis=0, numeric_only=True, dropna=False)"]
|
|
104
|
+
assert results == expected_results
|
|
105
|
+
|
|
106
|
+
s = s1.mode()
|
|
107
|
+
results = DataFrameModeAdapter().generate_code(s.op, SPECodeContext())
|
|
108
|
+
expected_results = ["var_1 = var_0.mode(dropna=True)"]
|
|
109
|
+
assert results == expected_results
|
|
110
|
+
|
|
111
|
+
|
|
99
112
|
def test_aggregate(df1):
|
|
100
113
|
df = df1.agg(["sum", "mean"])
|
|
101
114
|
context = SPECodeContext()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import pytest
|
|
18
|
+
|
|
19
|
+
from ..... import dataframe as md
|
|
20
|
+
from ...core import SPECodeContext
|
|
21
|
+
from ..reshape import (
|
|
22
|
+
DataFrameMeltAdapter,
|
|
23
|
+
DataFramePivotTableAdapter,
|
|
24
|
+
DataFrameStackAdapter,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture
|
|
29
|
+
def df1():
|
|
30
|
+
return md.DataFrame(
|
|
31
|
+
np.random.randn(6, 4),
|
|
32
|
+
index=md.Index(["1", "2", "3", "4", "5", "6"], name="test_idx"),
|
|
33
|
+
columns=list("ABCD"),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@pytest.fixture
|
|
38
|
+
def midx_df1():
|
|
39
|
+
return md.DataFrame(
|
|
40
|
+
[
|
|
41
|
+
[1, 10, 101],
|
|
42
|
+
[9, 90, 909],
|
|
43
|
+
[2, 20, 202],
|
|
44
|
+
],
|
|
45
|
+
index=pd.MultiIndex.from_arrays(
|
|
46
|
+
[[1, 9, 2], [11, 99, 22], [111, 999, 222]],
|
|
47
|
+
names=("idx_0", "idx_1", "idx_2"),
|
|
48
|
+
),
|
|
49
|
+
columns=list("ABC"),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_melt(df1):
|
|
54
|
+
context = SPECodeContext()
|
|
55
|
+
|
|
56
|
+
v1 = df1.melt(id_vars=["A"], value_vars=["B"])
|
|
57
|
+
results = DataFrameMeltAdapter().generate_code(v1.op, context)
|
|
58
|
+
assert (
|
|
59
|
+
results[0]
|
|
60
|
+
== "var_1 = var_0.melt(id_vars=['A'], value_vars=['B'], value_name='value')"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_pivot_table(midx_df1):
|
|
65
|
+
df = midx_df1.pivot_table(columns="B", values="C")
|
|
66
|
+
context = SPECodeContext()
|
|
67
|
+
results = DataFramePivotTableAdapter().generate_code(df.op, context)
|
|
68
|
+
assert results[0] == (
|
|
69
|
+
"var_1 = var_0.pivot_table(values='C', columns='B', "
|
|
70
|
+
"aggfunc='mean', margins=False, dropna=True, margins_name='All', "
|
|
71
|
+
"sort=True)"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_stack(midx_df1):
|
|
76
|
+
df = midx_df1.stack([0])
|
|
77
|
+
context = SPECodeContext()
|
|
78
|
+
results = DataFrameStackAdapter().generate_code(df.op, context)
|
|
79
|
+
assert results[0] == "var_1 = var_0.stack(level=[0], dropna=True)"
|
|
@@ -157,3 +157,23 @@ def test_dataframe_sort_index_by_all(df2):
|
|
|
157
157
|
" na_position='last', ignore_index=False, level=None, sort_remaining=True)"
|
|
158
158
|
]
|
|
159
159
|
assert results == expected_results
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def test_dataframe_nlargest(df1):
|
|
163
|
+
df = df1.nlargest(10, "A", keep="last")
|
|
164
|
+
adapter = DataFrameSortValuesAdapter()
|
|
165
|
+
context = SPECodeContext()
|
|
166
|
+
assert adapter.accepts(df.op) == EngineAcceptance.ACCEPT
|
|
167
|
+
results = adapter.generate_code(df.op, context)
|
|
168
|
+
expected_results = ["var_1 = var_0.nlargest(10, keep='last', columns='A')"]
|
|
169
|
+
assert results == expected_results
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def test_series_nsmallest(s2):
|
|
173
|
+
s = s2.nsmallest(10, keep="last")
|
|
174
|
+
adapter = DataFrameSortValuesAdapter()
|
|
175
|
+
context = SPECodeContext()
|
|
176
|
+
assert adapter.accepts(s.op) == EngineAcceptance.ACCEPT
|
|
177
|
+
results = adapter.generate_code(s.op, context)
|
|
178
|
+
expected_results = ["var_1 = var_0.nsmallest(10, keep='last')"]
|
|
179
|
+
assert results == expected_results
|
|
@@ -14,9 +14,18 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import List
|
|
16
16
|
|
|
17
|
+
from ....dataframe.tseries.between_time import DataFrameBetweenTime
|
|
17
18
|
from ....dataframe.tseries.to_datetime import DataFrameToDatetime
|
|
18
19
|
from ....utils import no_default
|
|
19
20
|
from ..core import SPECodeContext, SPEOperatorAdapter, register_op_adapter
|
|
21
|
+
from ..utils import build_method_call_adapter
|
|
22
|
+
|
|
23
|
+
DataFrameBetweenTimeAdapter = build_method_call_adapter(
|
|
24
|
+
DataFrameBetweenTime,
|
|
25
|
+
"between_time",
|
|
26
|
+
kw_keys=["start_time", "end_time", "inclusive", "axis"],
|
|
27
|
+
skip_none=True,
|
|
28
|
+
)
|
|
20
29
|
|
|
21
30
|
|
|
22
31
|
@register_op_adapter(DataFrameToDatetime)
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
|
-
from typing import List
|
|
16
|
+
from typing import List, Optional
|
|
17
17
|
|
|
18
18
|
from .....learn.contrib.lightgbm._predict import LGBMPredict
|
|
19
19
|
from .....learn.contrib.lightgbm._train import LGBMTrain
|
|
@@ -23,11 +23,12 @@ from ...core import SPECodeContext, SPEOperatorAdapter, register_op_adapter
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class _LightGBMAdapter(SPEOperatorAdapter):
|
|
26
|
-
def generate_code(
|
|
26
|
+
def generate_code(
|
|
27
|
+
self, op: OperatorType, context: SPECodeContext
|
|
28
|
+
) -> Optional[List[str]]:
|
|
27
29
|
context.register_import("lightgbm")
|
|
28
30
|
context.register_import("pandas", "pd")
|
|
29
31
|
context.register_import("numpy", "np")
|
|
30
|
-
return []
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
@register_op_adapter(ToLGBMDataset)
|
|
@@ -50,8 +50,9 @@ def test_spe_xgb_train_script():
|
|
|
50
50
|
results = adapter.generate_code(model.op, context)
|
|
51
51
|
assert results == [
|
|
52
52
|
"start_time = time.time()",
|
|
53
|
+
"var_2 = dict()",
|
|
53
54
|
"logger.info('Trained data size: (%s, %s)', var_1.num_row(), var_1.num_col())",
|
|
54
|
-
"var_0 = xgboost.train(params={}, dtrain=var_1, evals=[], num_boost_round=10)",
|
|
55
|
+
"var_0 = xgboost.train(params={}, dtrain=var_1, evals=[], num_boost_round=10, evals_result=var_2)",
|
|
55
56
|
"logger.info('Train cost: %.2f s', time.time() - start_time)",
|
|
56
57
|
]
|
|
57
58
|
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
from .....learn.metrics._ranking import AucOp, RocAucScore, RocCurve
|
|
18
|
+
from ...core import SPECodeContext, SPEOperatorAdapter, register_op_adapter
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_op_adapter(AucOp)
|
|
22
|
+
class AucOpAdapter(SPEOperatorAdapter):
|
|
23
|
+
def generate_code(self, op: AucOp, context: SPECodeContext) -> List[str]:
|
|
24
|
+
context.register_import(
|
|
25
|
+
"sklearn.metrics",
|
|
26
|
+
from_item="auc",
|
|
27
|
+
alias="sk_auc",
|
|
28
|
+
)
|
|
29
|
+
call_args = self.generate_call_args_with_attributes(
|
|
30
|
+
op, context, "x", "y", skip_none=True
|
|
31
|
+
)
|
|
32
|
+
out_vars = [context.get_output_tileable_variable(out) for out in op.outputs]
|
|
33
|
+
out_vars_str = ", ".join(out_vars)
|
|
34
|
+
return [f"{out_vars_str} = sk_auc({call_args})"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@register_op_adapter(RocAucScore)
|
|
38
|
+
class RocAucScoreAdapter(SPEOperatorAdapter):
|
|
39
|
+
def generate_code(self, op: RocAucScore, context: SPECodeContext) -> List[str]:
|
|
40
|
+
context.register_import(
|
|
41
|
+
"sklearn.metrics",
|
|
42
|
+
from_item="roc_auc_score",
|
|
43
|
+
alias="sk_roc_auc_score",
|
|
44
|
+
)
|
|
45
|
+
call_args = self.generate_call_args_with_attributes(
|
|
46
|
+
op,
|
|
47
|
+
context,
|
|
48
|
+
"y_true",
|
|
49
|
+
"y_score",
|
|
50
|
+
skip_none=True,
|
|
51
|
+
kw_keys=["average", "sample_weight", "max_fpr", "multi_class", "labels"],
|
|
52
|
+
)
|
|
53
|
+
out_vars = [context.get_output_tileable_variable(out) for out in op.outputs]
|
|
54
|
+
out_vars_str = ", ".join(out_vars)
|
|
55
|
+
return [f"{out_vars_str} = sk_roc_auc_score({call_args})"]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@register_op_adapter(RocCurve)
|
|
59
|
+
class RocCurveAdapter(SPEOperatorAdapter):
|
|
60
|
+
def generate_code(self, op: RocCurve, context: SPECodeContext) -> List[str]:
|
|
61
|
+
context.register_import(
|
|
62
|
+
"sklearn.metrics",
|
|
63
|
+
from_item="roc_curve",
|
|
64
|
+
alias="sk_roc_curve",
|
|
65
|
+
)
|
|
66
|
+
call_args = self.generate_call_args_with_attributes(
|
|
67
|
+
op,
|
|
68
|
+
context,
|
|
69
|
+
"y_true",
|
|
70
|
+
"y_score",
|
|
71
|
+
skip_none=True,
|
|
72
|
+
kw_keys=["sample_weight", "drop_intermediate", "pos_label"],
|
|
73
|
+
)
|
|
74
|
+
out_vars = [context.get_output_tileable_variable(out) for out in op.outputs]
|
|
75
|
+
out_vars_str = ", ".join(out_vars)
|
|
76
|
+
return [f"{out_vars_str} = sk_roc_curve({call_args})"]
|