maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -18,13 +18,15 @@ import pyarrow as pa
|
|
|
18
18
|
import pytest
|
|
19
19
|
|
|
20
20
|
from ..... import dataframe as md
|
|
21
|
+
from .....core import OutputType, TileableGraph, build_fetch
|
|
21
22
|
from .....lib.dtypes_extension import dict_
|
|
22
|
-
from .....utils import
|
|
23
|
-
|
|
23
|
+
from .....utils import (
|
|
24
|
+
ARROW_DTYPE_NOT_SUPPORTED,
|
|
25
|
+
deserialize_serializable,
|
|
26
|
+
serialize_serializable,
|
|
27
|
+
)
|
|
28
|
+
from ..core import SeriesDictMethod
|
|
24
29
|
from ..getitem import SeriesDictGetItemOperator
|
|
25
|
-
from ..length import SeriesDictLengthOperator
|
|
26
|
-
from ..remove import SeriesDictRemoveOperator
|
|
27
|
-
from ..setitem import SeriesDictSetItemOperator
|
|
28
30
|
|
|
29
31
|
pytestmark = pytest.mark.skipif(
|
|
30
32
|
ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported"
|
|
@@ -58,10 +60,11 @@ def test_getitem(df):
|
|
|
58
60
|
assert s1.shape == (1,)
|
|
59
61
|
assert s1.index_value == df.index_value
|
|
60
62
|
op = s1.op
|
|
61
|
-
assert isinstance(op,
|
|
62
|
-
assert op.
|
|
63
|
-
assert op.
|
|
64
|
-
assert op.
|
|
63
|
+
assert isinstance(op, SeriesDictMethod)
|
|
64
|
+
assert op.method == "getitem"
|
|
65
|
+
assert op.method_kwargs["query_key"] == "k1"
|
|
66
|
+
assert op.method_kwargs["default_value"] is None
|
|
67
|
+
assert op.method_kwargs["ignore_key_error"] is False
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
def test_getitem_with_default_value(df):
|
|
@@ -72,10 +75,11 @@ def test_getitem_with_default_value(df):
|
|
|
72
75
|
assert s1.shape == (1,)
|
|
73
76
|
assert s1.index_value == df.index_value
|
|
74
77
|
op = s1.op
|
|
75
|
-
assert isinstance(op,
|
|
76
|
-
assert op.
|
|
77
|
-
assert op.
|
|
78
|
-
assert op.
|
|
78
|
+
assert isinstance(op, SeriesDictMethod)
|
|
79
|
+
assert op.method == "getitem"
|
|
80
|
+
assert op.method_kwargs["query_key"] == "k1"
|
|
81
|
+
assert op.method_kwargs["default_value"] == 1
|
|
82
|
+
assert op.method_kwargs["ignore_key_error"] is True
|
|
79
83
|
|
|
80
84
|
|
|
81
85
|
def test_setitem(df):
|
|
@@ -87,9 +91,10 @@ def test_setitem(df):
|
|
|
87
91
|
assert s1.index_value == df.index_value
|
|
88
92
|
assert s1.shape == (1,)
|
|
89
93
|
op = s1.op
|
|
90
|
-
assert isinstance(op,
|
|
91
|
-
assert op.
|
|
92
|
-
assert op.
|
|
94
|
+
assert isinstance(op, SeriesDictMethod)
|
|
95
|
+
assert op.method == "setitem"
|
|
96
|
+
assert op.method_kwargs["query_key"] == "k1"
|
|
97
|
+
assert op.method_kwargs["value"] == "v3"
|
|
93
98
|
|
|
94
99
|
|
|
95
100
|
def test_length(df):
|
|
@@ -100,7 +105,8 @@ def test_length(df):
|
|
|
100
105
|
assert s1.shape == (1,)
|
|
101
106
|
assert s1.index_value == df.index_value
|
|
102
107
|
op = s1.op
|
|
103
|
-
assert
|
|
108
|
+
assert op.method == "len"
|
|
109
|
+
assert isinstance(op, SeriesDictMethod)
|
|
104
110
|
|
|
105
111
|
|
|
106
112
|
def test_remove(df):
|
|
@@ -111,9 +117,10 @@ def test_remove(df):
|
|
|
111
117
|
assert s1.index_value == df.index_value
|
|
112
118
|
assert s1.shape == (1,)
|
|
113
119
|
op = s1.op
|
|
114
|
-
assert isinstance(op,
|
|
115
|
-
assert op.
|
|
116
|
-
assert op.
|
|
120
|
+
assert isinstance(op, SeriesDictMethod)
|
|
121
|
+
assert op.method == "remove"
|
|
122
|
+
assert op.method_kwargs["query_key"] == "k1"
|
|
123
|
+
assert op.method_kwargs["ignore_key_error"] is True
|
|
117
124
|
|
|
118
125
|
|
|
119
126
|
def test_contains(df):
|
|
@@ -124,5 +131,38 @@ def test_contains(df):
|
|
|
124
131
|
assert s1.index_value == df.index_value
|
|
125
132
|
assert s1.shape == (1,)
|
|
126
133
|
op = s1.op
|
|
127
|
-
assert isinstance(op,
|
|
128
|
-
assert op.
|
|
134
|
+
assert isinstance(op, SeriesDictMethod)
|
|
135
|
+
assert op.method == "contains"
|
|
136
|
+
assert op.method_kwargs["query_key"] == "k1"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_legacy_compatibility(df):
|
|
140
|
+
in_series = df["A"]
|
|
141
|
+
legacy_op = SeriesDictGetItemOperator(
|
|
142
|
+
query_key="k1",
|
|
143
|
+
default_value=1,
|
|
144
|
+
ignore_key_error=True,
|
|
145
|
+
_output_types=[OutputType.series],
|
|
146
|
+
)
|
|
147
|
+
out = legacy_op.new_tileable(
|
|
148
|
+
[in_series],
|
|
149
|
+
shape=in_series.shape,
|
|
150
|
+
index_value=in_series.index_value,
|
|
151
|
+
name="k1",
|
|
152
|
+
dtype=pd.ArrowDtype(pa.int64()),
|
|
153
|
+
)
|
|
154
|
+
fetch_node = build_fetch(in_series).data
|
|
155
|
+
out.op.inputs = [fetch_node]
|
|
156
|
+
|
|
157
|
+
dag = TileableGraph([out.data])
|
|
158
|
+
dag.add_node(fetch_node)
|
|
159
|
+
dag.add_node(out.data)
|
|
160
|
+
dag.add_edge(fetch_node, out.data)
|
|
161
|
+
dag_out = deserialize_serializable(serialize_serializable(dag))
|
|
162
|
+
|
|
163
|
+
out_op = dag_out.results[0].op
|
|
164
|
+
assert isinstance(out_op, SeriesDictMethod)
|
|
165
|
+
assert out_op.method == "getitem"
|
|
166
|
+
assert out_op.method_kwargs["ignore_key_error"] == True
|
|
167
|
+
assert out_op.method_kwargs["default_value"] == 1
|
|
168
|
+
assert out_op.method_kwargs["query_key"] == "k1"
|
|
@@ -20,13 +20,13 @@ def _install():
|
|
|
20
20
|
from .getitem import series_list_getitem, series_list_getitem_with_index_error
|
|
21
21
|
from .length import series_list_length
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
list_method_to_handlers = {
|
|
24
24
|
"__getitem__": series_list_getitem_with_index_error,
|
|
25
25
|
"get": series_list_getitem,
|
|
26
26
|
"len": series_list_length,
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
for name, handler in
|
|
29
|
+
for name, handler in list_method_to_handlers.items():
|
|
30
30
|
ListAccessor._register(name, handler)
|
|
31
31
|
|
|
32
32
|
for series in SERIES_TYPE:
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .... import opcodes
|
|
16
|
+
from ....core import OutputType
|
|
17
|
+
from ....serialization.serializables import DictField, StringField, TupleField
|
|
18
|
+
from ....utils import no_default
|
|
19
|
+
from ...operators import DataFrameOperator, DataFrameOperatorMixin
|
|
20
|
+
from ..compat import LegacySeriesMethodOperator
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SeriesListMethod(DataFrameOperator, DataFrameOperatorMixin):
|
|
24
|
+
_op_type_ = opcodes.SERIES_LIST_METHOD
|
|
25
|
+
|
|
26
|
+
method = StringField("method", default=None)
|
|
27
|
+
method_args = TupleField("method_args", default_factory=list)
|
|
28
|
+
method_kwargs = DictField("method_kwargs", default_factory=dict)
|
|
29
|
+
|
|
30
|
+
def __init__(self, output_types=None, **kw):
|
|
31
|
+
output_types = output_types or [OutputType.series]
|
|
32
|
+
kw["_output_types"] = kw.get("_output_types") or output_types
|
|
33
|
+
super().__init__(**kw)
|
|
34
|
+
|
|
35
|
+
def __call__(self, inp, dtype=None, name=no_default):
|
|
36
|
+
dtype = dtype or inp.dtype
|
|
37
|
+
name = inp.name if name is no_default else name
|
|
38
|
+
return self.new_series(
|
|
39
|
+
[inp],
|
|
40
|
+
shape=inp.shape,
|
|
41
|
+
dtype=dtype,
|
|
42
|
+
index_value=inp.index_value,
|
|
43
|
+
name=name,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class LegacySeriesListOperator(LegacySeriesMethodOperator):
|
|
48
|
+
_method_cls = SeriesListMethod
|
|
@@ -15,28 +15,17 @@
|
|
|
15
15
|
import pandas as pd
|
|
16
16
|
|
|
17
17
|
from .... import opcodes
|
|
18
|
-
from ....core.entity.output_types import OutputType
|
|
19
18
|
from ....serialization.serializables.field import AnyField, BoolField
|
|
20
|
-
from
|
|
19
|
+
from .core import LegacySeriesListOperator, SeriesListMethod
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
class SeriesListGetItemOperator(
|
|
22
|
+
class SeriesListGetItemOperator(LegacySeriesListOperator):
|
|
23
|
+
# operator class deprecated since v2.3.0
|
|
24
24
|
_op_type_ = opcodes.SERIES_LIST_GETITEM
|
|
25
|
+
_method_name = "getitem"
|
|
25
26
|
query_index = AnyField("query_index", default=None)
|
|
26
27
|
ignore_index_error = BoolField("ignore_index_error", default=False)
|
|
27
28
|
|
|
28
|
-
def __init__(self, **kw):
|
|
29
|
-
super().__init__(_output_types=[OutputType.series], **kw)
|
|
30
|
-
|
|
31
|
-
def __call__(self, series):
|
|
32
|
-
arrow_list_type = series.dtype.pyarrow_dtype
|
|
33
|
-
return self.new_series(
|
|
34
|
-
[series],
|
|
35
|
-
shape=series.shape,
|
|
36
|
-
dtype=pd.ArrowDtype(arrow_list_type.value_type),
|
|
37
|
-
index_value=series.index_value,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
29
|
|
|
41
30
|
def series_list_getitem(series, query_index):
|
|
42
31
|
"""
|
|
@@ -77,8 +66,10 @@ def series_list_getitem(series, query_index):
|
|
|
77
66
|
3 <NA>
|
|
78
67
|
dtype: int64[pyarrow]
|
|
79
68
|
"""
|
|
80
|
-
|
|
81
|
-
|
|
69
|
+
kw = dict(query_index=query_index, ignore_index_error=True)
|
|
70
|
+
arrow_list_type = series.dtype.pyarrow_dtype
|
|
71
|
+
return SeriesListMethod(method="getitem", method_kwargs=kw)(
|
|
72
|
+
series, dtype=pd.ArrowDtype(arrow_list_type.value_type)
|
|
82
73
|
)
|
|
83
74
|
|
|
84
75
|
|
|
@@ -130,6 +121,8 @@ def series_list_getitem_with_index_error(series, query_index):
|
|
|
130
121
|
3 <NA>
|
|
131
122
|
dtype: int64[pyarrow]
|
|
132
123
|
"""
|
|
133
|
-
|
|
134
|
-
|
|
124
|
+
kw = dict(query_index=query_index, ignore_index_error=False)
|
|
125
|
+
arrow_list_type = series.dtype.pyarrow_dtype
|
|
126
|
+
return SeriesListMethod(method="getitem", method_kwargs=kw)(
|
|
127
|
+
series, dtype=pd.ArrowDtype(arrow_list_type.value_type)
|
|
135
128
|
)
|
|
@@ -16,24 +16,13 @@ import pandas as pd
|
|
|
16
16
|
import pyarrow as pa
|
|
17
17
|
|
|
18
18
|
from .... import opcodes
|
|
19
|
-
from
|
|
20
|
-
from ...operators import DataFrameOperator, DataFrameOperatorMixin
|
|
19
|
+
from .core import LegacySeriesListOperator, SeriesListMethod
|
|
21
20
|
|
|
22
21
|
|
|
23
|
-
class SeriesListLengthOperator(
|
|
22
|
+
class SeriesListLengthOperator(LegacySeriesListOperator):
|
|
23
|
+
# operator class deprecated since v2.3.0
|
|
24
24
|
_op_type_ = opcodes.SERIES_LIST_LENGTH
|
|
25
|
-
|
|
26
|
-
def __init__(self, **kw):
|
|
27
|
-
super().__init__(_output_types=[OutputType.series], **kw)
|
|
28
|
-
|
|
29
|
-
def __call__(self, series):
|
|
30
|
-
return self.new_series(
|
|
31
|
-
[series],
|
|
32
|
-
shape=series.shape,
|
|
33
|
-
index_value=series.index_value,
|
|
34
|
-
dtype=pd.ArrowDtype(pa.int64()),
|
|
35
|
-
name=None,
|
|
36
|
-
)
|
|
25
|
+
_method_name = "len"
|
|
37
26
|
|
|
38
27
|
|
|
39
28
|
def series_list_length(series):
|
|
@@ -70,4 +59,6 @@ def series_list_length(series):
|
|
|
70
59
|
3 <NA>
|
|
71
60
|
dtype: int64[pyarrow]
|
|
72
61
|
"""
|
|
73
|
-
return
|
|
62
|
+
return SeriesListMethod(method="len")(
|
|
63
|
+
series, name=None, dtype=pd.ArrowDtype(pa.int64())
|
|
64
|
+
)
|
|
@@ -20,8 +20,7 @@ import pytest
|
|
|
20
20
|
from ..... import dataframe as md
|
|
21
21
|
from .....lib.dtypes_extension import list_
|
|
22
22
|
from .....utils import ARROW_DTYPE_NOT_SUPPORTED
|
|
23
|
-
from ..
|
|
24
|
-
from ..length import SeriesListLengthOperator
|
|
23
|
+
from ..core import SeriesListMethod
|
|
25
24
|
|
|
26
25
|
pytestmark = pytest.mark.skipif(
|
|
27
26
|
ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported"
|
|
@@ -52,9 +51,10 @@ def test_getitem(df):
|
|
|
52
51
|
assert s1.shape == (1,)
|
|
53
52
|
assert s1.index_value == df.index_value
|
|
54
53
|
op = s1.op
|
|
55
|
-
assert isinstance(op,
|
|
56
|
-
assert op.
|
|
57
|
-
assert op.
|
|
54
|
+
assert isinstance(op, SeriesListMethod)
|
|
55
|
+
assert op.method == "getitem"
|
|
56
|
+
assert op.method_kwargs["query_index"] == 1
|
|
57
|
+
assert op.method_kwargs["ignore_index_error"] is False
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def test_getitem_ignore_index_err(df):
|
|
@@ -64,9 +64,10 @@ def test_getitem_ignore_index_err(df):
|
|
|
64
64
|
assert s1.shape == (1,)
|
|
65
65
|
assert s1.index_value == df.index_value
|
|
66
66
|
op = s1.op
|
|
67
|
-
assert isinstance(op,
|
|
68
|
-
assert op.
|
|
69
|
-
assert op.
|
|
67
|
+
assert isinstance(op, SeriesListMethod)
|
|
68
|
+
assert op.method == "getitem"
|
|
69
|
+
assert op.method_kwargs["query_index"] == 1
|
|
70
|
+
assert op.method_kwargs["ignore_index_error"] is True
|
|
70
71
|
|
|
71
72
|
|
|
72
73
|
def test_length(df):
|
|
@@ -76,4 +77,5 @@ def test_length(df):
|
|
|
76
77
|
assert s1.shape == (1,)
|
|
77
78
|
assert s1.index_value == df.index_value
|
|
78
79
|
op = s1.op
|
|
79
|
-
assert isinstance(op,
|
|
80
|
+
assert isinstance(op, SeriesListMethod)
|
|
81
|
+
assert op.method == "len"
|
|
@@ -12,7 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .core import SeriesStringMethod
|
|
15
|
+
from .core import SeriesStringMethod, string_method_to_handlers
|
|
16
|
+
|
|
17
|
+
methods = set(string_method_to_handlers.keys())
|
|
18
|
+
del string_method_to_handlers
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
def _install():
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _install():
|
|
17
|
+
from ....core import CachedAccessor
|
|
18
|
+
from ...core import SERIES_TYPE
|
|
19
|
+
from .accessor import StructAccessor
|
|
20
|
+
from .dtypes import struct_dtypes
|
|
21
|
+
from .field import struct_field
|
|
22
|
+
|
|
23
|
+
struct_method_to_handlers = {
|
|
24
|
+
"dtypes": struct_dtypes,
|
|
25
|
+
"field": struct_field,
|
|
26
|
+
}
|
|
27
|
+
property_names = {"dtypes"}
|
|
28
|
+
|
|
29
|
+
for name, handler in struct_method_to_handlers.items():
|
|
30
|
+
StructAccessor._register(name, handler, is_property=name in property_names)
|
|
31
|
+
|
|
32
|
+
for series in SERIES_TYPE:
|
|
33
|
+
series.struct = CachedAccessor("struct", StructAccessor)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_install()
|
|
37
|
+
del _install
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import pyarrow as pa
|
|
19
|
+
|
|
20
|
+
from ....core import BaseMaxFrameAccessor
|
|
21
|
+
from ....utils import ARROW_DTYPE_NOT_SUPPORTED
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from ...core import Series
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class StructAccessor(BaseMaxFrameAccessor):
|
|
28
|
+
obj: "Series"
|
|
29
|
+
_api_count: int = 0
|
|
30
|
+
|
|
31
|
+
def __init__(self, series):
|
|
32
|
+
super().__init__(series)
|
|
33
|
+
if ARROW_DTYPE_NOT_SUPPORTED:
|
|
34
|
+
raise ImportError("pd.ArrowDtype is not supported in current environment")
|
|
35
|
+
|
|
36
|
+
if not isinstance(series.dtype, pd.ArrowDtype) or not isinstance(
|
|
37
|
+
series.dtype.pyarrow_dtype, pa.StructType
|
|
38
|
+
):
|
|
39
|
+
raise AttributeError("Can only use .struct accessor with struct values")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .... import opcodes
|
|
16
|
+
from ....core import OutputType
|
|
17
|
+
from ....serialization.serializables import DictField, StringField, TupleField
|
|
18
|
+
from ....utils import no_default
|
|
19
|
+
from ...operators import DataFrameOperator, DataFrameOperatorMixin
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SeriesStructMethod(DataFrameOperator, DataFrameOperatorMixin):
|
|
23
|
+
_op_type_ = opcodes.SERIES_STRUCT_METHOD
|
|
24
|
+
|
|
25
|
+
method = StringField("method", default=None)
|
|
26
|
+
method_args = TupleField("method_args", default_factory=list)
|
|
27
|
+
method_kwargs = DictField("method_kwargs", default_factory=dict)
|
|
28
|
+
|
|
29
|
+
def __init__(self, output_types=None, **kw):
|
|
30
|
+
output_types = output_types or [OutputType.series]
|
|
31
|
+
kw["_output_types"] = kw.get("_output_types") or output_types
|
|
32
|
+
super().__init__(**kw)
|
|
33
|
+
|
|
34
|
+
def __call__(self, inp, dtype=None, name=no_default):
|
|
35
|
+
dtype = dtype if dtype is not None else inp.dtype
|
|
36
|
+
name = inp.name if name is no_default else name
|
|
37
|
+
return self.new_series(
|
|
38
|
+
[inp],
|
|
39
|
+
shape=inp.shape,
|
|
40
|
+
dtype=dtype,
|
|
41
|
+
index_value=inp.index_value,
|
|
42
|
+
name=name,
|
|
43
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ....lib.dtypes_extension import ArrowDtype
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def struct_dtypes(series):
|
|
21
|
+
"""
|
|
22
|
+
Return the dtype object of each child field of the struct.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
pandas.Series
|
|
27
|
+
The data type of each child field.
|
|
28
|
+
|
|
29
|
+
Examples
|
|
30
|
+
--------
|
|
31
|
+
>>> import maxframe.dataframe as md
|
|
32
|
+
>>> import pandas as pd
|
|
33
|
+
>>> import pyarrow as pa
|
|
34
|
+
>>> s = md.Series(
|
|
35
|
+
... [
|
|
36
|
+
... {"version": 1, "project": "pandas"},
|
|
37
|
+
... {"version": 2, "project": "pandas"},
|
|
38
|
+
... {"version": 1, "project": "numpy"},
|
|
39
|
+
... ],
|
|
40
|
+
... dtype=pd.ArrowDtype(pa.struct(
|
|
41
|
+
... [("version", pa.int64()), ("project", pa.string())]
|
|
42
|
+
... ))
|
|
43
|
+
... )
|
|
44
|
+
>>> s.struct.dtypes.execute()
|
|
45
|
+
version int64[pyarrow]
|
|
46
|
+
project string[pyarrow]
|
|
47
|
+
dtype: object
|
|
48
|
+
"""
|
|
49
|
+
pa_type = series.dtype.pyarrow_dtype
|
|
50
|
+
fields = [pa_type[idx] for idx in range(pa_type.num_fields)]
|
|
51
|
+
dtypes_list = [ArrowDtype(ft.type) for ft in fields]
|
|
52
|
+
dt_name = [ft.name for ft in fields]
|
|
53
|
+
return pd.Series(dtypes_list, index=dt_name)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ....lib.dtypes_extension import ArrowDtype
|
|
16
|
+
from .core import SeriesStructMethod
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def struct_field(series, name_or_index):
|
|
20
|
+
"""
|
|
21
|
+
Extract a child field of a struct as a Series.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
name_or_index : str | bytes | int | expression | list
|
|
26
|
+
Name or index of the child field to extract.
|
|
27
|
+
|
|
28
|
+
For list-like inputs, this will index into a nested
|
|
29
|
+
struct.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
pandas.Series
|
|
34
|
+
The data corresponding to the selected child field.
|
|
35
|
+
|
|
36
|
+
See Also
|
|
37
|
+
--------
|
|
38
|
+
Series.struct.explode : Return all child fields as a DataFrame.
|
|
39
|
+
|
|
40
|
+
Notes
|
|
41
|
+
-----
|
|
42
|
+
The name of the resulting Series will be set using the following
|
|
43
|
+
rules:
|
|
44
|
+
|
|
45
|
+
- For string, bytes, or integer `name_or_index` (or a list of these, for
|
|
46
|
+
a nested selection), the Series name is set to the selected
|
|
47
|
+
field's name.
|
|
48
|
+
- For a :class:`pyarrow.compute.Expression`, this is set to
|
|
49
|
+
the string form of the expression.
|
|
50
|
+
- For list-like `name_or_index`, the name will be set to the
|
|
51
|
+
name of the final field selected.
|
|
52
|
+
|
|
53
|
+
Examples
|
|
54
|
+
--------
|
|
55
|
+
>>> import maxframe.dataframe as md
|
|
56
|
+
>>> import pandas as pd
|
|
57
|
+
>>> import pyarrow as pa
|
|
58
|
+
>>> s = md.Series(
|
|
59
|
+
... [
|
|
60
|
+
... {"version": 1, "project": "pandas"},
|
|
61
|
+
... {"version": 2, "project": "pandas"},
|
|
62
|
+
... {"version": 1, "project": "numpy"},
|
|
63
|
+
... ],
|
|
64
|
+
... dtype=pd.ArrowDtype(pa.struct(
|
|
65
|
+
... [("version", pa.int64()), ("project", pa.string())]
|
|
66
|
+
... ))
|
|
67
|
+
... )
|
|
68
|
+
|
|
69
|
+
Extract by field name.
|
|
70
|
+
|
|
71
|
+
>>> s.struct.field("project").execute()
|
|
72
|
+
0 pandas
|
|
73
|
+
1 pandas
|
|
74
|
+
2 numpy
|
|
75
|
+
Name: project, dtype: string[pyarrow]
|
|
76
|
+
|
|
77
|
+
Extract by field index.
|
|
78
|
+
|
|
79
|
+
>>> s.struct.field(0).execute()
|
|
80
|
+
0 1
|
|
81
|
+
1 2
|
|
82
|
+
2 1
|
|
83
|
+
Name: version, dtype: int64[pyarrow]
|
|
84
|
+
|
|
85
|
+
For nested struct types, you can pass a list of values to index
|
|
86
|
+
multiple levels:
|
|
87
|
+
|
|
88
|
+
>>> version_type = pa.struct([
|
|
89
|
+
... ("major", pa.int64()),
|
|
90
|
+
... ("minor", pa.int64()),
|
|
91
|
+
... ])
|
|
92
|
+
>>> s = md.Series(
|
|
93
|
+
... [
|
|
94
|
+
... {"version": {"major": 1, "minor": 5}, "project": "pandas"},
|
|
95
|
+
... {"version": {"major": 2, "minor": 1}, "project": "pandas"},
|
|
96
|
+
... {"version": {"major": 1, "minor": 26}, "project": "numpy"},
|
|
97
|
+
... ],
|
|
98
|
+
... dtype=pd.ArrowDtype(pa.struct(
|
|
99
|
+
... [("version", version_type), ("project", pa.string())]
|
|
100
|
+
... ))
|
|
101
|
+
... )
|
|
102
|
+
>>> s.struct.field(["version", "minor"]).execute()
|
|
103
|
+
0 5
|
|
104
|
+
1 1
|
|
105
|
+
2 26
|
|
106
|
+
Name: minor, dtype: int64[pyarrow]
|
|
107
|
+
>>> s.struct.field([0, 0]).execute()
|
|
108
|
+
0 1
|
|
109
|
+
1 2
|
|
110
|
+
2 1
|
|
111
|
+
Name: major, dtype: int64[pyarrow]
|
|
112
|
+
"""
|
|
113
|
+
op = SeriesStructMethod(
|
|
114
|
+
method="field",
|
|
115
|
+
method_kwargs={"name_or_index": name_or_index},
|
|
116
|
+
)
|
|
117
|
+
names = name_or_index if isinstance(name_or_index, list) else [name_or_index]
|
|
118
|
+
arrow_type = series.dtype.pyarrow_dtype
|
|
119
|
+
arrow_name = None
|
|
120
|
+
for n in names:
|
|
121
|
+
arrow_name = arrow_type[n].name
|
|
122
|
+
arrow_type = arrow_type[n].type
|
|
123
|
+
return op(series, dtype=ArrowDtype(arrow_type), name=arrow_name)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|