maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from datetime import date, datetime, time, timedelta
|
|
17
|
+
from decimal import Decimal
|
|
18
|
+
from typing import Union
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
from pandas import DatetimeTZDtype, Timedelta, Timestamp
|
|
24
|
+
from pandas.api.extensions import (
|
|
25
|
+
ExtensionArray,
|
|
26
|
+
ExtensionDtype,
|
|
27
|
+
register_extension_dtype,
|
|
28
|
+
)
|
|
29
|
+
from pandas.api.types import is_integer, is_scalar
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
from pandas._libs.arrays import NDArrayBacked
|
|
33
|
+
except ImportError:
|
|
34
|
+
NDArrayBacked = type("NDArrayBacked", (object,), {"is_fake": True})
|
|
35
|
+
|
|
36
|
+
from ...lib.version import parse as parse_version
|
|
37
|
+
from ..compat import cached_property
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
This module is copied from pandas to use in framedriver as it can only run on python3.7,
|
|
41
|
+
which has the highest supported pandas version 1.3.5 without ArrowDtype defined.
|
|
42
|
+
Once the framedriver can run on python3.11, this class can be removed.
|
|
43
|
+
|
|
44
|
+
This module will be removed in released SDK.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
_dtype_search_re = re.compile(r"[\[\(].*[\]\)]")
|
|
48
|
+
# when pyarrow<4.0, it fixes type of generated pandas block as ExtensionBlock
|
|
49
|
+
# which needs special handling
|
|
50
|
+
_pyarrow_fix_extension_block = parse_version(pa.__version__).major < 4
|
|
51
|
+
_pd_string_with_storage_option = parse_version(pd.__version__).release[:2] >= (1, 3)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class FakeCategoricalDtypeType(type):
|
|
55
|
+
"""
|
|
56
|
+
the type of FakeCategoricalDtype, this metaclass determines subclass ability
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _unpack_tuple_and_ellipses(item: tuple):
|
|
63
|
+
"""
|
|
64
|
+
Possibly unpack arr[..., n] to arr[n]
|
|
65
|
+
"""
|
|
66
|
+
if len(item) > 1:
|
|
67
|
+
# Note: we are assuming this indexing is being done on a 1D arraylike
|
|
68
|
+
if item[0] is Ellipsis:
|
|
69
|
+
item = item[1:]
|
|
70
|
+
elif item[-1] is Ellipsis:
|
|
71
|
+
item = item[:-1]
|
|
72
|
+
|
|
73
|
+
if len(item) > 1:
|
|
74
|
+
raise IndexError("too many indices for array.")
|
|
75
|
+
|
|
76
|
+
item = item[0]
|
|
77
|
+
return item
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _validate_indices(indices: np.ndarray, n: int) -> None:
|
|
81
|
+
if len(indices):
|
|
82
|
+
min_idx = indices.min()
|
|
83
|
+
if min_idx < -1:
|
|
84
|
+
msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
|
|
85
|
+
raise ValueError(msg)
|
|
86
|
+
|
|
87
|
+
max_idx = indices.max()
|
|
88
|
+
if max_idx >= n:
|
|
89
|
+
raise IndexError("indices are out-of-bounds")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def to_pyarrow_type(dtype):
|
|
93
|
+
"""
|
|
94
|
+
Convert dtype to a pyarrow type instance.
|
|
95
|
+
"""
|
|
96
|
+
if isinstance(dtype, FakeArrowDtype):
|
|
97
|
+
return dtype.pyarrow_dtype
|
|
98
|
+
elif isinstance(dtype, pa.DataType):
|
|
99
|
+
return dtype
|
|
100
|
+
elif isinstance(dtype, DatetimeTZDtype):
|
|
101
|
+
return pa.timestamp(dtype.unit, dtype.tz)
|
|
102
|
+
elif isinstance(dtype, pd.StringDtype):
|
|
103
|
+
return pa.string()
|
|
104
|
+
elif dtype:
|
|
105
|
+
try:
|
|
106
|
+
# Accepts python types too
|
|
107
|
+
# Doesn't handle all numpy types
|
|
108
|
+
return pa.from_numpy_dtype(dtype)
|
|
109
|
+
except pa.ArrowNotImplementedError:
|
|
110
|
+
pass
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class FakeArrowExtensionArray(ExtensionArray, NDArrayBacked):
|
|
115
|
+
"""
|
|
116
|
+
In framedriver, we use arrow array as a bridge between pandas and odps datatypes,
|
|
117
|
+
so we only generate empty array here to simplify the code.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, values: Union[pa.Array, pa.ChunkedArray]) -> None:
|
|
121
|
+
if isinstance(values, pa.Array):
|
|
122
|
+
self._pa_array = pa.chunked_array([values])
|
|
123
|
+
elif isinstance(values, pa.ChunkedArray):
|
|
124
|
+
self._pa_array = values
|
|
125
|
+
else:
|
|
126
|
+
raise ValueError(
|
|
127
|
+
f"Unsupported type '{type(values)}' for ArrowExtensionArray"
|
|
128
|
+
)
|
|
129
|
+
if getattr(NDArrayBacked, "is_fake", False):
|
|
130
|
+
self._dtype = FakeArrowDtype(self._pa_array.type)
|
|
131
|
+
else:
|
|
132
|
+
NDArrayBacked.__init__(
|
|
133
|
+
self, np.array([]), FakeArrowDtype(self._pa_array.type)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def _from_sequence(
|
|
138
|
+
cls, scalars, dtype=None, copy: bool = False
|
|
139
|
+
) -> "FakeArrowExtensionArray":
|
|
140
|
+
"""
|
|
141
|
+
Construct a new ExtensionArray from a sequence of scalars.
|
|
142
|
+
"""
|
|
143
|
+
pa_type = to_pyarrow_type(dtype)
|
|
144
|
+
if isinstance(scalars, (pa.Array, pa.ChunkedArray)):
|
|
145
|
+
pa_array = scalars
|
|
146
|
+
else:
|
|
147
|
+
if len(scalars) == 0:
|
|
148
|
+
# special case where pyarrow raises on empty numpy arrays
|
|
149
|
+
scalars = []
|
|
150
|
+
pa_array = pa.array(scalars, type=pa_type)
|
|
151
|
+
arr = cls(pa_array)
|
|
152
|
+
return arr
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def _from_sequence_of_strings(
|
|
156
|
+
cls, strings, dtype=None, copy: bool = False
|
|
157
|
+
) -> "FakeArrowExtensionArray":
|
|
158
|
+
return cls._from_sequence(strings, dtype, copy)
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def dtype(self) -> ExtensionDtype:
|
|
162
|
+
"""
|
|
163
|
+
Implementation of ExtensionArray.dtype.
|
|
164
|
+
"""
|
|
165
|
+
return self._dtype
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def shape(self):
|
|
169
|
+
return (self._pa_array.length(),)
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def nbytes(self) -> int:
|
|
173
|
+
"""
|
|
174
|
+
The number of bytes needed to store this object in memory.
|
|
175
|
+
"""
|
|
176
|
+
return self._pa_array.nbytes
|
|
177
|
+
|
|
178
|
+
def __len__(self) -> int:
|
|
179
|
+
"""
|
|
180
|
+
Return an empty array length.
|
|
181
|
+
"""
|
|
182
|
+
return len(self._pa_array)
|
|
183
|
+
|
|
184
|
+
def _from_pyarrow_array(self, pa_array):
|
|
185
|
+
return type(self)(pa_array)
|
|
186
|
+
|
|
187
|
+
def __getitem__(self, item):
|
|
188
|
+
# code from ArrowExtensionArray in pandas>=1.5
|
|
189
|
+
if isinstance(item, np.ndarray):
|
|
190
|
+
if not len(item):
|
|
191
|
+
pa_dtype = self._dtype.pyarrow_dtype
|
|
192
|
+
result = pa.chunked_array([], type=pa_dtype)
|
|
193
|
+
return self._from_pyarrow_array(result)
|
|
194
|
+
elif item.dtype.kind in "iu":
|
|
195
|
+
return self.take(item)
|
|
196
|
+
elif item.dtype.kind == "b":
|
|
197
|
+
return self._from_pyarrow_array(self._pa_array.filter(item))
|
|
198
|
+
else:
|
|
199
|
+
raise IndexError(
|
|
200
|
+
"Only integers, slices and integer or "
|
|
201
|
+
"boolean arrays are valid indices."
|
|
202
|
+
)
|
|
203
|
+
elif isinstance(item, tuple):
|
|
204
|
+
item = _unpack_tuple_and_ellipses(item)
|
|
205
|
+
|
|
206
|
+
if item is Ellipsis:
|
|
207
|
+
# TODO: should be handled by pyarrow?
|
|
208
|
+
item = slice(None)
|
|
209
|
+
|
|
210
|
+
if is_scalar(item) and not is_integer(item):
|
|
211
|
+
# e.g. "foo" or 2.5
|
|
212
|
+
# exception message copied from numpy
|
|
213
|
+
raise IndexError(
|
|
214
|
+
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
|
|
215
|
+
r"(`None`) and integer or boolean arrays are valid indices"
|
|
216
|
+
)
|
|
217
|
+
# We are not an array indexer, so maybe e.g. a slice or integer
|
|
218
|
+
# indexer. We dispatch to pyarrow.
|
|
219
|
+
if isinstance(item, slice):
|
|
220
|
+
# Arrow bug https://github.com/apache/arrow/issues/38768
|
|
221
|
+
if item.start == item.stop:
|
|
222
|
+
pass
|
|
223
|
+
elif (
|
|
224
|
+
item.stop is not None
|
|
225
|
+
and item.stop < -len(self)
|
|
226
|
+
and item.step is not None
|
|
227
|
+
and item.step < 0
|
|
228
|
+
):
|
|
229
|
+
item = slice(item.start, None, item.step)
|
|
230
|
+
value = self._pa_array[item]
|
|
231
|
+
if isinstance(value, pa.ChunkedArray):
|
|
232
|
+
return self._from_pyarrow_array(value)
|
|
233
|
+
else:
|
|
234
|
+
pa_type = self._pa_array.type
|
|
235
|
+
scalar = value.as_py()
|
|
236
|
+
if scalar is None:
|
|
237
|
+
return self._dtype.na_value
|
|
238
|
+
elif pa.types.is_timestamp(pa_type) and pa_type.unit != "ns":
|
|
239
|
+
# GH 53326
|
|
240
|
+
return Timestamp(scalar).as_unit(pa_type.unit)
|
|
241
|
+
elif pa.types.is_duration(pa_type) and pa_type.unit != "ns":
|
|
242
|
+
# GH 53326
|
|
243
|
+
return Timedelta(scalar).as_unit(pa_type.unit)
|
|
244
|
+
else:
|
|
245
|
+
return scalar
|
|
246
|
+
|
|
247
|
+
def __arrow_array__(self, type=None):
|
|
248
|
+
"""Convert myself to a pyarrow ChunkedArray."""
|
|
249
|
+
return self._pa_array
|
|
250
|
+
|
|
251
|
+
def copy(self) -> "FakeArrowExtensionArray":
|
|
252
|
+
return self._from_pyarrow_array(self._pa_array)
|
|
253
|
+
|
|
254
|
+
def isna(self) -> np.ndarray:
|
|
255
|
+
# code from ArrowExtensionArray in pandas>=1.5
|
|
256
|
+
null_count = self._pa_array.null_count
|
|
257
|
+
if null_count == 0:
|
|
258
|
+
return np.zeros(len(self), dtype=np.bool_)
|
|
259
|
+
elif null_count == len(self):
|
|
260
|
+
return np.ones(len(self), dtype=np.bool_)
|
|
261
|
+
|
|
262
|
+
return self._pa_array.is_null().to_numpy()
|
|
263
|
+
|
|
264
|
+
def take(self, indices, allow_fill=False, fill_value=None):
|
|
265
|
+
# code from ArrowExtensionArray in pandas>=1.5
|
|
266
|
+
indices_array = np.asanyarray(indices)
|
|
267
|
+
|
|
268
|
+
if len(self._pa_array) == 0 and (indices_array >= 0).any():
|
|
269
|
+
raise IndexError("cannot do a non-empty take")
|
|
270
|
+
if indices_array.size > 0 and indices_array.max() >= len(self._pa_array):
|
|
271
|
+
raise IndexError("out of bounds value in 'indices'.")
|
|
272
|
+
|
|
273
|
+
if allow_fill:
|
|
274
|
+
fill_mask = indices_array < 0
|
|
275
|
+
if fill_mask.any():
|
|
276
|
+
_validate_indices(indices_array, len(self._pa_array))
|
|
277
|
+
# TODO(ARROW-9433): Treat negative indices as NULL
|
|
278
|
+
indices_array = pa.array(indices_array, mask=fill_mask)
|
|
279
|
+
result = self._pa_array.take(indices_array)
|
|
280
|
+
if pd.isna(fill_value):
|
|
281
|
+
return self._from_pyarrow_array(result)
|
|
282
|
+
# TODO: ArrowNotImplementedError: Function fill_null has no
|
|
283
|
+
# kernel matching input types (array[string], scalar[string])
|
|
284
|
+
result = self._from_pyarrow_array(result)
|
|
285
|
+
result[fill_mask] = fill_value
|
|
286
|
+
return result
|
|
287
|
+
# return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
|
|
288
|
+
else:
|
|
289
|
+
# Nothing to fill
|
|
290
|
+
return self._from_pyarrow_array(self._pa_array.take(indices))
|
|
291
|
+
else: # allow_fill=False
|
|
292
|
+
# TODO(ARROW-9432): Treat negative indices as indices from the right.
|
|
293
|
+
if (indices_array < 0).any():
|
|
294
|
+
# Don't modify in-place
|
|
295
|
+
indices_array = np.copy(indices_array)
|
|
296
|
+
indices_array[indices_array < 0] += len(self._pa_array)
|
|
297
|
+
return self._from_pyarrow_array(self._pa_array.take(indices_array))
|
|
298
|
+
|
|
299
|
+
@classmethod
|
|
300
|
+
def _concat_same_type(cls, to_concat):
|
|
301
|
+
# code from ArrowExtensionArray in pandas>=1.5
|
|
302
|
+
chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
|
|
303
|
+
if to_concat[0].dtype == "string":
|
|
304
|
+
# StringDtype has no attribute pyarrow_dtype
|
|
305
|
+
pa_dtype = pa.large_string()
|
|
306
|
+
else:
|
|
307
|
+
pa_dtype = to_concat[0].dtype.pyarrow_dtype
|
|
308
|
+
arr = pa.chunked_array(chunks, type=pa_dtype)
|
|
309
|
+
return to_concat[0]._from_pyarrow_array(arr)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@register_extension_dtype
|
|
313
|
+
class FakeArrowDtype(ExtensionDtype):
|
|
314
|
+
def __new__(cls, pyarrow_dtype):
|
|
315
|
+
# TODO: here we avoid returning FakeDatetimeTZArrowDtype to make
|
|
316
|
+
# the behavior of timestamp consistent with other types when
|
|
317
|
+
# ExtensionDtype is fixed on pyarrow. Remove `_pyarrow_fix_extension_block`
|
|
318
|
+
# condition once we can enforce pyarrow>=4.0
|
|
319
|
+
if (
|
|
320
|
+
not _pyarrow_fix_extension_block
|
|
321
|
+
and cls is FakeArrowDtype
|
|
322
|
+
and pa.types.is_timestamp(pyarrow_dtype)
|
|
323
|
+
and pyarrow_dtype.unit == "ns"
|
|
324
|
+
):
|
|
325
|
+
# Need special logic for DatetimeTZDtype
|
|
326
|
+
return FakeDatetimeTZArrowDtype(pyarrow_dtype)
|
|
327
|
+
elif _pd_string_with_storage_option and pyarrow_dtype == pa.string():
|
|
328
|
+
# Use builtin StringDtype with arrow support to
|
|
329
|
+
# avoid compatibility issues
|
|
330
|
+
return pd.StringDtype(storage="pyarrow")
|
|
331
|
+
return object.__new__(cls)
|
|
332
|
+
|
|
333
|
+
def __init__(self, pyarrow_dtype):
|
|
334
|
+
super().__init__()
|
|
335
|
+
self.pyarrow_dtype = pyarrow_dtype
|
|
336
|
+
|
|
337
|
+
@classmethod
|
|
338
|
+
def construct_array_type(cls):
|
|
339
|
+
"""
|
|
340
|
+
Return the array type associated with this dtype.
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
type
|
|
345
|
+
"""
|
|
346
|
+
return FakeArrowExtensionArray
|
|
347
|
+
|
|
348
|
+
@classmethod
|
|
349
|
+
def construct_from_string(cls, string: str):
|
|
350
|
+
"""
|
|
351
|
+
Construct this type from a string.
|
|
352
|
+
|
|
353
|
+
Parameters
|
|
354
|
+
----------
|
|
355
|
+
string : str
|
|
356
|
+
string should follow the format f"{pyarrow_type}[pyarrow]"
|
|
357
|
+
e.g. int64[pyarrow]
|
|
358
|
+
"""
|
|
359
|
+
if not isinstance(string, str):
|
|
360
|
+
raise TypeError(
|
|
361
|
+
f"'construct_from_string' expects a string, got {type(string)}"
|
|
362
|
+
)
|
|
363
|
+
if not string.endswith("[pyarrow]"):
|
|
364
|
+
raise TypeError(f"'{string}' must end with '[pyarrow]'")
|
|
365
|
+
if string == "string[pyarrow]":
|
|
366
|
+
# Ensure Registry.find skips ArrowDtype to use StringDtype instead
|
|
367
|
+
raise TypeError("string[pyarrow] should be constructed by StringDtype")
|
|
368
|
+
|
|
369
|
+
base_type = string[:-9] # get rid of "[pyarrow]"
|
|
370
|
+
try:
|
|
371
|
+
pa_dtype = pa.type_for_alias(base_type)
|
|
372
|
+
except ValueError as err:
|
|
373
|
+
has_parameters = _dtype_search_re.search(base_type)
|
|
374
|
+
if has_parameters:
|
|
375
|
+
# Fallback to try common temporal types
|
|
376
|
+
try:
|
|
377
|
+
return cls._parse_temporal_dtype_string(base_type)
|
|
378
|
+
except (NotImplementedError, ValueError):
|
|
379
|
+
# Fall through to raise with nice exception message below
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
raise NotImplementedError(
|
|
383
|
+
"Passing pyarrow type specific parameters "
|
|
384
|
+
f"({has_parameters.group()}) in the string is not supported. "
|
|
385
|
+
"Please construct an ArrowDtype object with a pyarrow_dtype "
|
|
386
|
+
"instance with specific parameters."
|
|
387
|
+
) from err
|
|
388
|
+
raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
|
|
389
|
+
return cls(pa_dtype)
|
|
390
|
+
|
|
391
|
+
@property
|
|
392
|
+
def _is_numeric(self) -> bool:
|
|
393
|
+
"""
|
|
394
|
+
Whether columns with this dtype should be considered numeric.
|
|
395
|
+
"""
|
|
396
|
+
# TODO: pa.types.is_boolean?
|
|
397
|
+
return (
|
|
398
|
+
pa.types.is_integer(self.pyarrow_dtype)
|
|
399
|
+
or pa.types.is_floating(self.pyarrow_dtype)
|
|
400
|
+
or pa.types.is_decimal(self.pyarrow_dtype)
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
@property
|
|
404
|
+
def _is_boolean(self) -> bool:
|
|
405
|
+
"""
|
|
406
|
+
Whether this dtype should be considered boolean.
|
|
407
|
+
"""
|
|
408
|
+
return pa.types.is_boolean(self.pyarrow_dtype)
|
|
409
|
+
|
|
410
|
+
def _get_common_dtype(self, dtypes):
|
|
411
|
+
# We unwrap any masked dtypes, find the common dtype we would use
|
|
412
|
+
# for that, then re-mask the result.
|
|
413
|
+
# Mirrors BaseMaskedDtype
|
|
414
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
415
|
+
|
|
416
|
+
null_dtype = type(self)(pa.null())
|
|
417
|
+
|
|
418
|
+
new_dtype = find_common_type(
|
|
419
|
+
[
|
|
420
|
+
dtype.numpy_dtype if isinstance(dtype, FakeArrowDtype) else dtype
|
|
421
|
+
for dtype in dtypes
|
|
422
|
+
if dtype != null_dtype
|
|
423
|
+
]
|
|
424
|
+
)
|
|
425
|
+
if not isinstance(new_dtype, np.dtype):
|
|
426
|
+
return None
|
|
427
|
+
try:
|
|
428
|
+
pa_dtype = pa.from_numpy_dtype(new_dtype)
|
|
429
|
+
return type(self)(pa_dtype)
|
|
430
|
+
except NotImplementedError:
|
|
431
|
+
return None
|
|
432
|
+
|
|
433
|
+
@property
|
|
434
|
+
def type(self):
|
|
435
|
+
"""
|
|
436
|
+
Returns associated scalar type.
|
|
437
|
+
"""
|
|
438
|
+
pa_type = self.pyarrow_dtype
|
|
439
|
+
if pa.types.is_integer(pa_type):
|
|
440
|
+
return int
|
|
441
|
+
elif pa.types.is_floating(pa_type):
|
|
442
|
+
return float
|
|
443
|
+
elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
|
|
444
|
+
return str
|
|
445
|
+
elif (
|
|
446
|
+
pa.types.is_binary(pa_type)
|
|
447
|
+
or pa.types.is_fixed_size_binary(pa_type)
|
|
448
|
+
or pa.types.is_large_binary(pa_type)
|
|
449
|
+
):
|
|
450
|
+
return bytes
|
|
451
|
+
elif pa.types.is_boolean(pa_type):
|
|
452
|
+
return bool
|
|
453
|
+
elif pa.types.is_duration(pa_type):
|
|
454
|
+
if pa_type.unit == "ns":
|
|
455
|
+
return Timedelta
|
|
456
|
+
else:
|
|
457
|
+
return timedelta
|
|
458
|
+
elif pa.types.is_timestamp(pa_type):
|
|
459
|
+
# TODO: here we avoid returning Timestamp when ExtensionDtype
|
|
460
|
+
# is fixed on pyarrow. Remove `_pyarrow_fix_extension_block`
|
|
461
|
+
# condition once we can enforce pyarrow>=4.0
|
|
462
|
+
if not _pyarrow_fix_extension_block and pa_type.unit == "ns":
|
|
463
|
+
return Timestamp
|
|
464
|
+
else:
|
|
465
|
+
return datetime
|
|
466
|
+
elif pa.types.is_date(pa_type):
|
|
467
|
+
return date
|
|
468
|
+
elif pa.types.is_time(pa_type):
|
|
469
|
+
return time
|
|
470
|
+
elif pa.types.is_decimal(pa_type):
|
|
471
|
+
return Decimal
|
|
472
|
+
elif pa.types.is_dictionary(pa_type):
|
|
473
|
+
# TODO: Potentially change this & CategoricalDtype.type to
|
|
474
|
+
# something more representative of the scalar
|
|
475
|
+
return FakeCategoricalDtypeType
|
|
476
|
+
elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
|
|
477
|
+
return list
|
|
478
|
+
elif pa.types.is_fixed_size_list(pa_type):
|
|
479
|
+
return list
|
|
480
|
+
elif pa.types.is_map(pa_type):
|
|
481
|
+
return list
|
|
482
|
+
elif pa.types.is_struct(pa_type):
|
|
483
|
+
return dict
|
|
484
|
+
elif pa.types.is_null(pa_type):
|
|
485
|
+
# TODO: None? pd.NA? pa.null?
|
|
486
|
+
return type(pa_type)
|
|
487
|
+
elif isinstance(pa_type, pa.ExtensionType):
|
|
488
|
+
return type(self)(pa_type.storage_type).type
|
|
489
|
+
raise NotImplementedError(pa_type)
|
|
490
|
+
|
|
491
|
+
@property
|
|
492
|
+
def name(self) -> str:
|
|
493
|
+
"""
|
|
494
|
+
A string identifying the data type.
|
|
495
|
+
"""
|
|
496
|
+
return f"{str(self.pyarrow_dtype)}[pyarrow]"
|
|
497
|
+
|
|
498
|
+
@cached_property
|
|
499
|
+
def numpy_dtype(self) -> np.dtype:
|
|
500
|
+
"""Return an instance of the related numpy dtype"""
|
|
501
|
+
if pa.types.is_timestamp(self.pyarrow_dtype):
|
|
502
|
+
# pa.timestamp(unit).to_pandas_dtype() returns ns units
|
|
503
|
+
# regardless of the pyarrow timestamp units.
|
|
504
|
+
# This can be removed if/when pyarrow addresses it:
|
|
505
|
+
# https://github.com/apache/arrow/issues/34462
|
|
506
|
+
return np.dtype(f"datetime64[{self.pyarrow_dtype.unit}]")
|
|
507
|
+
if pa.types.is_duration(self.pyarrow_dtype):
|
|
508
|
+
# pa.duration(unit).to_pandas_dtype() returns ns units
|
|
509
|
+
# regardless of the pyarrow duration units
|
|
510
|
+
# This can be removed if/when pyarrow addresses it:
|
|
511
|
+
# https://github.com/apache/arrow/issues/34462
|
|
512
|
+
return np.dtype(f"timedelta64[{self.pyarrow_dtype.unit}]")
|
|
513
|
+
if pa.types.is_string(self.pyarrow_dtype) or pa.types.is_large_string(
|
|
514
|
+
self.pyarrow_dtype
|
|
515
|
+
):
|
|
516
|
+
# pa.string().to_pandas_dtype() = object which we don't want
|
|
517
|
+
return np.dtype(str)
|
|
518
|
+
try:
|
|
519
|
+
return np.dtype(self.pyarrow_dtype.to_pandas_dtype())
|
|
520
|
+
except (NotImplementedError, TypeError):
|
|
521
|
+
return np.dtype(object)
|
|
522
|
+
|
|
523
|
+
@cached_property
|
|
524
|
+
def kind(self) -> str:
|
|
525
|
+
if pa.types.is_timestamp(self.pyarrow_dtype):
|
|
526
|
+
# To mirror DatetimeTZDtype
|
|
527
|
+
return "M"
|
|
528
|
+
return self.numpy_dtype.kind
|
|
529
|
+
|
|
530
|
+
@cached_property
|
|
531
|
+
def itemsize(self) -> int:
|
|
532
|
+
"""Return the number of bytes in this dtype"""
|
|
533
|
+
return self.numpy_dtype.itemsize
|
|
534
|
+
|
|
535
|
+
def __eq__(self, other: object) -> bool:
|
|
536
|
+
if not isinstance(other, type(self)):
|
|
537
|
+
return super().__eq__(other)
|
|
538
|
+
return self.pyarrow_dtype == other.pyarrow_dtype
|
|
539
|
+
|
|
540
|
+
def __hash__(self) -> int:
|
|
541
|
+
# make myself hashable
|
|
542
|
+
return hash(str(self))
|
|
543
|
+
|
|
544
|
+
def __from_arrow__(self, array):
|
|
545
|
+
array_class = self.construct_array_type()
|
|
546
|
+
arr = array.cast(self.pyarrow_dtype, safe=True)
|
|
547
|
+
return array_class(arr)
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class FakeDatetimeTZExtensionArray(FakeArrowExtensionArray):
|
|
551
|
+
"""
|
|
552
|
+
Workaround array class for DatetimeTZDtype in pandas when
|
|
553
|
+
arrow type is timestamp[ns]
|
|
554
|
+
"""
|
|
555
|
+
|
|
556
|
+
def __init__(self, *args, **kw):
|
|
557
|
+
super().__init__(*args, **kw)
|
|
558
|
+
self._ndim = 1
|
|
559
|
+
|
|
560
|
+
@property
|
|
561
|
+
def shape(self):
|
|
562
|
+
return (
|
|
563
|
+
(self._pa_array.length(),)
|
|
564
|
+
if self._ndim == 1
|
|
565
|
+
else (1, self._pa_array.length())
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
@property
|
|
569
|
+
def ndim(self) -> int:
|
|
570
|
+
return self._ndim
|
|
571
|
+
|
|
572
|
+
def __len__(self) -> int:
|
|
573
|
+
return len(self._pa_array) if self._ndim == 1 else self.shape[0]
|
|
574
|
+
|
|
575
|
+
def __getitem__(self, item):
|
|
576
|
+
if self._ndim == 2 and item == 0:
|
|
577
|
+
return self._from_pyarrow_array(self._pa_array)
|
|
578
|
+
return super().__getitem__(item)
|
|
579
|
+
|
|
580
|
+
def reshape(self, *args, **kwargs):
|
|
581
|
+
if args != (1, -1):
|
|
582
|
+
raise ValueError("Only support reshape to (1, -1)")
|
|
583
|
+
new_arr = self._from_pyarrow_array(self._pa_array)
|
|
584
|
+
new_arr._ndim = 2
|
|
585
|
+
return new_arr
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class FakeDatetimeTZArrowDtype(DatetimeTZDtype, FakeArrowDtype):
|
|
589
|
+
"""
|
|
590
|
+
Workaround dtype class for DatetimeTZDtype in pandas when
|
|
591
|
+
arrow type is timestamp[ns]
|
|
592
|
+
"""
|
|
593
|
+
|
|
594
|
+
def __init__(self, pyarrow_type):
|
|
595
|
+
from ... import options
|
|
596
|
+
|
|
597
|
+
FakeArrowDtype.__init__(self, pyarrow_type)
|
|
598
|
+
DatetimeTZDtype.__init__(
|
|
599
|
+
self, unit=pyarrow_type.unit, tz=pyarrow_type.tz or options.local_timezone
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
@classmethod
|
|
603
|
+
def construct_array_type(cls):
|
|
604
|
+
return FakeDatetimeTZExtensionArray
|