maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import abc
|
|
16
|
+
import contextlib
|
|
17
|
+
import io
|
|
18
|
+
from typing import Any, ContextManager, List, Optional, Sequence, Union
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import pyarrow as pa
|
|
23
|
+
from pandas.api.extensions import (
|
|
24
|
+
ExtensionArray,
|
|
25
|
+
ExtensionDtype,
|
|
26
|
+
register_extension_dtype,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
from ...utils import tokenize
|
|
30
|
+
from .dtypes import ArrowDtype
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ArrowBlobType(pa.ExtensionType):
|
|
34
|
+
def __init__(self):
|
|
35
|
+
super().__init__(pa.binary(), "maxframe.blob")
|
|
36
|
+
|
|
37
|
+
def __arrow_ext_serialize__(self):
|
|
38
|
+
return b""
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def __arrow_ext_deserialize__(cls, storage_type, serialized):
|
|
42
|
+
return ArrowBlobType()
|
|
43
|
+
|
|
44
|
+
def __eq__(self, other):
|
|
45
|
+
return isinstance(other, ArrowBlobType)
|
|
46
|
+
|
|
47
|
+
def __hash__(self):
|
|
48
|
+
return hash(str(self))
|
|
49
|
+
|
|
50
|
+
def to_pandas_dtype(self):
|
|
51
|
+
return ExternalBlobDtype()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class AbstractExternalBlob(metaclass=abc.ABCMeta):
|
|
55
|
+
_blob_types = {}
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def reference(self) -> Any:
|
|
59
|
+
raise NotImplementedError
|
|
60
|
+
|
|
61
|
+
def __eq__(self, other) -> bool:
|
|
62
|
+
if isinstance(other, AbstractExternalBlob):
|
|
63
|
+
return self.reference == other.reference
|
|
64
|
+
else:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
def __hash__(self):
|
|
68
|
+
return hash((type(self), self.reference))
|
|
69
|
+
|
|
70
|
+
def __repr__(self):
|
|
71
|
+
return f"<{type(self).__name__} reference={self.reference}>"
|
|
72
|
+
|
|
73
|
+
@abc.abstractmethod
|
|
74
|
+
def open(self, mode: str = "r") -> ContextManager[io.IOBase]:
|
|
75
|
+
raise NotImplementedError
|
|
76
|
+
|
|
77
|
+
@abc.abstractmethod
|
|
78
|
+
def copy(self):
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
def __copy__(self):
|
|
82
|
+
return self.copy()
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def register(cls):
|
|
86
|
+
cls._blob_types[cls.__name__.lower()] = cls
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def get_cls_by_name(cls, cls_name: str):
|
|
90
|
+
return cls._blob_types[cls_name.lower()]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class SolidBlob(AbstractExternalBlob):
|
|
94
|
+
def __init__(self, content: Optional[bytes] = None):
|
|
95
|
+
self._reference = content
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def reference(self) -> bytes:
|
|
99
|
+
return self._reference
|
|
100
|
+
|
|
101
|
+
def __reduce__(self):
|
|
102
|
+
return type(self), (self._reference,)
|
|
103
|
+
|
|
104
|
+
@contextlib.contextmanager
|
|
105
|
+
def open(self, mode: str = "r"):
|
|
106
|
+
if "w" in mode:
|
|
107
|
+
sio = io.BytesIO()
|
|
108
|
+
else:
|
|
109
|
+
sio = io.BytesIO(self._reference)
|
|
110
|
+
try:
|
|
111
|
+
yield sio
|
|
112
|
+
except Exception:
|
|
113
|
+
raise
|
|
114
|
+
else:
|
|
115
|
+
if "w" in mode:
|
|
116
|
+
self._reference = sio.getvalue()
|
|
117
|
+
|
|
118
|
+
def copy(self) -> "SolidBlob":
|
|
119
|
+
return SolidBlob(self._reference)
|
|
120
|
+
|
|
121
|
+
def __maxframe_tokenize__(self):
|
|
122
|
+
return type(self), self._reference
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
SolidBlob.register()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def new_blob(content: Optional[bytes] = None):
|
|
129
|
+
# todo this function currently maps directly to SolidBlob. It should
|
|
130
|
+
# provide appropriate instance base on running envs.
|
|
131
|
+
return SolidBlob(content)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def blob_from_reference(reference: Optional[bytes] = None):
|
|
135
|
+
# todo this function currently maps directly to SolidBlob. It should
|
|
136
|
+
# provide appropriate instance base on running envs.
|
|
137
|
+
return SolidBlob(reference)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@register_extension_dtype
|
|
141
|
+
class ExternalBlobDtype(ExtensionDtype):
|
|
142
|
+
"""
|
|
143
|
+
Extension dtype for ExternalBlob data.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
name = "blob"
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def construct_array_type(cls):
|
|
150
|
+
"""
|
|
151
|
+
Return the array type associated with this dtype.
|
|
152
|
+
"""
|
|
153
|
+
return ExternalBlobExtensionArray
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def construct_from_string(cls, string):
|
|
157
|
+
if string == cls.name:
|
|
158
|
+
return cls()
|
|
159
|
+
raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def type(self):
|
|
163
|
+
return AbstractExternalBlob
|
|
164
|
+
|
|
165
|
+
def __repr__(self) -> str:
|
|
166
|
+
return self.name
|
|
167
|
+
|
|
168
|
+
def __from_arrow__(self, array):
|
|
169
|
+
return ExternalBlobExtensionArray(
|
|
170
|
+
[blob_from_reference(a) for a in array.to_pylist()]
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class ExternalBlobExtensionArray(ExtensionArray):
|
|
175
|
+
"""
|
|
176
|
+
Extension array for storing AbstractExternalBlob objects.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
_data: List[AbstractExternalBlob]
|
|
180
|
+
|
|
181
|
+
def __init__(self, values: Union[List[AbstractExternalBlob], np.ndarray]):
|
|
182
|
+
if not isinstance(values, (list, np.ndarray)):
|
|
183
|
+
raise TypeError("values must be a list or numpy array")
|
|
184
|
+
|
|
185
|
+
if isinstance(values, np.ndarray):
|
|
186
|
+
if values.ndim != 1:
|
|
187
|
+
raise ValueError("values must be a 1-dimensional array")
|
|
188
|
+
values = values.tolist()
|
|
189
|
+
|
|
190
|
+
new_values = [None] * len(values)
|
|
191
|
+
for idx, val in enumerate(values):
|
|
192
|
+
if isinstance(val, str):
|
|
193
|
+
val = val.encode()
|
|
194
|
+
if isinstance(val, (bytes, bytearray)):
|
|
195
|
+
val = new_blob(val)
|
|
196
|
+
|
|
197
|
+
if pd.isna(val):
|
|
198
|
+
val = None
|
|
199
|
+
if val is not None and not isinstance(val, AbstractExternalBlob):
|
|
200
|
+
raise TypeError(
|
|
201
|
+
"All values must be instances of AbstractExternalBlob or None"
|
|
202
|
+
)
|
|
203
|
+
else:
|
|
204
|
+
new_values[idx] = val
|
|
205
|
+
|
|
206
|
+
# Store with python list instead of numpy array
|
|
207
|
+
self._data = new_values
|
|
208
|
+
self._dtype = ExternalBlobDtype()
|
|
209
|
+
|
|
210
|
+
@classmethod
|
|
211
|
+
def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
|
|
212
|
+
"""
|
|
213
|
+
Construct a new ExtensionArray from a sequence of scalars.
|
|
214
|
+
"""
|
|
215
|
+
return cls(scalars)
|
|
216
|
+
|
|
217
|
+
@classmethod
|
|
218
|
+
def _from_factorized(cls, values, original):
|
|
219
|
+
"""
|
|
220
|
+
Reconstruct an ExtensionArray after factorization.
|
|
221
|
+
"""
|
|
222
|
+
return cls(values)
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def dtype(self) -> ExtensionDtype:
|
|
226
|
+
"""Return the dtype of the array."""
|
|
227
|
+
return self._dtype
|
|
228
|
+
|
|
229
|
+
def __len__(self) -> int:
|
|
230
|
+
"""Return length of the array."""
|
|
231
|
+
return len(self._data)
|
|
232
|
+
|
|
233
|
+
def __getitem__(self, item):
|
|
234
|
+
"""Select a subset of self."""
|
|
235
|
+
if isinstance(item, int):
|
|
236
|
+
return self._data[item]
|
|
237
|
+
else:
|
|
238
|
+
# For slices and other index types, return a new array with the selected items
|
|
239
|
+
return type(self)([self._data[i] for i in np.arange(len(self._data))[item]])
|
|
240
|
+
|
|
241
|
+
def __maxframe_tokenize__(self):
|
|
242
|
+
return type(self), tokenize(*self._data)
|
|
243
|
+
|
|
244
|
+
def isna(self):
|
|
245
|
+
"""A 1-D array indicating if each value is missing."""
|
|
246
|
+
return np.array([x is None for x in self._data])
|
|
247
|
+
|
|
248
|
+
def copy(self):
|
|
249
|
+
"""Return a copy of the array."""
|
|
250
|
+
return type(self)(self._data.copy())
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def nbytes(self) -> int:
|
|
254
|
+
"""The number of bytes needed to store this object in memory."""
|
|
255
|
+
# Since we're storing references, we'll return a minimal size estimate
|
|
256
|
+
return len(self._data) * 8
|
|
257
|
+
|
|
258
|
+
def take(self, indices, allow_fill=False, fill_value=None):
|
|
259
|
+
"""
|
|
260
|
+
Take elements from an array.
|
|
261
|
+
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
indices : sequence of int
|
|
265
|
+
Indices to be taken.
|
|
266
|
+
allow_fill : bool, default False
|
|
267
|
+
How to handle negative values in `indices`.
|
|
268
|
+
fill_value : any, optional
|
|
269
|
+
Fill value to use for NA-indices when `allow_fill` is True.
|
|
270
|
+
"""
|
|
271
|
+
if allow_fill:
|
|
272
|
+
# Handle negative indices
|
|
273
|
+
values = []
|
|
274
|
+
for i in indices:
|
|
275
|
+
if i < 0:
|
|
276
|
+
if not allow_fill:
|
|
277
|
+
raise IndexError("negative index not allowed")
|
|
278
|
+
values.append(fill_value)
|
|
279
|
+
else:
|
|
280
|
+
values.append(self._data[i])
|
|
281
|
+
return type(self)(values)
|
|
282
|
+
else:
|
|
283
|
+
return type(self)([self._data[i] for i in indices])
|
|
284
|
+
|
|
285
|
+
@classmethod
|
|
286
|
+
def _concat_same_type(
|
|
287
|
+
cls, to_concat: Sequence["ExternalBlobExtensionArray"]
|
|
288
|
+
) -> "ExternalBlobExtensionArray":
|
|
289
|
+
return cls([x for sublist in to_concat for x in sublist._data])
|
|
290
|
+
|
|
291
|
+
def __arrow_array__(self, type=None):
|
|
292
|
+
return pa.ExtensionArray.from_storage(
|
|
293
|
+
ArrowBlobType(),
|
|
294
|
+
pa.array([x.reference for x in self._data], type=pa.binary()),
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
def astype(self, dtype, copy=True):
|
|
298
|
+
if isinstance(dtype, ArrowDtype) and dtype.pyarrow_dtype == pa.binary():
|
|
299
|
+
blobs = [None] * len(self)
|
|
300
|
+
for i, blob in enumerate(self._data):
|
|
301
|
+
with blob.open("rb") as reader:
|
|
302
|
+
blobs[i] = reader.read()
|
|
303
|
+
return pd.array(blobs, dtype=dtype)
|
|
304
|
+
return super().astype(dtype, copy=copy)
|
|
@@ -46,6 +46,25 @@ def list_(value_type: Union[pa.DataType, pa.Field]):
|
|
|
46
46
|
return pd.ArrowDtype(pa.list_(value_type))
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
def struct_(fields: Union[pa.Schema, list]):
|
|
50
|
+
"""
|
|
51
|
+
Create ``pd.ArrowDtype(pa.StructType)`` instance from fields.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
fields : pyarrow.Schema or list of pyarrow.Field
|
|
56
|
+
The fields to create the struct type from.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
ArrowDtype
|
|
61
|
+
An ArrowDtype instance with a StructType.
|
|
62
|
+
"""
|
|
63
|
+
if ArrowDtype is None:
|
|
64
|
+
raise ImportError("ArrowDtype is not supported in current environment")
|
|
65
|
+
return pd.ArrowDtype(pa.struct(fields))
|
|
66
|
+
|
|
67
|
+
|
|
49
68
|
def is_map_dtype(dtype: ArrowDtype) -> bool:
|
|
50
69
|
"""
|
|
51
70
|
Check whether the dtype is a map type.
|
|
@@ -64,3 +83,24 @@ def is_list_dtype(dtype: ArrowDtype) -> bool:
|
|
|
64
83
|
return isinstance(dtype, ArrowDtype) and isinstance(
|
|
65
84
|
dtype.pyarrow_dtype, pa.ListType
|
|
66
85
|
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_struct_dtype(dtype: ArrowDtype) -> bool:
|
|
89
|
+
"""
|
|
90
|
+
Check whether the dtype is a struct dtype.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
dtype : ArrowDtype
|
|
95
|
+
The dtype to check.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
bool
|
|
100
|
+
True if the dtype is a struct type, False otherwise.
|
|
101
|
+
"""
|
|
102
|
+
if ArrowDtype is None:
|
|
103
|
+
raise ImportError("ArrowDtype is not supported in current environment")
|
|
104
|
+
return isinstance(dtype, ArrowDtype) and isinstance(
|
|
105
|
+
dtype.pyarrow_dtype, pa.StructType
|
|
106
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import pickle
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pyarrow as pa
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from ....lib.version import parse as parse_version
|
|
10
|
+
from ....utils import deserialize_serializable, serialize_serializable, tokenize
|
|
11
|
+
from ...wrapped_pickle import switch_unpickle
|
|
12
|
+
from .. import ArrowDtype
|
|
13
|
+
from ..blob import ArrowBlobType, ExternalBlobDtype, SolidBlob
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@switch_unpickle
|
|
17
|
+
def test_blob_object():
|
|
18
|
+
blob = SolidBlob(b"text_content")
|
|
19
|
+
assert blob == SolidBlob(b"text_content")
|
|
20
|
+
assert blob == copy.copy(blob)
|
|
21
|
+
assert blob == deserialize_serializable(serialize_serializable(blob))
|
|
22
|
+
assert hash(blob) == hash(blob)
|
|
23
|
+
assert tokenize(blob) == tokenize(blob)
|
|
24
|
+
assert blob != b"other_content"
|
|
25
|
+
|
|
26
|
+
with blob.open("rb") as reader:
|
|
27
|
+
assert reader.read() == b"text_content"
|
|
28
|
+
|
|
29
|
+
blob = SolidBlob()
|
|
30
|
+
with pytest.raises(ValueError), blob.open("wb"):
|
|
31
|
+
raise ValueError
|
|
32
|
+
with blob.open("wb") as writer:
|
|
33
|
+
writer.write(b"text_content")
|
|
34
|
+
assert blob.reference == b"text_content"
|
|
35
|
+
with switch_unpickle(forbidden=False):
|
|
36
|
+
assert pickle.loads(pickle.dumps(blob)) == blob
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@switch_unpickle
|
|
40
|
+
def test_blob_series():
|
|
41
|
+
with pytest.raises(ValueError):
|
|
42
|
+
pd.Series(np.array([["a", "b"], ["c", "d"]]), dtype="blob")
|
|
43
|
+
|
|
44
|
+
pd_ser = pd.Series(["abcd", "efgh", "ijkl"], dtype="blob")
|
|
45
|
+
assert tokenize(pd_ser) == tokenize(pd_ser)
|
|
46
|
+
assert pd_ser[0].reference == b"abcd"
|
|
47
|
+
pd.testing.assert_series_equal(pd_ser, pd_ser.copy(deep=True))
|
|
48
|
+
pd.testing.assert_series_equal(
|
|
49
|
+
pd_ser, deserialize_serializable(serialize_serializable(pd_ser))
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
part_ser = pd_ser.iloc[np.array([0, 1])]
|
|
53
|
+
pd.testing.assert_series_equal(
|
|
54
|
+
part_ser, pd.Series(np.array([b"abcd", b"efgh"], dtype="O"), dtype="blob")
|
|
55
|
+
)
|
|
56
|
+
ix = pd.Index([0, 2, 3])
|
|
57
|
+
part_ser = pd_ser.reindex(ix)
|
|
58
|
+
pd.testing.assert_series_equal(
|
|
59
|
+
part_ser, pd.Series([b"abcd", b"ijkl", None], index=ix, dtype="blob")
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
pd_ser2 = pd.Series(["abcd"], dtype="blob")
|
|
63
|
+
cat_ser = pd.concat([pd_ser, pd_ser2], ignore_index=True)
|
|
64
|
+
pd.testing.assert_series_equal(
|
|
65
|
+
cat_ser, pd.Series([b"abcd", b"efgh", b"ijkl", b"abcd"], dtype="blob")
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
cat_ser_bin = cat_ser.astype(ArrowDtype(pa.binary()))
|
|
69
|
+
pd.testing.assert_series_equal(
|
|
70
|
+
cat_ser_bin,
|
|
71
|
+
pd.Series([b"abcd", b"efgh", b"ijkl", b"abcd"], dtype=ArrowDtype(pa.binary())),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_blob_arrow_conversion():
|
|
76
|
+
pd_ser = pd.Series([SolidBlob(b"abcd"), SolidBlob(b"efgh")], dtype="blob")
|
|
77
|
+
pa_arr = pa.Array.from_pandas(pd_ser)
|
|
78
|
+
assert pa_arr.type == ArrowBlobType()
|
|
79
|
+
try:
|
|
80
|
+
assert pa_arr.tolist() == [b"abcd", b"efgh"]
|
|
81
|
+
except NotImplementedError:
|
|
82
|
+
# compatibility test for arrow 1.0
|
|
83
|
+
assert pa_arr.storage.tolist() == [b"abcd", b"efgh"]
|
|
84
|
+
|
|
85
|
+
if parse_version(pa.__version__) >= parse_version("3.0"):
|
|
86
|
+
to_pd_ser = pa_arr.to_pandas()
|
|
87
|
+
assert to_pd_ser.dtype == ExternalBlobDtype()
|
|
88
|
+
pd.testing.assert_series_equal(pd_ser, to_pd_ser)
|
|
@@ -17,7 +17,7 @@ import pyarrow as pa
|
|
|
17
17
|
import pytest
|
|
18
18
|
|
|
19
19
|
from ....utils import ARROW_DTYPE_NOT_SUPPORTED
|
|
20
|
-
from ..dtypes import dict_, is_list_dtype, is_map_dtype, list_
|
|
20
|
+
from ..dtypes import dict_, is_list_dtype, is_map_dtype, is_struct_dtype, list_, struct_
|
|
21
21
|
|
|
22
22
|
try:
|
|
23
23
|
from pandas import ArrowDtype
|
|
@@ -46,3 +46,18 @@ def test_list_dtype():
|
|
|
46
46
|
dt = pd.ArrowDtype(pa.map_(pa.int64(), pa.string()))
|
|
47
47
|
assert not is_list_dtype(dt)
|
|
48
48
|
assert not is_list_dtype(pd.Int64Dtype)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_struct_dtype():
|
|
52
|
+
fields = [pa.field("a", pa.int64()), pa.field("b", pa.string())]
|
|
53
|
+
dt = struct_(fields)
|
|
54
|
+
assert is_struct_dtype(dt)
|
|
55
|
+
|
|
56
|
+
# Test with schema
|
|
57
|
+
schema = pa.schema(fields)
|
|
58
|
+
dt2 = struct_(schema)
|
|
59
|
+
assert is_struct_dtype(dt2)
|
|
60
|
+
|
|
61
|
+
dt = pd.ArrowDtype(pa.map_(pa.int64(), pa.string()))
|
|
62
|
+
assert not is_struct_dtype(dt)
|
|
63
|
+
assert not is_struct_dtype(pd.Int64Dtype)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import pyarrow as pa
|
|
4
|
+
import pytest
|
|
5
|
+
from pandas.api.types import pandas_dtype
|
|
6
|
+
|
|
7
|
+
from ....lib.version import parse as parse_version
|
|
8
|
+
from ....utils import deserialize_serializable, serialize_serializable, tokenize
|
|
9
|
+
from ...wrapped_pickle import switch_unpickle
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from pandas import ArrowDtype # noqa: F401
|
|
13
|
+
|
|
14
|
+
pytestmark = pytest.mark.skip("Only test when ArrowDtype not available in pandas")
|
|
15
|
+
except ImportError:
|
|
16
|
+
from .._fake_arrow_dtype import FakeArrowDtype, to_pyarrow_type
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_fake_arrow_dtype():
|
|
20
|
+
assert to_pyarrow_type(np.dtype("int64")) == pa.int64()
|
|
21
|
+
assert to_pyarrow_type(pa.string()) == pa.string()
|
|
22
|
+
assert to_pyarrow_type(FakeArrowDtype(pa.string())) == pa.string()
|
|
23
|
+
assert to_pyarrow_type(FakeArrowDtype(pa.bool_())) == pa.bool_()
|
|
24
|
+
assert to_pyarrow_type(FakeArrowDtype(pa.int8())) == pa.int8()
|
|
25
|
+
|
|
26
|
+
pd_type = pandas_dtype("binary[pyarrow]")
|
|
27
|
+
assert isinstance(pd_type, FakeArrowDtype)
|
|
28
|
+
assert pd_type.pyarrow_dtype == pa.binary()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@switch_unpickle
|
|
32
|
+
def test_arrow_series():
|
|
33
|
+
if parse_version(pa.__version__).major < 2:
|
|
34
|
+
pytest.skip("pyarrow need to be >= 2.0 to run this case")
|
|
35
|
+
|
|
36
|
+
empty_pd_ser = pd.Series(np.array([]), dtype=FakeArrowDtype(pa.binary()))
|
|
37
|
+
assert len(empty_pd_ser) == 0
|
|
38
|
+
|
|
39
|
+
pd_ser = pd.Series([b"abcd", b"efgh", b"ijkl"], dtype=FakeArrowDtype(pa.binary()))
|
|
40
|
+
assert tokenize(pd_ser) == tokenize(pd_ser)
|
|
41
|
+
assert pd_ser[0] == b"abcd"
|
|
42
|
+
pd.testing.assert_series_equal(pd_ser, pd_ser.copy(deep=True))
|
|
43
|
+
pd.testing.assert_series_equal(
|
|
44
|
+
pd_ser, deserialize_serializable(serialize_serializable(pd_ser))
|
|
45
|
+
)
|
|
46
|
+
part_ser = pd_ser.iloc[np.array([0, 1])]
|
|
47
|
+
pd.testing.assert_series_equal(
|
|
48
|
+
part_ser,
|
|
49
|
+
pd.Series(
|
|
50
|
+
np.array([b"abcd", b"efgh"], dtype="O"), dtype=FakeArrowDtype(pa.binary())
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
ix = pd.Index([0, 2, 3])
|
|
54
|
+
part_ser = pd_ser.reindex(ix)
|
|
55
|
+
pd.testing.assert_series_equal(
|
|
56
|
+
part_ser,
|
|
57
|
+
pd.Series(
|
|
58
|
+
[b"abcd", b"ijkl", None], index=ix, dtype=FakeArrowDtype(pa.binary())
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
pd_ser2 = pd.Series([b"abcd"], dtype=FakeArrowDtype(pa.binary()))
|
|
62
|
+
cat_ser = pd.concat([pd_ser, pd_ser2], ignore_index=True)
|
|
63
|
+
pd.testing.assert_series_equal(
|
|
64
|
+
cat_ser,
|
|
65
|
+
pd.Series(
|
|
66
|
+
[b"abcd", b"efgh", b"ijkl", b"abcd"], dtype=FakeArrowDtype(pa.binary())
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
cat_ser_bin = cat_ser.astype(FakeArrowDtype(pa.binary()))
|
|
70
|
+
pd.testing.assert_series_equal(
|
|
71
|
+
cat_ser_bin,
|
|
72
|
+
pd.Series(
|
|
73
|
+
[b"abcd", b"efgh", b"ijkl", b"abcd"], dtype=FakeArrowDtype(pa.binary())
|
|
74
|
+
),
|
|
75
|
+
)
|