maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...core import ENTITY_TYPE
|
|
16
|
+
from ...utils import find_objects, no_default
|
|
17
|
+
from ..utils import validate_axis
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def dataframe_from_dict(data, orient="columns", dtype=None, columns=None):
|
|
21
|
+
"""
|
|
22
|
+
Construct DataFrame from dict of array-like or dicts.
|
|
23
|
+
|
|
24
|
+
Creates DataFrame object from dictionary by columns or by index
|
|
25
|
+
allowing dtype specification.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
data : dict
|
|
30
|
+
Of the form {field : array-like} or {field : dict}.
|
|
31
|
+
orient : {'columns', 'index', 'tight'}, default 'columns'
|
|
32
|
+
The "orientation" of the data. If the keys of the passed dict
|
|
33
|
+
should be the columns of the resulting DataFrame, pass 'columns'
|
|
34
|
+
(default). Otherwise if the keys should be rows, pass 'index'.
|
|
35
|
+
If 'tight', assume a dict with keys ['index', 'columns', 'data',
|
|
36
|
+
'index_names', 'column_names'].
|
|
37
|
+
|
|
38
|
+
dtype : dtype, default None
|
|
39
|
+
Data type to force after DataFrame construction, otherwise infer.
|
|
40
|
+
columns : list, default None
|
|
41
|
+
Column labels to use when ``orient='index'``. Raises a ValueError
|
|
42
|
+
if used with ``orient='columns'`` or ``orient='tight'``.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
DataFrame
|
|
47
|
+
|
|
48
|
+
See Also
|
|
49
|
+
--------
|
|
50
|
+
DataFrame.from_records : DataFrame from structured ndarray, sequence
|
|
51
|
+
of tuples or dicts, or DataFrame.
|
|
52
|
+
DataFrame : DataFrame object creation using constructor.
|
|
53
|
+
DataFrame.to_dict : Convert the DataFrame to a dictionary.
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
By default the keys of the dict become the DataFrame columns:
|
|
58
|
+
|
|
59
|
+
>>> import maxframe.dataframe as md
|
|
60
|
+
>>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
|
|
61
|
+
>>> md.DataFrame.from_dict(data).execute()
|
|
62
|
+
col_1 col_2
|
|
63
|
+
0 3 a
|
|
64
|
+
1 2 b
|
|
65
|
+
2 1 c
|
|
66
|
+
3 0 d
|
|
67
|
+
|
|
68
|
+
Specify ``orient='index'`` to create the DataFrame using dictionary
|
|
69
|
+
keys as rows:
|
|
70
|
+
|
|
71
|
+
>>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']}
|
|
72
|
+
>>> md.DataFrame.from_dict(data, orient='index').execute()
|
|
73
|
+
0 1 2 3
|
|
74
|
+
row_1 3 2 1 0
|
|
75
|
+
row_2 a b c d
|
|
76
|
+
|
|
77
|
+
When using the 'index' orientation, the column names can be
|
|
78
|
+
specified manually:
|
|
79
|
+
|
|
80
|
+
>>> md.DataFrame.from_dict(data, orient='index',
|
|
81
|
+
... columns=['A', 'B', 'C', 'D']).execute()
|
|
82
|
+
A B C D
|
|
83
|
+
row_1 3 2 1 0
|
|
84
|
+
row_2 a b c d
|
|
85
|
+
|
|
86
|
+
Specify ``orient='tight'`` to create the DataFrame using a 'tight'
|
|
87
|
+
format:
|
|
88
|
+
|
|
89
|
+
>>> data = {'index': [('a', 'b'), ('a', 'c')],
|
|
90
|
+
... 'columns': [('x', 1), ('y', 2)],
|
|
91
|
+
... 'data': [[1, 3], [2, 4]],
|
|
92
|
+
... 'index_names': ['n1', 'n2'],
|
|
93
|
+
... 'column_names': ['z1', 'z2']}
|
|
94
|
+
>>> md.DataFrame.from_dict(data, orient='tight').execute()
|
|
95
|
+
z1 x y
|
|
96
|
+
z2 1 2
|
|
97
|
+
n1 n2
|
|
98
|
+
a b 1 3
|
|
99
|
+
c 2 4
|
|
100
|
+
"""
|
|
101
|
+
from ..initializer import DataFrame as DataFrameInit
|
|
102
|
+
from .from_tensor import dataframe_from_1d_tileables
|
|
103
|
+
|
|
104
|
+
if orient != "tight" and not find_objects(data, ENTITY_TYPE):
|
|
105
|
+
res = DataFrameInit(data)
|
|
106
|
+
elif orient == "tight":
|
|
107
|
+
# init directly
|
|
108
|
+
init_kw = {
|
|
109
|
+
"index": data.get("index"),
|
|
110
|
+
"columns": data.get("columns"),
|
|
111
|
+
}
|
|
112
|
+
df = DataFrameInit(data["data"], **init_kw)
|
|
113
|
+
rename_kw = {
|
|
114
|
+
"index": data.get("index_names", no_default),
|
|
115
|
+
"columns": data.get("column_names", no_default),
|
|
116
|
+
}
|
|
117
|
+
res = df.rename_axis(**rename_kw)
|
|
118
|
+
else:
|
|
119
|
+
axis = validate_axis(orient)
|
|
120
|
+
res = dataframe_from_1d_tileables(data, columns=columns, axis=axis)
|
|
121
|
+
|
|
122
|
+
if dtype is not None:
|
|
123
|
+
res = res.astype(dtype)
|
|
124
|
+
return res
|
|
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def series_from_index(ind, index=None, name=None):
|
|
54
|
-
name = name or ind.name
|
|
54
|
+
name = name or ind.name
|
|
55
55
|
if index is not None:
|
|
56
56
|
index = Index(index)
|
|
57
57
|
op = SeriesFromIndex(input_=ind, index=index, name=name)
|
|
@@ -77,6 +77,83 @@ def from_records(
|
|
|
77
77
|
sparse=False,
|
|
78
78
|
**kw
|
|
79
79
|
):
|
|
80
|
+
"""
|
|
81
|
+
Convert structured or record ndarray to DataFrame.
|
|
82
|
+
|
|
83
|
+
Creates a DataFrame object from a structured ndarray, sequence of
|
|
84
|
+
tuples or dicts, or DataFrame.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
data : structured ndarray, sequence of tuples or dicts, or DataFrame
|
|
89
|
+
Structured input data.
|
|
90
|
+
|
|
91
|
+
.. deprecated:: 2.1.0
|
|
92
|
+
Passing a DataFrame is deprecated.
|
|
93
|
+
index : str, list of fields, array-like
|
|
94
|
+
Field of array to use as the index, alternately a specific set of
|
|
95
|
+
input labels to use.
|
|
96
|
+
exclude : sequence, default None
|
|
97
|
+
Columns or fields to exclude.
|
|
98
|
+
columns : sequence, default None
|
|
99
|
+
Column names to use. If the passed data do not have names
|
|
100
|
+
associated with them, this argument provides names for the
|
|
101
|
+
columns. Otherwise this argument indicates the order of the columns
|
|
102
|
+
in the result (any names not found in the data will become all-NA
|
|
103
|
+
columns).
|
|
104
|
+
coerce_float : bool, default False
|
|
105
|
+
Attempt to convert values of non-string, non-numeric objects (like
|
|
106
|
+
decimal.Decimal) to floating point, useful for SQL result sets.
|
|
107
|
+
nrows : int, default None
|
|
108
|
+
Number of rows to read if data is an iterator.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
DataFrame
|
|
113
|
+
|
|
114
|
+
See Also
|
|
115
|
+
--------
|
|
116
|
+
DataFrame.from_dict : DataFrame from dict of array-like or dicts.
|
|
117
|
+
DataFrame : DataFrame object creation using constructor.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
Data can be provided as a structured ndarray:
|
|
122
|
+
|
|
123
|
+
>>> import maxframe.tensor as mt
|
|
124
|
+
>>> import maxframe.dataframe as md
|
|
125
|
+
>>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
|
|
126
|
+
... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
|
|
127
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
128
|
+
col_1 col_2
|
|
129
|
+
0 3 a
|
|
130
|
+
1 2 b
|
|
131
|
+
2 1 c
|
|
132
|
+
3 0 d
|
|
133
|
+
|
|
134
|
+
Data can be provided as a list of dicts:
|
|
135
|
+
|
|
136
|
+
>>> data = [{'col_1': 3, 'col_2': 'a'},
|
|
137
|
+
... {'col_1': 2, 'col_2': 'b'},
|
|
138
|
+
... {'col_1': 1, 'col_2': 'c'},
|
|
139
|
+
... {'col_1': 0, 'col_2': 'd'}]
|
|
140
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
141
|
+
col_1 col_2
|
|
142
|
+
0 3 a
|
|
143
|
+
1 2 b
|
|
144
|
+
2 1 c
|
|
145
|
+
3 0 d
|
|
146
|
+
|
|
147
|
+
Data can be provided as a list of tuples with corresponding columns:
|
|
148
|
+
|
|
149
|
+
>>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
|
|
150
|
+
>>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
|
|
151
|
+
col_1 col_2
|
|
152
|
+
0 3 a
|
|
153
|
+
1 2 b
|
|
154
|
+
2 1 c
|
|
155
|
+
3 0 d
|
|
156
|
+
"""
|
|
80
157
|
if isinstance(data, np.ndarray):
|
|
81
158
|
from .dataframe import from_pandas
|
|
82
159
|
|
|
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
39
|
input = AnyField("input")
|
|
40
40
|
index = AnyField("index")
|
|
41
41
|
columns = AnyField("columns")
|
|
42
|
+
axis = AnyField("axis")
|
|
42
43
|
|
|
43
44
|
def __init__(self, *args, **kwargs):
|
|
44
45
|
kwargs["_output_types"] = [OutputType.dataframe]
|
|
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
120
121
|
if isinstance(tileable, ENTITY_TYPE):
|
|
121
122
|
tileables.append(tileable)
|
|
122
123
|
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
index_size = index.shape[0]
|
|
124
|
+
if self.axis == 0:
|
|
125
|
+
if index is not None:
|
|
126
|
+
raise NotImplementedError("Cannot accept index when axis=0")
|
|
127
127
|
else:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
128
|
+
index = pd.Index(list(input_1d_tileables.keys()))
|
|
129
|
+
index_value = parse_index(index, store_data=True)
|
|
130
|
+
self.index = index
|
|
131
|
+
|
|
132
|
+
if columns is not None:
|
|
133
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
134
|
+
if not isinstance(columns, pd.Index):
|
|
135
|
+
columns = self.columns = pd.Index(columns)
|
|
136
|
+
column_size = columns.shape[0]
|
|
137
|
+
if (
|
|
138
|
+
not pd.isna(tileable_size)
|
|
139
|
+
and not pd.isna(column_size)
|
|
140
|
+
and tileable_size != column_size
|
|
141
|
+
):
|
|
142
|
+
raise ValueError(
|
|
143
|
+
f"columns {columns} should have the same shape "
|
|
144
|
+
f"with tensor: {tileable_size}"
|
|
145
|
+
)
|
|
146
|
+
columns_value = self._process_index(columns, tileables)
|
|
147
|
+
else:
|
|
148
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
149
|
+
columns = columns_value = None
|
|
150
|
+
else:
|
|
151
|
+
columns = pd.RangeIndex(0, tileables[0].shape[0])
|
|
152
|
+
columns_value = parse_index(columns, store_data=True)
|
|
153
|
+
self.columns = columns
|
|
154
|
+
|
|
155
|
+
shape = (len(input_1d_tileables), shape[0] if shape else 0)
|
|
139
156
|
else:
|
|
140
|
-
if
|
|
141
|
-
|
|
157
|
+
if index is not None:
|
|
158
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
159
|
+
if hasattr(index, "shape"):
|
|
160
|
+
index_size = index.shape[0]
|
|
161
|
+
else:
|
|
162
|
+
index_size = len(index)
|
|
163
|
+
if (
|
|
164
|
+
not pd.isna(tileable_size)
|
|
165
|
+
and not pd.isna(index_size)
|
|
166
|
+
and tileable_size != index_size
|
|
167
|
+
):
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"index {index} should have the same shape "
|
|
170
|
+
f"with tensor: {tileable_size}"
|
|
171
|
+
)
|
|
172
|
+
index_value = self._process_index(index, tileables)
|
|
142
173
|
else:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
174
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
175
|
+
index = pd.RangeIndex(0)
|
|
176
|
+
else:
|
|
177
|
+
index = pd.RangeIndex(0, tileables[0].shape[0])
|
|
178
|
+
self.index = index
|
|
179
|
+
index_value = parse_index(index)
|
|
146
180
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
181
|
+
if columns is not None:
|
|
182
|
+
if len(input_1d_tileables) != len(columns):
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"columns {columns} should have size {len(input_1d_tileables)}"
|
|
185
|
+
)
|
|
186
|
+
if not isinstance(columns, pd.Index):
|
|
187
|
+
if isinstance(columns, ENTITY_TYPE):
|
|
188
|
+
raise NotImplementedError(
|
|
189
|
+
"The columns value cannot be a tileable"
|
|
190
|
+
)
|
|
191
|
+
columns = pd.Index(columns)
|
|
192
|
+
columns_value = parse_index(columns, store_data=True)
|
|
193
|
+
else:
|
|
194
|
+
columns_value = parse_index(
|
|
195
|
+
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
151
196
|
)
|
|
152
|
-
if not isinstance(columns, pd.Index):
|
|
153
|
-
if isinstance(columns, ENTITY_TYPE):
|
|
154
|
-
raise NotImplementedError("The columns value cannot be a tileable")
|
|
155
|
-
columns = pd.Index(columns)
|
|
156
|
-
columns_value = parse_index(columns, store_data=True)
|
|
157
|
-
else:
|
|
158
|
-
columns_value = parse_index(
|
|
159
|
-
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
160
|
-
)
|
|
161
197
|
|
|
162
|
-
|
|
198
|
+
shape = (shape[0] if shape else 0, len(input_1d_tileables))
|
|
199
|
+
|
|
163
200
|
return self.new_dataframe(
|
|
164
201
|
tileables,
|
|
165
202
|
shape,
|
|
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
|
|
|
278
315
|
gpu: bool = None,
|
|
279
316
|
sparse: bool = False,
|
|
280
317
|
):
|
|
318
|
+
if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
|
|
319
|
+
columns = pd.MultiIndex.from_tuples(columns)
|
|
320
|
+
|
|
281
321
|
if tensor is not None:
|
|
282
322
|
if tensor.ndim > 2 or tensor.ndim <= 0:
|
|
283
323
|
raise TypeError(
|
|
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
|
|
|
299
339
|
dtypes = pd.Series([], index=pd.Index([], dtype=object))
|
|
300
340
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
301
341
|
index = pd.Index(index)
|
|
342
|
+
if isinstance(index[0], tuple):
|
|
343
|
+
index = pd.MultiIndex.from_tuples(index)
|
|
302
344
|
op = DataFrameFromTensor(
|
|
303
345
|
input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
304
346
|
)
|
|
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
|
|
|
311
353
|
columns: Union[pd.Index, list] = None,
|
|
312
354
|
gpu: bool = None,
|
|
313
355
|
sparse: bool = False,
|
|
356
|
+
axis: int = 1,
|
|
314
357
|
):
|
|
358
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
359
|
+
|
|
315
360
|
data = dict()
|
|
316
361
|
for k, v in d.items():
|
|
317
362
|
if isinstance(v, (list, tuple)) and any(
|
|
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
|
|
|
322
367
|
data[k] = v
|
|
323
368
|
d = data
|
|
324
369
|
if columns is not None:
|
|
325
|
-
tileables = [d.get(c) for c in columns]
|
|
370
|
+
tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
|
|
326
371
|
else:
|
|
327
|
-
columns = list(d.keys())
|
|
372
|
+
columns = list(d.keys()) if axis == 1 else None
|
|
328
373
|
tileables = list(d.values())
|
|
329
374
|
|
|
330
375
|
gpu = (
|
|
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
|
|
|
332
377
|
if gpu is None
|
|
333
378
|
else gpu
|
|
334
379
|
)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
380
|
+
|
|
381
|
+
if axis == 0:
|
|
382
|
+
col_num = (
|
|
383
|
+
tileables[0].shape[0]
|
|
384
|
+
if hasattr(tileables[0], "shape")
|
|
385
|
+
else len(tileables[0])
|
|
386
|
+
)
|
|
387
|
+
if pd.isna(col_num):
|
|
388
|
+
dtypes = None
|
|
389
|
+
else:
|
|
390
|
+
common_dtype = find_common_type(
|
|
391
|
+
[
|
|
392
|
+
t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
|
|
393
|
+
for t in tileables
|
|
394
|
+
]
|
|
395
|
+
)
|
|
396
|
+
dtypes = pd.Series(
|
|
397
|
+
[common_dtype] * col_num,
|
|
398
|
+
index=columns if columns is not None else pd.RangeIndex(col_num),
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
dtypes = pd.Series(
|
|
402
|
+
[t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
|
|
403
|
+
index=columns,
|
|
404
|
+
)
|
|
405
|
+
|
|
339
406
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
340
407
|
index = pd.Index(index)
|
|
408
|
+
|
|
341
409
|
op = DataFrameFromTensor(
|
|
342
|
-
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
410
|
+
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
|
|
343
411
|
)
|
|
344
412
|
return op(d, index, columns, dtypes)
|
|
345
413
|
|
|
@@ -38,8 +38,12 @@ from ...serialization.serializables import (
|
|
|
38
38
|
StringField,
|
|
39
39
|
)
|
|
40
40
|
from ...utils import lazy_import, parse_readable_size
|
|
41
|
-
from ..utils import parse_index, to_arrow_dtypes
|
|
42
|
-
from .core import
|
|
41
|
+
from ..utils import parse_index, to_arrow_dtypes, validate_dtype_backend
|
|
42
|
+
from .core import (
|
|
43
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
44
|
+
DtypeBackendCompatibleMixin,
|
|
45
|
+
IncrementalIndexDatasource,
|
|
46
|
+
)
|
|
43
47
|
|
|
44
48
|
cudf = lazy_import("cudf")
|
|
45
49
|
|
|
@@ -88,6 +92,7 @@ def _find_chunk_start_end(f, offset, size):
|
|
|
88
92
|
class DataFrameReadCSV(
|
|
89
93
|
IncrementalIndexDatasource,
|
|
90
94
|
ColumnPruneSupportedDataSourceMixin,
|
|
95
|
+
DtypeBackendCompatibleMixin,
|
|
91
96
|
):
|
|
92
97
|
_op_type_ = opcodes.READ_CSV
|
|
93
98
|
|
|
@@ -101,7 +106,7 @@ class DataFrameReadCSV(
|
|
|
101
106
|
offset = Int64Field("offset")
|
|
102
107
|
size = Int64Field("size")
|
|
103
108
|
incremental_index = BoolField("incremental_index")
|
|
104
|
-
|
|
109
|
+
dtype_backend = StringField("dtype_backend", default=None)
|
|
105
110
|
keep_usecols_order = BoolField("keep_usecols_order", default=None)
|
|
106
111
|
storage_options = DictField("storage_options")
|
|
107
112
|
merge_small_files = BoolField("merge_small_files")
|
|
@@ -151,7 +156,7 @@ def read_csv(
|
|
|
151
156
|
head_bytes="100k",
|
|
152
157
|
head_lines=None,
|
|
153
158
|
incremental_index: bool = True,
|
|
154
|
-
|
|
159
|
+
dtype_backend: str = None,
|
|
155
160
|
storage_options: dict = None,
|
|
156
161
|
memory_scale: int = None,
|
|
157
162
|
merge_small_files: bool = True,
|
|
@@ -419,8 +424,8 @@ def read_csv(
|
|
|
419
424
|
incremental_index: bool, default True
|
|
420
425
|
If index_col not specified, ensure range index incremental,
|
|
421
426
|
gain a slightly better performance if setting False.
|
|
422
|
-
|
|
423
|
-
|
|
427
|
+
dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
|
|
428
|
+
Back-end data type applied to the resultant DataFrame (still experimental).
|
|
424
429
|
storage_options: dict, optional
|
|
425
430
|
Options for storage connection.
|
|
426
431
|
merge_small_files: bool, default True
|
|
@@ -441,13 +446,12 @@ def read_csv(
|
|
|
441
446
|
Examples
|
|
442
447
|
--------
|
|
443
448
|
>>> import maxframe.dataframe as md
|
|
444
|
-
>>> from maxframe.lib.filesystem.oss import build_oss_path
|
|
445
449
|
>>> md.read_csv('data.csv') # doctest: +SKIP
|
|
446
450
|
>>> # read from HDFS
|
|
447
451
|
>>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
|
|
448
452
|
>>> # read from OSS
|
|
449
|
-
>>>
|
|
450
|
-
>>>
|
|
453
|
+
>>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
|
|
454
|
+
>>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
|
|
451
455
|
"""
|
|
452
456
|
# infer dtypes and columns
|
|
453
457
|
if isinstance(path, (list, tuple)):
|
|
@@ -510,7 +514,7 @@ def read_csv(
|
|
|
510
514
|
compression=compression,
|
|
511
515
|
gpu=gpu,
|
|
512
516
|
incremental_index=incremental_index,
|
|
513
|
-
|
|
517
|
+
dtype_backend=dtype_backend,
|
|
514
518
|
storage_options=storage_options,
|
|
515
519
|
memory_scale=memory_scale,
|
|
516
520
|
merge_small_files=merge_small_files,
|
|
@@ -519,10 +523,13 @@ def read_csv(
|
|
|
519
523
|
)
|
|
520
524
|
chunk_bytes = chunk_bytes or options.chunk_store_limit
|
|
521
525
|
dtypes = mini_df.dtypes
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
+
|
|
527
|
+
dtype_backend = validate_dtype_backend(
|
|
528
|
+
dtype_backend or options.dataframe.dtype_backend
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
if not gpu and dtype_backend == "pyarrow":
|
|
532
|
+
dtypes = to_arrow_dtypes(dtypes)
|
|
526
533
|
ret = op(
|
|
527
534
|
index_value=index_value,
|
|
528
535
|
columns_value=columns_value,
|
|
@@ -29,7 +29,7 @@ from odps.types import Column, OdpsSchema, validate_data_type
|
|
|
29
29
|
from odps.utils import split_sql_by_semicolon
|
|
30
30
|
|
|
31
31
|
from ... import opcodes
|
|
32
|
-
from ...config import options
|
|
32
|
+
from ...config import option_context, options
|
|
33
33
|
from ...core import OutputType
|
|
34
34
|
from ...core.graph import DAG
|
|
35
35
|
from ...io.odpsio import odps_schema_to_pandas_dtypes
|
|
@@ -44,8 +44,12 @@ from ...serialization.serializables import (
|
|
|
44
44
|
StringField,
|
|
45
45
|
)
|
|
46
46
|
from ...utils import is_empty
|
|
47
|
-
from ..utils import parse_index
|
|
48
|
-
from .core import
|
|
47
|
+
from ..utils import parse_index, validate_dtype_backend
|
|
48
|
+
from .core import (
|
|
49
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
50
|
+
DtypeBackendCompatibleMixin,
|
|
51
|
+
IncrementalIndexDatasource,
|
|
52
|
+
)
|
|
49
53
|
|
|
50
54
|
logger = logging.getLogger(__name__)
|
|
51
55
|
|
|
@@ -266,6 +270,7 @@ def _build_explain_sql(
|
|
|
266
270
|
class DataFrameReadODPSQuery(
|
|
267
271
|
IncrementalIndexDatasource,
|
|
268
272
|
ColumnPruneSupportedDataSourceMixin,
|
|
273
|
+
DtypeBackendCompatibleMixin,
|
|
269
274
|
):
|
|
270
275
|
_op_type_ = opcodes.READ_ODPS_QUERY
|
|
271
276
|
|
|
@@ -273,12 +278,16 @@ class DataFrameReadODPSQuery(
|
|
|
273
278
|
dtypes = SeriesField("dtypes", default=None)
|
|
274
279
|
columns = AnyField("columns", default=None)
|
|
275
280
|
nrows = Int64Field("nrows", default=None)
|
|
276
|
-
|
|
281
|
+
dtype_backend = StringField("dtype_backend", default=None)
|
|
277
282
|
string_as_binary = BoolField("string_as_binary", default=None)
|
|
278
283
|
index_columns = ListField("index_columns", FieldTypes.string, default=None)
|
|
279
284
|
index_dtypes = SeriesField("index_dtypes", default=None)
|
|
280
285
|
column_renames = DictField("column_renames", default=None)
|
|
281
286
|
|
|
287
|
+
def __init__(self, dtype_backend=None, **kw):
|
|
288
|
+
dtype_backend = validate_dtype_backend(dtype_backend)
|
|
289
|
+
super().__init__(dtype_backend=dtype_backend, **kw)
|
|
290
|
+
|
|
282
291
|
def get_columns(self):
|
|
283
292
|
return self.columns or list(self.dtypes.index)
|
|
284
293
|
|
|
@@ -404,6 +413,7 @@ def read_odps_query(
|
|
|
404
413
|
sql_hints: Dict[str, str] = None,
|
|
405
414
|
anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
|
|
406
415
|
skip_schema: bool = False,
|
|
416
|
+
dtype_backend: str = None,
|
|
407
417
|
**kw,
|
|
408
418
|
):
|
|
409
419
|
"""
|
|
@@ -428,6 +438,8 @@ def read_odps_query(
|
|
|
428
438
|
Skip resolving output schema before execution. Once this is configured,
|
|
429
439
|
the output DataFrame cannot be inputs of other DataFrame operators
|
|
430
440
|
before execution.
|
|
441
|
+
dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
|
|
442
|
+
Back-end data type applied to the resultant DataFrame (still experimental).
|
|
431
443
|
|
|
432
444
|
Returns
|
|
433
445
|
-------
|
|
@@ -459,6 +471,14 @@ def read_odps_query(
|
|
|
459
471
|
if odps_entry is None:
|
|
460
472
|
raise ValueError("Missing odps_entry parameter")
|
|
461
473
|
|
|
474
|
+
if "use_arrow_dtype" in kw:
|
|
475
|
+
dtype_backend = dtype_backend or validate_dtype_backend(
|
|
476
|
+
kw.pop("use_arrow_dtype")
|
|
477
|
+
)
|
|
478
|
+
dtype_backend = validate_dtype_backend(
|
|
479
|
+
dtype_backend or options.dataframe.dtype_backend
|
|
480
|
+
)
|
|
481
|
+
|
|
462
482
|
col_renames = {}
|
|
463
483
|
if not skip_schema:
|
|
464
484
|
odps_schema = _resolve_query_schema(
|
|
@@ -479,7 +499,9 @@ def read_odps_query(
|
|
|
479
499
|
else:
|
|
480
500
|
new_columns.append(col)
|
|
481
501
|
|
|
482
|
-
|
|
502
|
+
with option_context():
|
|
503
|
+
options.dataframe.dtype_backend = dtype_backend
|
|
504
|
+
dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
|
|
483
505
|
else:
|
|
484
506
|
dtypes = None
|
|
485
507
|
|
|
@@ -500,10 +522,11 @@ def read_odps_query(
|
|
|
500
522
|
|
|
501
523
|
chunk_bytes = kw.pop("chunk_bytes", None)
|
|
502
524
|
chunk_size = kw.pop("chunk_size", None)
|
|
525
|
+
|
|
503
526
|
op = DataFrameReadODPSQuery(
|
|
504
527
|
query=query,
|
|
505
528
|
dtypes=dtypes,
|
|
506
|
-
|
|
529
|
+
dtype_backend=dtype_backend,
|
|
507
530
|
string_as_binary=string_as_binary,
|
|
508
531
|
index_columns=index_col,
|
|
509
532
|
index_dtypes=index_dtypes,
|