maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp37-win32.pyd +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +9 -8
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +38 -1
- maxframe/codegen/spe/dataframe/misc.py +11 -33
- maxframe/codegen/spe/dataframe/reduction.py +32 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +39 -18
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +73 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +54 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/base.py +2 -1
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/graph/core.cp37-win32.pyd +0 -0
- maxframe/core/graph/entity.py +8 -3
- maxframe/core/mode.py +6 -1
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +12 -5
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +18 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +161 -224
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +21 -14
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
- maxframe/dataframe/datastore/__init__.py +11 -1
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +36 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +17 -2
- maxframe/dataframe/groupby/aggregation.py +86 -49
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +19 -5
- maxframe/dataframe/groupby/core.py +116 -16
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +22 -2
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +46 -18
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +15 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +28 -11
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +82 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +125 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +48 -3
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +14 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +35 -16
- maxframe/dataframe/reduction/aggregation.py +43 -14
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +103 -0
- maxframe/dataframe/reduction/argmin.py +103 -0
- maxframe/dataframe/reduction/core.py +80 -24
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +19 -11
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +16 -1
- maxframe/dataframe/sort/argsort.py +68 -0
- maxframe/dataframe/sort/core.py +2 -1
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +125 -52
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +18 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +113 -4
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +7 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +2 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +79 -9
- maxframe/learn/utils/odpsio.py +262 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +124 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cp37-win32.pyd +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +41 -15
- maxframe/protocol.py +12 -0
- maxframe/remote/core.py +4 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cp37-win32.pyd +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +31 -4
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
- maxframe/tensor/core.py +6 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +51 -6
- maxframe/tests/utils.py +0 -2
- maxframe/typing_.py +2 -0
- maxframe/udf.py +130 -9
- maxframe/utils.py +254 -27
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +28 -1
- maxframe/dataframe/arrays.py +0 -864
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,7 @@ from odps.models import Table
|
|
|
22
22
|
from odps.utils import to_timestamp
|
|
23
23
|
|
|
24
24
|
from ... import opcodes
|
|
25
|
-
from ...config import options
|
|
25
|
+
from ...config import option_context, options
|
|
26
26
|
from ...core import OutputType
|
|
27
27
|
from ...io.odpsio import odps_schema_to_pandas_dtypes
|
|
28
28
|
from ...serialization.serializables import (
|
|
@@ -36,8 +36,12 @@ from ...serialization.serializables import (
|
|
|
36
36
|
)
|
|
37
37
|
from ...utils import estimate_table_size, is_empty
|
|
38
38
|
from ..core import DataFrame # noqa: F401
|
|
39
|
-
from ..utils import parse_index
|
|
40
|
-
from .core import
|
|
39
|
+
from ..utils import parse_index, validate_dtype_backend
|
|
40
|
+
from .core import (
|
|
41
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
42
|
+
DtypeBackendCompatibleMixin,
|
|
43
|
+
IncrementalIndexDatasource,
|
|
44
|
+
)
|
|
41
45
|
|
|
42
46
|
logger = logging.getLogger(__name__)
|
|
43
47
|
|
|
@@ -45,6 +49,7 @@ logger = logging.getLogger(__name__)
|
|
|
45
49
|
class DataFrameReadODPSTable(
|
|
46
50
|
IncrementalIndexDatasource,
|
|
47
51
|
ColumnPruneSupportedDataSourceMixin,
|
|
52
|
+
DtypeBackendCompatibleMixin,
|
|
48
53
|
):
|
|
49
54
|
__slots__ = ("_odps_entry",)
|
|
50
55
|
_op_type_ = opcodes.READ_ODPS_TABLE
|
|
@@ -54,18 +59,22 @@ class DataFrameReadODPSTable(
|
|
|
54
59
|
dtypes = SeriesField("dtypes", default=None)
|
|
55
60
|
columns = AnyField("columns", default=None)
|
|
56
61
|
nrows = Int64Field("nrows", default=None)
|
|
57
|
-
|
|
62
|
+
dtype_backend = StringField("dtype_backend", default=None)
|
|
58
63
|
string_as_binary = BoolField("string_as_binary", default=None)
|
|
59
64
|
append_partitions = BoolField("append_partitions", default=None)
|
|
60
65
|
last_modified_time = Int64Field("last_modified_time", default=None)
|
|
61
66
|
index_columns = ListField("index_columns", FieldTypes.string, default=None)
|
|
62
67
|
index_dtypes = SeriesField("index_dtypes", default=None)
|
|
63
68
|
|
|
64
|
-
def __init__(self, memory_scale=None, **kw):
|
|
69
|
+
def __init__(self, memory_scale=None, dtype_backend=None, **kw):
|
|
65
70
|
output_type = kw.pop("output_type", OutputType.dataframe)
|
|
66
71
|
self._odps_entry = kw.pop("odps_entry", None)
|
|
72
|
+
dtype_backend = validate_dtype_backend(dtype_backend)
|
|
67
73
|
super(DataFrameReadODPSTable, self).__init__(
|
|
68
|
-
memory_scale=memory_scale,
|
|
74
|
+
memory_scale=memory_scale,
|
|
75
|
+
dtype_backend=dtype_backend,
|
|
76
|
+
_output_types=[output_type],
|
|
77
|
+
**kw,
|
|
69
78
|
)
|
|
70
79
|
|
|
71
80
|
@property
|
|
@@ -153,6 +162,7 @@ def read_odps_table(
|
|
|
153
162
|
odps_entry: ODPS = None,
|
|
154
163
|
string_as_binary: bool = None,
|
|
155
164
|
append_partitions: bool = False,
|
|
165
|
+
dtype_backend: str = None,
|
|
156
166
|
**kw,
|
|
157
167
|
):
|
|
158
168
|
"""
|
|
@@ -176,6 +186,8 @@ def read_odps_table(
|
|
|
176
186
|
append_partitions: bool
|
|
177
187
|
If True, will add all partition columns as selected columns when
|
|
178
188
|
`columns` is not specified,
|
|
189
|
+
dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
|
|
190
|
+
Back-end data type applied to the resultant DataFrame (still experimental).
|
|
179
191
|
|
|
180
192
|
Returns
|
|
181
193
|
-------
|
|
@@ -202,9 +214,20 @@ def read_odps_table(
|
|
|
202
214
|
else table.table_schema.simple_columns
|
|
203
215
|
)
|
|
204
216
|
table_columns = [c.name.lower() for c in cols]
|
|
205
|
-
|
|
206
|
-
|
|
217
|
+
|
|
218
|
+
if "use_arrow_dtype" in kw:
|
|
219
|
+
dtype_backend = dtype_backend or validate_dtype_backend(
|
|
220
|
+
kw.pop("use_arrow_dtype")
|
|
221
|
+
)
|
|
222
|
+
dtype_backend = validate_dtype_backend(
|
|
223
|
+
dtype_backend or options.dataframe.dtype_backend
|
|
207
224
|
)
|
|
225
|
+
|
|
226
|
+
with option_context():
|
|
227
|
+
options.dataframe.dtype_backend = dtype_backend
|
|
228
|
+
table_dtypes = odps_schema_to_pandas_dtypes(
|
|
229
|
+
table.table_schema, with_partitions=True
|
|
230
|
+
)
|
|
208
231
|
df_types = [table_dtypes[c] for c in table_columns]
|
|
209
232
|
|
|
210
233
|
if isinstance(index_col, str):
|
|
@@ -246,7 +269,6 @@ def read_odps_table(
|
|
|
246
269
|
dtypes = pd.Series(df_types, index=table_columns)
|
|
247
270
|
chunk_bytes = kw.pop("chunk_bytes", None)
|
|
248
271
|
chunk_size = kw.pop("chunk_size", None)
|
|
249
|
-
use_arrow_dtype = kw.pop("use_arrow_dtype", True)
|
|
250
272
|
|
|
251
273
|
partitions = partitions or kw.get("partition")
|
|
252
274
|
if isinstance(partitions, str):
|
|
@@ -261,7 +283,7 @@ def read_odps_table(
|
|
|
261
283
|
partitions=partitions,
|
|
262
284
|
dtypes=dtypes,
|
|
263
285
|
columns=columns,
|
|
264
|
-
|
|
286
|
+
dtype_backend=dtype_backend,
|
|
265
287
|
string_as_binary=string_as_binary,
|
|
266
288
|
append_partitions=append_partitions,
|
|
267
289
|
last_modified_time=to_timestamp(table.last_data_modified_time),
|
|
@@ -32,6 +32,7 @@ except ImportError:
|
|
|
32
32
|
|
|
33
33
|
from ... import opcodes
|
|
34
34
|
from ...config import options
|
|
35
|
+
from ...lib.dtypes_extension import ArrowDtype
|
|
35
36
|
from ...lib.filesystem import FileSystem, get_fs, glob, open_file
|
|
36
37
|
from ...serialization.serializables import (
|
|
37
38
|
AnyField,
|
|
@@ -43,10 +44,13 @@ from ...serialization.serializables import (
|
|
|
43
44
|
StringField,
|
|
44
45
|
)
|
|
45
46
|
from ...utils import lazy_import
|
|
46
|
-
from ..arrays import ArrowStringDtype
|
|
47
47
|
from ..operators import OutputType
|
|
48
48
|
from ..utils import parse_index, to_arrow_dtypes
|
|
49
|
-
from .core import
|
|
49
|
+
from .core import (
|
|
50
|
+
ColumnPruneSupportedDataSourceMixin,
|
|
51
|
+
DtypeBackendCompatibleMixin,
|
|
52
|
+
IncrementalIndexDatasource,
|
|
53
|
+
)
|
|
50
54
|
|
|
51
55
|
PARQUET_MEMORY_SCALE = 15
|
|
52
56
|
STRING_FIELD_OVERHEAD = 50
|
|
@@ -89,13 +93,11 @@ class ParquetEngine:
|
|
|
89
93
|
def read_dtypes(self, f, **kwargs):
|
|
90
94
|
raise NotImplementedError
|
|
91
95
|
|
|
92
|
-
def read_to_pandas(
|
|
93
|
-
self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
|
|
94
|
-
):
|
|
96
|
+
def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
|
|
95
97
|
raise NotImplementedError
|
|
96
98
|
|
|
97
99
|
def read_group_to_pandas(
|
|
98
|
-
self, f, group_index, columns=None, nrows=None,
|
|
100
|
+
self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
|
|
99
101
|
):
|
|
100
102
|
raise NotImplementedError
|
|
101
103
|
|
|
@@ -106,11 +108,11 @@ class ParquetEngine:
|
|
|
106
108
|
partition_keys: Dict,
|
|
107
109
|
columns=None,
|
|
108
110
|
nrows=None,
|
|
109
|
-
|
|
111
|
+
dtype_backend=None,
|
|
110
112
|
**kwargs,
|
|
111
113
|
):
|
|
112
114
|
raw_df = self.read_to_pandas(
|
|
113
|
-
f, columns=columns, nrows=nrows,
|
|
115
|
+
f, columns=columns, nrows=nrows, dtype_backend=dtype_backend, **kwargs
|
|
114
116
|
)
|
|
115
117
|
for col, value in partition_keys.items():
|
|
116
118
|
dictionary = partitions[col]
|
|
@@ -169,28 +171,26 @@ class ArrowEngine(ParquetEngine):
|
|
|
169
171
|
return file.schema_arrow.empty_table().to_pandas().dtypes
|
|
170
172
|
|
|
171
173
|
@classmethod
|
|
172
|
-
def _table_to_pandas(cls, t, nrows=None,
|
|
174
|
+
def _table_to_pandas(cls, t, nrows=None, dtype_backend=None):
|
|
173
175
|
if nrows is not None:
|
|
174
176
|
t = t.slice(0, nrows)
|
|
175
|
-
if
|
|
176
|
-
df = t.to_pandas(types_mapper={pa.string():
|
|
177
|
+
if dtype_backend == "pyarrow":
|
|
178
|
+
df = t.to_pandas(types_mapper={pa.string(): ArrowDtype(pa.string())}.get)
|
|
177
179
|
else:
|
|
178
180
|
df = t.to_pandas()
|
|
179
181
|
return df
|
|
180
182
|
|
|
181
|
-
def read_to_pandas(
|
|
182
|
-
self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
|
|
183
|
-
):
|
|
183
|
+
def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
|
|
184
184
|
file = pq.ParquetFile(f)
|
|
185
185
|
t = file.read(columns=columns, **kwargs)
|
|
186
|
-
return self._table_to_pandas(t, nrows=nrows,
|
|
186
|
+
return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
|
|
187
187
|
|
|
188
188
|
def read_group_to_pandas(
|
|
189
|
-
self, f, group_index, columns=None, nrows=None,
|
|
189
|
+
self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
|
|
190
190
|
):
|
|
191
191
|
file = pq.ParquetFile(f)
|
|
192
192
|
t = file.read_row_group(group_index, columns=columns, **kwargs)
|
|
193
|
-
return self._table_to_pandas(t, nrows=nrows,
|
|
193
|
+
return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
|
|
194
194
|
|
|
195
195
|
|
|
196
196
|
class FastpaquetEngine(ParquetEngine):
|
|
@@ -203,14 +203,12 @@ class FastpaquetEngine(ParquetEngine):
|
|
|
203
203
|
dtypes_dict = file._dtypes()
|
|
204
204
|
return pd.Series(dict((c, dtypes_dict[c]) for c in file.columns))
|
|
205
205
|
|
|
206
|
-
def read_to_pandas(
|
|
207
|
-
self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
|
|
208
|
-
):
|
|
206
|
+
def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
|
|
209
207
|
file = fastparquet.ParquetFile(f)
|
|
210
208
|
df = file.to_pandas(columns, **kwargs)
|
|
211
209
|
if nrows is not None:
|
|
212
210
|
df = df.head(nrows)
|
|
213
|
-
if
|
|
211
|
+
if dtype_backend == "pyarrow":
|
|
214
212
|
df = df.astype(to_arrow_dtypes(df.dtypes).to_dict())
|
|
215
213
|
return df
|
|
216
214
|
|
|
@@ -265,29 +263,30 @@ class CudfEngine:
|
|
|
265
263
|
class DataFrameReadParquet(
|
|
266
264
|
IncrementalIndexDatasource,
|
|
267
265
|
ColumnPruneSupportedDataSourceMixin,
|
|
266
|
+
DtypeBackendCompatibleMixin,
|
|
268
267
|
):
|
|
269
268
|
_op_type_ = opcodes.READ_PARQUET
|
|
270
269
|
|
|
271
270
|
path = AnyField("path")
|
|
272
271
|
engine = StringField("engine")
|
|
273
272
|
columns = ListField("columns")
|
|
274
|
-
|
|
275
|
-
groups_as_chunks = BoolField("groups_as_chunks")
|
|
276
|
-
group_index = Int32Field("group_index")
|
|
277
|
-
read_kwargs = DictField("read_kwargs")
|
|
278
|
-
incremental_index = BoolField("incremental_index")
|
|
279
|
-
storage_options = DictField("storage_options")
|
|
280
|
-
is_partitioned = BoolField("is_partitioned")
|
|
281
|
-
merge_small_files = BoolField("merge_small_files")
|
|
282
|
-
merge_small_file_options = DictField("merge_small_file_options")
|
|
273
|
+
dtype_backend = StringField("dtype_backend", default=None)
|
|
274
|
+
groups_as_chunks = BoolField("groups_as_chunks", default=None)
|
|
275
|
+
group_index = Int32Field("group_index", default=None)
|
|
276
|
+
read_kwargs = DictField("read_kwargs", default=None)
|
|
277
|
+
incremental_index = BoolField("incremental_index", default=None)
|
|
278
|
+
storage_options = DictField("storage_options", default=None)
|
|
279
|
+
is_partitioned = BoolField("is_partitioned", default=None)
|
|
280
|
+
merge_small_files = BoolField("merge_small_files", default=None)
|
|
281
|
+
merge_small_file_options = DictField("merge_small_file_options", default=None)
|
|
283
282
|
# for chunk
|
|
284
283
|
partitions = DictField("partitions", default=None)
|
|
285
284
|
partition_keys = DictField("partition_keys", default=None)
|
|
286
285
|
num_group_rows = Int64Field("num_group_rows", default=None)
|
|
287
286
|
# as read meta may be too time-consuming when number of files is large,
|
|
288
287
|
# thus we only read first file to get row number and raw file size
|
|
289
|
-
first_chunk_row_num = Int64Field("first_chunk_row_num")
|
|
290
|
-
first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes")
|
|
288
|
+
first_chunk_row_num = Int64Field("first_chunk_row_num", default=None)
|
|
289
|
+
first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes", default=None)
|
|
291
290
|
|
|
292
291
|
def get_columns(self):
|
|
293
292
|
return self.columns
|
|
@@ -319,7 +318,7 @@ def read_parquet(
|
|
|
319
318
|
engine: str = "auto",
|
|
320
319
|
columns: list = None,
|
|
321
320
|
groups_as_chunks: bool = False,
|
|
322
|
-
|
|
321
|
+
dtype_backend: str = None,
|
|
323
322
|
incremental_index: bool = False,
|
|
324
323
|
storage_options: dict = None,
|
|
325
324
|
memory_scale: int = None,
|
|
@@ -356,8 +355,8 @@ def read_parquet(
|
|
|
356
355
|
incremental_index: bool, default False
|
|
357
356
|
If index_col not specified, ensure range index incremental,
|
|
358
357
|
gain a slightly better performance if setting False.
|
|
359
|
-
|
|
360
|
-
|
|
358
|
+
dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
|
|
359
|
+
Back-end data type applied to the resultant DataFrame (still experimental).
|
|
361
360
|
storage_options: dict, optional
|
|
362
361
|
Options for storage connection.
|
|
363
362
|
memory_scale: int, optional
|
|
@@ -401,9 +400,9 @@ def read_parquet(
|
|
|
401
400
|
if columns:
|
|
402
401
|
dtypes = dtypes[columns]
|
|
403
402
|
|
|
404
|
-
if
|
|
405
|
-
|
|
406
|
-
if
|
|
403
|
+
if dtype_backend is None:
|
|
404
|
+
dtype_backend = options.dataframe.dtype_backend
|
|
405
|
+
if dtype_backend == "pyarrow":
|
|
407
406
|
dtypes = to_arrow_dtypes(dtypes)
|
|
408
407
|
|
|
409
408
|
index_value = parse_index(pd.RangeIndex(-1))
|
|
@@ -413,7 +412,7 @@ def read_parquet(
|
|
|
413
412
|
engine=engine_type,
|
|
414
413
|
columns=columns,
|
|
415
414
|
groups_as_chunks=groups_as_chunks,
|
|
416
|
-
|
|
415
|
+
dtype_backend=dtype_backend,
|
|
417
416
|
read_kwargs=kwargs,
|
|
418
417
|
incremental_index=incremental_index,
|
|
419
418
|
storage_options=storage_options,
|
|
@@ -184,6 +184,23 @@ def test_from_tensor():
|
|
|
184
184
|
df = dataframe_from_1d_tileables(d)
|
|
185
185
|
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
|
|
186
186
|
|
|
187
|
+
# test axis parameter for dataframe_from_1d_tileables
|
|
188
|
+
d = OrderedDict(
|
|
189
|
+
[("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# axis=1 (default behavior) - keys become columns
|
|
193
|
+
df = dataframe_from_1d_tileables(d, axis=1)
|
|
194
|
+
assert df.shape == (4, 2)
|
|
195
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
|
|
196
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
|
|
197
|
+
|
|
198
|
+
# axis=0 - keys become index (rows)
|
|
199
|
+
df = dataframe_from_1d_tileables(d, axis=0)
|
|
200
|
+
assert df.shape == (2, 4)
|
|
201
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
|
|
202
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
|
|
203
|
+
|
|
187
204
|
series = series_from_tensor(mt.random.rand(4))
|
|
188
205
|
pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
|
|
189
206
|
|
|
@@ -207,6 +224,26 @@ def test_from_tensor():
|
|
|
207
224
|
with pytest.raises(ValueError):
|
|
208
225
|
dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
|
|
209
226
|
|
|
227
|
+
# 1-d tensors should have same shape
|
|
228
|
+
with pytest.raises(ValueError):
|
|
229
|
+
dataframe_from_1d_tileables(
|
|
230
|
+
OrderedDict(
|
|
231
|
+
[(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# index has wrong shape
|
|
236
|
+
with pytest.raises(ValueError):
|
|
237
|
+
dataframe_from_1d_tileables(
|
|
238
|
+
{0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# columns have wrong shape
|
|
242
|
+
with pytest.raises(ValueError):
|
|
243
|
+
dataframe_from_1d_tileables(
|
|
244
|
+
{0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
|
|
245
|
+
)
|
|
246
|
+
|
|
210
247
|
# index should be 1-d
|
|
211
248
|
with pytest.raises(ValueError):
|
|
212
249
|
dataframe_from_tensor(
|
|
@@ -12,14 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from .direct import df_to_dict, series_to_dict, series_to_list, to_clipboard
|
|
16
|
+
from .to_csv import to_csv
|
|
15
17
|
from .to_odps import to_odps_table
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def _install():
|
|
19
|
-
from ..core import DATAFRAME_TYPE
|
|
21
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
20
22
|
|
|
21
23
|
for t in DATAFRAME_TYPE:
|
|
24
|
+
t.to_clipboard = to_clipboard
|
|
25
|
+
t.to_csv = to_csv
|
|
26
|
+
t.to_dict = df_to_dict
|
|
22
27
|
t.to_odps_table = to_odps_table
|
|
28
|
+
for t in SERIES_TYPE:
|
|
29
|
+
t.to_clipboard = to_clipboard
|
|
30
|
+
t.to_csv = to_csv
|
|
31
|
+
t.to_dict = series_to_dict
|
|
32
|
+
t.to_list = series_to_list
|
|
23
33
|
|
|
24
34
|
|
|
25
35
|
_install()
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from ...utils import pd_release_version
|
|
16
|
+
|
|
17
|
+
_to_dict_has_index = pd_release_version[0] >= 2
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def df_to_dict(
|
|
21
|
+
df, orient="dict", into=dict, index=True, batch_size=10000, session=None
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Convert the DataFrame to a dictionary.
|
|
25
|
+
|
|
26
|
+
The type of the key-value pairs can be customized with the parameters
|
|
27
|
+
(see below).
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
|
|
32
|
+
Determines the type of the values of the dictionary.
|
|
33
|
+
|
|
34
|
+
- 'dict' (default) : dict like {column -> {index -> value}}
|
|
35
|
+
- 'list' : dict like {column -> [values]}
|
|
36
|
+
- 'series' : dict like {column -> Series(values)}
|
|
37
|
+
- 'split' : dict like
|
|
38
|
+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
|
|
39
|
+
- 'tight' : dict like
|
|
40
|
+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values],
|
|
41
|
+
'index_names' -> [index.names], 'column_names' -> [column.names]}
|
|
42
|
+
- 'records' : list like
|
|
43
|
+
[{column -> value}, ... , {column -> value}]
|
|
44
|
+
- 'index' : dict like {index -> {column -> value}}
|
|
45
|
+
|
|
46
|
+
into : class, default dict
|
|
47
|
+
The collections.abc.MutableMapping subclass used for all Mappings
|
|
48
|
+
in the return value. Can be the actual class or an empty
|
|
49
|
+
instance of the mapping type you want. If you want a
|
|
50
|
+
collections.defaultdict, you must pass it initialized.
|
|
51
|
+
|
|
52
|
+
index : bool, default True
|
|
53
|
+
Whether to include the index item (and index_names item if `orient`
|
|
54
|
+
is 'tight') in the returned dictionary. Can only be ``False``
|
|
55
|
+
when `orient` is 'split' or 'tight'.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
dict, list or collections.abc.MutableMapping
|
|
60
|
+
Return a collections.abc.MutableMapping object representing the
|
|
61
|
+
DataFrame. The resulting transformation depends on the `orient`
|
|
62
|
+
parameter.
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
DataFrame.from_dict: Create a DataFrame from a dictionary.
|
|
67
|
+
DataFrame.to_json: Convert a DataFrame to JSON format.
|
|
68
|
+
|
|
69
|
+
Examples
|
|
70
|
+
--------
|
|
71
|
+
>>> import maxframe.dataframe as md
|
|
72
|
+
>>> df = md.DataFrame({'col1': [1, 2],
|
|
73
|
+
... 'col2': [0.5, 0.75]},
|
|
74
|
+
... index=['row1', 'row2'])
|
|
75
|
+
>>> df.execute()
|
|
76
|
+
col1 col2
|
|
77
|
+
row1 1 0.50
|
|
78
|
+
row2 2 0.75
|
|
79
|
+
>>> df.to_dict()
|
|
80
|
+
{'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
|
|
81
|
+
|
|
82
|
+
You can specify the return orientation.
|
|
83
|
+
|
|
84
|
+
>>> df.to_dict('series')
|
|
85
|
+
{'col1': row1 1
|
|
86
|
+
row2 2
|
|
87
|
+
Name: col1, dtype: int64,
|
|
88
|
+
'col2': row1 0.50
|
|
89
|
+
row2 0.75
|
|
90
|
+
Name: col2, dtype: float64}
|
|
91
|
+
|
|
92
|
+
>>> df.to_dict('split')
|
|
93
|
+
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
|
|
94
|
+
'data': [[1, 0.5], [2, 0.75]]}
|
|
95
|
+
|
|
96
|
+
>>> df.to_dict('records')
|
|
97
|
+
[{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
|
|
98
|
+
|
|
99
|
+
>>> df.to_dict('index')
|
|
100
|
+
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
|
|
101
|
+
|
|
102
|
+
>>> df.to_dict('tight')
|
|
103
|
+
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
|
|
104
|
+
'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
|
|
105
|
+
|
|
106
|
+
You can also specify the mapping type.
|
|
107
|
+
|
|
108
|
+
>>> from collections import OrderedDict, defaultdict
|
|
109
|
+
>>> df.to_dict(into=OrderedDict)
|
|
110
|
+
OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
|
|
111
|
+
('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
|
|
112
|
+
|
|
113
|
+
If you want a `defaultdict`, you need to initialize it:
|
|
114
|
+
|
|
115
|
+
>>> dd = defaultdict(list)
|
|
116
|
+
>>> df.to_dict('records', into=dd)
|
|
117
|
+
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
|
|
118
|
+
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
|
|
119
|
+
"""
|
|
120
|
+
fetch_kwargs = dict(batch_size=batch_size)
|
|
121
|
+
to_dict_kw = dict(orient=orient, into=into)
|
|
122
|
+
if _to_dict_has_index:
|
|
123
|
+
to_dict_kw["index"] = index
|
|
124
|
+
return df.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
|
|
125
|
+
**to_dict_kw
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def series_to_dict(series, into=dict, batch_size=10000, session=None):
|
|
130
|
+
"""
|
|
131
|
+
Convert Series to {label -> value} dict or dict-like object.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
into : class, default dict
|
|
136
|
+
The collections.abc.Mapping subclass to use as the return
|
|
137
|
+
object. Can be the actual class or an empty
|
|
138
|
+
instance of the mapping type you want. If you want a
|
|
139
|
+
collections.defaultdict, you must pass it initialized.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
collections.abc.Mapping
|
|
144
|
+
Key-value representation of Series.
|
|
145
|
+
|
|
146
|
+
Examples
|
|
147
|
+
--------
|
|
148
|
+
>>> import maxframe.dataframe as md
|
|
149
|
+
>>> s = md.Series([1, 2, 3, 4])
|
|
150
|
+
>>> s.to_dict()
|
|
151
|
+
{0: 1, 1: 2, 2: 3, 3: 4}
|
|
152
|
+
>>> from collections import OrderedDict, defaultdict
|
|
153
|
+
>>> s.to_dict(OrderedDict)
|
|
154
|
+
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
|
|
155
|
+
>>> dd = defaultdict(list)
|
|
156
|
+
>>> s.to_dict(dd)
|
|
157
|
+
defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
|
|
158
|
+
"""
|
|
159
|
+
fetch_kwargs = dict(batch_size=batch_size)
|
|
160
|
+
return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
|
|
161
|
+
into=into
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def series_to_list(series, batch_size=10000, session=None):
|
|
166
|
+
"""
|
|
167
|
+
Return a list of the values.
|
|
168
|
+
|
|
169
|
+
These are each a scalar type, which is a Python scalar
|
|
170
|
+
(for str, int, float) or a pandas scalar
|
|
171
|
+
(for Timestamp/Timedelta/Interval/Period)
|
|
172
|
+
|
|
173
|
+
Returns
|
|
174
|
+
-------
|
|
175
|
+
list
|
|
176
|
+
|
|
177
|
+
See Also
|
|
178
|
+
--------
|
|
179
|
+
numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
|
|
180
|
+
nested list of Python scalars.
|
|
181
|
+
|
|
182
|
+
Examples
|
|
183
|
+
--------
|
|
184
|
+
For Series
|
|
185
|
+
|
|
186
|
+
>>> import maxframe.dataframe as md
|
|
187
|
+
>>> s = md.Series([1, 2, 3])
|
|
188
|
+
>>> s.to_list()
|
|
189
|
+
[1, 2, 3]
|
|
190
|
+
|
|
191
|
+
For Index:
|
|
192
|
+
|
|
193
|
+
>>> idx = md.Index([1, 2, 3])
|
|
194
|
+
>>> idx.execute()
|
|
195
|
+
Index([1, 2, 3], dtype='int64')
|
|
196
|
+
|
|
197
|
+
>>> idx.to_list()
|
|
198
|
+
[1, 2, 3]
|
|
199
|
+
"""
|
|
200
|
+
fetch_kwargs = dict(batch_size=batch_size)
|
|
201
|
+
return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_list()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def to_clipboard(
|
|
205
|
+
obj, *, excel=True, sep=None, batch_size=10000, session=None, **kwargs
|
|
206
|
+
):
|
|
207
|
+
"""
|
|
208
|
+
Copy object to the system clipboard.
|
|
209
|
+
|
|
210
|
+
Write a text representation of object to the system clipboard.
|
|
211
|
+
This can be pasted into Excel, for example.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
excel : bool, default True
|
|
216
|
+
Produce output in a csv format for easy pasting into excel.
|
|
217
|
+
|
|
218
|
+
- True, use the provided separator for csv pasting.
|
|
219
|
+
- False, write a string representation of the object to the clipboard.
|
|
220
|
+
|
|
221
|
+
sep : str, default ``'\t'``
|
|
222
|
+
Field delimiter.
|
|
223
|
+
**kwargs
|
|
224
|
+
These parameters will be passed to DataFrame.to_csv.
|
|
225
|
+
|
|
226
|
+
See Also
|
|
227
|
+
--------
|
|
228
|
+
DataFrame.to_csv : Write a DataFrame to a comma-separated values
|
|
229
|
+
(csv) file.
|
|
230
|
+
read_clipboard : Read text from clipboard and pass to read_csv.
|
|
231
|
+
|
|
232
|
+
Notes
|
|
233
|
+
-----
|
|
234
|
+
Requirements for your platform.
|
|
235
|
+
|
|
236
|
+
- Linux : `xclip`, or `xsel` (with `PyQt4` modules)
|
|
237
|
+
- Windows : none
|
|
238
|
+
- macOS : none
|
|
239
|
+
|
|
240
|
+
This method uses the processes developed for the package `pyperclip`. A
|
|
241
|
+
solution to render any output string format is given in the examples.
|
|
242
|
+
|
|
243
|
+
Examples
|
|
244
|
+
--------
|
|
245
|
+
Copy the contents of a DataFrame to the clipboard.
|
|
246
|
+
|
|
247
|
+
>>> import maxframe.dataframe as md
|
|
248
|
+
>>> df = md.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
|
|
249
|
+
|
|
250
|
+
>>> df.to_clipboard(sep=',') # doctest: +SKIP
|
|
251
|
+
... # Wrote the following to the system clipboard:
|
|
252
|
+
... # ,A,B,C
|
|
253
|
+
... # 0,1,2,3
|
|
254
|
+
... # 1,4,5,6
|
|
255
|
+
|
|
256
|
+
We can omit the index by passing the keyword `index` and setting
|
|
257
|
+
it to false.
|
|
258
|
+
|
|
259
|
+
>>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
|
|
260
|
+
... # Wrote the following to the system clipboard:
|
|
261
|
+
... # A,B,C
|
|
262
|
+
... # 1,2,3
|
|
263
|
+
... # 4,5,6
|
|
264
|
+
"""
|
|
265
|
+
fetch_kwargs = dict(batch_size=batch_size)
|
|
266
|
+
return obj.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_clipboard(
|
|
267
|
+
excel=excel, sep=sep, **kwargs
|
|
268
|
+
)
|