maxframe 2.0.0b1__cp38-cp38-macosx_10_9_universal2.whl → 2.2.0__cp38-cp38-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-38-darwin.so +0 -0
- maxframe/_utils.pyx +14 -1
- maxframe/codegen/core.py +6 -6
- maxframe/codegen/spe/core.py +1 -1
- maxframe/codegen/spe/dataframe/__init__.py +1 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
- maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
- maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
- maxframe/codegen/spe/dataframe/groupby.py +88 -0
- maxframe/codegen/spe/dataframe/indexing.py +99 -4
- maxframe/codegen/spe/dataframe/merge.py +34 -1
- maxframe/codegen/spe/dataframe/misc.py +9 -33
- maxframe/codegen/spe/dataframe/reduction.py +14 -9
- maxframe/codegen/spe/dataframe/reshape.py +46 -0
- maxframe/codegen/spe/dataframe/sort.py +30 -17
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
- maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
- maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
- maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
- maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
- maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
- maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
- maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
- maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
- maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
- maxframe/codegen/spe/tensor/__init__.py +3 -0
- maxframe/codegen/spe/tensor/fft.py +74 -0
- maxframe/codegen/spe/tensor/linalg.py +29 -2
- maxframe/codegen/spe/tensor/misc.py +79 -25
- maxframe/codegen/spe/tensor/spatial.py +45 -0
- maxframe/codegen/spe/tensor/statistics.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
- maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
- maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
- maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
- maxframe/codegen/spe/utils.py +2 -0
- maxframe/config/config.py +70 -9
- maxframe/config/tests/test_validators.py +13 -1
- maxframe/config/validators.py +49 -0
- maxframe/conftest.py +44 -17
- maxframe/core/accessor.py +2 -2
- maxframe/core/entity/core.py +5 -0
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/graph/core.cpython-38-darwin.so +0 -0
- maxframe/core/graph/entity.py +1 -2
- maxframe/core/operator/base.py +9 -2
- maxframe/core/operator/core.py +10 -2
- maxframe/core/operator/utils.py +13 -0
- maxframe/dataframe/__init__.py +10 -3
- maxframe/dataframe/accessors/__init__.py +1 -1
- maxframe/dataframe/accessors/compat.py +45 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
- maxframe/dataframe/accessors/dict_/contains.py +7 -16
- maxframe/dataframe/accessors/dict_/core.py +48 -0
- maxframe/dataframe/accessors/dict_/getitem.py +17 -21
- maxframe/dataframe/accessors/dict_/length.py +7 -16
- maxframe/dataframe/accessors/dict_/remove.py +6 -18
- maxframe/dataframe/accessors/dict_/setitem.py +8 -18
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
- maxframe/dataframe/accessors/list_/__init__.py +2 -2
- maxframe/dataframe/accessors/list_/core.py +48 -0
- maxframe/dataframe/accessors/list_/getitem.py +12 -19
- maxframe/dataframe/accessors/list_/length.py +7 -16
- maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
- maxframe/dataframe/accessors/string_/__init__.py +4 -1
- maxframe/dataframe/accessors/struct_/__init__.py +37 -0
- maxframe/dataframe/accessors/struct_/accessor.py +39 -0
- maxframe/dataframe/accessors/struct_/core.py +43 -0
- maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
- maxframe/dataframe/accessors/struct_/field.py +123 -0
- maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
- maxframe/dataframe/arithmetic/__init__.py +14 -4
- maxframe/dataframe/arithmetic/between.py +106 -0
- maxframe/dataframe/arithmetic/dot.py +237 -0
- maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
- maxframe/dataframe/core.py +63 -118
- maxframe/dataframe/datasource/__init__.py +18 -0
- maxframe/dataframe/datasource/from_dict.py +124 -0
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +77 -0
- maxframe/dataframe/datasource/from_tensor.py +109 -41
- maxframe/dataframe/datasource/read_csv.py +2 -3
- maxframe/dataframe/datasource/read_odps_query.py +76 -16
- maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
- maxframe/dataframe/datastore/__init__.py +5 -1
- maxframe/dataframe/datastore/to_csv.py +29 -41
- maxframe/dataframe/datastore/to_odps.py +30 -4
- maxframe/dataframe/extensions/__init__.py +20 -4
- maxframe/dataframe/extensions/apply_chunk.py +32 -6
- maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
- maxframe/dataframe/extensions/collect_kv.py +126 -0
- maxframe/dataframe/extensions/extract_kv.py +177 -0
- maxframe/dataframe/extensions/map_reduce.py +263 -0
- maxframe/dataframe/extensions/rebalance.py +62 -0
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
- maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
- maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
- maxframe/dataframe/groupby/__init__.py +12 -1
- maxframe/dataframe/groupby/aggregation.py +78 -45
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/apply_chunk.py +18 -2
- maxframe/dataframe/groupby/core.py +96 -12
- maxframe/dataframe/groupby/cum.py +4 -25
- maxframe/dataframe/groupby/expanding.py +264 -0
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +12 -5
- maxframe/dataframe/groupby/head.py +11 -1
- maxframe/dataframe/groupby/rank.py +136 -0
- maxframe/dataframe/groupby/rolling.py +206 -0
- maxframe/dataframe/groupby/shift.py +114 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
- maxframe/dataframe/indexing/__init__.py +20 -1
- maxframe/dataframe/indexing/droplevel.py +195 -0
- maxframe/dataframe/indexing/filter.py +169 -0
- maxframe/dataframe/indexing/get_level_values.py +76 -0
- maxframe/dataframe/indexing/iat.py +45 -0
- maxframe/dataframe/indexing/iloc.py +152 -12
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +287 -7
- maxframe/dataframe/indexing/reindex.py +14 -5
- maxframe/dataframe/indexing/rename.py +6 -0
- maxframe/dataframe/indexing/rename_axis.py +2 -2
- maxframe/dataframe/indexing/reorder_levels.py +143 -0
- maxframe/dataframe/indexing/reset_index.py +33 -6
- maxframe/dataframe/indexing/sample.py +8 -0
- maxframe/dataframe/indexing/setitem.py +3 -3
- maxframe/dataframe/indexing/swaplevel.py +185 -0
- maxframe/dataframe/indexing/take.py +99 -0
- maxframe/dataframe/indexing/truncate.py +140 -0
- maxframe/dataframe/indexing/where.py +0 -11
- maxframe/dataframe/indexing/xs.py +148 -0
- maxframe/dataframe/merge/__init__.py +12 -1
- maxframe/dataframe/merge/append.py +97 -98
- maxframe/dataframe/merge/combine_first.py +120 -0
- maxframe/dataframe/merge/compare.py +387 -0
- maxframe/dataframe/merge/concat.py +183 -0
- maxframe/dataframe/merge/update.py +271 -0
- maxframe/dataframe/misc/__init__.py +16 -10
- maxframe/dataframe/misc/_duplicate.py +10 -4
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/check_unique.py +51 -0
- maxframe/dataframe/misc/clip.py +145 -0
- maxframe/dataframe/misc/describe.py +175 -9
- maxframe/dataframe/misc/drop_duplicates.py +2 -2
- maxframe/dataframe/misc/duplicated.py +2 -2
- maxframe/dataframe/misc/get_dummies.py +5 -1
- maxframe/dataframe/misc/isin.py +2 -2
- maxframe/dataframe/misc/map.py +94 -0
- maxframe/dataframe/misc/tests/test_misc.py +13 -2
- maxframe/dataframe/misc/to_numeric.py +3 -0
- maxframe/dataframe/misc/transform.py +12 -5
- maxframe/dataframe/misc/transpose.py +13 -1
- maxframe/dataframe/misc/valid_index.py +115 -0
- maxframe/dataframe/misc/value_counts.py +38 -4
- maxframe/dataframe/missing/checkna.py +13 -6
- maxframe/dataframe/missing/dropna.py +5 -0
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +7 -4
- maxframe/dataframe/reduction/__init__.py +29 -15
- maxframe/dataframe/reduction/aggregation.py +38 -9
- maxframe/dataframe/reduction/all.py +2 -2
- maxframe/dataframe/reduction/any.py +2 -2
- maxframe/dataframe/reduction/argmax.py +100 -0
- maxframe/dataframe/reduction/argmin.py +100 -0
- maxframe/dataframe/reduction/core.py +65 -18
- maxframe/dataframe/reduction/count.py +13 -9
- maxframe/dataframe/reduction/cov.py +166 -0
- maxframe/dataframe/reduction/cummax.py +2 -2
- maxframe/dataframe/reduction/cummin.py +2 -2
- maxframe/dataframe/reduction/cumprod.py +2 -2
- maxframe/dataframe/reduction/cumsum.py +2 -2
- maxframe/dataframe/reduction/custom_reduction.py +2 -2
- maxframe/dataframe/reduction/idxmax.py +185 -0
- maxframe/dataframe/reduction/idxmin.py +185 -0
- maxframe/dataframe/reduction/kurtosis.py +37 -30
- maxframe/dataframe/reduction/max.py +2 -2
- maxframe/dataframe/reduction/mean.py +9 -7
- maxframe/dataframe/reduction/median.py +2 -2
- maxframe/dataframe/reduction/min.py +2 -2
- maxframe/dataframe/reduction/nunique.py +9 -8
- maxframe/dataframe/reduction/prod.py +18 -13
- maxframe/dataframe/reduction/reduction_size.py +2 -2
- maxframe/dataframe/reduction/sem.py +13 -9
- maxframe/dataframe/reduction/skew.py +31 -27
- maxframe/dataframe/reduction/str_concat.py +10 -7
- maxframe/dataframe/reduction/sum.py +18 -14
- maxframe/dataframe/reduction/unique.py +20 -3
- maxframe/dataframe/reduction/var.py +16 -12
- maxframe/dataframe/reshape/__init__.py +38 -0
- maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
- maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
- maxframe/dataframe/reshape/unstack.py +114 -0
- maxframe/dataframe/sort/__init__.py +8 -0
- maxframe/dataframe/sort/argsort.py +62 -0
- maxframe/dataframe/sort/core.py +1 -0
- maxframe/dataframe/sort/nlargest.py +238 -0
- maxframe/dataframe/sort/nsmallest.py +228 -0
- maxframe/dataframe/statistics/__init__.py +3 -3
- maxframe/dataframe/statistics/corr.py +1 -0
- maxframe/dataframe/statistics/quantile.py +2 -2
- maxframe/dataframe/tests/test_typing.py +104 -0
- maxframe/dataframe/tests/test_utils.py +66 -2
- maxframe/dataframe/typing_.py +185 -0
- maxframe/dataframe/utils.py +95 -26
- maxframe/dataframe/window/aggregation.py +8 -4
- maxframe/dataframe/window/core.py +14 -1
- maxframe/dataframe/window/ewm.py +1 -3
- maxframe/dataframe/window/expanding.py +37 -35
- maxframe/dataframe/window/rolling.py +49 -39
- maxframe/dataframe/window/tests/test_expanding.py +1 -7
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +7 -4
- maxframe/errors.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +4 -2
- maxframe/io/odpsio/schema.py +9 -3
- maxframe/io/odpsio/tableio.py +7 -2
- maxframe/io/odpsio/tests/test_schema.py +198 -83
- maxframe/io/odpsio/tests/test_volumeio.py +4 -15
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +10 -2
- maxframe/learn/cluster/__init__.py +15 -0
- maxframe/learn/cluster/_kmeans.py +782 -0
- maxframe/learn/contrib/llm/core.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +87 -1
- maxframe/learn/contrib/xgboost/train.py +5 -2
- maxframe/learn/core.py +66 -0
- maxframe/learn/linear_model/_base.py +58 -1
- maxframe/learn/linear_model/_lin_reg.py +1 -1
- maxframe/learn/metrics/__init__.py +6 -0
- maxframe/learn/metrics/_classification.py +145 -0
- maxframe/learn/metrics/_ranking.py +477 -0
- maxframe/learn/metrics/_scorer.py +60 -0
- maxframe/learn/metrics/pairwise/__init__.py +21 -0
- maxframe/learn/metrics/pairwise/core.py +77 -0
- maxframe/learn/metrics/pairwise/cosine.py +115 -0
- maxframe/learn/metrics/pairwise/euclidean.py +176 -0
- maxframe/learn/metrics/pairwise/haversine.py +96 -0
- maxframe/learn/metrics/pairwise/manhattan.py +80 -0
- maxframe/learn/metrics/pairwise/pairwise.py +127 -0
- maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
- maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
- maxframe/learn/metrics/tests/__init__.py +13 -0
- maxframe/learn/metrics/tests/test_scorer.py +26 -0
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/checks.py +1 -2
- maxframe/learn/utils/core.py +59 -0
- maxframe/learn/utils/extmath.py +37 -0
- maxframe/learn/utils/odpsio.py +193 -0
- maxframe/learn/utils/validation.py +2 -2
- maxframe/lib/compat.py +40 -0
- maxframe/lib/dtypes_extension/__init__.py +16 -1
- maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
- maxframe/lib/dtypes_extension/blob.py +304 -0
- maxframe/lib/dtypes_extension/dtypes.py +40 -0
- maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
- maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
- maxframe/lib/filesystem/_oss_lib/common.py +122 -50
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/oss.py +115 -46
- maxframe/lib/filesystem/tests/test_oss.py +74 -36
- maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
- maxframe/lib/wrapped_pickle.py +10 -0
- maxframe/opcodes.py +33 -15
- maxframe/protocol.py +12 -0
- maxframe/serialization/__init__.py +11 -2
- maxframe/serialization/arrow.py +38 -13
- maxframe/serialization/blob.py +32 -0
- maxframe/serialization/core.cpython-38-darwin.so +0 -0
- maxframe/serialization/core.pyx +39 -1
- maxframe/serialization/exception.py +2 -4
- maxframe/serialization/numpy.py +11 -0
- maxframe/serialization/pandas.py +46 -9
- maxframe/serialization/serializables/core.py +2 -2
- maxframe/serialization/tests/test_serial.py +29 -2
- maxframe/tensor/__init__.py +38 -8
- maxframe/tensor/arithmetic/__init__.py +19 -10
- maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/tensor/core.py +3 -2
- maxframe/tensor/datasource/tests/test_datasource.py +2 -1
- maxframe/tensor/extensions/__init__.py +2 -0
- maxframe/tensor/extensions/apply_chunk.py +3 -3
- maxframe/tensor/extensions/rebalance.py +65 -0
- maxframe/tensor/fft/__init__.py +32 -0
- maxframe/tensor/fft/core.py +168 -0
- maxframe/tensor/fft/fft.py +112 -0
- maxframe/tensor/fft/fft2.py +118 -0
- maxframe/tensor/fft/fftfreq.py +80 -0
- maxframe/tensor/fft/fftn.py +123 -0
- maxframe/tensor/fft/fftshift.py +79 -0
- maxframe/tensor/fft/hfft.py +112 -0
- maxframe/tensor/fft/ifft.py +114 -0
- maxframe/tensor/fft/ifft2.py +115 -0
- maxframe/tensor/fft/ifftn.py +123 -0
- maxframe/tensor/fft/ifftshift.py +73 -0
- maxframe/tensor/fft/ihfft.py +93 -0
- maxframe/tensor/fft/irfft.py +118 -0
- maxframe/tensor/fft/irfft2.py +62 -0
- maxframe/tensor/fft/irfftn.py +114 -0
- maxframe/tensor/fft/rfft.py +116 -0
- maxframe/tensor/fft/rfft2.py +63 -0
- maxframe/tensor/fft/rfftfreq.py +87 -0
- maxframe/tensor/fft/rfftn.py +113 -0
- maxframe/tensor/indexing/fill_diagonal.py +1 -7
- maxframe/tensor/linalg/__init__.py +7 -0
- maxframe/tensor/linalg/_einsumfunc.py +1025 -0
- maxframe/tensor/linalg/cholesky.py +117 -0
- maxframe/tensor/linalg/einsum.py +339 -0
- maxframe/tensor/linalg/lstsq.py +100 -0
- maxframe/tensor/linalg/matrix_norm.py +75 -0
- maxframe/tensor/linalg/norm.py +249 -0
- maxframe/tensor/linalg/solve.py +72 -0
- maxframe/tensor/linalg/solve_triangular.py +2 -2
- maxframe/tensor/linalg/vector_norm.py +113 -0
- maxframe/tensor/misc/__init__.py +24 -1
- maxframe/tensor/misc/argwhere.py +72 -0
- maxframe/tensor/misc/array_split.py +46 -0
- maxframe/tensor/misc/broadcast_arrays.py +57 -0
- maxframe/tensor/misc/copyto.py +130 -0
- maxframe/tensor/misc/delete.py +104 -0
- maxframe/tensor/misc/dsplit.py +68 -0
- maxframe/tensor/misc/ediff1d.py +74 -0
- maxframe/tensor/misc/expand_dims.py +85 -0
- maxframe/tensor/misc/flip.py +90 -0
- maxframe/tensor/misc/fliplr.py +64 -0
- maxframe/tensor/misc/flipud.py +68 -0
- maxframe/tensor/misc/hsplit.py +85 -0
- maxframe/tensor/misc/insert.py +139 -0
- maxframe/tensor/misc/moveaxis.py +83 -0
- maxframe/tensor/misc/result_type.py +88 -0
- maxframe/tensor/misc/roll.py +124 -0
- maxframe/tensor/misc/rollaxis.py +77 -0
- maxframe/tensor/misc/shape.py +89 -0
- maxframe/tensor/misc/split.py +190 -0
- maxframe/tensor/misc/tile.py +109 -0
- maxframe/tensor/misc/vsplit.py +74 -0
- maxframe/tensor/reduction/array_equal.py +2 -1
- maxframe/tensor/sort/__init__.py +2 -0
- maxframe/tensor/sort/argpartition.py +98 -0
- maxframe/tensor/sort/partition.py +228 -0
- maxframe/tensor/spatial/__init__.py +15 -0
- maxframe/tensor/spatial/distance/__init__.py +17 -0
- maxframe/tensor/spatial/distance/cdist.py +421 -0
- maxframe/tensor/spatial/distance/pdist.py +398 -0
- maxframe/tensor/spatial/distance/squareform.py +153 -0
- maxframe/tensor/special/__init__.py +159 -21
- maxframe/tensor/special/airy.py +55 -0
- maxframe/tensor/special/bessel.py +199 -0
- maxframe/tensor/special/core.py +65 -4
- maxframe/tensor/special/ellip_func_integrals.py +155 -0
- maxframe/tensor/special/ellip_harm.py +55 -0
- maxframe/tensor/special/err_fresnel.py +223 -0
- maxframe/tensor/special/gamma_funcs.py +303 -0
- maxframe/tensor/special/hypergeometric_funcs.py +69 -0
- maxframe/tensor/special/info_theory.py +189 -0
- maxframe/tensor/special/misc.py +21 -0
- maxframe/tensor/statistics/__init__.py +6 -0
- maxframe/tensor/statistics/corrcoef.py +77 -0
- maxframe/tensor/statistics/cov.py +222 -0
- maxframe/tensor/statistics/digitize.py +126 -0
- maxframe/tensor/statistics/histogram.py +520 -0
- maxframe/tensor/statistics/median.py +85 -0
- maxframe/tensor/statistics/ptp.py +89 -0
- maxframe/tensor/utils.py +3 -3
- maxframe/tests/test_utils.py +43 -1
- maxframe/tests/utils.py +3 -13
- maxframe/typing_.py +2 -0
- maxframe/udf.py +27 -2
- maxframe/utils.py +193 -19
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
- maxframe_client/fetcher.py +35 -4
- maxframe_client/session/odps.py +7 -2
- maxframe_client/tests/test_fetcher.py +76 -3
- maxframe_client/tests/test_session.py +4 -1
- /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
- /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
- {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def series_from_index(ind, index=None, name=None):
|
|
54
|
-
name = name or ind.name
|
|
54
|
+
name = name or ind.name
|
|
55
55
|
if index is not None:
|
|
56
56
|
index = Index(index)
|
|
57
57
|
op = SeriesFromIndex(input_=ind, index=index, name=name)
|
|
@@ -77,6 +77,83 @@ def from_records(
|
|
|
77
77
|
sparse=False,
|
|
78
78
|
**kw
|
|
79
79
|
):
|
|
80
|
+
"""
|
|
81
|
+
Convert structured or record ndarray to DataFrame.
|
|
82
|
+
|
|
83
|
+
Creates a DataFrame object from a structured ndarray, sequence of
|
|
84
|
+
tuples or dicts, or DataFrame.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
data : structured ndarray, sequence of tuples or dicts, or DataFrame
|
|
89
|
+
Structured input data.
|
|
90
|
+
|
|
91
|
+
.. deprecated:: 2.1.0
|
|
92
|
+
Passing a DataFrame is deprecated.
|
|
93
|
+
index : str, list of fields, array-like
|
|
94
|
+
Field of array to use as the index, alternately a specific set of
|
|
95
|
+
input labels to use.
|
|
96
|
+
exclude : sequence, default None
|
|
97
|
+
Columns or fields to exclude.
|
|
98
|
+
columns : sequence, default None
|
|
99
|
+
Column names to use. If the passed data do not have names
|
|
100
|
+
associated with them, this argument provides names for the
|
|
101
|
+
columns. Otherwise this argument indicates the order of the columns
|
|
102
|
+
in the result (any names not found in the data will become all-NA
|
|
103
|
+
columns).
|
|
104
|
+
coerce_float : bool, default False
|
|
105
|
+
Attempt to convert values of non-string, non-numeric objects (like
|
|
106
|
+
decimal.Decimal) to floating point, useful for SQL result sets.
|
|
107
|
+
nrows : int, default None
|
|
108
|
+
Number of rows to read if data is an iterator.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
DataFrame
|
|
113
|
+
|
|
114
|
+
See Also
|
|
115
|
+
--------
|
|
116
|
+
DataFrame.from_dict : DataFrame from dict of array-like or dicts.
|
|
117
|
+
DataFrame : DataFrame object creation using constructor.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
Data can be provided as a structured ndarray:
|
|
122
|
+
|
|
123
|
+
>>> import maxframe.tensor as mt
|
|
124
|
+
>>> import maxframe.dataframe as md
|
|
125
|
+
>>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
|
|
126
|
+
... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
|
|
127
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
128
|
+
col_1 col_2
|
|
129
|
+
0 3 a
|
|
130
|
+
1 2 b
|
|
131
|
+
2 1 c
|
|
132
|
+
3 0 d
|
|
133
|
+
|
|
134
|
+
Data can be provided as a list of dicts:
|
|
135
|
+
|
|
136
|
+
>>> data = [{'col_1': 3, 'col_2': 'a'},
|
|
137
|
+
... {'col_1': 2, 'col_2': 'b'},
|
|
138
|
+
... {'col_1': 1, 'col_2': 'c'},
|
|
139
|
+
... {'col_1': 0, 'col_2': 'd'}]
|
|
140
|
+
>>> md.DataFrame.from_records(data).execute()
|
|
141
|
+
col_1 col_2
|
|
142
|
+
0 3 a
|
|
143
|
+
1 2 b
|
|
144
|
+
2 1 c
|
|
145
|
+
3 0 d
|
|
146
|
+
|
|
147
|
+
Data can be provided as a list of tuples with corresponding columns:
|
|
148
|
+
|
|
149
|
+
>>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
|
|
150
|
+
>>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
|
|
151
|
+
col_1 col_2
|
|
152
|
+
0 3 a
|
|
153
|
+
1 2 b
|
|
154
|
+
2 1 c
|
|
155
|
+
3 0 d
|
|
156
|
+
"""
|
|
80
157
|
if isinstance(data, np.ndarray):
|
|
81
158
|
from .dataframe import from_pandas
|
|
82
159
|
|
|
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
39
39
|
input = AnyField("input")
|
|
40
40
|
index = AnyField("index")
|
|
41
41
|
columns = AnyField("columns")
|
|
42
|
+
axis = AnyField("axis")
|
|
42
43
|
|
|
43
44
|
def __init__(self, *args, **kwargs):
|
|
44
45
|
kwargs["_output_types"] = [OutputType.dataframe]
|
|
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
120
121
|
if isinstance(tileable, ENTITY_TYPE):
|
|
121
122
|
tileables.append(tileable)
|
|
122
123
|
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
index_size = index.shape[0]
|
|
124
|
+
if self.axis == 0:
|
|
125
|
+
if index is not None:
|
|
126
|
+
raise NotImplementedError("Cannot accept index when axis=0")
|
|
127
127
|
else:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
128
|
+
index = pd.Index(list(input_1d_tileables.keys()))
|
|
129
|
+
index_value = parse_index(index, store_data=True)
|
|
130
|
+
self.index = index
|
|
131
|
+
|
|
132
|
+
if columns is not None:
|
|
133
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
134
|
+
if not isinstance(columns, pd.Index):
|
|
135
|
+
columns = self.columns = pd.Index(columns)
|
|
136
|
+
column_size = columns.shape[0]
|
|
137
|
+
if (
|
|
138
|
+
not pd.isna(tileable_size)
|
|
139
|
+
and not pd.isna(column_size)
|
|
140
|
+
and tileable_size != column_size
|
|
141
|
+
):
|
|
142
|
+
raise ValueError(
|
|
143
|
+
f"columns {columns} should have the same shape "
|
|
144
|
+
f"with tensor: {tileable_size}"
|
|
145
|
+
)
|
|
146
|
+
columns_value = self._process_index(columns, tileables)
|
|
147
|
+
else:
|
|
148
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
149
|
+
columns = columns_value = None
|
|
150
|
+
else:
|
|
151
|
+
columns = pd.RangeIndex(0, tileables[0].shape[0])
|
|
152
|
+
columns_value = parse_index(columns, store_data=True)
|
|
153
|
+
self.columns = columns
|
|
154
|
+
|
|
155
|
+
shape = (len(input_1d_tileables), shape[0] if shape else 0)
|
|
139
156
|
else:
|
|
140
|
-
if
|
|
141
|
-
|
|
157
|
+
if index is not None:
|
|
158
|
+
tileable_size = tileables[0].shape[0] if tileables else 0
|
|
159
|
+
if hasattr(index, "shape"):
|
|
160
|
+
index_size = index.shape[0]
|
|
161
|
+
else:
|
|
162
|
+
index_size = len(index)
|
|
163
|
+
if (
|
|
164
|
+
not pd.isna(tileable_size)
|
|
165
|
+
and not pd.isna(index_size)
|
|
166
|
+
and tileable_size != index_size
|
|
167
|
+
):
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"index {index} should have the same shape "
|
|
170
|
+
f"with tensor: {tileable_size}"
|
|
171
|
+
)
|
|
172
|
+
index_value = self._process_index(index, tileables)
|
|
142
173
|
else:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
174
|
+
if not tileables or np.isnan(tileables[0].shape[0]):
|
|
175
|
+
index = pd.RangeIndex(0)
|
|
176
|
+
else:
|
|
177
|
+
index = pd.RangeIndex(0, tileables[0].shape[0])
|
|
178
|
+
self.index = index
|
|
179
|
+
index_value = parse_index(index)
|
|
146
180
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
181
|
+
if columns is not None:
|
|
182
|
+
if len(input_1d_tileables) != len(columns):
|
|
183
|
+
raise ValueError(
|
|
184
|
+
f"columns {columns} should have size {len(input_1d_tileables)}"
|
|
185
|
+
)
|
|
186
|
+
if not isinstance(columns, pd.Index):
|
|
187
|
+
if isinstance(columns, ENTITY_TYPE):
|
|
188
|
+
raise NotImplementedError(
|
|
189
|
+
"The columns value cannot be a tileable"
|
|
190
|
+
)
|
|
191
|
+
columns = pd.Index(columns)
|
|
192
|
+
columns_value = parse_index(columns, store_data=True)
|
|
193
|
+
else:
|
|
194
|
+
columns_value = parse_index(
|
|
195
|
+
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
151
196
|
)
|
|
152
|
-
if not isinstance(columns, pd.Index):
|
|
153
|
-
if isinstance(columns, ENTITY_TYPE):
|
|
154
|
-
raise NotImplementedError("The columns value cannot be a tileable")
|
|
155
|
-
columns = pd.Index(columns)
|
|
156
|
-
columns_value = parse_index(columns, store_data=True)
|
|
157
|
-
else:
|
|
158
|
-
columns_value = parse_index(
|
|
159
|
-
pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
|
|
160
|
-
)
|
|
161
197
|
|
|
162
|
-
|
|
198
|
+
shape = (shape[0] if shape else 0, len(input_1d_tileables))
|
|
199
|
+
|
|
163
200
|
return self.new_dataframe(
|
|
164
201
|
tileables,
|
|
165
202
|
shape,
|
|
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
|
|
|
278
315
|
gpu: bool = None,
|
|
279
316
|
sparse: bool = False,
|
|
280
317
|
):
|
|
318
|
+
if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
|
|
319
|
+
columns = pd.MultiIndex.from_tuples(columns)
|
|
320
|
+
|
|
281
321
|
if tensor is not None:
|
|
282
322
|
if tensor.ndim > 2 or tensor.ndim <= 0:
|
|
283
323
|
raise TypeError(
|
|
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
|
|
|
299
339
|
dtypes = pd.Series([], index=pd.Index([], dtype=object))
|
|
300
340
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
301
341
|
index = pd.Index(index)
|
|
342
|
+
if isinstance(index[0], tuple):
|
|
343
|
+
index = pd.MultiIndex.from_tuples(index)
|
|
302
344
|
op = DataFrameFromTensor(
|
|
303
345
|
input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
304
346
|
)
|
|
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
|
|
|
311
353
|
columns: Union[pd.Index, list] = None,
|
|
312
354
|
gpu: bool = None,
|
|
313
355
|
sparse: bool = False,
|
|
356
|
+
axis: int = 1,
|
|
314
357
|
):
|
|
358
|
+
from pandas.core.dtypes.cast import find_common_type
|
|
359
|
+
|
|
315
360
|
data = dict()
|
|
316
361
|
for k, v in d.items():
|
|
317
362
|
if isinstance(v, (list, tuple)) and any(
|
|
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
|
|
|
322
367
|
data[k] = v
|
|
323
368
|
d = data
|
|
324
369
|
if columns is not None:
|
|
325
|
-
tileables = [d.get(c) for c in columns]
|
|
370
|
+
tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
|
|
326
371
|
else:
|
|
327
|
-
columns = list(d.keys())
|
|
372
|
+
columns = list(d.keys()) if axis == 1 else None
|
|
328
373
|
tileables = list(d.values())
|
|
329
374
|
|
|
330
375
|
gpu = (
|
|
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
|
|
|
332
377
|
if gpu is None
|
|
333
378
|
else gpu
|
|
334
379
|
)
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
380
|
+
|
|
381
|
+
if axis == 0:
|
|
382
|
+
col_num = (
|
|
383
|
+
tileables[0].shape[0]
|
|
384
|
+
if hasattr(tileables[0], "shape")
|
|
385
|
+
else len(tileables[0])
|
|
386
|
+
)
|
|
387
|
+
if pd.isna(col_num):
|
|
388
|
+
dtypes = None
|
|
389
|
+
else:
|
|
390
|
+
common_dtype = find_common_type(
|
|
391
|
+
[
|
|
392
|
+
t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
|
|
393
|
+
for t in tileables
|
|
394
|
+
]
|
|
395
|
+
)
|
|
396
|
+
dtypes = pd.Series(
|
|
397
|
+
[common_dtype] * col_num,
|
|
398
|
+
index=columns if columns is not None else pd.RangeIndex(col_num),
|
|
399
|
+
)
|
|
400
|
+
else:
|
|
401
|
+
dtypes = pd.Series(
|
|
402
|
+
[t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
|
|
403
|
+
index=columns,
|
|
404
|
+
)
|
|
405
|
+
|
|
339
406
|
if index is not None and not isinstance(index, ENTITY_TYPE):
|
|
340
407
|
index = pd.Index(index)
|
|
408
|
+
|
|
341
409
|
op = DataFrameFromTensor(
|
|
342
|
-
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
|
|
410
|
+
input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
|
|
343
411
|
)
|
|
344
412
|
return op(d, index, columns, dtypes)
|
|
345
413
|
|
|
@@ -441,13 +441,12 @@ def read_csv(
|
|
|
441
441
|
Examples
|
|
442
442
|
--------
|
|
443
443
|
>>> import maxframe.dataframe as md
|
|
444
|
-
>>> from maxframe.lib.filesystem.oss import build_oss_path
|
|
445
444
|
>>> md.read_csv('data.csv') # doctest: +SKIP
|
|
446
445
|
>>> # read from HDFS
|
|
447
446
|
>>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
|
|
448
447
|
>>> # read from OSS
|
|
449
|
-
>>>
|
|
450
|
-
>>>
|
|
448
|
+
>>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
|
|
449
|
+
>>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
|
|
451
450
|
"""
|
|
452
451
|
# infer dtypes and columns
|
|
453
452
|
if isinstance(path, (list, tuple)):
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import dataclasses
|
|
16
|
+
import functools
|
|
16
17
|
import io
|
|
17
18
|
import logging
|
|
18
19
|
import re
|
|
@@ -22,6 +23,8 @@ from typing import Dict, List, MutableMapping, Optional, Tuple, Union
|
|
|
22
23
|
import numpy as np
|
|
23
24
|
import pandas as pd
|
|
24
25
|
from odps import ODPS
|
|
26
|
+
from odps.errors import ODPSError
|
|
27
|
+
from odps.models import TableSchema
|
|
25
28
|
from odps.types import Column, OdpsSchema, validate_data_type
|
|
26
29
|
from odps.utils import split_sql_by_semicolon
|
|
27
30
|
|
|
@@ -245,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
|
|
|
245
248
|
return _parse_full_explain(explain_string)
|
|
246
249
|
|
|
247
250
|
|
|
248
|
-
def _build_explain_sql(
|
|
251
|
+
def _build_explain_sql(
|
|
252
|
+
sql_stmt: str, no_split: bool = False, use_output: bool = False
|
|
253
|
+
) -> str:
|
|
254
|
+
clause = "EXPLAIN "
|
|
255
|
+
if use_output:
|
|
256
|
+
clause += "OUTPUT "
|
|
249
257
|
if no_split:
|
|
250
|
-
return
|
|
258
|
+
return clause + sql_stmt
|
|
251
259
|
sql_parts = split_sql_by_semicolon(sql_stmt)
|
|
252
260
|
if not sql_parts:
|
|
253
261
|
raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
|
|
254
|
-
sql_parts[-1] =
|
|
262
|
+
sql_parts[-1] = clause + sql_parts[-1]
|
|
255
263
|
return "\n".join(sql_parts)
|
|
256
264
|
|
|
257
265
|
|
|
@@ -332,6 +340,62 @@ def _check_token_in_sql(token: str, sql: str) -> bool:
|
|
|
332
340
|
return False
|
|
333
341
|
|
|
334
342
|
|
|
343
|
+
def _resolve_schema_by_explain(
|
|
344
|
+
odps_entry: ODPS,
|
|
345
|
+
query: str,
|
|
346
|
+
no_split_sql: bool = False,
|
|
347
|
+
hints: Dict[str, str] = None,
|
|
348
|
+
use_explain_output: bool = True,
|
|
349
|
+
) -> OdpsSchema:
|
|
350
|
+
hints = (hints or dict()).copy()
|
|
351
|
+
hints["odps.sql.select.output.format"] = "json"
|
|
352
|
+
explain_stmt = _build_explain_sql(
|
|
353
|
+
query, no_split=no_split_sql, use_output=use_explain_output
|
|
354
|
+
)
|
|
355
|
+
inst = odps_entry.execute_sql(explain_stmt, hints=hints)
|
|
356
|
+
logger.debug("Explain output instance ID: %s", inst.id)
|
|
357
|
+
explain_str = list(inst.get_task_results().values())[0]
|
|
358
|
+
if use_explain_output:
|
|
359
|
+
if not explain_str or "nothing to explain" in explain_str:
|
|
360
|
+
raise ValueError("The SQL statement should be an instant query")
|
|
361
|
+
return TableSchema.parse(None, explain_str)
|
|
362
|
+
else:
|
|
363
|
+
return _parse_explained_schema(explain_str)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _resolve_query_schema(
|
|
367
|
+
odps_entry: ODPS,
|
|
368
|
+
query: str,
|
|
369
|
+
no_split_sql: bool = False,
|
|
370
|
+
hints: Dict[str, str] = None,
|
|
371
|
+
use_explain_output: Optional[bool] = None,
|
|
372
|
+
) -> OdpsSchema:
|
|
373
|
+
methods = []
|
|
374
|
+
if use_explain_output is not False:
|
|
375
|
+
# None or True
|
|
376
|
+
methods.append(_resolve_schema_by_explain)
|
|
377
|
+
if use_explain_output is not True:
|
|
378
|
+
# None or False
|
|
379
|
+
methods.append(
|
|
380
|
+
functools.partial(_resolve_schema_by_explain, use_explain_output=False)
|
|
381
|
+
)
|
|
382
|
+
for idx, resolve_method in enumerate(methods):
|
|
383
|
+
try:
|
|
384
|
+
return resolve_method(
|
|
385
|
+
odps_entry, query, no_split_sql=no_split_sql, hints=hints
|
|
386
|
+
)
|
|
387
|
+
except ODPSError as ex:
|
|
388
|
+
msg = (
|
|
389
|
+
f"Failed to obtain schema from SQL explain: {ex!r}\n"
|
|
390
|
+
f"Explain instance ID: {ex.instance_id}"
|
|
391
|
+
)
|
|
392
|
+
if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
|
|
393
|
+
exc = ValueError(msg)
|
|
394
|
+
raise exc.with_traceback(ex.__traceback__) from None
|
|
395
|
+
# will this happen?
|
|
396
|
+
raise ValueError("Failed to obtain schema from SQL explain") # pragma: no cover
|
|
397
|
+
|
|
398
|
+
|
|
335
399
|
def read_odps_query(
|
|
336
400
|
query: str,
|
|
337
401
|
odps_entry: ODPS = None,
|
|
@@ -371,6 +435,8 @@ def read_odps_query(
|
|
|
371
435
|
DataFrame read from MaxCompute (ODPS) table
|
|
372
436
|
"""
|
|
373
437
|
no_split_sql = kw.pop("no_split_sql", False)
|
|
438
|
+
# if use_explain_output is None, will try two methods.
|
|
439
|
+
use_explain_output = kw.pop("use_explain_output", None)
|
|
374
440
|
|
|
375
441
|
hints = options.sql.settings.copy() or {}
|
|
376
442
|
if sql_hints:
|
|
@@ -395,19 +461,13 @@ def read_odps_query(
|
|
|
395
461
|
|
|
396
462
|
col_renames = {}
|
|
397
463
|
if not skip_schema:
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
except BaseException as ex:
|
|
406
|
-
exc = ValueError(
|
|
407
|
-
f"Failed to obtain schema from SQL explain: {ex!r}"
|
|
408
|
-
f"\nExplain instance ID: {inst.id}"
|
|
409
|
-
)
|
|
410
|
-
raise exc.with_traceback(ex.__traceback__) from None
|
|
464
|
+
odps_schema = _resolve_query_schema(
|
|
465
|
+
odps_entry,
|
|
466
|
+
query,
|
|
467
|
+
no_split_sql=no_split_sql,
|
|
468
|
+
hints=hints,
|
|
469
|
+
use_explain_output=use_explain_output,
|
|
470
|
+
)
|
|
411
471
|
|
|
412
472
|
new_columns = []
|
|
413
473
|
for col in odps_schema.columns:
|
|
@@ -17,11 +17,13 @@ import uuid
|
|
|
17
17
|
from collections import OrderedDict
|
|
18
18
|
from math import isinf
|
|
19
19
|
|
|
20
|
+
import mock
|
|
20
21
|
import numpy as np
|
|
21
22
|
import pandas as pd
|
|
22
23
|
import pytest
|
|
23
24
|
from odps import ODPS
|
|
24
25
|
from odps import types as odps_types
|
|
26
|
+
from odps.errors import ODPSError
|
|
25
27
|
|
|
26
28
|
from .... import tensor as mt
|
|
27
29
|
from ....core import OutputType
|
|
@@ -50,6 +52,7 @@ from ..read_odps_query import (
|
|
|
50
52
|
ColumnSchema,
|
|
51
53
|
_parse_full_explain,
|
|
52
54
|
_parse_simple_explain,
|
|
55
|
+
_resolve_query_schema,
|
|
53
56
|
_resolve_task_sector,
|
|
54
57
|
)
|
|
55
58
|
from ..series import from_pandas as from_pandas_series
|
|
@@ -181,6 +184,23 @@ def test_from_tensor():
|
|
|
181
184
|
df = dataframe_from_1d_tileables(d)
|
|
182
185
|
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
|
|
183
186
|
|
|
187
|
+
# test axis parameter for dataframe_from_1d_tileables
|
|
188
|
+
d = OrderedDict(
|
|
189
|
+
[("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# axis=1 (default behavior) - keys become columns
|
|
193
|
+
df = dataframe_from_1d_tileables(d, axis=1)
|
|
194
|
+
assert df.shape == (4, 2)
|
|
195
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
|
|
196
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
|
|
197
|
+
|
|
198
|
+
# axis=0 - keys become index (rows)
|
|
199
|
+
df = dataframe_from_1d_tileables(d, axis=0)
|
|
200
|
+
assert df.shape == (2, 4)
|
|
201
|
+
pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
|
|
202
|
+
pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
|
|
203
|
+
|
|
184
204
|
series = series_from_tensor(mt.random.rand(4))
|
|
185
205
|
pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
|
|
186
206
|
|
|
@@ -204,6 +224,26 @@ def test_from_tensor():
|
|
|
204
224
|
with pytest.raises(ValueError):
|
|
205
225
|
dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
|
|
206
226
|
|
|
227
|
+
# 1-d tensors should have same shape
|
|
228
|
+
with pytest.raises(ValueError):
|
|
229
|
+
dataframe_from_1d_tileables(
|
|
230
|
+
OrderedDict(
|
|
231
|
+
[(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# index has wrong shape
|
|
236
|
+
with pytest.raises(ValueError):
|
|
237
|
+
dataframe_from_1d_tileables(
|
|
238
|
+
{0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# columns have wrong shape
|
|
242
|
+
with pytest.raises(ValueError):
|
|
243
|
+
dataframe_from_1d_tileables(
|
|
244
|
+
{0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
|
|
245
|
+
)
|
|
246
|
+
|
|
207
247
|
# index should be 1-d
|
|
208
248
|
with pytest.raises(ValueError):
|
|
209
249
|
dataframe_from_tensor(
|
|
@@ -360,7 +400,7 @@ def test_from_odps_query():
|
|
|
360
400
|
|
|
361
401
|
with pytest.raises(ValueError) as err_info:
|
|
362
402
|
read_odps_query(
|
|
363
|
-
f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
|
|
403
|
+
f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
|
|
364
404
|
f"AS SELECT * FROM {table1_name}"
|
|
365
405
|
)
|
|
366
406
|
assert "instant query" in err_info.value.args[0]
|
|
@@ -578,3 +618,46 @@ def test_resolve_break_lines():
|
|
|
578
618
|
for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
|
|
579
619
|
assert col.name == exp_nm
|
|
580
620
|
assert col.type == odps_types.validate_data_type(exp_tp)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
@pytest.mark.parametrize("use_explain_output", [None, False, True])
|
|
624
|
+
def test_explain_use_explain_output(use_explain_output):
|
|
625
|
+
class MockInstance:
|
|
626
|
+
@property
|
|
627
|
+
def id(self):
|
|
628
|
+
return "mock_id"
|
|
629
|
+
|
|
630
|
+
def get_task_results(self):
|
|
631
|
+
return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
|
|
632
|
+
|
|
633
|
+
old_execute_sql = ODPS.execute_sql
|
|
634
|
+
exec_count = 0
|
|
635
|
+
|
|
636
|
+
def new_execute_sql(self, sql, *args, **kw):
|
|
637
|
+
nonlocal exec_count
|
|
638
|
+
exec_count += 1
|
|
639
|
+
|
|
640
|
+
if use_explain_output and sql.lower().startswith("explain output select"):
|
|
641
|
+
return MockInstance()
|
|
642
|
+
elif use_explain_output is None and sql.lower().startswith("explain output"):
|
|
643
|
+
raise ODPSError("ODPS-0130161: mock error")
|
|
644
|
+
return old_execute_sql(self, sql, *args, **kw)
|
|
645
|
+
|
|
646
|
+
odps_entry = ODPS.from_environments()
|
|
647
|
+
|
|
648
|
+
with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
|
|
649
|
+
with pytest.raises(ValueError):
|
|
650
|
+
_resolve_query_schema(
|
|
651
|
+
odps_entry, "not_a_sql", use_explain_output=use_explain_output
|
|
652
|
+
)
|
|
653
|
+
assert exec_count == (2 if use_explain_output is None else 1)
|
|
654
|
+
|
|
655
|
+
exec_count = 0
|
|
656
|
+
schema = _resolve_query_schema(
|
|
657
|
+
odps_entry,
|
|
658
|
+
"select cast(1 as bigint) as a_bigint",
|
|
659
|
+
use_explain_output=use_explain_output,
|
|
660
|
+
)
|
|
661
|
+
assert schema.columns[0].name == "a_bigint"
|
|
662
|
+
assert schema.columns[0].type == odps_types.bigint
|
|
663
|
+
assert exec_count == (2 if use_explain_output is None else 1)
|
|
@@ -12,14 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from .to_csv import to_csv
|
|
15
16
|
from .to_odps import to_odps_table
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
def _install():
|
|
19
|
-
from ..core import DATAFRAME_TYPE
|
|
20
|
+
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
20
21
|
|
|
21
22
|
for t in DATAFRAME_TYPE:
|
|
23
|
+
t.to_csv = to_csv
|
|
22
24
|
t.to_odps_table = to_odps_table
|
|
25
|
+
for t in SERIES_TYPE:
|
|
26
|
+
t.to_csv = to_csv
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
_install()
|