maxframe 1.3.1__cp39-cp39-macosx_10_9_universal2.whl → 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-39-darwin.so +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cpython-39-darwin.so +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cpython-39-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/METADATA +4 -1
- maxframe-2.0.0b2.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -14,6 +12,8 @@
|
|
|
14
12
|
# See the License for the specific language governing permissions and
|
|
15
13
|
# limitations under the License.
|
|
16
14
|
|
|
15
|
+
from typing import MutableMapping, Union
|
|
16
|
+
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pandas as pd
|
|
19
19
|
|
|
@@ -58,6 +58,13 @@ class DataFrameFromRecords(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
58
58
|
columns_value=columns_value,
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
+
@classmethod
|
|
62
|
+
def estimate_size(
|
|
63
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromRecords"
|
|
64
|
+
): # pragma: no cover
|
|
65
|
+
# todo implement this to facilitate local computation
|
|
66
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
67
|
+
|
|
61
68
|
|
|
62
69
|
def from_records(
|
|
63
70
|
data,
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -15,13 +13,13 @@
|
|
|
15
13
|
# limitations under the License.
|
|
16
14
|
|
|
17
15
|
from collections import OrderedDict
|
|
18
|
-
from typing import Any, Dict, List, Union
|
|
16
|
+
from typing import Any, Dict, List, MutableMapping, Union
|
|
19
17
|
|
|
20
18
|
import numpy as np
|
|
21
19
|
import pandas as pd
|
|
22
20
|
|
|
23
21
|
from ... import opcodes
|
|
24
|
-
from ...core import ENTITY_TYPE, OutputType
|
|
22
|
+
from ...core import ENTITY_TYPE, EntityData, OutputType
|
|
25
23
|
from ...serialization.serializables import AnyField, KeyField
|
|
26
24
|
from ...tensor.core import Tensor
|
|
27
25
|
from ...tensor.datasource import tensor as astensor
|
|
@@ -46,24 +44,25 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
46
44
|
kwargs["_output_types"] = [OutputType.dataframe]
|
|
47
45
|
super().__init__(*args, **kwargs)
|
|
48
46
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
47
|
+
@classmethod
|
|
48
|
+
def _set_inputs(cls, op: "DataFrameFromTensor", inputs: List[EntityData]):
|
|
49
|
+
super()._set_inputs(op, inputs)
|
|
50
|
+
inputs_iter = iter(op._inputs)
|
|
51
|
+
if op.input is not None:
|
|
52
|
+
if not isinstance(op.input, dict):
|
|
53
|
+
op.input = next(inputs_iter)
|
|
55
54
|
else:
|
|
56
55
|
# check each value for input
|
|
57
56
|
new_input = OrderedDict()
|
|
58
|
-
for k, v in
|
|
57
|
+
for k, v in op.input.items():
|
|
59
58
|
if isinstance(v, ENTITY_TYPE):
|
|
60
59
|
new_input[k] = next(inputs_iter)
|
|
61
60
|
else:
|
|
62
61
|
new_input[k] = v
|
|
63
|
-
|
|
62
|
+
op.input = new_input
|
|
64
63
|
|
|
65
|
-
if isinstance(
|
|
66
|
-
|
|
64
|
+
if isinstance(op.index, ENTITY_TYPE):
|
|
65
|
+
op.index = next(inputs_iter)
|
|
67
66
|
|
|
68
67
|
def __call__(
|
|
69
68
|
self,
|
|
@@ -138,7 +137,11 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
138
137
|
)
|
|
139
138
|
index_value = self._process_index(index, tileables)
|
|
140
139
|
else:
|
|
141
|
-
|
|
140
|
+
if np.isnan(tileables[0].shape[0]):
|
|
141
|
+
index = pd.RangeIndex(0)
|
|
142
|
+
else:
|
|
143
|
+
index = pd.RangeIndex(0, tileables[0].shape[0])
|
|
144
|
+
self.index = index
|
|
142
145
|
index_value = parse_index(index)
|
|
143
146
|
|
|
144
147
|
if columns is not None:
|
|
@@ -260,6 +263,13 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
260
263
|
columns_value=columns_value,
|
|
261
264
|
)
|
|
262
265
|
|
|
266
|
+
@classmethod
|
|
267
|
+
def estimate_size(
|
|
268
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromTensor"
|
|
269
|
+
): # pragma: no cover
|
|
270
|
+
# todo implement this to facilitate local computation
|
|
271
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
272
|
+
|
|
263
273
|
|
|
264
274
|
def dataframe_from_tensor(
|
|
265
275
|
tensor: Tensor,
|
|
@@ -340,12 +350,13 @@ class SeriesFromTensor(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
340
350
|
input = KeyField("input")
|
|
341
351
|
index = AnyField("index")
|
|
342
352
|
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
353
|
+
@classmethod
|
|
354
|
+
def _set_inputs(cls, op: "SeriesFromTensor", inputs: List[EntityData]):
|
|
355
|
+
super()._set_inputs(op, inputs)
|
|
356
|
+
if op.input is not None:
|
|
357
|
+
op.input = op.inputs[0]
|
|
358
|
+
if op.index is not None and hasattr(op.index, "key"):
|
|
359
|
+
op.index = op.inputs[-1]
|
|
349
360
|
|
|
350
361
|
def __call__(
|
|
351
362
|
self,
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -15,6 +13,7 @@
|
|
|
15
13
|
# limitations under the License.
|
|
16
14
|
|
|
17
15
|
from io import BytesIO
|
|
16
|
+
from typing import MutableMapping, Union
|
|
18
17
|
from urllib.parse import urlparse
|
|
19
18
|
|
|
20
19
|
import numpy as np
|
|
@@ -129,6 +128,13 @@ class DataFrameReadCSV(
|
|
|
129
128
|
chunk_bytes=chunk_bytes,
|
|
130
129
|
)
|
|
131
130
|
|
|
131
|
+
@classmethod
|
|
132
|
+
def estimate_size(
|
|
133
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadCSV"
|
|
134
|
+
): # pragma: no cover
|
|
135
|
+
# todo implement this to facilitate local computation
|
|
136
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
137
|
+
|
|
132
138
|
|
|
133
139
|
def read_csv(
|
|
134
140
|
path,
|
|
@@ -13,13 +13,18 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import dataclasses
|
|
16
|
+
import functools
|
|
17
|
+
import io
|
|
16
18
|
import logging
|
|
17
19
|
import re
|
|
18
|
-
|
|
20
|
+
import tokenize
|
|
21
|
+
from typing import Dict, List, MutableMapping, Optional, Tuple, Union
|
|
19
22
|
|
|
20
23
|
import numpy as np
|
|
21
24
|
import pandas as pd
|
|
22
25
|
from odps import ODPS
|
|
26
|
+
from odps.errors import ODPSError
|
|
27
|
+
from odps.models import TableSchema
|
|
23
28
|
from odps.types import Column, OdpsSchema, validate_data_type
|
|
24
29
|
from odps.utils import split_sql_by_semicolon
|
|
25
30
|
|
|
@@ -110,7 +115,15 @@ def _split_explain_string(explain_string: str) -> List[str]:
|
|
|
110
115
|
grouped = []
|
|
111
116
|
for part in parts:
|
|
112
117
|
part = part.strip("\n")
|
|
113
|
-
|
|
118
|
+
part_line1 = part.split("\n", 1)[0]
|
|
119
|
+
# initial line of part should not start with spaces (Statistics row)
|
|
120
|
+
# or with quote marks
|
|
121
|
+
if (
|
|
122
|
+
grouped
|
|
123
|
+
and not part.startswith(" ")
|
|
124
|
+
and "'" not in part_line1
|
|
125
|
+
and '"' not in part_line1
|
|
126
|
+
):
|
|
114
127
|
final_parts.append("\n\n".join(grouped).strip())
|
|
115
128
|
grouped = []
|
|
116
129
|
grouped.append(part)
|
|
@@ -235,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
|
|
|
235
248
|
return _parse_full_explain(explain_string)
|
|
236
249
|
|
|
237
250
|
|
|
238
|
-
def _build_explain_sql(
|
|
251
|
+
def _build_explain_sql(
|
|
252
|
+
sql_stmt: str, no_split: bool = False, use_output: bool = False
|
|
253
|
+
) -> str:
|
|
254
|
+
clause = "EXPLAIN "
|
|
255
|
+
if use_output:
|
|
256
|
+
clause += "OUTPUT "
|
|
239
257
|
if no_split:
|
|
240
|
-
return
|
|
258
|
+
return clause + sql_stmt
|
|
241
259
|
sql_parts = split_sql_by_semicolon(sql_stmt)
|
|
242
260
|
if not sql_parts:
|
|
243
261
|
raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
|
|
244
|
-
sql_parts[-1] =
|
|
262
|
+
sql_parts[-1] = clause + sql_parts[-1]
|
|
245
263
|
return "\n".join(sql_parts)
|
|
246
264
|
|
|
247
265
|
|
|
@@ -267,6 +285,15 @@ class DataFrameReadODPSQuery(
|
|
|
267
285
|
def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
|
|
268
286
|
self.columns = columns
|
|
269
287
|
|
|
288
|
+
@classmethod
|
|
289
|
+
def estimate_size(
|
|
290
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSQuery"
|
|
291
|
+
): # pragma: no cover
|
|
292
|
+
# use infinity to show that the size cannot be inferred
|
|
293
|
+
# todo when local catalyst is implemented,
|
|
294
|
+
# a more precise estimation here can be useful then.
|
|
295
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
296
|
+
|
|
270
297
|
def __call__(self, chunk_bytes=None, chunk_size=None):
|
|
271
298
|
if is_empty(self.index_columns):
|
|
272
299
|
index_value = parse_index(pd.RangeIndex(0))
|
|
@@ -302,6 +329,73 @@ class DataFrameReadODPSQuery(
|
|
|
302
329
|
)
|
|
303
330
|
|
|
304
331
|
|
|
332
|
+
def _check_token_in_sql(token: str, sql: str) -> bool:
|
|
333
|
+
try:
|
|
334
|
+
names = set()
|
|
335
|
+
for tk_info in tokenize.tokenize(io.BytesIO(sql.encode()).readline):
|
|
336
|
+
if tk_info.type == tokenize.NAME:
|
|
337
|
+
names.add(tk_info.string)
|
|
338
|
+
return token in names
|
|
339
|
+
except: # pragma: no cover
|
|
340
|
+
return False
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _resolve_schema_by_explain(
|
|
344
|
+
odps_entry: ODPS,
|
|
345
|
+
query: str,
|
|
346
|
+
no_split_sql: bool = False,
|
|
347
|
+
hints: Dict[str, str] = None,
|
|
348
|
+
use_explain_output: bool = True,
|
|
349
|
+
) -> OdpsSchema:
|
|
350
|
+
hints = (hints or dict()).copy()
|
|
351
|
+
hints["odps.sql.select.output.format"] = "json"
|
|
352
|
+
explain_stmt = _build_explain_sql(
|
|
353
|
+
query, no_split=no_split_sql, use_output=use_explain_output
|
|
354
|
+
)
|
|
355
|
+
inst = odps_entry.execute_sql(explain_stmt, hints=hints)
|
|
356
|
+
logger.debug("Explain output instance ID: %s", inst.id)
|
|
357
|
+
explain_str = list(inst.get_task_results().values())[0]
|
|
358
|
+
if use_explain_output:
|
|
359
|
+
if not explain_str or "nothing to explain" in explain_str:
|
|
360
|
+
raise ValueError("The SQL statement should be an instant query")
|
|
361
|
+
return TableSchema.parse(None, explain_str)
|
|
362
|
+
else:
|
|
363
|
+
return _parse_explained_schema(explain_str)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _resolve_query_schema(
|
|
367
|
+
odps_entry: ODPS,
|
|
368
|
+
query: str,
|
|
369
|
+
no_split_sql: bool = False,
|
|
370
|
+
hints: Dict[str, str] = None,
|
|
371
|
+
use_explain_output: Optional[bool] = None,
|
|
372
|
+
) -> OdpsSchema:
|
|
373
|
+
methods = []
|
|
374
|
+
if use_explain_output is not False:
|
|
375
|
+
# None or True
|
|
376
|
+
methods.append(_resolve_schema_by_explain)
|
|
377
|
+
if use_explain_output is not True:
|
|
378
|
+
# None or False
|
|
379
|
+
methods.append(
|
|
380
|
+
functools.partial(_resolve_schema_by_explain, use_explain_output=False)
|
|
381
|
+
)
|
|
382
|
+
for idx, resolve_method in enumerate(methods):
|
|
383
|
+
try:
|
|
384
|
+
return resolve_method(
|
|
385
|
+
odps_entry, query, no_split_sql=no_split_sql, hints=hints
|
|
386
|
+
)
|
|
387
|
+
except ODPSError as ex:
|
|
388
|
+
msg = (
|
|
389
|
+
f"Failed to obtain schema from SQL explain: {ex!r}\n"
|
|
390
|
+
f"Explain instance ID: {ex.instance_id}"
|
|
391
|
+
)
|
|
392
|
+
if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
|
|
393
|
+
exc = ValueError(msg)
|
|
394
|
+
raise exc.with_traceback(ex.__traceback__) from None
|
|
395
|
+
# will this happen?
|
|
396
|
+
raise ValueError("Failed to obtain schema from SQL explain") # pragma: no cover
|
|
397
|
+
|
|
398
|
+
|
|
305
399
|
def read_odps_query(
|
|
306
400
|
query: str,
|
|
307
401
|
odps_entry: ODPS = None,
|
|
@@ -341,6 +435,8 @@ def read_odps_query(
|
|
|
341
435
|
DataFrame read from MaxCompute (ODPS) table
|
|
342
436
|
"""
|
|
343
437
|
no_split_sql = kw.pop("no_split_sql", False)
|
|
438
|
+
# if use_explain_output is None, will try two methods.
|
|
439
|
+
use_explain_output = kw.pop("use_explain_output", None)
|
|
344
440
|
|
|
345
441
|
hints = options.sql.settings.copy() or {}
|
|
346
442
|
if sql_hints:
|
|
@@ -365,24 +461,18 @@ def read_odps_query(
|
|
|
365
461
|
|
|
366
462
|
col_renames = {}
|
|
367
463
|
if not skip_schema:
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
except BaseException as ex:
|
|
376
|
-
exc = ValueError(
|
|
377
|
-
f"Failed to obtain schema from SQL explain: {ex!r}"
|
|
378
|
-
f"\nExplain instance ID: {inst.id}"
|
|
379
|
-
)
|
|
380
|
-
raise exc.with_traceback(ex.__traceback__) from None
|
|
464
|
+
odps_schema = _resolve_query_schema(
|
|
465
|
+
odps_entry,
|
|
466
|
+
query,
|
|
467
|
+
no_split_sql=no_split_sql,
|
|
468
|
+
hints=hints,
|
|
469
|
+
use_explain_output=use_explain_output,
|
|
470
|
+
)
|
|
381
471
|
|
|
382
472
|
new_columns = []
|
|
383
473
|
for col in odps_schema.columns:
|
|
384
474
|
anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
|
|
385
|
-
if anon_match and col.name
|
|
475
|
+
if anon_match and not _check_token_in_sql(col.name, query):
|
|
386
476
|
new_name = anonymous_col_prefix + anon_match.group(1)
|
|
387
477
|
col_renames[col.name] = new_name
|
|
388
478
|
new_columns.append(Column(new_name, col.type))
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
-
from typing import List, Optional, Union
|
|
16
|
+
from typing import List, MutableMapping, Optional, Union
|
|
17
17
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import pandas as pd
|
|
@@ -34,7 +34,7 @@ from ...serialization.serializables import (
|
|
|
34
34
|
SeriesField,
|
|
35
35
|
StringField,
|
|
36
36
|
)
|
|
37
|
-
from ...utils import is_empty
|
|
37
|
+
from ...utils import estimate_table_size, is_empty
|
|
38
38
|
from ..core import DataFrame # noqa: F401
|
|
39
39
|
from ..utils import parse_index
|
|
40
40
|
from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
|
|
@@ -46,6 +46,7 @@ class DataFrameReadODPSTable(
|
|
|
46
46
|
IncrementalIndexDatasource,
|
|
47
47
|
ColumnPruneSupportedDataSourceMixin,
|
|
48
48
|
):
|
|
49
|
+
__slots__ = ("_odps_entry",)
|
|
49
50
|
_op_type_ = opcodes.READ_ODPS_TABLE
|
|
50
51
|
|
|
51
52
|
table_name = StringField("table_name")
|
|
@@ -61,7 +62,8 @@ class DataFrameReadODPSTable(
|
|
|
61
62
|
index_dtypes = SeriesField("index_dtypes", default=None)
|
|
62
63
|
|
|
63
64
|
def __init__(self, memory_scale=None, **kw):
|
|
64
|
-
output_type = kw.
|
|
65
|
+
output_type = kw.pop("output_type", OutputType.dataframe)
|
|
66
|
+
self._odps_entry = kw.pop("odps_entry", None)
|
|
65
67
|
super(DataFrameReadODPSTable, self).__init__(
|
|
66
68
|
memory_scale=memory_scale, _output_types=[output_type], **kw
|
|
67
69
|
)
|
|
@@ -130,6 +132,18 @@ class DataFrameReadODPSTable(
|
|
|
130
132
|
chunk_size=chunk_size,
|
|
131
133
|
)
|
|
132
134
|
|
|
135
|
+
@classmethod
|
|
136
|
+
def estimate_size(
|
|
137
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSTable"
|
|
138
|
+
) -> None:
|
|
139
|
+
odps_entry = op._odps_entry or ODPS.from_global() or ODPS.from_environments()
|
|
140
|
+
if not odps_entry: # pragma: no cover
|
|
141
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
142
|
+
return
|
|
143
|
+
ctx[op.outputs[0].key] = estimate_table_size(
|
|
144
|
+
odps_entry, op.table_name, op.partitions
|
|
145
|
+
)
|
|
146
|
+
|
|
133
147
|
|
|
134
148
|
def read_odps_table(
|
|
135
149
|
table_name: Union[str, Table],
|
|
@@ -212,7 +226,8 @@ def read_odps_table(
|
|
|
212
226
|
index_dtypes = pd.Series(table_index_types, index=index_col)
|
|
213
227
|
|
|
214
228
|
if columns is not None:
|
|
215
|
-
|
|
229
|
+
new_columns = [c.lower() for c in columns]
|
|
230
|
+
table_col_set = set(new_columns)
|
|
216
231
|
col_diff = sorted(table_col_set - set(table_columns))
|
|
217
232
|
if col_diff:
|
|
218
233
|
raise ValueError(
|
|
@@ -223,7 +238,6 @@ def read_odps_table(
|
|
|
223
238
|
raise ValueError("Index columns and columns shall not overlap.")
|
|
224
239
|
|
|
225
240
|
# reorder columns
|
|
226
|
-
new_columns = [c for c in table_columns if c in table_col_set]
|
|
227
241
|
df_types = [df_types[table_columns.index(col)] for col in new_columns]
|
|
228
242
|
table_columns = new_columns
|
|
229
243
|
columns = new_columns
|
|
@@ -253,6 +267,7 @@ def read_odps_table(
|
|
|
253
267
|
last_modified_time=to_timestamp(table.last_data_modified_time),
|
|
254
268
|
index_columns=index_col,
|
|
255
269
|
index_dtypes=index_dtypes,
|
|
270
|
+
odps_entry=odps_entry,
|
|
256
271
|
**kw,
|
|
257
272
|
)
|
|
258
273
|
return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -15,7 +13,7 @@
|
|
|
15
13
|
# limitations under the License.
|
|
16
14
|
|
|
17
15
|
import os
|
|
18
|
-
from typing import Dict
|
|
16
|
+
from typing import Dict, MutableMapping, Union
|
|
19
17
|
from urllib.parse import urlparse
|
|
20
18
|
|
|
21
19
|
import numpy as np
|
|
@@ -308,6 +306,13 @@ class DataFrameReadParquet(
|
|
|
308
306
|
columns_value=columns_value,
|
|
309
307
|
)
|
|
310
308
|
|
|
309
|
+
@classmethod
|
|
310
|
+
def estimate_size(
|
|
311
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadParquet"
|
|
312
|
+
): # pragma: no cover
|
|
313
|
+
# todo implement this to facilitate local computation
|
|
314
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
315
|
+
|
|
311
316
|
|
|
312
317
|
def read_parquet(
|
|
313
318
|
path,
|
|
@@ -15,15 +15,19 @@
|
|
|
15
15
|
import os
|
|
16
16
|
import uuid
|
|
17
17
|
from collections import OrderedDict
|
|
18
|
+
from math import isinf
|
|
18
19
|
|
|
20
|
+
import mock
|
|
19
21
|
import numpy as np
|
|
20
22
|
import pandas as pd
|
|
21
23
|
import pytest
|
|
22
24
|
from odps import ODPS
|
|
23
25
|
from odps import types as odps_types
|
|
26
|
+
from odps.errors import ODPSError
|
|
24
27
|
|
|
25
28
|
from .... import tensor as mt
|
|
26
29
|
from ....core import OutputType
|
|
30
|
+
from ....core.operator import estimate_size
|
|
27
31
|
from ....tests.utils import tn
|
|
28
32
|
from ....utils import lazy_import
|
|
29
33
|
from ... import read_odps_query, read_odps_table
|
|
@@ -48,6 +52,7 @@ from ..read_odps_query import (
|
|
|
48
52
|
ColumnSchema,
|
|
49
53
|
_parse_full_explain,
|
|
50
54
|
_parse_simple_explain,
|
|
55
|
+
_resolve_query_schema,
|
|
51
56
|
_resolve_task_sector,
|
|
52
57
|
)
|
|
53
58
|
from ..series import from_pandas as from_pandas_series
|
|
@@ -71,6 +76,10 @@ def test_from_pandas_dataframe():
|
|
|
71
76
|
assert df.index_value.max_val == 9
|
|
72
77
|
np.testing.assert_equal(df.columns_value._index_value._data, data.columns.values)
|
|
73
78
|
|
|
79
|
+
result_ctx = dict()
|
|
80
|
+
estimate_size(result_ctx, df.op)
|
|
81
|
+
assert result_ctx[df.key] > 0 and not isinf(result_ctx[df.key])
|
|
82
|
+
|
|
74
83
|
data2 = data[::2]
|
|
75
84
|
df2 = from_pandas_df(data2, chunk_size=4)
|
|
76
85
|
|
|
@@ -258,6 +267,10 @@ def test_from_odps_table():
|
|
|
258
267
|
),
|
|
259
268
|
)
|
|
260
269
|
|
|
270
|
+
result_ctx = dict()
|
|
271
|
+
estimate_size(result_ctx, df.op)
|
|
272
|
+
assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
|
|
273
|
+
|
|
261
274
|
with pytest.raises(ValueError):
|
|
262
275
|
read_odps_table(test_table, columns=["col3", "col4"])
|
|
263
276
|
with pytest.raises(ValueError):
|
|
@@ -300,6 +313,7 @@ def test_from_odps_table():
|
|
|
300
313
|
),
|
|
301
314
|
)
|
|
302
315
|
|
|
316
|
+
test_parted_table.create_partition("pt=20240103")
|
|
303
317
|
df = read_odps_table(
|
|
304
318
|
test_parted_table, columns=["col1", "col2", "pt"], partitions="pt=20240103"
|
|
305
319
|
)
|
|
@@ -314,6 +328,10 @@ def test_from_odps_table():
|
|
|
314
328
|
),
|
|
315
329
|
)
|
|
316
330
|
|
|
331
|
+
result_ctx = dict()
|
|
332
|
+
estimate_size(result_ctx, df.op)
|
|
333
|
+
assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
|
|
334
|
+
|
|
317
335
|
out_idx = read_odps_table(
|
|
318
336
|
test_table,
|
|
319
337
|
columns=[],
|
|
@@ -345,7 +363,7 @@ def test_from_odps_query():
|
|
|
345
363
|
|
|
346
364
|
with pytest.raises(ValueError) as err_info:
|
|
347
365
|
read_odps_query(
|
|
348
|
-
f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
|
|
366
|
+
f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
|
|
349
367
|
f"AS SELECT * FROM {table1_name}"
|
|
350
368
|
)
|
|
351
369
|
assert "instant query" in err_info.value.args[0]
|
|
@@ -545,3 +563,64 @@ def test_resolve_multi_join():
|
|
|
545
563
|
for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
|
|
546
564
|
assert col.name == exp_nm
|
|
547
565
|
assert col.type == odps_types.validate_data_type(exp_tp)
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def test_resolve_break_lines():
|
|
569
|
+
input_path = os.path.join(
|
|
570
|
+
os.path.dirname(__file__), "test-data", "task-input-with-break-line.txt"
|
|
571
|
+
)
|
|
572
|
+
with open(input_path, "r") as f:
|
|
573
|
+
sector = f.read()
|
|
574
|
+
|
|
575
|
+
expected_col_types = {
|
|
576
|
+
"key": "string",
|
|
577
|
+
"value": "string",
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
schema = _parse_full_explain(sector)
|
|
581
|
+
for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
|
|
582
|
+
assert col.name == exp_nm
|
|
583
|
+
assert col.type == odps_types.validate_data_type(exp_tp)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
@pytest.mark.parametrize("use_explain_output", [None, False, True])
|
|
587
|
+
def test_explain_use_explain_output(use_explain_output):
|
|
588
|
+
class MockInstance:
|
|
589
|
+
@property
|
|
590
|
+
def id(self):
|
|
591
|
+
return "mock_id"
|
|
592
|
+
|
|
593
|
+
def get_task_results(self):
|
|
594
|
+
return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
|
|
595
|
+
|
|
596
|
+
old_execute_sql = ODPS.execute_sql
|
|
597
|
+
exec_count = 0
|
|
598
|
+
|
|
599
|
+
def new_execute_sql(self, sql, *args, **kw):
|
|
600
|
+
nonlocal exec_count
|
|
601
|
+
exec_count += 1
|
|
602
|
+
|
|
603
|
+
if use_explain_output and sql.lower().startswith("explain output select"):
|
|
604
|
+
return MockInstance()
|
|
605
|
+
elif use_explain_output is None and sql.lower().startswith("explain output"):
|
|
606
|
+
raise ODPSError("ODPS-0130161: mock error")
|
|
607
|
+
return old_execute_sql(self, sql, *args, **kw)
|
|
608
|
+
|
|
609
|
+
odps_entry = ODPS.from_environments()
|
|
610
|
+
|
|
611
|
+
with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
|
|
612
|
+
with pytest.raises(ValueError):
|
|
613
|
+
_resolve_query_schema(
|
|
614
|
+
odps_entry, "not_a_sql", use_explain_output=use_explain_output
|
|
615
|
+
)
|
|
616
|
+
assert exec_count == (2 if use_explain_output is None else 1)
|
|
617
|
+
|
|
618
|
+
exec_count = 0
|
|
619
|
+
schema = _resolve_query_schema(
|
|
620
|
+
odps_entry,
|
|
621
|
+
"select cast(1 as bigint) as a_bigint",
|
|
622
|
+
use_explain_output=use_explain_output,
|
|
623
|
+
)
|
|
624
|
+
assert schema.columns[0].name == "a_bigint"
|
|
625
|
+
assert schema.columns[0].type == odps_types.bigint
|
|
626
|
+
assert exec_count == (2 if use_explain_output is None else 1)
|
|
@@ -12,9 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import pandas as pd
|
|
15
16
|
import pytest
|
|
16
17
|
|
|
17
|
-
from ... import DataFrame
|
|
18
|
+
from ... import DataFrame, Index
|
|
18
19
|
from ..to_odps import to_odps_table
|
|
19
20
|
|
|
20
21
|
|
|
@@ -23,6 +24,25 @@ def df():
|
|
|
23
24
|
return DataFrame({"A": [1, 2], "B": [3, 4]})
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def df_with_named_index():
|
|
29
|
+
return DataFrame({"A": [1, 2], "B": [3, 4]}, index=Index([1, 2], name="A"))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def df_with_named_multi_indexes():
|
|
34
|
+
arrays = [
|
|
35
|
+
["c1", "c2"],
|
|
36
|
+
["d1", "d2"],
|
|
37
|
+
["e1", "e2"],
|
|
38
|
+
]
|
|
39
|
+
multi_index = pd.MultiIndex.from_arrays(arrays, names=("C", "D", "E"))
|
|
40
|
+
return DataFrame(
|
|
41
|
+
{"A": [1, 2], "B": [3, 4]},
|
|
42
|
+
index=multi_index,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
26
46
|
@pytest.mark.parametrize(
|
|
27
47
|
"kwargs",
|
|
28
48
|
[
|
|
@@ -46,3 +66,34 @@ def test_to_odps_table_validation(df, kwargs):
|
|
|
46
66
|
)
|
|
47
67
|
def test_to_odps_table_vaild(df, kwargs):
|
|
48
68
|
to_odps_table(df, "test_table", **kwargs)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_to_odps_table_column_conflicts(
|
|
72
|
+
df, df_with_named_index, df_with_named_multi_indexes
|
|
73
|
+
):
|
|
74
|
+
to_odps_table(df.reset_index(), "test_table", index=False)
|
|
75
|
+
to_odps_table(df.reset_index(), "test_table", index_label="C")
|
|
76
|
+
with pytest.raises(ValueError):
|
|
77
|
+
to_odps_table(df.reset_index(), "test_table")
|
|
78
|
+
|
|
79
|
+
to_odps_table(df_with_named_index, "test_table", index=False)
|
|
80
|
+
to_odps_table(df_with_named_index, "test_table", index_label="C")
|
|
81
|
+
with pytest.raises(ValueError):
|
|
82
|
+
to_odps_table(df_with_named_index, "test_table")
|
|
83
|
+
|
|
84
|
+
to_odps_table(df, "test_table", partition="C='1'")
|
|
85
|
+
with pytest.raises(ValueError):
|
|
86
|
+
to_odps_table(df, "test_table", partition="A='1'")
|
|
87
|
+
|
|
88
|
+
with pytest.raises(ValueError):
|
|
89
|
+
to_odps_table(df, "test_table", partition="A='1'")
|
|
90
|
+
|
|
91
|
+
to_odps_table(df_with_named_multi_indexes, "test_table")
|
|
92
|
+
to_odps_table(
|
|
93
|
+
df_with_named_multi_indexes, "test_table", partition="C='1'", index=False
|
|
94
|
+
)
|
|
95
|
+
with pytest.raises(ValueError):
|
|
96
|
+
to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
|
|
97
|
+
|
|
98
|
+
df_with_named_multi_indexes.index.names = ["C1", "D1", "E1"]
|
|
99
|
+
to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
|