maxframe 1.3.1__cp310-cp310-win_amd64.whl → 2.0.0b1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp310-win_amd64.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +33 -3
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +8 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +22 -48
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +53 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +67 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
- maxframe-2.0.0b1.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
maxframe/io/objects/tensor.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import itertools
|
|
15
16
|
import struct
|
|
16
17
|
from io import BytesIO
|
|
17
18
|
from typing import Any, Dict
|
|
@@ -19,36 +20,65 @@ from typing import Any, Dict
|
|
|
19
20
|
import msgpack
|
|
20
21
|
import numpy as np
|
|
21
22
|
|
|
23
|
+
from ...config import options
|
|
24
|
+
from ...core import OutputType
|
|
22
25
|
from ...lib import wrapped_pickle as pickle
|
|
23
|
-
from ...
|
|
26
|
+
from ...lib.version import parse as parse_version
|
|
24
27
|
from ...typing_ import SlicesType, TileableType
|
|
25
28
|
from ..odpsio import ODPSVolumeReader, ODPSVolumeWriter
|
|
26
29
|
from .core import AbstractObjectIOHandler, register_object_io_handler
|
|
27
30
|
|
|
31
|
+
try:
|
|
32
|
+
from maxframe import __version__ as mf_version
|
|
33
|
+
except ImportError:
|
|
34
|
+
mf_version = None
|
|
28
35
|
|
|
29
|
-
|
|
36
|
+
|
|
37
|
+
@register_object_io_handler(OutputType.tensor)
|
|
30
38
|
class TensorIOHandler(AbstractObjectIOHandler):
|
|
39
|
+
def _prepare_meta_for_serial(self, tileable: TileableType) -> Dict[str, Any]:
|
|
40
|
+
meta = super()._prepare_meta_for_serial(tileable)
|
|
41
|
+
meta["nsplits"] = (
|
|
42
|
+
getattr(tileable, "nsplits", None) or (np.nan,) * tileable.ndim
|
|
43
|
+
)
|
|
44
|
+
return meta
|
|
45
|
+
|
|
31
46
|
def write_object_meta(
|
|
32
47
|
self,
|
|
33
48
|
writer: ODPSVolumeWriter,
|
|
34
49
|
tileable: TileableType,
|
|
35
50
|
extra_meta: Dict[str, Any] = None,
|
|
36
51
|
):
|
|
37
|
-
# fixme upload in real slices when tensors are supported in DPE
|
|
38
52
|
extra_meta = extra_meta or dict()
|
|
39
|
-
extra_meta["nsplits"] = (
|
|
40
|
-
|
|
53
|
+
extra_meta["nsplits"] = (
|
|
54
|
+
getattr(tileable, "nsplits", None) or ((np.nan,),) * tileable.ndim
|
|
55
|
+
)
|
|
41
56
|
super().write_object_meta(writer, tileable, extra_meta=extra_meta)
|
|
42
57
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
58
|
+
@staticmethod
|
|
59
|
+
def _get_chunk_file_name(params: Dict[str, Any]) -> str:
|
|
60
|
+
# fixme remove this when all v1.0 clients removed
|
|
61
|
+
mf_ver = options.session.client_version or mf_version
|
|
62
|
+
if mf_ver:
|
|
63
|
+
client_major = parse_version(
|
|
64
|
+
options.session.client_version or mf_version
|
|
65
|
+
).major
|
|
66
|
+
else:
|
|
67
|
+
client_major = None
|
|
68
|
+
|
|
69
|
+
if client_major == 1:
|
|
70
|
+
# returns v1.0 file name as we do not support tiled tensors
|
|
71
|
+
return "0,0.dat"
|
|
72
|
+
|
|
73
|
+
ndim = len(params.get("shape") or (0,))
|
|
74
|
+
data_index = params.get("index") or (0,) * ndim
|
|
75
|
+
return ",".join(str(ix) for ix in data_index) + ".dat"
|
|
76
|
+
|
|
77
|
+
def _read_single_chunk(
|
|
78
|
+
self, reader: ODPSVolumeReader, params: Dict[str, Any]
|
|
49
79
|
) -> Any:
|
|
50
|
-
|
|
51
|
-
body = reader.read_file(
|
|
80
|
+
file_name = self._get_chunk_file_name(params)
|
|
81
|
+
body = reader.read_file(file_name)
|
|
52
82
|
bio = BytesIO(body)
|
|
53
83
|
(header_len,) = struct.unpack("<I", bio.read(4))
|
|
54
84
|
header_data = msgpack.loads(bio.read(header_len))
|
|
@@ -57,10 +87,35 @@ class TensorIOHandler(AbstractObjectIOHandler):
|
|
|
57
87
|
bufs = [bio.read(size) for size in header_data[1:]]
|
|
58
88
|
return pickle.loads(pickled, buffers=bufs)
|
|
59
89
|
|
|
60
|
-
def
|
|
61
|
-
self,
|
|
90
|
+
def read_object_body(
|
|
91
|
+
self,
|
|
92
|
+
reader: ODPSVolumeReader,
|
|
93
|
+
params: Dict[str, Any],
|
|
94
|
+
extra_params: Dict[str, Any] = None,
|
|
95
|
+
slices: SlicesType = None,
|
|
96
|
+
) -> Any:
|
|
97
|
+
if "index" in params:
|
|
98
|
+
return self._read_single_chunk(reader, params)
|
|
99
|
+
|
|
100
|
+
tileable_params = params.copy()
|
|
101
|
+
tileable_params.update(extra_params or {})
|
|
102
|
+
|
|
103
|
+
# todo implements slices argument for head and tail fetching
|
|
104
|
+
assert slices is None
|
|
105
|
+
|
|
106
|
+
chunk_shape = tuple(len(x) for x in params["nsplits"])
|
|
107
|
+
block_array = np.empty(shape=chunk_shape, dtype="O")
|
|
108
|
+
for idx in itertools.product(*(range(sp) for sp in chunk_shape)):
|
|
109
|
+
chunk_params = tileable_params.copy()
|
|
110
|
+
chunk_params.pop("nsplits")
|
|
111
|
+
chunk_params["index"] = idx
|
|
112
|
+
block_array[idx] = self._read_single_chunk(reader, chunk_params)
|
|
113
|
+
|
|
114
|
+
return np.block(block_array.tolist())
|
|
115
|
+
|
|
116
|
+
def write_object_body(
|
|
117
|
+
self, writer: ODPSVolumeWriter, params: Dict[str, Any], value: Any
|
|
62
118
|
):
|
|
63
|
-
# fixme upload in real slices when tensors are supported in DPE
|
|
64
119
|
def data_gen():
|
|
65
120
|
bufs = []
|
|
66
121
|
pickled = pickle.dumps(value, buffer_callback=bufs.append)
|
|
@@ -73,4 +128,5 @@ class TensorIOHandler(AbstractObjectIOHandler):
|
|
|
73
128
|
for buf in bufs:
|
|
74
129
|
yield buf
|
|
75
130
|
|
|
76
|
-
|
|
131
|
+
file_name = self._get_chunk_file_name(params)
|
|
132
|
+
writer.write_file(file_name, data_gen())
|
|
@@ -19,7 +19,7 @@ from odps import ODPS
|
|
|
19
19
|
from ....core import OutputType
|
|
20
20
|
from ....core.operator import ObjectOperatorMixin, Operator
|
|
21
21
|
from ....tensor.datasource import ArrayDataSource
|
|
22
|
-
from ....tests.utils import tn
|
|
22
|
+
from ....tests.utils import create_test_volume, tn
|
|
23
23
|
from ...odpsio import ODPSVolumeReader, ODPSVolumeWriter
|
|
24
24
|
from ..core import get_object_io_handler
|
|
25
25
|
|
|
@@ -31,61 +31,9 @@ class TestObjectOp(Operator, ObjectOperatorMixin):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
@pytest.fixture(scope="module")
|
|
34
|
-
def create_volume(
|
|
35
|
-
|
|
36
|
-
odps_entry = ODPS.from_environments()
|
|
37
|
-
|
|
38
|
-
try:
|
|
39
|
-
odps_entry.delete_volume(test_vol_name, auto_remove_dir=True, recursive=True)
|
|
40
|
-
except:
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
oss_test_dir_name = tn("test_oss_directory")
|
|
44
|
-
if oss_config is None:
|
|
45
|
-
pytest.skip("Need oss and its config to run this test")
|
|
46
|
-
(
|
|
47
|
-
oss_access_id,
|
|
48
|
-
oss_secret_access_key,
|
|
49
|
-
oss_bucket_name,
|
|
50
|
-
oss_endpoint,
|
|
51
|
-
) = oss_config.oss_config
|
|
52
|
-
|
|
53
|
-
if "test" in oss_endpoint:
|
|
54
|
-
# offline config
|
|
55
|
-
test_location = "oss://%s:%s@%s/%s/%s" % (
|
|
56
|
-
oss_access_id,
|
|
57
|
-
oss_secret_access_key,
|
|
58
|
-
oss_endpoint,
|
|
59
|
-
oss_bucket_name,
|
|
60
|
-
oss_test_dir_name,
|
|
61
|
-
)
|
|
62
|
-
rolearn = None
|
|
63
|
-
else:
|
|
64
|
-
# online config
|
|
65
|
-
endpoint_parts = oss_endpoint.split(".", 1)
|
|
66
|
-
if "-internal" not in endpoint_parts[0]:
|
|
67
|
-
endpoint_parts[0] += "-internal"
|
|
68
|
-
test_location = "oss://%s/%s/%s" % (
|
|
69
|
-
".".join(endpoint_parts),
|
|
70
|
-
oss_bucket_name,
|
|
71
|
-
oss_test_dir_name,
|
|
72
|
-
)
|
|
73
|
-
rolearn = oss_config.oss_rolearn
|
|
74
|
-
|
|
75
|
-
oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
|
|
76
|
-
odps_entry.create_external_volume(
|
|
77
|
-
test_vol_name, location=test_location, rolearn=rolearn
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
try:
|
|
34
|
+
def create_volume(oss_config):
|
|
35
|
+
with create_test_volume(tn("test_object_io_vol"), oss_config) as test_vol_name:
|
|
81
36
|
yield test_vol_name
|
|
82
|
-
finally:
|
|
83
|
-
try:
|
|
84
|
-
odps_entry.delete_volume(
|
|
85
|
-
test_vol_name, auto_remove_dir=True, recursive=True
|
|
86
|
-
)
|
|
87
|
-
except:
|
|
88
|
-
pass
|
|
89
37
|
|
|
90
38
|
|
|
91
39
|
def test_simple_object_io(create_volume):
|
|
@@ -119,6 +67,11 @@ def test_tensor_object_io(create_volume):
|
|
|
119
67
|
odps_entry, create_volume, obj.key, replace_internal_host=True
|
|
120
68
|
)
|
|
121
69
|
|
|
70
|
+
# test write and read full object
|
|
122
71
|
handler = get_object_io_handler(obj)()
|
|
123
72
|
handler.write_object(writer, obj, data)
|
|
124
73
|
np.testing.assert_equal(data, handler.read_object(reader, obj))
|
|
74
|
+
|
|
75
|
+
# test read single chunk
|
|
76
|
+
params = {"index": (0, 0)}
|
|
77
|
+
np.testing.assert_equal(data, handler.read_object_body(reader, params))
|
maxframe/io/odpsio/arrow.py
CHANGED
|
@@ -68,6 +68,8 @@ def arrow_to_pandas(
|
|
|
68
68
|
return _rebuild_dataframe(df, table_meta)
|
|
69
69
|
elif table_meta.type == OutputType.index:
|
|
70
70
|
return _rebuild_index(df, table_meta)
|
|
71
|
+
elif table_meta.type == OutputType.tensor:
|
|
72
|
+
return _rebuild_index(df, table_meta).to_numpy()
|
|
71
73
|
elif table_meta.type == OutputType.scalar:
|
|
72
74
|
return _rebuild_index(df, table_meta)[0]
|
|
73
75
|
else: # this could never happen # pragma: no cover
|
|
@@ -107,9 +109,9 @@ def pandas_to_arrow(
|
|
|
107
109
|
else:
|
|
108
110
|
table_datetime_cols = {"_idx_0"}
|
|
109
111
|
df = df.to_frame(name=names[0] if len(names) == 1 else names)
|
|
110
|
-
elif table_meta.type
|
|
112
|
+
elif table_meta.type in (OutputType.scalar, OutputType.tensor):
|
|
111
113
|
names = ["_idx_0"]
|
|
112
|
-
if isinstance(df, TENSOR_TYPE):
|
|
114
|
+
if isinstance(df, (TENSOR_TYPE, np.ndarray)):
|
|
113
115
|
df = pd.DataFrame([], columns=names).astype({names[0]: df.dtype})
|
|
114
116
|
else:
|
|
115
117
|
df = pd.DataFrame([[df]], columns=names)
|
|
@@ -145,4 +147,15 @@ def pandas_to_arrow(
|
|
|
145
147
|
col_data = pa_table.column(idx).cast(pa.timestamp("ms"))
|
|
146
148
|
col_datas.append(col_data)
|
|
147
149
|
pa_table = pa.Table.from_arrays(col_datas, names=col_names)
|
|
150
|
+
|
|
151
|
+
new_names, new_dtypes = [], []
|
|
152
|
+
for table_col, (pd_col, pd_dtype) in zip(
|
|
153
|
+
table_meta.table_column_names, table_meta.pd_column_dtypes.items()
|
|
154
|
+
):
|
|
155
|
+
new_names.append(pd_col)
|
|
156
|
+
if table_col not in table_datetime_cols:
|
|
157
|
+
new_dtypes.append(pd_dtype)
|
|
158
|
+
else:
|
|
159
|
+
new_dtypes.append(np.dtype("datetime64[ms]"))
|
|
160
|
+
table_meta.pd_column_dtypes = pd.Series(new_dtypes, index=new_names)
|
|
148
161
|
return pa_table, table_meta
|
maxframe/io/odpsio/schema.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import string
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Any, Dict, Tuple
|
|
17
|
+
from typing import Any, Dict, Tuple
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
@@ -23,9 +23,11 @@ from odps import types as odps_types
|
|
|
23
23
|
from pandas.api import types as pd_types
|
|
24
24
|
|
|
25
25
|
from ...core import TILEABLE_TYPE, OutputType
|
|
26
|
+
from ...dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
|
|
26
27
|
from ...lib.dtypes_extension import ArrowDtype
|
|
27
28
|
from ...protocol import DataFrameTableMeta
|
|
28
29
|
from ...tensor.core import TENSOR_TYPE
|
|
30
|
+
from ...utils import build_temp_table_name
|
|
29
31
|
|
|
30
32
|
_TEMP_TABLE_PREFIX = "tmp_mf_"
|
|
31
33
|
DEFAULT_SINGLE_INDEX_NAME = "_idx_0"
|
|
@@ -63,15 +65,15 @@ _odps_type_to_arrow = {
|
|
|
63
65
|
odps_types.timestamp_ntz: pa.timestamp("ns"),
|
|
64
66
|
}
|
|
65
67
|
|
|
66
|
-
|
|
68
|
+
_based_for_pandas_pa_types = (pa.ListType, pa.MapType)
|
|
67
69
|
|
|
68
70
|
|
|
69
|
-
def is_based_for_pandas_dtype(
|
|
71
|
+
def is_based_for_pandas_dtype(arrow_type: pa.DataType) -> bool:
|
|
70
72
|
"""
|
|
71
73
|
Check whether the arrow type is based for one pandas data type.
|
|
72
74
|
If true, we should make sure the environment support ArrowDtype.
|
|
73
75
|
"""
|
|
74
|
-
if not isinstance(
|
|
76
|
+
if not isinstance(arrow_type, _based_for_pandas_pa_types):
|
|
75
77
|
return False
|
|
76
78
|
|
|
77
79
|
if ArrowDtype is None:
|
|
@@ -243,12 +245,37 @@ def pandas_dataframe_to_arrow_table(df: pd.DataFrame, nthreads=1) -> pa.Table:
|
|
|
243
245
|
)
|
|
244
246
|
|
|
245
247
|
|
|
248
|
+
def pandas_dtypes_to_arrow_schema(dtypes, unknown_as_string: bool = False) -> pa.Schema:
|
|
249
|
+
if unknown_as_string:
|
|
250
|
+
dt_list = [dt if dt != np.dtype("O") else pd.StringDtype() for dt in dtypes]
|
|
251
|
+
dtypes = pd.Series(dt_list, index=dtypes.index)
|
|
252
|
+
schema = pandas_types_to_arrow_schema(
|
|
253
|
+
pd.DataFrame([], columns=dtypes.index).astype(dtypes)
|
|
254
|
+
)
|
|
255
|
+
return schema
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def pandas_dtype_to_arrow_type(dtype, unknown_as_string: bool = False) -> pa.DataType:
|
|
259
|
+
if unknown_as_string and dtype == np.dtype("O"):
|
|
260
|
+
dtype = pd.StringDtype()
|
|
261
|
+
schema = pandas_types_to_arrow_schema(
|
|
262
|
+
pd.DataFrame([], columns=["a"]).astype({"a": dtype})
|
|
263
|
+
)
|
|
264
|
+
return schema.types[0]
|
|
265
|
+
|
|
266
|
+
|
|
246
267
|
def is_scalar_object(df_obj: Any) -> bool:
|
|
247
268
|
return (
|
|
248
269
|
isinstance(df_obj, TENSOR_TYPE) and df_obj.shape == ()
|
|
249
270
|
) or pd_types.is_scalar(df_obj)
|
|
250
271
|
|
|
251
272
|
|
|
273
|
+
def is_tensor_object(df_obj: Any) -> bool:
|
|
274
|
+
return (
|
|
275
|
+
isinstance(df_obj, TENSOR_TYPE) or isinstance(df_obj, np.ndarray)
|
|
276
|
+
) and df_obj.ndim <= 1
|
|
277
|
+
|
|
278
|
+
|
|
252
279
|
def _scalar_as_index(df_obj: Any) -> pd.Index:
|
|
253
280
|
if isinstance(df_obj, TILEABLE_TYPE):
|
|
254
281
|
return pd.Index([], dtype=df_obj.dtype)
|
|
@@ -264,7 +291,7 @@ def pandas_to_odps_schema(
|
|
|
264
291
|
from ... import dataframe as md
|
|
265
292
|
from .arrow import pandas_to_arrow
|
|
266
293
|
|
|
267
|
-
if is_scalar_object(df_obj):
|
|
294
|
+
if is_scalar_object(df_obj) or is_tensor_object(df_obj):
|
|
268
295
|
empty_index = None
|
|
269
296
|
elif hasattr(df_obj, "index_value"):
|
|
270
297
|
empty_index = df_obj.index_value.to_pandas()[:0]
|
|
@@ -368,25 +395,25 @@ def build_table_column_name(
|
|
|
368
395
|
|
|
369
396
|
|
|
370
397
|
def build_dataframe_table_meta(
|
|
371
|
-
df_obj: Any, ignore_index: bool = False
|
|
398
|
+
df_obj: Any, ignore_index: bool = False, session_id: str = None
|
|
372
399
|
) -> DataFrameTableMeta:
|
|
373
|
-
from ... import dataframe as md
|
|
374
|
-
|
|
375
400
|
col_to_count = defaultdict(lambda: 0)
|
|
376
401
|
col_to_idx = defaultdict(lambda: 0)
|
|
377
402
|
pd_col_to_col_name = dict()
|
|
378
|
-
if isinstance(df_obj, (
|
|
403
|
+
if isinstance(df_obj, (DATAFRAME_TYPE, pd.DataFrame)):
|
|
379
404
|
obj_type = OutputType.dataframe
|
|
380
|
-
elif isinstance(df_obj, (
|
|
405
|
+
elif isinstance(df_obj, (SERIES_TYPE, pd.Series)):
|
|
381
406
|
obj_type = OutputType.series
|
|
382
|
-
elif isinstance(df_obj, (
|
|
407
|
+
elif isinstance(df_obj, (INDEX_TYPE, pd.Index)):
|
|
383
408
|
obj_type = OutputType.index
|
|
409
|
+
elif is_tensor_object(df_obj) and df_obj.ndim == 1:
|
|
410
|
+
obj_type = OutputType.tensor
|
|
384
411
|
elif is_scalar_object(df_obj):
|
|
385
412
|
obj_type = OutputType.scalar
|
|
386
413
|
else: # pragma: no cover
|
|
387
414
|
raise TypeError(f"Cannot accept type {type(df_obj)}")
|
|
388
415
|
|
|
389
|
-
if obj_type
|
|
416
|
+
if obj_type in (OutputType.scalar, OutputType.tensor):
|
|
390
417
|
pd_dtypes = pd.Series([])
|
|
391
418
|
column_index_names = []
|
|
392
419
|
index_obj = _scalar_as_index(df_obj)
|
|
@@ -404,7 +431,10 @@ def build_dataframe_table_meta(
|
|
|
404
431
|
index_obj = df_obj.index
|
|
405
432
|
|
|
406
433
|
if isinstance(df_obj, TILEABLE_TYPE):
|
|
407
|
-
|
|
434
|
+
if not session_id:
|
|
435
|
+
table_name = _TEMP_TABLE_PREFIX + str(df_obj.key)
|
|
436
|
+
else:
|
|
437
|
+
table_name = build_temp_table_name(session_id, df_obj.key)
|
|
408
438
|
else:
|
|
409
439
|
table_name = None
|
|
410
440
|
|
maxframe/io/odpsio/tableio.py
CHANGED
|
@@ -26,9 +26,9 @@ from odps.apis.storage_api import (
|
|
|
26
26
|
TableBatchScanResponse,
|
|
27
27
|
TableBatchWriteResponse,
|
|
28
28
|
)
|
|
29
|
+
from odps.errors import TableModified
|
|
29
30
|
from odps.tunnel import TableDownloadSession, TableDownloadStatus, TableTunnel
|
|
30
31
|
from odps.types import OdpsSchema, PartitionSpec, timestamp_ntz
|
|
31
|
-
from odps.utils import call_with_retry
|
|
32
32
|
|
|
33
33
|
try:
|
|
34
34
|
import pyarrow.compute as pac
|
|
@@ -37,7 +37,7 @@ except ImportError:
|
|
|
37
37
|
|
|
38
38
|
from ...config import options
|
|
39
39
|
from ...env import ODPS_STORAGE_API_ENDPOINT
|
|
40
|
-
from ...utils import is_empty, sync_pyodps_options
|
|
40
|
+
from ...utils import call_with_retry, is_empty, sync_pyodps_options
|
|
41
41
|
from .schema import odps_schema_to_arrow_schema
|
|
42
42
|
|
|
43
43
|
PartitionsType = Union[List[str], str, None]
|
|
@@ -154,6 +154,32 @@ class TunnelMultiPartitionReader:
|
|
|
154
154
|
return None
|
|
155
155
|
return self._count
|
|
156
156
|
|
|
157
|
+
def _open_table_reader(self, partition: Optional[str], columns: List[str]):
|
|
158
|
+
attempts = 2
|
|
159
|
+
for trial in range(attempts):
|
|
160
|
+
try:
|
|
161
|
+
return self._table.open_reader(
|
|
162
|
+
partition,
|
|
163
|
+
columns=columns,
|
|
164
|
+
arrow=True,
|
|
165
|
+
download_id=self._partition_to_download_ids.get(partition),
|
|
166
|
+
append_partitions=True,
|
|
167
|
+
)
|
|
168
|
+
except TableModified:
|
|
169
|
+
if trial == attempts - 1:
|
|
170
|
+
raise
|
|
171
|
+
pt_to_session = TunnelTableIO.create_download_sessions(
|
|
172
|
+
self._odps_entry,
|
|
173
|
+
self._table.full_table_name,
|
|
174
|
+
partition,
|
|
175
|
+
reopen=True,
|
|
176
|
+
)
|
|
177
|
+
assert partition in pt_to_session
|
|
178
|
+
self._partition_to_download_ids[partition] = pt_to_session[partition].id
|
|
179
|
+
raise RuntimeError(
|
|
180
|
+
"Unexpected condition: all trial of open reader done and not raised"
|
|
181
|
+
)
|
|
182
|
+
|
|
157
183
|
def _open_next_reader(self):
|
|
158
184
|
if self._cur_reader is not None:
|
|
159
185
|
self._reader_start_pos += self._cur_reader.count
|
|
@@ -170,12 +196,8 @@ class TunnelMultiPartitionReader:
|
|
|
170
196
|
part_str = self._partitions[self._cur_partition_id]
|
|
171
197
|
req_columns = self._schema.names
|
|
172
198
|
with sync_pyodps_options():
|
|
173
|
-
self._cur_reader = self.
|
|
174
|
-
part_str,
|
|
175
|
-
columns=req_columns,
|
|
176
|
-
arrow=True,
|
|
177
|
-
download_id=self._partition_to_download_ids.get(part_str),
|
|
178
|
-
append_partitions=True,
|
|
199
|
+
self._cur_reader = self._open_table_reader(
|
|
200
|
+
part_str, columns=req_columns
|
|
179
201
|
)
|
|
180
202
|
if self._cur_reader.count + self._reader_start_pos > self._start:
|
|
181
203
|
start = self._start - self._reader_start_pos
|
|
@@ -193,13 +215,27 @@ class TunnelMultiPartitionReader:
|
|
|
193
215
|
|
|
194
216
|
def read(self):
|
|
195
217
|
with sync_pyodps_options():
|
|
218
|
+
is_first_batch = False
|
|
196
219
|
if self._cur_reader is None:
|
|
220
|
+
is_first_batch = True
|
|
197
221
|
self._open_next_reader()
|
|
198
222
|
if self._cur_reader is None:
|
|
199
223
|
return None
|
|
200
224
|
while self._cur_reader is not None:
|
|
201
225
|
try:
|
|
202
|
-
|
|
226
|
+
try:
|
|
227
|
+
batch = next(self._reader_iter)
|
|
228
|
+
except TableModified:
|
|
229
|
+
if not is_first_batch:
|
|
230
|
+
raise
|
|
231
|
+
# clear download id cache to create new sessions
|
|
232
|
+
self._partition_to_download_ids = dict()
|
|
233
|
+
self._cur_reader = None
|
|
234
|
+
self._open_next_reader()
|
|
235
|
+
if self._cur_reader is None:
|
|
236
|
+
return None
|
|
237
|
+
batch = next(self._reader_iter)
|
|
238
|
+
|
|
203
239
|
if batch is not None:
|
|
204
240
|
if self._row_left is not None:
|
|
205
241
|
self._row_left -= batch.num_rows
|
|
@@ -222,6 +258,14 @@ class TunnelMultiPartitionReader:
|
|
|
222
258
|
|
|
223
259
|
class TunnelTableIO(ODPSTableIO):
|
|
224
260
|
_down_session_ids = OrderedDict()
|
|
261
|
+
_down_modified_time = dict()
|
|
262
|
+
|
|
263
|
+
@classmethod
|
|
264
|
+
def _get_modified_time(cls, odps_entry: ODPS, full_table_name, partition):
|
|
265
|
+
data_src = odps_entry.get_table(full_table_name)
|
|
266
|
+
if partition is not None:
|
|
267
|
+
data_src = data_src.partitions[partition]
|
|
268
|
+
return data_src.last_data_modified_time
|
|
225
269
|
|
|
226
270
|
@classmethod
|
|
227
271
|
def create_download_sessions(
|
|
@@ -229,6 +273,7 @@ class TunnelTableIO(ODPSTableIO):
|
|
|
229
273
|
odps_entry: ODPS,
|
|
230
274
|
full_table_name: str,
|
|
231
275
|
partitions: List[Optional[str]] = None,
|
|
276
|
+
reopen: bool = False,
|
|
232
277
|
) -> Dict[Optional[str], TableDownloadSession]:
|
|
233
278
|
table = odps_entry.get_table(full_table_name)
|
|
234
279
|
tunnel = TableTunnel(odps_entry, quota_name=options.tunnel_quota_name)
|
|
@@ -240,9 +285,14 @@ class TunnelTableIO(ODPSTableIO):
|
|
|
240
285
|
part_to_session = dict()
|
|
241
286
|
for part in parts:
|
|
242
287
|
part_key = (full_table_name, part)
|
|
288
|
+
modified_time = cls._get_modified_time(odps_entry, full_table_name, part)
|
|
243
289
|
down_session = None
|
|
244
290
|
|
|
245
|
-
if
|
|
291
|
+
if (
|
|
292
|
+
not reopen
|
|
293
|
+
and part_key in cls._down_session_ids
|
|
294
|
+
and cls._down_modified_time.get(part_key) == modified_time
|
|
295
|
+
):
|
|
246
296
|
down_id = cls._down_session_ids[part_key]
|
|
247
297
|
down_session = tunnel.create_download_session(
|
|
248
298
|
table, async_mode=True, partition_spec=part, download_id=down_id
|
|
@@ -256,8 +306,10 @@ class TunnelTableIO(ODPSTableIO):
|
|
|
256
306
|
)
|
|
257
307
|
|
|
258
308
|
while len(cls._down_session_ids) >= _DOWNLOAD_ID_CACHE_SIZE:
|
|
259
|
-
cls._down_session_ids.popitem(False)
|
|
309
|
+
k, _ = cls._down_session_ids.popitem(False)
|
|
310
|
+
cls._down_modified_time.pop(k)
|
|
260
311
|
cls._down_session_ids[part_key] = down_session.id
|
|
312
|
+
cls._down_modified_time[part_key] = modified_time
|
|
261
313
|
part_to_session[part] = down_session
|
|
262
314
|
return part_to_session
|
|
263
315
|
|