maxframe 1.3.1__cp311-cp311-win_amd64.whl → 2.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp311-win_amd64.pyd +0 -0
- maxframe/_utils.pyi +21 -0
- maxframe/_utils.pyx +4 -3
- maxframe/codegen/__init__.py +27 -0
- maxframe/{codegen.py → codegen/core.py} +49 -43
- maxframe/codegen/spe/__init__.py +16 -0
- maxframe/codegen/spe/core.py +307 -0
- maxframe/codegen/spe/dataframe/__init__.py +37 -0
- maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
- maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
- maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
- maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
- maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
- maxframe/codegen/spe/dataframe/datasource.py +181 -0
- maxframe/codegen/spe/dataframe/datastore.py +204 -0
- maxframe/codegen/spe/dataframe/extensions.py +63 -0
- maxframe/codegen/spe/dataframe/fetch.py +26 -0
- maxframe/codegen/spe/dataframe/groupby.py +224 -0
- maxframe/codegen/spe/dataframe/indexing.py +238 -0
- maxframe/codegen/spe/dataframe/merge.py +73 -0
- maxframe/codegen/spe/dataframe/misc.py +286 -0
- maxframe/codegen/spe/dataframe/missing.py +64 -0
- maxframe/codegen/spe/dataframe/reduction.py +160 -0
- maxframe/codegen/spe/dataframe/sort.py +83 -0
- maxframe/codegen/spe/dataframe/statistics.py +46 -0
- maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
- maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
- maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
- maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
- maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
- maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
- maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
- maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
- maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
- maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
- maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
- maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
- maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
- maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
- maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
- maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
- maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
- maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
- maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
- maxframe/codegen/spe/dataframe/tseries.py +46 -0
- maxframe/codegen/spe/dataframe/udf.py +62 -0
- maxframe/codegen/spe/dataframe/value_counts.py +31 -0
- maxframe/codegen/spe/dataframe/window.py +65 -0
- maxframe/codegen/spe/learn/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
- maxframe/codegen/spe/learn/contrib/models.py +41 -0
- maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
- maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
- maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
- maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
- maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
- maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
- maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
- maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
- maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
- maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
- maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
- maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
- maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
- maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
- maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
- maxframe/codegen/spe/learn/utils/__init__.py +15 -0
- maxframe/codegen/spe/learn/utils/checks.py +55 -0
- maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
- maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
- maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
- maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
- maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
- maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
- maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
- maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
- maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
- maxframe/codegen/spe/learn/utils/validation.py +35 -0
- maxframe/codegen/spe/objects.py +26 -0
- maxframe/codegen/spe/remote.py +29 -0
- maxframe/codegen/spe/tensor/__init__.py +28 -0
- maxframe/codegen/spe/tensor/arithmetic.py +95 -0
- maxframe/codegen/spe/tensor/core.py +41 -0
- maxframe/codegen/spe/tensor/datasource.py +165 -0
- maxframe/codegen/spe/tensor/extensions.py +35 -0
- maxframe/codegen/spe/tensor/fetch.py +26 -0
- maxframe/codegen/spe/tensor/indexing.py +63 -0
- maxframe/codegen/spe/tensor/linalg.py +63 -0
- maxframe/codegen/spe/tensor/merge.py +31 -0
- maxframe/codegen/spe/tensor/misc.py +121 -0
- maxframe/codegen/spe/tensor/random.py +29 -0
- maxframe/codegen/spe/tensor/reduction.py +39 -0
- maxframe/codegen/spe/tensor/reshape.py +26 -0
- maxframe/codegen/spe/tensor/sort.py +42 -0
- maxframe/codegen/spe/tensor/special.py +35 -0
- maxframe/codegen/spe/tensor/statistics.py +24 -0
- maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
- maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
- maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
- maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
- maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
- maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
- maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
- maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
- maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
- maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
- maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
- maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
- maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
- maxframe/codegen/spe/tests/__init__.py +13 -0
- maxframe/codegen/spe/tests/test_remote.py +29 -0
- maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
- maxframe/codegen/spe/utils.py +54 -0
- maxframe/codegen/tests/__init__.py +13 -0
- maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +50 -23
- maxframe/config/tests/test_config.py +4 -12
- maxframe/config/validators.py +5 -0
- maxframe/conftest.py +38 -10
- maxframe/core/__init__.py +1 -0
- maxframe/core/context.py +110 -0
- maxframe/core/entity/__init__.py +1 -0
- maxframe/core/entity/core.py +0 -7
- maxframe/core/entity/objects.py +19 -5
- maxframe/core/entity/output_types.py +11 -0
- maxframe/core/entity/tests/test_objects.py +11 -12
- maxframe/core/entity/tileables.py +3 -1
- maxframe/core/entity/utils.py +15 -0
- maxframe/core/graph/__init__.py +6 -1
- maxframe/core/graph/builder/base.py +5 -1
- maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +17 -6
- maxframe/core/graph/entity.py +18 -6
- maxframe/core/operator/__init__.py +8 -3
- maxframe/core/operator/base.py +35 -12
- maxframe/core/operator/core.py +37 -14
- maxframe/core/operator/fetch.py +5 -18
- maxframe/core/operator/objects.py +0 -20
- maxframe/core/operator/shuffle.py +6 -72
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/accessors/datetime_/core.py +7 -4
- maxframe/dataframe/accessors/string_/core.py +9 -6
- maxframe/dataframe/arithmetic/core.py +31 -20
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
- maxframe/dataframe/core.py +98 -91
- maxframe/dataframe/datasource/core.py +8 -1
- maxframe/dataframe/datasource/date_range.py +8 -0
- maxframe/dataframe/datasource/from_index.py +9 -5
- maxframe/dataframe/datasource/from_records.py +9 -2
- maxframe/dataframe/datasource/from_tensor.py +32 -21
- maxframe/dataframe/datasource/read_csv.py +8 -2
- maxframe/dataframe/datasource/read_odps_query.py +109 -19
- maxframe/dataframe/datasource/read_odps_table.py +20 -5
- maxframe/dataframe/datasource/read_parquet.py +8 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
- maxframe/dataframe/datastore/to_csv.py +7 -3
- maxframe/dataframe/datastore/to_odps.py +42 -6
- maxframe/dataframe/extensions/__init__.py +6 -1
- maxframe/dataframe/extensions/apply_chunk.py +96 -136
- maxframe/dataframe/extensions/flatjson.py +3 -2
- maxframe/dataframe/extensions/flatmap.py +15 -7
- maxframe/dataframe/fetch/core.py +12 -1
- maxframe/dataframe/groupby/__init__.py +7 -0
- maxframe/dataframe/groupby/aggregation.py +9 -8
- maxframe/dataframe/groupby/apply.py +50 -74
- maxframe/dataframe/groupby/apply_chunk.py +393 -0
- maxframe/dataframe/groupby/core.py +80 -17
- maxframe/dataframe/groupby/extensions.py +26 -0
- maxframe/dataframe/groupby/fill.py +9 -4
- maxframe/dataframe/groupby/sample.py +7 -7
- maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
- maxframe/dataframe/groupby/transform.py +57 -54
- maxframe/dataframe/indexing/align.py +7 -6
- maxframe/dataframe/indexing/getitem.py +9 -8
- maxframe/dataframe/indexing/iloc.py +28 -23
- maxframe/dataframe/indexing/insert.py +7 -3
- maxframe/dataframe/indexing/loc.py +9 -8
- maxframe/dataframe/indexing/reindex.py +36 -30
- maxframe/dataframe/indexing/rename_axis.py +18 -10
- maxframe/dataframe/indexing/reset_index.py +0 -2
- maxframe/dataframe/indexing/sample.py +13 -9
- maxframe/dataframe/indexing/set_axis.py +9 -6
- maxframe/dataframe/indexing/setitem.py +8 -5
- maxframe/dataframe/indexing/where.py +12 -9
- maxframe/dataframe/merge/__init__.py +0 -1
- maxframe/dataframe/merge/concat.py +10 -31
- maxframe/dataframe/merge/merge.py +2 -24
- maxframe/dataframe/misc/__init__.py +6 -0
- maxframe/dataframe/misc/_duplicate.py +7 -3
- maxframe/dataframe/misc/apply.py +106 -139
- maxframe/dataframe/misc/astype.py +3 -2
- maxframe/dataframe/misc/case_when.py +11 -7
- maxframe/dataframe/misc/cut.py +11 -10
- maxframe/dataframe/misc/describe.py +7 -3
- maxframe/dataframe/misc/drop.py +13 -11
- maxframe/dataframe/misc/eval.py +0 -2
- maxframe/dataframe/misc/get_dummies.py +78 -49
- maxframe/dataframe/misc/isin.py +13 -10
- maxframe/dataframe/misc/map.py +21 -6
- maxframe/dataframe/misc/melt.py +8 -1
- maxframe/dataframe/misc/pivot.py +232 -0
- maxframe/dataframe/misc/pivot_table.py +52 -40
- maxframe/dataframe/misc/rechunk.py +59 -0
- maxframe/dataframe/misc/shift.py +7 -4
- maxframe/dataframe/misc/stack.py +5 -3
- maxframe/dataframe/misc/tests/test_misc.py +167 -1
- maxframe/dataframe/misc/transform.py +63 -65
- maxframe/dataframe/misc/value_counts.py +7 -4
- maxframe/dataframe/missing/dropna.py +16 -7
- maxframe/dataframe/missing/fillna.py +18 -10
- maxframe/dataframe/missing/replace.py +10 -6
- maxframe/dataframe/missing/tests/test_missing.py +2 -2
- maxframe/dataframe/operators.py +1 -27
- maxframe/dataframe/reduction/aggregation.py +65 -3
- maxframe/dataframe/reduction/core.py +3 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
- maxframe/dataframe/reduction/unique.py +53 -7
- maxframe/dataframe/statistics/corr.py +9 -6
- maxframe/dataframe/statistics/quantile.py +9 -6
- maxframe/dataframe/tseries/to_datetime.py +6 -4
- maxframe/dataframe/utils.py +219 -31
- maxframe/dataframe/window/rolling.py +7 -4
- maxframe/env.py +1 -0
- maxframe/errors.py +9 -0
- maxframe/extension.py +13 -2
- maxframe/io/objects/core.py +67 -51
- maxframe/io/objects/tensor.py +73 -17
- maxframe/io/objects/tests/test_object_io.py +10 -55
- maxframe/io/odpsio/arrow.py +15 -2
- maxframe/io/odpsio/schema.py +43 -13
- maxframe/io/odpsio/tableio.py +63 -11
- maxframe/io/odpsio/tests/test_arrow.py +1 -2
- maxframe/io/odpsio/tests/test_schema.py +114 -1
- maxframe/io/odpsio/tests/test_tableio.py +42 -0
- maxframe/io/odpsio/tests/test_volumeio.py +21 -58
- maxframe/io/odpsio/volumeio.py +23 -8
- maxframe/learn/__init__.py +2 -2
- maxframe/learn/contrib/__init__.py +2 -2
- maxframe/learn/contrib/graph/connected_components.py +2 -1
- maxframe/learn/contrib/lightgbm/__init__.py +33 -0
- maxframe/learn/contrib/lightgbm/_predict.py +138 -0
- maxframe/learn/contrib/lightgbm/_train.py +163 -0
- maxframe/learn/contrib/lightgbm/callback.py +114 -0
- maxframe/learn/contrib/lightgbm/classifier.py +199 -0
- maxframe/learn/contrib/lightgbm/core.py +372 -0
- maxframe/learn/contrib/lightgbm/dataset.py +153 -0
- maxframe/learn/contrib/lightgbm/regressor.py +29 -0
- maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
- maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
- maxframe/learn/contrib/models.py +38 -9
- maxframe/learn/contrib/utils.py +55 -0
- maxframe/learn/contrib/xgboost/callback.py +86 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -30
- maxframe/learn/contrib/xgboost/core.py +54 -42
- maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
- maxframe/learn/contrib/xgboost/predict.py +16 -9
- maxframe/learn/contrib/xgboost/regressor.py +28 -27
- maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
- maxframe/learn/contrib/xgboost/train.py +59 -16
- maxframe/learn/core.py +252 -0
- maxframe/learn/datasets/__init__.py +20 -0
- maxframe/learn/datasets/samples_generator.py +628 -0
- maxframe/learn/linear_model/__init__.py +15 -0
- maxframe/learn/linear_model/_base.py +163 -0
- maxframe/learn/linear_model/_lin_reg.py +175 -0
- maxframe/learn/metrics/__init__.py +25 -0
- maxframe/learn/metrics/_check_targets.py +95 -0
- maxframe/learn/metrics/_classification.py +1121 -0
- maxframe/learn/metrics/_regression.py +256 -0
- maxframe/learn/model_selection/__init__.py +15 -0
- maxframe/learn/model_selection/_split.py +451 -0
- maxframe/learn/model_selection/tests/__init__.py +13 -0
- maxframe/learn/model_selection/tests/test_split.py +156 -0
- maxframe/learn/preprocessing/__init__.py +16 -0
- maxframe/learn/preprocessing/_data/__init__.py +17 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
- maxframe/learn/preprocessing/_data/normalize.py +127 -0
- maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
- maxframe/learn/preprocessing/_data/utils.py +79 -0
- maxframe/learn/preprocessing/_label/__init__.py +16 -0
- maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
- maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
- maxframe/learn/utils/__init__.py +4 -0
- maxframe/learn/utils/_encode.py +314 -0
- maxframe/learn/utils/checks.py +161 -0
- maxframe/learn/utils/core.py +33 -0
- maxframe/learn/utils/extmath.py +176 -0
- maxframe/learn/utils/multiclass.py +292 -0
- maxframe/learn/utils/shuffle.py +114 -0
- maxframe/learn/utils/sparsefuncs.py +87 -0
- maxframe/learn/utils/validation.py +775 -0
- maxframe/lib/__init__.py +0 -2
- maxframe/lib/compat.py +145 -0
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
- maxframe/lib/sparse/__init__.py +10 -15
- maxframe/lib/sparse/array.py +45 -33
- maxframe/lib/sparse/core.py +0 -2
- maxframe/lib/sparse/linalg.py +31 -0
- maxframe/lib/sparse/matrix.py +5 -2
- maxframe/lib/sparse/tests/__init__.py +0 -2
- maxframe/lib/sparse/tests/test_sparse.py +53 -53
- maxframe/lib/sparse/vector.py +0 -2
- maxframe/mixin.py +59 -2
- maxframe/opcodes.py +13 -5
- maxframe/protocol.py +67 -14
- maxframe/remote/core.py +16 -14
- maxframe/remote/run_script.py +6 -3
- maxframe/serialization/__init__.py +2 -0
- maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -1
- maxframe/serialization/core.pyx +82 -4
- maxframe/serialization/pandas.py +5 -1
- maxframe/serialization/serializables/core.py +6 -5
- maxframe/serialization/serializables/field.py +2 -2
- maxframe/serialization/serializables/tests/test_field_type.py +3 -5
- maxframe/serialization/tests/test_serial.py +27 -0
- maxframe/session.py +4 -71
- maxframe/sperunner.py +165 -0
- maxframe/tensor/__init__.py +35 -2
- maxframe/tensor/arithmetic/__init__.py +2 -4
- maxframe/tensor/arithmetic/abs.py +0 -2
- maxframe/tensor/arithmetic/absolute.py +0 -2
- maxframe/tensor/arithmetic/add.py +34 -4
- maxframe/tensor/arithmetic/angle.py +0 -2
- maxframe/tensor/arithmetic/arccos.py +1 -4
- maxframe/tensor/arithmetic/arccosh.py +1 -3
- maxframe/tensor/arithmetic/arcsin.py +0 -2
- maxframe/tensor/arithmetic/arcsinh.py +0 -2
- maxframe/tensor/arithmetic/arctan.py +0 -2
- maxframe/tensor/arithmetic/arctan2.py +0 -2
- maxframe/tensor/arithmetic/arctanh.py +0 -2
- maxframe/tensor/arithmetic/around.py +0 -2
- maxframe/tensor/arithmetic/bitand.py +0 -2
- maxframe/tensor/arithmetic/bitor.py +1 -3
- maxframe/tensor/arithmetic/bitxor.py +1 -3
- maxframe/tensor/arithmetic/cbrt.py +0 -2
- maxframe/tensor/arithmetic/ceil.py +0 -2
- maxframe/tensor/arithmetic/clip.py +13 -13
- maxframe/tensor/arithmetic/conj.py +0 -2
- maxframe/tensor/arithmetic/copysign.py +0 -2
- maxframe/tensor/arithmetic/core.py +47 -39
- maxframe/tensor/arithmetic/cos.py +1 -3
- maxframe/tensor/arithmetic/cosh.py +0 -2
- maxframe/tensor/arithmetic/deg2rad.py +0 -2
- maxframe/tensor/arithmetic/degrees.py +0 -2
- maxframe/tensor/arithmetic/divide.py +0 -2
- maxframe/tensor/arithmetic/equal.py +0 -2
- maxframe/tensor/arithmetic/exp.py +1 -3
- maxframe/tensor/arithmetic/exp2.py +0 -2
- maxframe/tensor/arithmetic/expm1.py +0 -2
- maxframe/tensor/arithmetic/fabs.py +0 -2
- maxframe/tensor/arithmetic/fix.py +0 -2
- maxframe/tensor/arithmetic/float_power.py +0 -2
- maxframe/tensor/arithmetic/floor.py +0 -2
- maxframe/tensor/arithmetic/floordiv.py +0 -2
- maxframe/tensor/arithmetic/fmax.py +0 -2
- maxframe/tensor/arithmetic/fmin.py +0 -2
- maxframe/tensor/arithmetic/fmod.py +0 -2
- maxframe/tensor/arithmetic/frexp.py +6 -2
- maxframe/tensor/arithmetic/greater.py +0 -2
- maxframe/tensor/arithmetic/greater_equal.py +0 -2
- maxframe/tensor/arithmetic/hypot.py +0 -2
- maxframe/tensor/arithmetic/i0.py +1 -3
- maxframe/tensor/arithmetic/imag.py +0 -2
- maxframe/tensor/arithmetic/invert.py +1 -3
- maxframe/tensor/arithmetic/isclose.py +0 -2
- maxframe/tensor/arithmetic/iscomplex.py +0 -2
- maxframe/tensor/arithmetic/isfinite.py +1 -3
- maxframe/tensor/arithmetic/isinf.py +0 -2
- maxframe/tensor/arithmetic/isnan.py +0 -2
- maxframe/tensor/arithmetic/isreal.py +0 -2
- maxframe/tensor/arithmetic/ldexp.py +0 -2
- maxframe/tensor/arithmetic/less.py +0 -2
- maxframe/tensor/arithmetic/less_equal.py +0 -2
- maxframe/tensor/arithmetic/log.py +1 -3
- maxframe/tensor/arithmetic/log10.py +1 -3
- maxframe/tensor/arithmetic/log1p.py +1 -3
- maxframe/tensor/arithmetic/log2.py +1 -3
- maxframe/tensor/arithmetic/logaddexp.py +0 -2
- maxframe/tensor/arithmetic/logaddexp2.py +0 -2
- maxframe/tensor/arithmetic/logical_and.py +0 -2
- maxframe/tensor/arithmetic/logical_not.py +1 -3
- maxframe/tensor/arithmetic/logical_or.py +0 -2
- maxframe/tensor/arithmetic/logical_xor.py +0 -2
- maxframe/tensor/arithmetic/lshift.py +0 -2
- maxframe/tensor/arithmetic/maximum.py +0 -2
- maxframe/tensor/arithmetic/minimum.py +0 -2
- maxframe/tensor/arithmetic/mod.py +0 -2
- maxframe/tensor/arithmetic/modf.py +6 -2
- maxframe/tensor/arithmetic/multiply.py +37 -4
- maxframe/tensor/arithmetic/nan_to_num.py +0 -2
- maxframe/tensor/arithmetic/negative.py +0 -2
- maxframe/tensor/arithmetic/nextafter.py +0 -2
- maxframe/tensor/arithmetic/not_equal.py +0 -2
- maxframe/tensor/arithmetic/positive.py +0 -2
- maxframe/tensor/arithmetic/power.py +0 -2
- maxframe/tensor/arithmetic/rad2deg.py +0 -2
- maxframe/tensor/arithmetic/radians.py +0 -2
- maxframe/tensor/arithmetic/real.py +0 -2
- maxframe/tensor/arithmetic/reciprocal.py +5 -3
- maxframe/tensor/arithmetic/rint.py +1 -3
- maxframe/tensor/arithmetic/rshift.py +0 -2
- maxframe/tensor/arithmetic/setimag.py +0 -2
- maxframe/tensor/arithmetic/setreal.py +0 -2
- maxframe/tensor/arithmetic/sign.py +0 -2
- maxframe/tensor/arithmetic/signbit.py +0 -2
- maxframe/tensor/arithmetic/sin.py +0 -2
- maxframe/tensor/arithmetic/sinc.py +1 -3
- maxframe/tensor/arithmetic/sinh.py +0 -2
- maxframe/tensor/arithmetic/spacing.py +0 -2
- maxframe/tensor/arithmetic/sqrt.py +0 -2
- maxframe/tensor/arithmetic/square.py +0 -2
- maxframe/tensor/arithmetic/subtract.py +4 -2
- maxframe/tensor/arithmetic/tan.py +0 -2
- maxframe/tensor/arithmetic/tanh.py +0 -2
- maxframe/tensor/arithmetic/tests/__init__.py +0 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
- maxframe/tensor/arithmetic/truediv.py +0 -2
- maxframe/tensor/arithmetic/trunc.py +0 -2
- maxframe/tensor/arithmetic/utils.py +32 -6
- maxframe/tensor/array_utils.py +3 -25
- maxframe/tensor/core.py +6 -6
- maxframe/tensor/datasource/__init__.py +10 -2
- maxframe/tensor/datasource/arange.py +0 -2
- maxframe/tensor/datasource/array.py +3 -22
- maxframe/tensor/datasource/core.py +15 -10
- maxframe/tensor/datasource/diag.py +140 -0
- maxframe/tensor/datasource/diagflat.py +69 -0
- maxframe/tensor/datasource/empty.py +0 -2
- maxframe/tensor/datasource/eye.py +95 -0
- maxframe/tensor/datasource/from_dataframe.py +0 -2
- maxframe/tensor/datasource/from_dense.py +0 -17
- maxframe/tensor/datasource/from_sparse.py +0 -2
- maxframe/tensor/datasource/full.py +0 -2
- maxframe/tensor/datasource/identity.py +54 -0
- maxframe/tensor/datasource/indices.py +115 -0
- maxframe/tensor/datasource/linspace.py +140 -0
- maxframe/tensor/datasource/meshgrid.py +135 -0
- maxframe/tensor/datasource/ones.py +8 -3
- maxframe/tensor/datasource/tests/test_datasource.py +32 -1
- maxframe/tensor/datasource/tri_array.py +107 -0
- maxframe/tensor/datasource/zeros.py +7 -3
- maxframe/tensor/extensions/__init__.py +31 -0
- maxframe/tensor/extensions/accessor.py +25 -0
- maxframe/tensor/extensions/apply_chunk.py +137 -0
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +8 -6
- maxframe/tensor/indexing/compress.py +0 -2
- maxframe/tensor/indexing/extract.py +0 -2
- maxframe/tensor/indexing/fill_diagonal.py +9 -6
- maxframe/tensor/indexing/flatnonzero.py +1 -3
- maxframe/tensor/indexing/getitem.py +10 -43
- maxframe/tensor/indexing/nonzero.py +2 -4
- maxframe/tensor/indexing/setitem.py +19 -9
- maxframe/tensor/indexing/slice.py +6 -3
- maxframe/tensor/indexing/take.py +0 -2
- maxframe/tensor/indexing/tests/__init__.py +0 -2
- maxframe/tensor/indexing/tests/test_indexing.py +0 -2
- maxframe/tensor/indexing/unravel_index.py +6 -6
- maxframe/tensor/lib/__init__.py +16 -0
- maxframe/tensor/lib/index_tricks.py +404 -0
- maxframe/tensor/linalg/__init__.py +36 -0
- maxframe/tensor/linalg/dot.py +145 -0
- maxframe/tensor/linalg/inner.py +36 -0
- maxframe/tensor/linalg/inv.py +83 -0
- maxframe/tensor/linalg/lu.py +115 -0
- maxframe/tensor/linalg/matmul.py +225 -0
- maxframe/tensor/linalg/qr.py +124 -0
- maxframe/tensor/linalg/solve_triangular.py +103 -0
- maxframe/tensor/linalg/svd.py +167 -0
- maxframe/tensor/linalg/tensordot.py +213 -0
- maxframe/tensor/linalg/vdot.py +73 -0
- maxframe/tensor/merge/__init__.py +4 -0
- maxframe/tensor/merge/append.py +74 -0
- maxframe/tensor/merge/column_stack.py +63 -0
- maxframe/tensor/merge/concatenate.py +3 -2
- maxframe/tensor/merge/dstack.py +71 -0
- maxframe/tensor/merge/hstack.py +70 -0
- maxframe/tensor/merge/stack.py +0 -2
- maxframe/tensor/merge/tests/test_merge.py +0 -2
- maxframe/tensor/misc/__init__.py +18 -5
- maxframe/tensor/misc/astype.py +10 -8
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/copy.py +64 -0
- maxframe/tensor/misc/diff.py +115 -0
- maxframe/tensor/misc/flatten.py +63 -0
- maxframe/tensor/misc/in1d.py +94 -0
- maxframe/tensor/misc/isin.py +130 -0
- maxframe/tensor/misc/ndim.py +53 -0
- maxframe/tensor/misc/ravel.py +0 -2
- maxframe/tensor/misc/repeat.py +129 -0
- maxframe/tensor/misc/searchsorted.py +147 -0
- maxframe/tensor/misc/setdiff1d.py +58 -0
- maxframe/tensor/misc/squeeze.py +117 -0
- maxframe/tensor/misc/swapaxes.py +113 -0
- maxframe/tensor/misc/tests/test_misc.py +0 -2
- maxframe/tensor/misc/transpose.py +8 -4
- maxframe/tensor/misc/trapezoid.py +123 -0
- maxframe/tensor/misc/unique.py +0 -1
- maxframe/tensor/misc/where.py +10 -8
- maxframe/tensor/operators.py +0 -34
- maxframe/tensor/random/__init__.py +3 -5
- maxframe/tensor/random/binomial.py +0 -2
- maxframe/tensor/random/bytes.py +0 -2
- maxframe/tensor/random/chisquare.py +0 -2
- maxframe/tensor/random/choice.py +9 -8
- maxframe/tensor/random/core.py +20 -5
- maxframe/tensor/random/dirichlet.py +0 -2
- maxframe/tensor/random/exponential.py +0 -2
- maxframe/tensor/random/f.py +2 -4
- maxframe/tensor/random/gamma.py +0 -2
- maxframe/tensor/random/geometric.py +0 -2
- maxframe/tensor/random/gumbel.py +0 -2
- maxframe/tensor/random/hypergeometric.py +0 -2
- maxframe/tensor/random/laplace.py +2 -4
- maxframe/tensor/random/logistic.py +0 -2
- maxframe/tensor/random/lognormal.py +0 -2
- maxframe/tensor/random/logseries.py +0 -2
- maxframe/tensor/random/multinomial.py +0 -2
- maxframe/tensor/random/multivariate_normal.py +0 -2
- maxframe/tensor/random/negative_binomial.py +0 -2
- maxframe/tensor/random/noncentral_chisquare.py +0 -2
- maxframe/tensor/random/noncentral_f.py +1 -3
- maxframe/tensor/random/normal.py +0 -2
- maxframe/tensor/random/pareto.py +0 -2
- maxframe/tensor/random/permutation.py +6 -3
- maxframe/tensor/random/poisson.py +0 -2
- maxframe/tensor/random/power.py +0 -2
- maxframe/tensor/random/rand.py +0 -2
- maxframe/tensor/random/randint.py +0 -2
- maxframe/tensor/random/randn.py +0 -2
- maxframe/tensor/random/random_integers.py +0 -2
- maxframe/tensor/random/random_sample.py +0 -2
- maxframe/tensor/random/rayleigh.py +0 -2
- maxframe/tensor/random/standard_cauchy.py +0 -2
- maxframe/tensor/random/standard_exponential.py +0 -2
- maxframe/tensor/random/standard_gamma.py +0 -2
- maxframe/tensor/random/standard_normal.py +0 -2
- maxframe/tensor/random/standard_t.py +0 -2
- maxframe/tensor/random/tests/__init__.py +0 -2
- maxframe/tensor/random/tests/test_random.py +0 -2
- maxframe/tensor/random/triangular.py +0 -2
- maxframe/tensor/random/uniform.py +0 -2
- maxframe/tensor/random/vonmises.py +0 -2
- maxframe/tensor/random/wald.py +0 -2
- maxframe/tensor/random/weibull.py +0 -2
- maxframe/tensor/random/zipf.py +0 -2
- maxframe/tensor/reduction/__init__.py +0 -2
- maxframe/tensor/reduction/all.py +0 -2
- maxframe/tensor/reduction/allclose.py +0 -2
- maxframe/tensor/reduction/any.py +0 -2
- maxframe/tensor/reduction/argmax.py +1 -3
- maxframe/tensor/reduction/argmin.py +1 -3
- maxframe/tensor/reduction/array_equal.py +0 -2
- maxframe/tensor/reduction/core.py +0 -2
- maxframe/tensor/reduction/count_nonzero.py +0 -2
- maxframe/tensor/reduction/cumprod.py +0 -2
- maxframe/tensor/reduction/cumsum.py +0 -2
- maxframe/tensor/reduction/max.py +0 -2
- maxframe/tensor/reduction/mean.py +0 -2
- maxframe/tensor/reduction/min.py +0 -2
- maxframe/tensor/reduction/nanargmax.py +0 -2
- maxframe/tensor/reduction/nanargmin.py +0 -2
- maxframe/tensor/reduction/nancumprod.py +0 -2
- maxframe/tensor/reduction/nancumsum.py +0 -2
- maxframe/tensor/reduction/nanmax.py +0 -2
- maxframe/tensor/reduction/nanmean.py +0 -2
- maxframe/tensor/reduction/nanmin.py +0 -2
- maxframe/tensor/reduction/nanprod.py +0 -2
- maxframe/tensor/reduction/nanstd.py +0 -2
- maxframe/tensor/reduction/nansum.py +0 -2
- maxframe/tensor/reduction/nanvar.py +0 -2
- maxframe/tensor/reduction/prod.py +0 -2
- maxframe/tensor/reduction/std.py +0 -2
- maxframe/tensor/reduction/sum.py +0 -2
- maxframe/tensor/reduction/tests/test_reduction.py +1 -4
- maxframe/tensor/reduction/var.py +0 -2
- maxframe/tensor/reshape/__init__.py +0 -2
- maxframe/tensor/reshape/reshape.py +6 -5
- maxframe/tensor/reshape/tests/__init__.py +0 -2
- maxframe/tensor/reshape/tests/test_reshape.py +0 -2
- maxframe/tensor/sort/__init__.py +16 -0
- maxframe/tensor/sort/argsort.py +150 -0
- maxframe/tensor/sort/sort.py +295 -0
- maxframe/tensor/special/__init__.py +37 -0
- maxframe/tensor/special/core.py +38 -0
- maxframe/tensor/special/misc.py +142 -0
- maxframe/tensor/special/statistical.py +56 -0
- maxframe/tensor/statistics/__init__.py +5 -0
- maxframe/tensor/statistics/average.py +143 -0
- maxframe/tensor/statistics/bincount.py +133 -0
- maxframe/tensor/statistics/quantile.py +10 -8
- maxframe/tensor/ufunc/__init__.py +0 -2
- maxframe/tensor/ufunc/ufunc.py +0 -2
- maxframe/tensor/utils.py +21 -3
- maxframe/tests/test_protocol.py +3 -3
- maxframe/tests/test_utils.py +210 -1
- maxframe/tests/utils.py +59 -1
- maxframe/udf.py +76 -6
- maxframe/utils.py +418 -17
- {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/METADATA +4 -1
- maxframe-2.0.0.dist-info/RECORD +939 -0
- {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +19 -3
- maxframe_client/fetcher.py +113 -6
- maxframe_client/session/odps.py +173 -38
- maxframe_client/session/task.py +3 -1
- maxframe_client/tests/test_session.py +41 -5
- maxframe-1.3.1.dist-info/RECORD +0 -705
- {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -13,20 +13,14 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from ... import opcodes
|
|
16
|
-
from ...serialization.serializables import
|
|
17
|
-
FieldTypes,
|
|
18
|
-
Int32Field,
|
|
19
|
-
StringField,
|
|
20
|
-
TupleField,
|
|
21
|
-
)
|
|
22
|
-
from . import FetchShuffle, ShuffleFetchType
|
|
16
|
+
from ...serialization.serializables import Int32Field, StringField
|
|
23
17
|
from .base import Operator, OperatorStage, VirtualOperator
|
|
18
|
+
from .core import TileableOperatorMixin
|
|
24
19
|
|
|
25
20
|
|
|
26
|
-
class ShuffleProxy(VirtualOperator):
|
|
21
|
+
class ShuffleProxy(VirtualOperator, TileableOperatorMixin):
|
|
27
22
|
_op_type_ = opcodes.SHUFFLE_PROXY
|
|
28
23
|
n_mappers = Int32Field("n_mappers", default=0)
|
|
29
|
-
# `n_reducers` will be updated in `MapReduceOperator._new_chunks`
|
|
30
24
|
n_reducers = Int32Field("n_reducers", default=0)
|
|
31
25
|
|
|
32
26
|
|
|
@@ -37,13 +31,11 @@ class MapReduceOperator(Operator):
|
|
|
37
31
|
"""
|
|
38
32
|
|
|
39
33
|
# for reducer
|
|
40
|
-
|
|
34
|
+
reducer_id = Int32Field("reducer_id")
|
|
41
35
|
# Total reducer nums, which also be shuffle blocks for single mapper.
|
|
42
36
|
n_reducers = Int32Field("n_reducers")
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
# `reducer_ordinal` will be set in `_new_chunks`.
|
|
46
|
-
reducer_ordinal = Int32Field("reducer_ordinal")
|
|
37
|
+
# preserved field to keep serialization compatible
|
|
38
|
+
_mr_preserved = Int32Field("mr_preserved")
|
|
47
39
|
reducer_phase = StringField("reducer_phase", default=None)
|
|
48
40
|
|
|
49
41
|
def __init__(self, *args, **kwargs):
|
|
@@ -51,61 +43,3 @@ class MapReduceOperator(Operator):
|
|
|
51
43
|
if self.stage == OperatorStage.reduce:
|
|
52
44
|
# for reducer, we assign worker at first
|
|
53
45
|
self.scheduling_hint.reassign_worker = True
|
|
54
|
-
|
|
55
|
-
def get_dependent_data_keys(self):
|
|
56
|
-
from .fetch import FetchShuffle
|
|
57
|
-
|
|
58
|
-
if self.stage == OperatorStage.reduce:
|
|
59
|
-
inputs = self.inputs or ()
|
|
60
|
-
deps = []
|
|
61
|
-
for inp in inputs:
|
|
62
|
-
if isinstance(inp.op, ShuffleProxy):
|
|
63
|
-
deps.extend(
|
|
64
|
-
[(chunk.key, self.reducer_index) for chunk in inp.inputs or ()]
|
|
65
|
-
)
|
|
66
|
-
elif isinstance(inp.op, FetchShuffle):
|
|
67
|
-
# fetch shuffle by index doesn't store data keys, so it won't run into this function.
|
|
68
|
-
assert inp.op.shuffle_fetch_type == ShuffleFetchType.FETCH_BY_KEY
|
|
69
|
-
deps.extend([(k, self.reducer_index) for k in inp.op.source_keys])
|
|
70
|
-
else:
|
|
71
|
-
deps.append(inp.key)
|
|
72
|
-
return deps
|
|
73
|
-
return super().get_dependent_data_keys()
|
|
74
|
-
|
|
75
|
-
def iter_mapper_keys(self, input_id=0):
|
|
76
|
-
# key is mapper chunk key, index is mapper chunk index.
|
|
77
|
-
input_chunk = self.inputs[input_id]
|
|
78
|
-
if isinstance(input_chunk.op, ShuffleProxy):
|
|
79
|
-
keys = [inp.key for inp in input_chunk.inputs]
|
|
80
|
-
else:
|
|
81
|
-
assert isinstance(input_chunk.op, FetchShuffle), input_chunk.op
|
|
82
|
-
if input_chunk.op.shuffle_fetch_type == ShuffleFetchType.FETCH_BY_INDEX:
|
|
83
|
-
# For fetch shuffle by index, all shuffle block of same reducers are
|
|
84
|
-
# identified by their index. chunk key are not needed any more.
|
|
85
|
-
# so just mock key here.
|
|
86
|
-
# keep this in sync with ray executor `execute_subtask`.
|
|
87
|
-
return list(range(input_chunk.op.n_mappers))
|
|
88
|
-
keys = input_chunk.op.source_keys
|
|
89
|
-
return keys
|
|
90
|
-
|
|
91
|
-
def iter_mapper_data(self, ctx, input_id=0, pop=False, skip_none=False):
|
|
92
|
-
for key in self.iter_mapper_keys(input_id):
|
|
93
|
-
try:
|
|
94
|
-
if pop:
|
|
95
|
-
yield ctx.pop((key, self.reducer_index))
|
|
96
|
-
else:
|
|
97
|
-
yield ctx[key, self.reducer_index]
|
|
98
|
-
except KeyError:
|
|
99
|
-
if not skip_none: # pragma: no cover
|
|
100
|
-
raise
|
|
101
|
-
if not pop:
|
|
102
|
-
ctx[key, self.reducer_index] = None
|
|
103
|
-
|
|
104
|
-
def execute(self, ctx, op):
|
|
105
|
-
"""The mapper stage must ensure all mapper blocks are inserted into ctx
|
|
106
|
-
and no blocks for some reducers are missing. This is needed by shuffle
|
|
107
|
-
fetch by index, which shuffle block are identified by the index instead
|
|
108
|
-
of data keys. For operators implementation simplicity, we can sort the
|
|
109
|
-
`ctx` by key which are (chunk key, reducer index) tuple and relax the
|
|
110
|
-
insert order requirements.
|
|
111
|
-
"""
|
maxframe/dataframe/__init__.py
CHANGED
|
@@ -46,6 +46,7 @@ from .misc.cut import cut
|
|
|
46
46
|
from .misc.eval import maxframe_eval as eval # pylint: disable=redefined-builtin
|
|
47
47
|
from .misc.get_dummies import get_dummies
|
|
48
48
|
from .misc.melt import melt
|
|
49
|
+
from .misc.pivot import pivot
|
|
49
50
|
from .misc.pivot_table import pivot_table
|
|
50
51
|
from .misc.qcut import qcut
|
|
51
52
|
from .misc.to_numeric import to_numeric
|
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import pandas as pd
|
|
16
18
|
|
|
17
19
|
from .... import opcodes
|
|
18
|
-
from ....core import OutputType
|
|
20
|
+
from ....core import EntityData, OutputType
|
|
19
21
|
from ....serialization.serializables import (
|
|
20
22
|
BoolField,
|
|
21
23
|
DictField,
|
|
@@ -45,9 +47,10 @@ class SeriesDatetimeMethod(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
45
47
|
def input(self):
|
|
46
48
|
return self._input
|
|
47
49
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
@classmethod
|
|
51
|
+
def _set_inputs(cls, op: "SeriesDatetimeMethod", inputs: List[EntityData]):
|
|
52
|
+
super()._set_inputs(op, inputs)
|
|
53
|
+
op._input = op._inputs[0]
|
|
51
54
|
|
|
52
55
|
def __call__(self, inp):
|
|
53
56
|
return datetime_method_to_handlers[self.method].call(self, inp)
|
|
@@ -12,11 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
import numpy as np
|
|
16
18
|
import pandas as pd
|
|
17
19
|
|
|
18
20
|
from .... import opcodes
|
|
19
|
-
from ....core import OutputType
|
|
21
|
+
from ....core import EntityData, OutputType
|
|
20
22
|
from ....serialization.serializables import DictField, KeyField, StringField, TupleField
|
|
21
23
|
from ....tensor import tensor as astensor
|
|
22
24
|
from ....tensor.core import TENSOR_TYPE
|
|
@@ -43,12 +45,13 @@ class SeriesStringMethod(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
43
45
|
def input(self):
|
|
44
46
|
return self._input
|
|
45
47
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
@classmethod
|
|
49
|
+
def _set_inputs(cls, op: "SeriesStringMethod", inputs: List[EntityData]):
|
|
50
|
+
super()._set_inputs(op, inputs)
|
|
51
|
+
op._input = op._inputs[0]
|
|
52
|
+
if len(op._inputs) == 2:
|
|
50
53
|
# for method cat
|
|
51
|
-
|
|
54
|
+
op.method_kwargs["others"] = op._inputs[1]
|
|
52
55
|
|
|
53
56
|
def __call__(self, inp):
|
|
54
57
|
return string_method_to_handlers[self.method].call(self, inp)
|
|
@@ -13,14 +13,15 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import copy
|
|
16
|
+
from typing import List, MutableMapping, Union
|
|
16
17
|
|
|
17
18
|
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
19
20
|
|
|
20
|
-
from ...core import ENTITY_TYPE
|
|
21
|
+
from ...core import ENTITY_TYPE, EntityData
|
|
21
22
|
from ...serialization.serializables import AnyField
|
|
22
23
|
from ...tensor.core import TENSOR_TYPE
|
|
23
|
-
from ...utils import classproperty,
|
|
24
|
+
from ...utils import classproperty, make_dtype
|
|
24
25
|
from ..core import DATAFRAME_TYPE, SERIES_TYPE
|
|
25
26
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
26
27
|
from ..ufunc.tensor import TensorUfuncMixin
|
|
@@ -30,6 +31,7 @@ from ..utils import (
|
|
|
30
31
|
infer_dtypes,
|
|
31
32
|
infer_index_value,
|
|
32
33
|
parse_index,
|
|
34
|
+
validate_axis,
|
|
33
35
|
)
|
|
34
36
|
|
|
35
37
|
|
|
@@ -63,7 +65,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
63
65
|
x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
|
|
64
66
|
):
|
|
65
67
|
x2_dtype = x2.dtype if hasattr(x2, "dtype") else type(x2)
|
|
66
|
-
x2_dtype =
|
|
68
|
+
x2_dtype = make_dtype(x2_dtype)
|
|
67
69
|
if hasattr(cls, "return_dtype"):
|
|
68
70
|
dtype = cls.return_dtype
|
|
69
71
|
else:
|
|
@@ -153,7 +155,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
153
155
|
columns = x1.columns_value
|
|
154
156
|
dtypes = x1.dtypes
|
|
155
157
|
index_shape, index = np.nan, None
|
|
156
|
-
if x1.index_value is not None and
|
|
158
|
+
if x1.index_value is not None and x2.index_value is not None:
|
|
157
159
|
if x1.index_value.key == x2.index_value.key:
|
|
158
160
|
dtypes = pd.Series(
|
|
159
161
|
[
|
|
@@ -277,34 +279,42 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
277
279
|
raise NotImplementedError
|
|
278
280
|
return self._call(x2, x1)
|
|
279
281
|
|
|
282
|
+
@classmethod
|
|
283
|
+
def estimate_size(
|
|
284
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameOperator"
|
|
285
|
+
):
|
|
286
|
+
ctx[op.outputs[0].key] = max(ctx[inp.key] for inp in (op.inputs or ()))
|
|
287
|
+
|
|
280
288
|
|
|
281
|
-
class DataFrameBinOp(
|
|
289
|
+
class DataFrameBinOp(DataFrameBinOpMixin, DataFrameOperator):
|
|
282
290
|
axis = AnyField("axis", default=None)
|
|
283
291
|
level = AnyField("level", default=None)
|
|
284
292
|
fill_value = AnyField("fill_value", default=None)
|
|
285
293
|
lhs = AnyField("lhs")
|
|
286
294
|
rhs = AnyField("rhs")
|
|
287
295
|
|
|
288
|
-
def __init__(self, output_types=None, **kw):
|
|
289
|
-
|
|
296
|
+
def __init__(self, output_types=None, axis=0, **kw):
|
|
297
|
+
axis = validate_axis(axis)
|
|
298
|
+
super().__init__(_output_types=output_types, axis=axis, **kw)
|
|
290
299
|
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
300
|
+
@classmethod
|
|
301
|
+
def _set_inputs(cls, op: "DataFrameBinOp", inputs: List[EntityData]):
|
|
302
|
+
super()._set_inputs(op, inputs)
|
|
303
|
+
if len(op._inputs) == 2:
|
|
304
|
+
op.lhs = op._inputs[0]
|
|
305
|
+
op.rhs = op._inputs[1]
|
|
296
306
|
else:
|
|
297
|
-
if isinstance(
|
|
298
|
-
|
|
299
|
-
elif
|
|
300
|
-
|
|
307
|
+
if isinstance(op.lhs, ENTITY_TYPE):
|
|
308
|
+
op.lhs = op._inputs[0]
|
|
309
|
+
elif isinstance(op.rhs, ENTITY_TYPE):
|
|
310
|
+
op.rhs = op._inputs[0]
|
|
301
311
|
|
|
302
312
|
|
|
303
313
|
class DataFrameUnaryOpMixin(DataFrameOperatorMixin):
|
|
304
314
|
__slots__ = ()
|
|
305
315
|
|
|
306
316
|
|
|
307
|
-
class DataFrameUnaryOp(
|
|
317
|
+
class DataFrameUnaryOp(DataFrameUnaryOpMixin, DataFrameOperator):
|
|
308
318
|
def __init__(self, output_types=None, **kw):
|
|
309
319
|
super().__init__(_output_types=output_types, **kw)
|
|
310
320
|
|
|
@@ -337,9 +347,10 @@ class DataFrameUnaryOp(DataFrameOperator, DataFrameUnaryOpMixin):
|
|
|
337
347
|
|
|
338
348
|
|
|
339
349
|
class DataFrameArithmeticTreeMixin:
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
350
|
+
@classmethod
|
|
351
|
+
def _set_inputs(cls, op: "DataFrameOperator", inputs: List[EntityData]):
|
|
352
|
+
inputs = op._get_inputs_data(inputs)
|
|
353
|
+
setattr(op, "_inputs", inputs)
|
|
343
354
|
|
|
344
355
|
|
|
345
356
|
class DataFrameUnaryUfunc(DataFrameUnaryOp, TensorUfuncMixin):
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import datetime
|
|
16
16
|
import operator
|
|
17
17
|
from dataclasses import dataclass
|
|
18
|
+
from math import isinf
|
|
18
19
|
from typing import Callable
|
|
19
20
|
|
|
20
21
|
import numpy as np
|
|
@@ -22,6 +23,7 @@ import pandas as pd
|
|
|
22
23
|
import pytest
|
|
23
24
|
|
|
24
25
|
from ....core import OperatorType
|
|
26
|
+
from ....core.operator import estimate_size
|
|
25
27
|
from ....tests.utils import assert_mf_index_dtype
|
|
26
28
|
from ....utils import dataslots
|
|
27
29
|
from ...core import IndexValue
|
|
@@ -185,6 +187,10 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
185
187
|
assert df3.index_value.key != df2.index_value.key
|
|
186
188
|
assert df3.shape[1] == 11 # columns is recorded, so we can get it
|
|
187
189
|
|
|
190
|
+
result_ctx = {inp.key: 10 for inp in df3.op.inputs}
|
|
191
|
+
estimate_size(result_ctx, df3.op)
|
|
192
|
+
assert result_ctx[df3.key] >= 0 and not isinf(result_ctx[df3.key])
|
|
193
|
+
|
|
188
194
|
|
|
189
195
|
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
|
|
190
196
|
def test_dataframe_and_series_with_align_map(func_name, func_opts):
|
maxframe/dataframe/core.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
1
|
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
4
2
|
#
|
|
5
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
@@ -34,7 +32,7 @@ from ..core import (
|
|
|
34
32
|
is_build_mode,
|
|
35
33
|
register_output_types,
|
|
36
34
|
)
|
|
37
|
-
from ..core.entity.utils import refresh_tileable_shape
|
|
35
|
+
from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
|
|
38
36
|
from ..protocol import DataFrameTableMeta
|
|
39
37
|
from ..serialization.serializables import (
|
|
40
38
|
AnyField,
|
|
@@ -303,6 +301,10 @@ class IndexValue(Serializable):
|
|
|
303
301
|
def names(self) -> list:
|
|
304
302
|
return self._names
|
|
305
303
|
|
|
304
|
+
@property
|
|
305
|
+
def dtypes(self) -> pd.Series:
|
|
306
|
+
return pd.Series(self._dtypes, index=self._names)
|
|
307
|
+
|
|
306
308
|
def to_pandas(self):
|
|
307
309
|
data = getattr(self, "_data", None)
|
|
308
310
|
sortorder = getattr(self, "_sortorder", None)
|
|
@@ -445,9 +447,7 @@ class DtypesValue(Serializable):
|
|
|
445
447
|
def refresh_index_value(tileable: ENTITY_TYPE):
|
|
446
448
|
index_to_index_values = dict()
|
|
447
449
|
for chunk in tileable.chunks:
|
|
448
|
-
if chunk.ndim == 1:
|
|
449
|
-
index_to_index_values[chunk.index] = chunk.index_value
|
|
450
|
-
elif chunk.index[1] == 0:
|
|
450
|
+
if chunk.ndim == 1 or chunk.index[1] == 0:
|
|
451
451
|
index_to_index_values[chunk.index] = chunk.index_value
|
|
452
452
|
index_value = merge_index_value(index_to_index_values, store_data=False)
|
|
453
453
|
# keep key as original index_value's
|
|
@@ -637,11 +637,12 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
637
637
|
def refresh_params(self):
|
|
638
638
|
# refresh params when chunks updated
|
|
639
639
|
refresh_tileable_shape(self)
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
640
|
+
fill_chunk_slices(self)
|
|
641
|
+
# refresh_index_value(self)
|
|
642
|
+
# if self._dtype is None:
|
|
643
|
+
# self._dtype = self.chunks[0].dtype
|
|
644
|
+
# if self._name is None:
|
|
645
|
+
# self._name = self.chunks[0].name
|
|
645
646
|
|
|
646
647
|
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
647
648
|
pass
|
|
@@ -696,73 +697,6 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
696
697
|
|
|
697
698
|
return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
|
|
698
699
|
|
|
699
|
-
|
|
700
|
-
class Index(HasShapeTileable, _ToPandasMixin):
|
|
701
|
-
__slots__ = "_df_or_series", "_parent_key", "_axis"
|
|
702
|
-
_allow_data_type_ = (IndexData,)
|
|
703
|
-
type_name = "Index"
|
|
704
|
-
|
|
705
|
-
def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
|
|
706
|
-
if data is not None and not isinstance(data, pd.Index):
|
|
707
|
-
# create corresponding Index class
|
|
708
|
-
# according to type of index_value
|
|
709
|
-
clz = globals()[type(data.index_value.value).__name__]
|
|
710
|
-
else:
|
|
711
|
-
clz = cls
|
|
712
|
-
return object.__new__(clz)
|
|
713
|
-
|
|
714
|
-
def __len__(self):
|
|
715
|
-
return len(self._data)
|
|
716
|
-
|
|
717
|
-
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
718
|
-
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
719
|
-
|
|
720
|
-
def _get_df_or_series(self):
|
|
721
|
-
obj = getattr(self, "_df_or_series", None)
|
|
722
|
-
if obj is not None:
|
|
723
|
-
return obj()
|
|
724
|
-
return None
|
|
725
|
-
|
|
726
|
-
def _set_df_or_series(self, df_or_series, axis):
|
|
727
|
-
self._df_or_series = weakref.ref(df_or_series)
|
|
728
|
-
self._parent_key = df_or_series.key
|
|
729
|
-
self._axis = axis
|
|
730
|
-
|
|
731
|
-
@property
|
|
732
|
-
def T(self):
|
|
733
|
-
"""Return the transpose, which is by definition self."""
|
|
734
|
-
return self
|
|
735
|
-
|
|
736
|
-
@property
|
|
737
|
-
def name(self):
|
|
738
|
-
return self._data.name
|
|
739
|
-
|
|
740
|
-
@name.setter
|
|
741
|
-
def name(self, value):
|
|
742
|
-
df_or_series = self._get_df_or_series()
|
|
743
|
-
if df_or_series is not None and df_or_series.key == self._parent_key:
|
|
744
|
-
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
745
|
-
self.data = df_or_series.axes[self._axis].data
|
|
746
|
-
else:
|
|
747
|
-
self.rename(value, inplace=True)
|
|
748
|
-
|
|
749
|
-
@property
|
|
750
|
-
def names(self):
|
|
751
|
-
return self._data.names
|
|
752
|
-
|
|
753
|
-
@names.setter
|
|
754
|
-
def names(self, value):
|
|
755
|
-
df_or_series = self._get_df_or_series()
|
|
756
|
-
if df_or_series is not None:
|
|
757
|
-
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
758
|
-
self.data = df_or_series.axes[self._axis].data
|
|
759
|
-
else:
|
|
760
|
-
self.rename(value, inplace=True)
|
|
761
|
-
|
|
762
|
-
@property
|
|
763
|
-
def values(self):
|
|
764
|
-
return self.to_tensor()
|
|
765
|
-
|
|
766
700
|
def to_frame(self, index: bool = True, name=None):
|
|
767
701
|
"""
|
|
768
702
|
Create a DataFrame with a column containing the Index.
|
|
@@ -838,7 +772,7 @@ class Index(HasShapeTileable, _ToPandasMixin):
|
|
|
838
772
|
columns = [name or self.name or 0]
|
|
839
773
|
index_ = self if index else None
|
|
840
774
|
return dataframe_from_tensor(
|
|
841
|
-
self.
|
|
775
|
+
self._to_maxframe_tensor(self, extract_multi_index=True),
|
|
842
776
|
index=index_,
|
|
843
777
|
columns=columns,
|
|
844
778
|
)
|
|
@@ -867,6 +801,73 @@ class Index(HasShapeTileable, _ToPandasMixin):
|
|
|
867
801
|
return series_from_index(self, index=index, name=name)
|
|
868
802
|
|
|
869
803
|
|
|
804
|
+
class Index(HasShapeTileable, _ToPandasMixin):
|
|
805
|
+
__slots__ = "_df_or_series", "_parent_key", "_axis"
|
|
806
|
+
_allow_data_type_ = (IndexData,)
|
|
807
|
+
type_name = "Index"
|
|
808
|
+
|
|
809
|
+
def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
|
|
810
|
+
if data is not None and not isinstance(data, pd.Index):
|
|
811
|
+
# create corresponding Index class
|
|
812
|
+
# according to type of index_value
|
|
813
|
+
clz = globals()[type(data.index_value.value).__name__]
|
|
814
|
+
else:
|
|
815
|
+
clz = cls
|
|
816
|
+
return object.__new__(clz)
|
|
817
|
+
|
|
818
|
+
def __len__(self):
|
|
819
|
+
return len(self._data)
|
|
820
|
+
|
|
821
|
+
def __maxframe_tensor__(self, dtype=None, order="K"):
|
|
822
|
+
return self._data.__maxframe_tensor__(dtype=dtype, order=order)
|
|
823
|
+
|
|
824
|
+
def _get_df_or_series(self):
|
|
825
|
+
obj = getattr(self, "_df_or_series", None)
|
|
826
|
+
if obj is not None:
|
|
827
|
+
return obj()
|
|
828
|
+
return None
|
|
829
|
+
|
|
830
|
+
def _set_df_or_series(self, df_or_series, axis):
|
|
831
|
+
self._df_or_series = weakref.ref(df_or_series)
|
|
832
|
+
self._parent_key = df_or_series.key
|
|
833
|
+
self._axis = axis
|
|
834
|
+
|
|
835
|
+
@property
|
|
836
|
+
def T(self):
|
|
837
|
+
"""Return the transpose, which is by definition self."""
|
|
838
|
+
return self
|
|
839
|
+
|
|
840
|
+
@property
|
|
841
|
+
def name(self):
|
|
842
|
+
return self._data.name
|
|
843
|
+
|
|
844
|
+
@name.setter
|
|
845
|
+
def name(self, value):
|
|
846
|
+
df_or_series = self._get_df_or_series()
|
|
847
|
+
if df_or_series is not None and df_or_series.key == self._parent_key:
|
|
848
|
+
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
849
|
+
self.data = df_or_series.axes[self._axis].data
|
|
850
|
+
else:
|
|
851
|
+
self.rename(value, inplace=True)
|
|
852
|
+
|
|
853
|
+
@property
|
|
854
|
+
def names(self):
|
|
855
|
+
return self._data.names
|
|
856
|
+
|
|
857
|
+
@names.setter
|
|
858
|
+
def names(self, value):
|
|
859
|
+
df_or_series = self._get_df_or_series()
|
|
860
|
+
if df_or_series is not None:
|
|
861
|
+
df_or_series.rename_axis(value, axis=self._axis, inplace=True)
|
|
862
|
+
self.data = df_or_series.axes[self._axis].data
|
|
863
|
+
else:
|
|
864
|
+
self.rename(value, inplace=True)
|
|
865
|
+
|
|
866
|
+
@property
|
|
867
|
+
def values(self):
|
|
868
|
+
return self.to_tensor()
|
|
869
|
+
|
|
870
|
+
|
|
870
871
|
class RangeIndex(Index):
|
|
871
872
|
__slots__ = ()
|
|
872
873
|
|
|
@@ -969,11 +970,12 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
969
970
|
def refresh_params(self):
|
|
970
971
|
# refresh params when chunks updated
|
|
971
972
|
refresh_tileable_shape(self)
|
|
972
|
-
|
|
973
|
+
fill_chunk_slices(self)
|
|
974
|
+
# refresh_index_value(self)
|
|
973
975
|
if self._dtype is None:
|
|
974
|
-
self._dtype = self.chunks[0]
|
|
975
|
-
if self._name is None:
|
|
976
|
-
|
|
976
|
+
self._dtype = getattr(self.chunks[0], "dtype", None)
|
|
977
|
+
# if self._name is None:
|
|
978
|
+
# self._name = self.chunks[0].name
|
|
977
979
|
|
|
978
980
|
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
979
981
|
pass
|
|
@@ -1074,6 +1076,12 @@ class SeriesData(_BatchedFetcher, BaseSeriesData):
|
|
|
1074
1076
|
into=into
|
|
1075
1077
|
)
|
|
1076
1078
|
|
|
1079
|
+
def to_frame(self, name=None):
|
|
1080
|
+
from . import dataframe_from_tensor
|
|
1081
|
+
|
|
1082
|
+
name = name or self.name or 0
|
|
1083
|
+
return dataframe_from_tensor(self, columns=[name])
|
|
1084
|
+
|
|
1077
1085
|
|
|
1078
1086
|
class Series(HasShapeTileable, _ToPandasMixin):
|
|
1079
1087
|
__slots__ = ("_cache",)
|
|
@@ -1287,10 +1295,7 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1287
1295
|
1 b
|
|
1288
1296
|
2 c
|
|
1289
1297
|
"""
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
name = name or self.name or 0
|
|
1293
|
-
return dataframe_from_tensor(self, columns=[name])
|
|
1298
|
+
return self._data.to_frame(name=name)
|
|
1294
1299
|
|
|
1295
1300
|
def between(self, left, right, inclusive="both"):
|
|
1296
1301
|
"""
|
|
@@ -1498,8 +1503,8 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1498
1503
|
"shape": self.shape,
|
|
1499
1504
|
"dtypes": self.dtypes,
|
|
1500
1505
|
"index_value": self.index_value,
|
|
1501
|
-
"columns_value": self
|
|
1502
|
-
"dtypes_value": self
|
|
1506
|
+
"columns_value": getattr(self, "columns_value", None),
|
|
1507
|
+
"dtypes_value": getattr(self, "dtypes_value", None),
|
|
1503
1508
|
}
|
|
1504
1509
|
|
|
1505
1510
|
def _set_params(self, new_params: Dict[str, Any]):
|
|
@@ -1531,8 +1536,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1531
1536
|
def refresh_params(self):
|
|
1532
1537
|
# refresh params when chunks updated
|
|
1533
1538
|
refresh_tileable_shape(self)
|
|
1534
|
-
|
|
1535
|
-
|
|
1539
|
+
fill_chunk_slices(self)
|
|
1540
|
+
# refresh_index_value(self)
|
|
1541
|
+
# refresh_dtypes(self)
|
|
1536
1542
|
|
|
1537
1543
|
def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
|
|
1538
1544
|
self._dtypes = dtypes
|
|
@@ -2227,6 +2233,7 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
|
|
|
2227
2233
|
def refresh_params(self):
|
|
2228
2234
|
# refresh params when chunks updated
|
|
2229
2235
|
refresh_tileable_shape(self)
|
|
2236
|
+
fill_chunk_slices(self)
|
|
2230
2237
|
if self._dtype is None:
|
|
2231
2238
|
self._dtype = self.chunks[0].dtype
|
|
2232
2239
|
if self._categories_value is None:
|
|
@@ -13,9 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
-
from typing import List, Optional
|
|
16
|
+
from typing import List, MutableMapping, Optional, Union
|
|
17
17
|
|
|
18
18
|
from ...serialization.serializables import Int64Field, StringField
|
|
19
|
+
from ...utils import estimate_pandas_size
|
|
19
20
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
20
21
|
|
|
21
22
|
|
|
@@ -79,3 +80,9 @@ class IncrementalIndexDatasource(HeadOptimizedDataSource):
|
|
|
79
80
|
class PandasDataSourceOperator(DataFrameOperator):
|
|
80
81
|
def get_data(self):
|
|
81
82
|
return getattr(self, "data", None)
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def estimate_size(
|
|
86
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "PandasDataSourceOperator"
|
|
87
|
+
):
|
|
88
|
+
ctx[op.outputs[0].key] = estimate_pandas_size(op.get_data())
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import warnings
|
|
16
16
|
from datetime import date, datetime, time
|
|
17
|
+
from typing import MutableMapping, Union
|
|
17
18
|
|
|
18
19
|
import numpy as np
|
|
19
20
|
import pandas as pd
|
|
@@ -162,6 +163,13 @@ class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
162
163
|
freq=self.freq,
|
|
163
164
|
)
|
|
164
165
|
|
|
166
|
+
@classmethod
|
|
167
|
+
def estimate_size(
|
|
168
|
+
cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameDateRange"
|
|
169
|
+
): # pragma: no cover
|
|
170
|
+
# todo implement this to facilitate local computation
|
|
171
|
+
ctx[op.outputs[0].key] = float("inf")
|
|
172
|
+
|
|
165
173
|
|
|
166
174
|
_midnight = time(0, 0)
|
|
167
175
|
|
|
@@ -12,7 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
15
17
|
from ... import opcodes
|
|
18
|
+
from ...core import EntityData
|
|
16
19
|
from ...serialization.serializables import AnyField, KeyField
|
|
17
20
|
from ..initializer import Index
|
|
18
21
|
from ..operators import DataFrameOperator, DataFrameOperatorMixin
|
|
@@ -25,11 +28,12 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
|
|
|
25
28
|
index = KeyField("index")
|
|
26
29
|
name = AnyField("name", default=None)
|
|
27
30
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
@classmethod
|
|
32
|
+
def _set_inputs(cls, op: "SeriesFromIndex", inputs: List[EntityData]):
|
|
33
|
+
super()._set_inputs(op, inputs)
|
|
34
|
+
op.input_ = op._inputs[0]
|
|
35
|
+
if len(op._inputs) > 1:
|
|
36
|
+
op.index = op._inputs[1]
|
|
33
37
|
|
|
34
38
|
def __call__(self, index, new_index=None, name=None):
|
|
35
39
|
inputs = [index]
|