maxframe 1.1.0__cp38-cp38-win_amd64.whl → 1.2.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -1
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/_utils.pxd +1 -1
- maxframe/_utils.pyx +1 -1
- maxframe/codegen.py +14 -7
- maxframe/config/__init__.py +2 -2
- maxframe/config/config.py +33 -1
- maxframe/config/tests/__init__.py +1 -1
- maxframe/config/tests/test_config.py +21 -2
- maxframe/config/tests/test_validators.py +1 -1
- maxframe/config/validators.py +1 -1
- maxframe/conftest.py +9 -3
- maxframe/core/__init__.py +2 -1
- maxframe/core/accessor.py +44 -0
- maxframe/core/base.py +1 -1
- maxframe/core/entity/__init__.py +1 -1
- maxframe/core/entity/core.py +1 -1
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +1 -1
- maxframe/core/entity/output_types.py +1 -1
- maxframe/core/entity/tests/__init__.py +1 -1
- maxframe/core/entity/tests/test_objects.py +1 -1
- maxframe/core/entity/tileables.py +1 -1
- maxframe/core/entity/utils.py +1 -1
- maxframe/core/graph/__init__.py +1 -1
- maxframe/core/graph/builder/__init__.py +1 -1
- maxframe/core/graph/builder/base.py +1 -1
- maxframe/core/graph/builder/tileable.py +1 -1
- maxframe/core/graph/builder/utils.py +1 -1
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +1 -1
- maxframe/core/graph/entity.py +1 -1
- maxframe/core/graph/tests/__init__.py +1 -1
- maxframe/core/graph/tests/test_graph.py +1 -1
- maxframe/core/mode.py +1 -1
- maxframe/core/operator/__init__.py +1 -1
- maxframe/core/operator/base.py +1 -1
- maxframe/core/operator/core.py +1 -1
- maxframe/core/operator/fetch.py +1 -1
- maxframe/core/operator/objects.py +1 -1
- maxframe/core/operator/shuffle.py +1 -1
- maxframe/core/operator/tests/__init__.py +1 -1
- maxframe/core/operator/tests/test_core.py +1 -1
- maxframe/core/operator/utils.py +1 -1
- maxframe/core/tests/__init__.py +1 -1
- maxframe/core/tests/test_mode.py +1 -1
- maxframe/dataframe/__init__.py +3 -3
- maxframe/dataframe/accessors/__init__.py +15 -0
- maxframe/dataframe/accessors/datetime_/__init__.py +32 -0
- maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
- maxframe/dataframe/{misc/datetimes.py → accessors/datetime_/core.py} +9 -9
- maxframe/dataframe/{plotting → accessors/datetime_}/tests/__init__.py +1 -1
- maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
- maxframe/dataframe/accessors/dict_/__init__.py +43 -0
- maxframe/dataframe/accessors/dict_/accessor.py +38 -0
- maxframe/dataframe/accessors/dict_/contains.py +81 -0
- maxframe/dataframe/accessors/dict_/getitem.py +144 -0
- maxframe/dataframe/accessors/dict_/length.py +72 -0
- maxframe/dataframe/accessors/dict_/remove.py +87 -0
- maxframe/dataframe/accessors/dict_/setitem.py +88 -0
- maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +130 -0
- maxframe/dataframe/{plotting → accessors/plotting}/__init__.py +4 -6
- maxframe/dataframe/{plotting → accessors/plotting}/core.py +3 -3
- maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
- maxframe/dataframe/{plotting/tests/test_plotting.py → accessors/plotting/tests/test_plotting_accessor.py} +6 -6
- maxframe/dataframe/accessors/string_/__init__.py +32 -0
- maxframe/dataframe/{misc → accessors/string_}/accessor.py +4 -65
- maxframe/dataframe/{misc/string_.py → accessors/string_/core.py} +20 -20
- maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
- maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
- maxframe/dataframe/arithmetic/__init__.py +1 -1
- maxframe/dataframe/arithmetic/abs.py +1 -1
- maxframe/dataframe/arithmetic/add.py +1 -1
- maxframe/dataframe/arithmetic/arccos.py +1 -1
- maxframe/dataframe/arithmetic/arccosh.py +1 -1
- maxframe/dataframe/arithmetic/arcsin.py +1 -1
- maxframe/dataframe/arithmetic/arcsinh.py +1 -1
- maxframe/dataframe/arithmetic/arctan.py +1 -1
- maxframe/dataframe/arithmetic/arctanh.py +1 -1
- maxframe/dataframe/arithmetic/around.py +1 -1
- maxframe/dataframe/arithmetic/bitwise_and.py +1 -1
- maxframe/dataframe/arithmetic/bitwise_or.py +1 -1
- maxframe/dataframe/arithmetic/bitwise_xor.py +1 -1
- maxframe/dataframe/arithmetic/ceil.py +1 -1
- maxframe/dataframe/arithmetic/core.py +1 -1
- maxframe/dataframe/arithmetic/cos.py +1 -1
- maxframe/dataframe/arithmetic/cosh.py +1 -1
- maxframe/dataframe/arithmetic/degrees.py +1 -1
- maxframe/dataframe/arithmetic/docstring.py +1 -1
- maxframe/dataframe/arithmetic/equal.py +1 -1
- maxframe/dataframe/arithmetic/exp.py +1 -1
- maxframe/dataframe/arithmetic/exp2.py +1 -1
- maxframe/dataframe/arithmetic/expm1.py +1 -1
- maxframe/dataframe/arithmetic/floor.py +1 -1
- maxframe/dataframe/arithmetic/floordiv.py +1 -1
- maxframe/dataframe/arithmetic/greater.py +1 -1
- maxframe/dataframe/arithmetic/greater_equal.py +1 -1
- maxframe/dataframe/arithmetic/invert.py +1 -1
- maxframe/dataframe/arithmetic/is_ufuncs.py +1 -1
- maxframe/dataframe/arithmetic/less.py +1 -1
- maxframe/dataframe/arithmetic/less_equal.py +1 -1
- maxframe/dataframe/arithmetic/log.py +1 -1
- maxframe/dataframe/arithmetic/log10.py +1 -1
- maxframe/dataframe/arithmetic/log2.py +1 -1
- maxframe/dataframe/arithmetic/mod.py +1 -1
- maxframe/dataframe/arithmetic/multiply.py +1 -1
- maxframe/dataframe/arithmetic/negative.py +1 -1
- maxframe/dataframe/arithmetic/not_equal.py +1 -1
- maxframe/dataframe/arithmetic/power.py +1 -1
- maxframe/dataframe/arithmetic/radians.py +1 -1
- maxframe/dataframe/arithmetic/sin.py +1 -1
- maxframe/dataframe/arithmetic/sinh.py +1 -1
- maxframe/dataframe/arithmetic/sqrt.py +1 -1
- maxframe/dataframe/arithmetic/subtract.py +1 -1
- maxframe/dataframe/arithmetic/tan.py +1 -1
- maxframe/dataframe/arithmetic/tanh.py +1 -1
- maxframe/dataframe/arithmetic/tests/__init__.py +1 -1
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/dataframe/arithmetic/truediv.py +1 -1
- maxframe/dataframe/arithmetic/trunc.py +1 -1
- maxframe/dataframe/arrays.py +1 -1
- maxframe/dataframe/core.py +1 -1
- maxframe/dataframe/datasource/__init__.py +1 -1
- maxframe/dataframe/datasource/core.py +1 -1
- maxframe/dataframe/datasource/dataframe.py +1 -1
- maxframe/dataframe/datasource/date_range.py +1 -1
- maxframe/dataframe/datasource/from_index.py +1 -1
- maxframe/dataframe/datasource/from_records.py +1 -1
- maxframe/dataframe/datasource/from_tensor.py +1 -1
- maxframe/dataframe/datasource/index.py +1 -1
- maxframe/dataframe/datasource/read_csv.py +1 -1
- maxframe/dataframe/datasource/read_odps_query.py +47 -17
- maxframe/dataframe/datasource/read_odps_table.py +5 -3
- maxframe/dataframe/datasource/read_parquet.py +1 -1
- maxframe/dataframe/datasource/series.py +1 -1
- maxframe/dataframe/datasource/tests/__init__.py +1 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +55 -8
- maxframe/dataframe/datastore/__init__.py +1 -1
- maxframe/dataframe/datastore/core.py +1 -1
- maxframe/dataframe/datastore/tests/__init__.py +1 -1
- maxframe/dataframe/datastore/tests/test_to_odps.py +1 -1
- maxframe/dataframe/datastore/to_csv.py +1 -1
- maxframe/dataframe/datastore/to_odps.py +1 -1
- maxframe/dataframe/extensions/__init__.py +2 -2
- maxframe/dataframe/extensions/accessor.py +6 -22
- maxframe/dataframe/extensions/apply_chunk.py +86 -1
- maxframe/dataframe/extensions/flatjson.py +1 -1
- maxframe/dataframe/extensions/flatmap.py +1 -1
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/__init__.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +1 -1
- maxframe/dataframe/fetch/__init__.py +1 -1
- maxframe/dataframe/fetch/core.py +1 -1
- maxframe/dataframe/groupby/__init__.py +1 -1
- maxframe/dataframe/groupby/aggregation.py +1 -1
- maxframe/dataframe/groupby/apply.py +1 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +1 -1
- maxframe/dataframe/groupby/fill.py +1 -1
- maxframe/dataframe/groupby/getitem.py +1 -1
- maxframe/dataframe/groupby/head.py +1 -1
- maxframe/dataframe/groupby/sample.py +1 -1
- maxframe/dataframe/groupby/tests/__init__.py +1 -1
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +1 -1
- maxframe/dataframe/indexing/__init__.py +1 -1
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/at.py +1 -1
- maxframe/dataframe/indexing/getitem.py +1 -1
- maxframe/dataframe/indexing/iat.py +1 -1
- maxframe/dataframe/indexing/iloc.py +1 -1
- maxframe/dataframe/indexing/insert.py +1 -1
- maxframe/dataframe/indexing/loc.py +1 -1
- maxframe/dataframe/indexing/reindex.py +1 -1
- maxframe/dataframe/indexing/rename.py +1 -1
- maxframe/dataframe/indexing/rename_axis.py +1 -1
- maxframe/dataframe/indexing/reset_index.py +1 -1
- maxframe/dataframe/indexing/sample.py +1 -1
- maxframe/dataframe/indexing/set_axis.py +1 -1
- maxframe/dataframe/indexing/set_index.py +1 -1
- maxframe/dataframe/indexing/setitem.py +1 -1
- maxframe/dataframe/indexing/tests/__init__.py +1 -1
- maxframe/dataframe/indexing/tests/test_indexing.py +1 -1
- maxframe/dataframe/indexing/where.py +1 -1
- maxframe/dataframe/initializer.py +1 -1
- maxframe/dataframe/merge/__init__.py +1 -1
- maxframe/dataframe/merge/append.py +1 -1
- maxframe/dataframe/merge/concat.py +1 -1
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/__init__.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +1 -1
- maxframe/dataframe/misc/__init__.py +1 -16
- maxframe/dataframe/misc/_duplicate.py +1 -1
- maxframe/dataframe/misc/apply.py +74 -1
- maxframe/dataframe/misc/astype.py +1 -1
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/check_monotonic.py +1 -1
- maxframe/dataframe/misc/cut.py +6 -4
- maxframe/dataframe/misc/describe.py +1 -1
- maxframe/dataframe/misc/diff.py +1 -1
- maxframe/dataframe/misc/drop.py +1 -1
- maxframe/dataframe/misc/drop_duplicates.py +7 -4
- maxframe/dataframe/misc/duplicated.py +1 -1
- maxframe/dataframe/misc/eval.py +1 -1
- maxframe/dataframe/misc/explode.py +1 -1
- maxframe/dataframe/misc/get_dummies.py +1 -1
- maxframe/dataframe/misc/isin.py +1 -1
- maxframe/dataframe/misc/map.py +1 -1
- maxframe/dataframe/misc/melt.py +1 -1
- maxframe/dataframe/misc/memory_usage.py +1 -1
- maxframe/dataframe/misc/pct_change.py +1 -1
- maxframe/dataframe/misc/pivot_table.py +1 -1
- maxframe/dataframe/misc/qcut.py +1 -1
- maxframe/dataframe/misc/select_dtypes.py +1 -1
- maxframe/dataframe/misc/shift.py +1 -1
- maxframe/dataframe/misc/stack.py +1 -1
- maxframe/dataframe/misc/tests/__init__.py +1 -1
- maxframe/dataframe/misc/tests/test_misc.py +1 -76
- maxframe/dataframe/misc/to_numeric.py +1 -1
- maxframe/dataframe/misc/transform.py +1 -1
- maxframe/dataframe/misc/transpose.py +1 -1
- maxframe/dataframe/misc/value_counts.py +1 -1
- maxframe/dataframe/missing/__init__.py +1 -1
- maxframe/dataframe/missing/checkna.py +1 -1
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +1 -1
- maxframe/dataframe/missing/replace.py +1 -1
- maxframe/dataframe/missing/tests/__init__.py +1 -1
- maxframe/dataframe/missing/tests/test_missing.py +1 -1
- maxframe/dataframe/operators.py +1 -1
- maxframe/dataframe/reduction/__init__.py +1 -1
- maxframe/dataframe/reduction/aggregation.py +1 -1
- maxframe/dataframe/reduction/all.py +1 -1
- maxframe/dataframe/reduction/any.py +1 -1
- maxframe/dataframe/reduction/core.py +1 -1
- maxframe/dataframe/reduction/count.py +1 -1
- maxframe/dataframe/reduction/cummax.py +1 -1
- maxframe/dataframe/reduction/cummin.py +1 -1
- maxframe/dataframe/reduction/cumprod.py +1 -1
- maxframe/dataframe/reduction/cumsum.py +1 -1
- maxframe/dataframe/reduction/custom_reduction.py +1 -1
- maxframe/dataframe/reduction/kurtosis.py +1 -1
- maxframe/dataframe/reduction/max.py +1 -1
- maxframe/dataframe/reduction/mean.py +1 -1
- maxframe/dataframe/reduction/median.py +1 -1
- maxframe/dataframe/reduction/min.py +1 -1
- maxframe/dataframe/reduction/nunique.py +1 -1
- maxframe/dataframe/reduction/prod.py +1 -1
- maxframe/dataframe/reduction/reduction_size.py +1 -1
- maxframe/dataframe/reduction/sem.py +1 -1
- maxframe/dataframe/reduction/skew.py +1 -1
- maxframe/dataframe/reduction/std.py +1 -1
- maxframe/dataframe/reduction/str_concat.py +1 -1
- maxframe/dataframe/reduction/sum.py +1 -1
- maxframe/dataframe/reduction/tests/__init__.py +1 -1
- maxframe/dataframe/reduction/tests/test_reduction.py +1 -1
- maxframe/dataframe/reduction/unique.py +1 -1
- maxframe/dataframe/reduction/var.py +1 -1
- maxframe/dataframe/sort/__init__.py +1 -1
- maxframe/dataframe/sort/core.py +1 -1
- maxframe/dataframe/sort/sort_index.py +1 -1
- maxframe/dataframe/sort/sort_values.py +1 -1
- maxframe/dataframe/sort/tests/__init__.py +1 -1
- maxframe/dataframe/sort/tests/test_sort.py +1 -1
- maxframe/dataframe/statistics/__init__.py +1 -1
- maxframe/dataframe/statistics/corr.py +1 -1
- maxframe/dataframe/statistics/quantile.py +1 -1
- maxframe/dataframe/statistics/tests/__init__.py +1 -1
- maxframe/dataframe/statistics/tests/test_statistics.py +1 -1
- maxframe/dataframe/tests/__init__.py +1 -1
- maxframe/dataframe/tests/test_initializer.py +1 -1
- maxframe/dataframe/tests/test_utils.py +36 -2
- maxframe/dataframe/tseries/__init__.py +1 -1
- maxframe/dataframe/tseries/tests/__init__.py +1 -1
- maxframe/dataframe/tseries/tests/test_tseries.py +1 -1
- maxframe/dataframe/tseries/to_datetime.py +1 -1
- maxframe/dataframe/ufunc/__init__.py +1 -1
- maxframe/dataframe/ufunc/tensor.py +1 -1
- maxframe/dataframe/ufunc/ufunc.py +1 -1
- maxframe/dataframe/utils.py +22 -2
- maxframe/dataframe/window/__init__.py +1 -1
- maxframe/dataframe/window/aggregation.py +1 -1
- maxframe/dataframe/window/core.py +1 -1
- maxframe/dataframe/window/ewm.py +1 -1
- maxframe/dataframe/window/expanding.py +1 -1
- maxframe/dataframe/window/rolling.py +1 -1
- maxframe/dataframe/window/tests/__init__.py +1 -1
- maxframe/dataframe/window/tests/test_ewm.py +1 -1
- maxframe/dataframe/window/tests/test_expanding.py +1 -1
- maxframe/dataframe/window/tests/test_rolling.py +1 -1
- maxframe/env.py +1 -1
- maxframe/errors.py +5 -1
- maxframe/extension.py +5 -2
- maxframe/io/__init__.py +1 -1
- maxframe/io/objects/__init__.py +1 -1
- maxframe/io/objects/core.py +1 -1
- maxframe/io/objects/tensor.py +1 -1
- maxframe/io/objects/tests/__init__.py +1 -1
- maxframe/io/objects/tests/test_object_io.py +1 -1
- maxframe/io/odpsio/__init__.py +2 -2
- maxframe/io/odpsio/arrow.py +29 -4
- maxframe/io/odpsio/schema.py +75 -3
- maxframe/io/odpsio/tableio.py +66 -20
- maxframe/io/odpsio/tests/__init__.py +1 -1
- maxframe/io/odpsio/tests/test_arrow.py +46 -1
- maxframe/io/odpsio/tests/test_schema.py +19 -1
- maxframe/io/odpsio/tests/test_tableio.py +1 -1
- maxframe/io/odpsio/tests/test_volumeio.py +1 -1
- maxframe/io/odpsio/volumeio.py +1 -1
- maxframe/learn/__init__.py +1 -1
- maxframe/learn/contrib/__init__.py +1 -1
- maxframe/learn/contrib/graph/__init__.py +1 -1
- maxframe/learn/contrib/graph/connected_components.py +1 -1
- maxframe/learn/contrib/graph/tests/__init__.py +1 -1
- maxframe/learn/contrib/graph/tests/test_connected_components.py +1 -1
- maxframe/learn/contrib/llm/__init__.py +1 -1
- maxframe/learn/contrib/llm/core.py +1 -1
- maxframe/learn/contrib/llm/models/__init__.py +1 -1
- maxframe/learn/contrib/llm/models/dashscope.py +1 -1
- maxframe/learn/contrib/llm/multi_modal.py +1 -1
- maxframe/learn/contrib/llm/text.py +1 -1
- maxframe/learn/contrib/pytorch/__init__.py +1 -1
- maxframe/learn/contrib/pytorch/run_function.py +1 -1
- maxframe/learn/contrib/pytorch/run_script.py +1 -1
- maxframe/learn/contrib/pytorch/tests/__init__.py +1 -1
- maxframe/learn/contrib/pytorch/tests/test_pytorch.py +1 -1
- maxframe/learn/contrib/utils.py +1 -1
- maxframe/learn/contrib/xgboost/__init__.py +1 -1
- maxframe/learn/contrib/xgboost/classifier.py +1 -1
- maxframe/learn/contrib/xgboost/core.py +1 -1
- maxframe/learn/contrib/xgboost/dmatrix.py +5 -2
- maxframe/learn/contrib/xgboost/predict.py +1 -1
- maxframe/learn/contrib/xgboost/regressor.py +1 -1
- maxframe/learn/contrib/xgboost/tests/__init__.py +1 -1
- maxframe/learn/contrib/xgboost/tests/test_core.py +1 -1
- maxframe/learn/contrib/xgboost/train.py +1 -1
- maxframe/learn/core.py +1 -1
- maxframe/learn/utils/__init__.py +1 -1
- maxframe/learn/utils/core.py +1 -1
- maxframe/lib/__init__.py +1 -1
- maxframe/lib/aio/__init__.py +1 -1
- maxframe/lib/aio/_runners.py +1 -1
- maxframe/lib/aio/_threads.py +1 -1
- maxframe/lib/aio/base.py +1 -1
- maxframe/lib/aio/file.py +1 -1
- maxframe/lib/aio/isolation.py +1 -1
- maxframe/lib/aio/lru.py +1 -1
- maxframe/lib/aio/parallelism.py +1 -1
- maxframe/lib/aio/tests/__init__.py +1 -1
- maxframe/lib/aio/tests/test_aio_file.py +1 -1
- maxframe/lib/compression.py +1 -1
- maxframe/lib/cython/__init__.py +1 -1
- maxframe/lib/cython/libcpp.pxd +1 -1
- maxframe/lib/dtypes_extension/__init__.py +14 -0
- maxframe/lib/dtypes_extension/dtypes.py +91 -0
- maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
- maxframe/lib/dtypes_extension/tests/test_dtypes.py +68 -0
- maxframe/lib/filesystem/__init__.py +1 -1
- maxframe/lib/filesystem/_glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/__init__.py +1 -1
- maxframe/lib/filesystem/_oss_lib/common.py +1 -1
- maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
- maxframe/lib/filesystem/_oss_lib/handle.py +1 -1
- maxframe/lib/filesystem/arrow.py +1 -1
- maxframe/lib/filesystem/base.py +1 -1
- maxframe/lib/filesystem/core.py +1 -1
- maxframe/lib/filesystem/fsmap.py +1 -1
- maxframe/lib/filesystem/hdfs.py +1 -1
- maxframe/lib/filesystem/local.py +1 -1
- maxframe/lib/filesystem/oss.py +1 -1
- maxframe/lib/filesystem/tests/__init__.py +1 -1
- maxframe/lib/filesystem/tests/test_filesystem.py +6 -4
- maxframe/lib/filesystem/tests/test_oss.py +1 -1
- maxframe/lib/functools_compat.py +1 -1
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +1 -1
- maxframe/lib/sparse/__init__.py +1 -1
- maxframe/lib/sparse/array.py +1 -1
- maxframe/lib/sparse/core.py +1 -1
- maxframe/lib/sparse/matrix.py +1 -1
- maxframe/lib/sparse/tests/__init__.py +1 -1
- maxframe/lib/sparse/tests/test_sparse.py +1 -1
- maxframe/lib/sparse/vector.py +1 -1
- maxframe/lib/tests/__init__.py +1 -1
- maxframe/lib/tests/test_wrapped_pickle.py +1 -1
- maxframe/lib/version.py +1 -1
- maxframe/lib/wrapped_pickle.py +1 -1
- maxframe/mixin.py +1 -1
- maxframe/opcodes.py +6 -1
- maxframe/protocol.py +1 -1
- maxframe/remote/__init__.py +1 -1
- maxframe/remote/core.py +1 -1
- maxframe/remote/run_script.py +1 -1
- maxframe/serialization/__init__.py +1 -1
- maxframe/serialization/arrow.py +1 -1
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +1 -1
- maxframe/serialization/core.pyi +1 -1
- maxframe/serialization/core.pyx +9 -6
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/maxframe_objects.py +1 -1
- maxframe/serialization/numpy.py +1 -1
- maxframe/serialization/pandas.py +1 -1
- maxframe/serialization/scipy.py +1 -1
- maxframe/serialization/serializables/__init__.py +1 -1
- maxframe/serialization/serializables/core.py +128 -87
- maxframe/serialization/serializables/field.py +1 -1
- maxframe/serialization/serializables/field_type.py +1 -1
- maxframe/serialization/serializables/tests/__init__.py +1 -1
- maxframe/serialization/serializables/tests/test_field_type.py +1 -1
- maxframe/serialization/serializables/tests/test_serializable.py +5 -2
- maxframe/serialization/tests/__init__.py +1 -1
- maxframe/serialization/tests/test_serial.py +21 -3
- maxframe/session.py +1 -1
- maxframe/tensor/__init__.py +1 -1
- maxframe/tensor/arithmetic/__init__.py +1 -1
- maxframe/tensor/arithmetic/abs.py +1 -1
- maxframe/tensor/arithmetic/absolute.py +1 -1
- maxframe/tensor/arithmetic/add.py +1 -1
- maxframe/tensor/arithmetic/angle.py +1 -1
- maxframe/tensor/arithmetic/arccos.py +1 -1
- maxframe/tensor/arithmetic/arccosh.py +1 -1
- maxframe/tensor/arithmetic/arcsin.py +1 -1
- maxframe/tensor/arithmetic/arcsinh.py +1 -1
- maxframe/tensor/arithmetic/arctan.py +1 -1
- maxframe/tensor/arithmetic/arctan2.py +1 -1
- maxframe/tensor/arithmetic/arctanh.py +1 -1
- maxframe/tensor/arithmetic/around.py +1 -1
- maxframe/tensor/arithmetic/bitand.py +1 -1
- maxframe/tensor/arithmetic/bitor.py +1 -1
- maxframe/tensor/arithmetic/bitxor.py +1 -1
- maxframe/tensor/arithmetic/cbrt.py +1 -1
- maxframe/tensor/arithmetic/ceil.py +1 -1
- maxframe/tensor/arithmetic/clip.py +1 -1
- maxframe/tensor/arithmetic/conj.py +1 -1
- maxframe/tensor/arithmetic/copysign.py +1 -1
- maxframe/tensor/arithmetic/core.py +1 -1
- maxframe/tensor/arithmetic/cos.py +1 -1
- maxframe/tensor/arithmetic/cosh.py +1 -1
- maxframe/tensor/arithmetic/deg2rad.py +1 -1
- maxframe/tensor/arithmetic/degrees.py +1 -1
- maxframe/tensor/arithmetic/divide.py +1 -1
- maxframe/tensor/arithmetic/equal.py +1 -1
- maxframe/tensor/arithmetic/exp.py +1 -1
- maxframe/tensor/arithmetic/exp2.py +1 -1
- maxframe/tensor/arithmetic/expm1.py +1 -1
- maxframe/tensor/arithmetic/fabs.py +1 -1
- maxframe/tensor/arithmetic/fix.py +1 -1
- maxframe/tensor/arithmetic/float_power.py +1 -1
- maxframe/tensor/arithmetic/floor.py +1 -1
- maxframe/tensor/arithmetic/floordiv.py +1 -1
- maxframe/tensor/arithmetic/fmax.py +1 -1
- maxframe/tensor/arithmetic/fmin.py +1 -1
- maxframe/tensor/arithmetic/fmod.py +1 -1
- maxframe/tensor/arithmetic/frexp.py +1 -1
- maxframe/tensor/arithmetic/greater.py +1 -1
- maxframe/tensor/arithmetic/greater_equal.py +1 -1
- maxframe/tensor/arithmetic/hypot.py +1 -1
- maxframe/tensor/arithmetic/i0.py +1 -1
- maxframe/tensor/arithmetic/imag.py +1 -1
- maxframe/tensor/arithmetic/invert.py +1 -1
- maxframe/tensor/arithmetic/isclose.py +1 -1
- maxframe/tensor/arithmetic/iscomplex.py +1 -1
- maxframe/tensor/arithmetic/isfinite.py +1 -1
- maxframe/tensor/arithmetic/isinf.py +1 -1
- maxframe/tensor/arithmetic/isnan.py +1 -1
- maxframe/tensor/arithmetic/isreal.py +1 -1
- maxframe/tensor/arithmetic/ldexp.py +1 -1
- maxframe/tensor/arithmetic/less.py +1 -1
- maxframe/tensor/arithmetic/less_equal.py +1 -1
- maxframe/tensor/arithmetic/log.py +1 -1
- maxframe/tensor/arithmetic/log10.py +1 -1
- maxframe/tensor/arithmetic/log1p.py +1 -1
- maxframe/tensor/arithmetic/log2.py +1 -1
- maxframe/tensor/arithmetic/logaddexp.py +1 -1
- maxframe/tensor/arithmetic/logaddexp2.py +1 -1
- maxframe/tensor/arithmetic/logical_and.py +1 -1
- maxframe/tensor/arithmetic/logical_not.py +1 -1
- maxframe/tensor/arithmetic/logical_or.py +1 -1
- maxframe/tensor/arithmetic/logical_xor.py +1 -1
- maxframe/tensor/arithmetic/lshift.py +1 -1
- maxframe/tensor/arithmetic/maximum.py +1 -1
- maxframe/tensor/arithmetic/minimum.py +1 -1
- maxframe/tensor/arithmetic/mod.py +1 -1
- maxframe/tensor/arithmetic/modf.py +1 -1
- maxframe/tensor/arithmetic/multiply.py +1 -1
- maxframe/tensor/arithmetic/nan_to_num.py +1 -1
- maxframe/tensor/arithmetic/negative.py +1 -1
- maxframe/tensor/arithmetic/nextafter.py +1 -1
- maxframe/tensor/arithmetic/not_equal.py +1 -1
- maxframe/tensor/arithmetic/positive.py +1 -1
- maxframe/tensor/arithmetic/power.py +1 -1
- maxframe/tensor/arithmetic/rad2deg.py +1 -1
- maxframe/tensor/arithmetic/radians.py +1 -1
- maxframe/tensor/arithmetic/real.py +1 -1
- maxframe/tensor/arithmetic/reciprocal.py +1 -1
- maxframe/tensor/arithmetic/rint.py +1 -1
- maxframe/tensor/arithmetic/rshift.py +1 -1
- maxframe/tensor/arithmetic/setimag.py +1 -1
- maxframe/tensor/arithmetic/setreal.py +1 -1
- maxframe/tensor/arithmetic/sign.py +1 -1
- maxframe/tensor/arithmetic/signbit.py +1 -1
- maxframe/tensor/arithmetic/sin.py +1 -1
- maxframe/tensor/arithmetic/sinc.py +1 -1
- maxframe/tensor/arithmetic/sinh.py +1 -1
- maxframe/tensor/arithmetic/spacing.py +1 -1
- maxframe/tensor/arithmetic/sqrt.py +1 -1
- maxframe/tensor/arithmetic/square.py +1 -1
- maxframe/tensor/arithmetic/subtract.py +1 -1
- maxframe/tensor/arithmetic/tan.py +1 -1
- maxframe/tensor/arithmetic/tanh.py +1 -1
- maxframe/tensor/arithmetic/tests/__init__.py +1 -1
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
- maxframe/tensor/arithmetic/truediv.py +1 -1
- maxframe/tensor/arithmetic/trunc.py +1 -1
- maxframe/tensor/arithmetic/utils.py +1 -1
- maxframe/tensor/array_utils.py +1 -1
- maxframe/tensor/core.py +1 -1
- maxframe/tensor/datasource/__init__.py +1 -1
- maxframe/tensor/datasource/arange.py +1 -1
- maxframe/tensor/datasource/array.py +1 -1
- maxframe/tensor/datasource/core.py +1 -1
- maxframe/tensor/datasource/empty.py +1 -1
- maxframe/tensor/datasource/from_dataframe.py +1 -1
- maxframe/tensor/datasource/from_dense.py +1 -1
- maxframe/tensor/datasource/from_sparse.py +1 -1
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/ones.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/__init__.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/datasource/zeros.py +1 -1
- maxframe/tensor/fetch/__init__.py +1 -1
- maxframe/tensor/fetch/core.py +1 -1
- maxframe/tensor/indexing/__init__.py +1 -1
- maxframe/tensor/indexing/choose.py +1 -1
- maxframe/tensor/indexing/compress.py +1 -1
- maxframe/tensor/indexing/core.py +1 -1
- maxframe/tensor/indexing/extract.py +1 -1
- maxframe/tensor/indexing/fill_diagonal.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +1 -1
- maxframe/tensor/indexing/nonzero.py +1 -1
- maxframe/tensor/indexing/setitem.py +1 -1
- maxframe/tensor/indexing/slice.py +1 -1
- maxframe/tensor/indexing/take.py +1 -1
- maxframe/tensor/indexing/tests/__init__.py +1 -1
- maxframe/tensor/indexing/tests/test_indexing.py +1 -1
- maxframe/tensor/indexing/unravel_index.py +1 -1
- maxframe/tensor/merge/__init__.py +1 -1
- maxframe/tensor/merge/concatenate.py +1 -1
- maxframe/tensor/merge/stack.py +1 -1
- maxframe/tensor/merge/tests/__init__.py +1 -1
- maxframe/tensor/merge/tests/test_merge.py +1 -1
- maxframe/tensor/merge/vstack.py +2 -2
- maxframe/tensor/misc/__init__.py +1 -1
- maxframe/tensor/misc/astype.py +1 -1
- maxframe/tensor/misc/atleast_1d.py +1 -1
- maxframe/tensor/misc/atleast_2d.py +1 -1
- maxframe/tensor/misc/atleast_3d.py +1 -1
- maxframe/tensor/misc/broadcast_to.py +1 -1
- maxframe/tensor/misc/ravel.py +1 -1
- maxframe/tensor/misc/tests/__init__.py +1 -1
- maxframe/tensor/misc/tests/test_misc.py +1 -1
- maxframe/tensor/misc/transpose.py +1 -1
- maxframe/tensor/misc/unique.py +1 -1
- maxframe/tensor/misc/where.py +1 -1
- maxframe/tensor/operators.py +1 -1
- maxframe/tensor/random/__init__.py +1 -1
- maxframe/tensor/random/beta.py +1 -1
- maxframe/tensor/random/binomial.py +1 -1
- maxframe/tensor/random/bytes.py +1 -1
- maxframe/tensor/random/chisquare.py +1 -1
- maxframe/tensor/random/choice.py +1 -1
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/random/dirichlet.py +1 -1
- maxframe/tensor/random/exponential.py +1 -1
- maxframe/tensor/random/f.py +1 -1
- maxframe/tensor/random/gamma.py +1 -1
- maxframe/tensor/random/geometric.py +1 -1
- maxframe/tensor/random/gumbel.py +1 -1
- maxframe/tensor/random/hypergeometric.py +1 -1
- maxframe/tensor/random/laplace.py +1 -1
- maxframe/tensor/random/logistic.py +1 -1
- maxframe/tensor/random/lognormal.py +1 -1
- maxframe/tensor/random/logseries.py +1 -1
- maxframe/tensor/random/multinomial.py +1 -1
- maxframe/tensor/random/multivariate_normal.py +1 -1
- maxframe/tensor/random/negative_binomial.py +1 -1
- maxframe/tensor/random/noncentral_chisquare.py +1 -1
- maxframe/tensor/random/noncentral_f.py +1 -1
- maxframe/tensor/random/normal.py +1 -1
- maxframe/tensor/random/pareto.py +1 -1
- maxframe/tensor/random/permutation.py +1 -1
- maxframe/tensor/random/poisson.py +1 -1
- maxframe/tensor/random/power.py +1 -1
- maxframe/tensor/random/rand.py +1 -1
- maxframe/tensor/random/randint.py +1 -1
- maxframe/tensor/random/randn.py +1 -1
- maxframe/tensor/random/random_integers.py +1 -1
- maxframe/tensor/random/random_sample.py +1 -1
- maxframe/tensor/random/rayleigh.py +1 -1
- maxframe/tensor/random/shuffle.py +1 -1
- maxframe/tensor/random/standard_cauchy.py +1 -1
- maxframe/tensor/random/standard_exponential.py +1 -1
- maxframe/tensor/random/standard_gamma.py +1 -1
- maxframe/tensor/random/standard_normal.py +1 -1
- maxframe/tensor/random/standard_t.py +1 -1
- maxframe/tensor/random/tests/__init__.py +1 -1
- maxframe/tensor/random/tests/test_random.py +1 -1
- maxframe/tensor/random/triangular.py +1 -1
- maxframe/tensor/random/uniform.py +1 -1
- maxframe/tensor/random/vonmises.py +1 -1
- maxframe/tensor/random/wald.py +1 -1
- maxframe/tensor/random/weibull.py +1 -1
- maxframe/tensor/random/zipf.py +1 -1
- maxframe/tensor/rechunk/__init__.py +1 -1
- maxframe/tensor/rechunk/rechunk.py +1 -1
- maxframe/tensor/reduction/__init__.py +1 -1
- maxframe/tensor/reduction/all.py +1 -1
- maxframe/tensor/reduction/allclose.py +1 -1
- maxframe/tensor/reduction/any.py +1 -1
- maxframe/tensor/reduction/argmax.py +1 -1
- maxframe/tensor/reduction/argmin.py +1 -1
- maxframe/tensor/reduction/array_equal.py +1 -1
- maxframe/tensor/reduction/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +1 -1
- maxframe/tensor/reduction/cumprod.py +1 -1
- maxframe/tensor/reduction/cumsum.py +1 -1
- maxframe/tensor/reduction/max.py +1 -1
- maxframe/tensor/reduction/mean.py +1 -1
- maxframe/tensor/reduction/min.py +1 -1
- maxframe/tensor/reduction/nanargmax.py +1 -1
- maxframe/tensor/reduction/nanargmin.py +1 -1
- maxframe/tensor/reduction/nancumprod.py +1 -1
- maxframe/tensor/reduction/nancumsum.py +1 -1
- maxframe/tensor/reduction/nanmax.py +1 -1
- maxframe/tensor/reduction/nanmean.py +1 -1
- maxframe/tensor/reduction/nanmin.py +1 -1
- maxframe/tensor/reduction/nanprod.py +1 -1
- maxframe/tensor/reduction/nanstd.py +1 -1
- maxframe/tensor/reduction/nansum.py +1 -1
- maxframe/tensor/reduction/nanvar.py +1 -1
- maxframe/tensor/reduction/prod.py +1 -1
- maxframe/tensor/reduction/std.py +1 -1
- maxframe/tensor/reduction/sum.py +1 -1
- maxframe/tensor/reduction/tests/__init__.py +1 -1
- maxframe/tensor/reduction/tests/test_reduction.py +1 -1
- maxframe/tensor/reduction/var.py +1 -1
- maxframe/tensor/reshape/__init__.py +1 -1
- maxframe/tensor/reshape/reshape.py +1 -1
- maxframe/tensor/reshape/tests/__init__.py +1 -1
- maxframe/tensor/reshape/tests/test_reshape.py +1 -1
- maxframe/tensor/statistics/__init__.py +1 -1
- maxframe/tensor/statistics/percentile.py +1 -1
- maxframe/tensor/statistics/quantile.py +1 -1
- maxframe/tensor/ufunc/__init__.py +1 -1
- maxframe/tensor/ufunc/ufunc.py +1 -1
- maxframe/tensor/utils.py +1 -1
- maxframe/tests/__init__.py +1 -1
- maxframe/tests/test_codegen.py +1 -1
- maxframe/tests/test_protocol.py +1 -1
- maxframe/tests/test_utils.py +1 -1
- maxframe/tests/utils.py +1 -1
- maxframe/typing_.py +1 -1
- maxframe/udf.py +6 -1
- maxframe/utils.py +14 -1
- {maxframe-1.1.0.dist-info → maxframe-1.2.0.dist-info}/METADATA +1 -1
- maxframe-1.2.0.dist-info/RECORD +697 -0
- {maxframe-1.1.0.dist-info → maxframe-1.2.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +1 -1
- maxframe_client/clients/__init__.py +1 -1
- maxframe_client/clients/framedriver.py +1 -1
- maxframe_client/conftest.py +1 -1
- maxframe_client/fetcher.py +6 -7
- maxframe_client/session/__init__.py +1 -1
- maxframe_client/session/consts.py +1 -1
- maxframe_client/session/graph.py +1 -1
- maxframe_client/session/odps.py +19 -2
- maxframe_client/session/task.py +5 -2
- maxframe_client/session/tests/__init__.py +1 -1
- maxframe_client/session/tests/test_task.py +36 -3
- maxframe_client/tests/__init__.py +1 -1
- maxframe_client/tests/test_fetcher.py +1 -1
- maxframe_client/tests/test_session.py +1 -1
- maxframe-1.1.0.dist-info/RECORD +0 -675
- {maxframe-1.1.0.dist-info → maxframe-1.2.0.dist-info}/top_level.txt +0 -0
maxframe/io/odpsio/arrow.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -14,14 +14,20 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import Any, Tuple, Union
|
|
16
16
|
|
|
17
|
+
import numpy as np
|
|
17
18
|
import pandas as pd
|
|
18
19
|
import pyarrow as pa
|
|
19
20
|
|
|
20
21
|
from ...core import OutputType
|
|
22
|
+
from ...lib.version import parse as parse_version
|
|
21
23
|
from ...protocol import DataFrameTableMeta
|
|
22
24
|
from ...tensor.core import TENSOR_TYPE
|
|
23
25
|
from ...typing_ import ArrowTableType, PandasObjectTypes
|
|
24
|
-
from .schema import
|
|
26
|
+
from .schema import (
|
|
27
|
+
arrow_table_to_pandas_dataframe,
|
|
28
|
+
build_dataframe_table_meta,
|
|
29
|
+
pandas_dataframe_to_arrow_table,
|
|
30
|
+
)
|
|
25
31
|
|
|
26
32
|
|
|
27
33
|
def _rebuild_dataframe(
|
|
@@ -57,7 +63,7 @@ def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index
|
|
|
57
63
|
def arrow_to_pandas(
|
|
58
64
|
arrow_table: ArrowTableType, table_meta: DataFrameTableMeta
|
|
59
65
|
) -> PandasObjectTypes:
|
|
60
|
-
df = arrow_table
|
|
66
|
+
df = arrow_table_to_pandas_dataframe(arrow_table, table_meta)
|
|
61
67
|
if table_meta.type in (OutputType.dataframe, OutputType.series):
|
|
62
68
|
return _rebuild_dataframe(df, table_meta)
|
|
63
69
|
elif table_meta.type == OutputType.index:
|
|
@@ -109,7 +115,26 @@ def pandas_to_arrow(
|
|
|
109
115
|
df = pd.DataFrame([[df]], columns=names)
|
|
110
116
|
else: # this could never happen # pragma: no cover
|
|
111
117
|
raise ValueError(f"Does not support meta type {table_meta.type!r}")
|
|
112
|
-
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
pa_table = pandas_dataframe_to_arrow_table(df, nthreads=nthreads)
|
|
121
|
+
except pa.ArrowTypeError as ex: # pragma: no cover
|
|
122
|
+
late_np_version = parse_version(np.__version__) >= parse_version("1.20")
|
|
123
|
+
early_pa_version = parse_version(pa.__version__) <= parse_version("4.0")
|
|
124
|
+
if (
|
|
125
|
+
late_np_version
|
|
126
|
+
and early_pa_version
|
|
127
|
+
and "Did not pass numpy.dtype object" in str(ex)
|
|
128
|
+
):
|
|
129
|
+
raise TypeError(
|
|
130
|
+
"Potential dependency conflict. Try update to pyarrow>4.0 "
|
|
131
|
+
"or downgrade to numpy<1.20. Details can be seen at "
|
|
132
|
+
"https://github.com/numpy/numpy/issues/17913. "
|
|
133
|
+
f"Raw error message: {ex!r}"
|
|
134
|
+
).with_traceback(ex.__traceback__) from None
|
|
135
|
+
else:
|
|
136
|
+
raise
|
|
137
|
+
|
|
113
138
|
if table_datetime_cols:
|
|
114
139
|
col_names = pa_table.schema.names
|
|
115
140
|
col_datas = []
|
maxframe/io/odpsio/schema.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import string
|
|
16
16
|
from collections import defaultdict
|
|
17
|
-
from typing import Any, Dict, Tuple
|
|
17
|
+
from typing import Any, Dict, Tuple, Union
|
|
18
18
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import pandas as pd
|
|
@@ -23,6 +23,7 @@ from odps import types as odps_types
|
|
|
23
23
|
from pandas.api import types as pd_types
|
|
24
24
|
|
|
25
25
|
from ...core import TILEABLE_TYPE, OutputType
|
|
26
|
+
from ...lib.dtypes_extension import ArrowDtype
|
|
26
27
|
from ...protocol import DataFrameTableMeta
|
|
27
28
|
from ...tensor.core import TENSOR_TYPE
|
|
28
29
|
|
|
@@ -61,6 +62,33 @@ _odps_type_to_arrow = {
|
|
|
61
62
|
odps_types.timestamp_ntz: pa.timestamp("ns"),
|
|
62
63
|
}
|
|
63
64
|
|
|
65
|
+
_based_for_pandas_pa_dtypes = Union[pa.MapType]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def is_based_for_pandas_dtype(dtype: pa.DataType) -> bool:
|
|
69
|
+
"""
|
|
70
|
+
Check whether the arrow type is based for one pandas data type.
|
|
71
|
+
If true, we should make sure the environment support ArrowDtype.
|
|
72
|
+
"""
|
|
73
|
+
if not isinstance(dtype, _based_for_pandas_pa_dtypes):
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
if ArrowDtype is None:
|
|
77
|
+
raise ImportError("ArrowDtype is not supported in current environment")
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def pandas_types_to_arrow_schema(df_obj: pd.DataFrame) -> pa.Schema:
|
|
82
|
+
"""
|
|
83
|
+
This one is only called when a pandas DataFrame is written to ODPS. So we can check
|
|
84
|
+
whether the ArrowDtype is supported.
|
|
85
|
+
"""
|
|
86
|
+
schema = pa.Schema.from_pandas(df_obj, preserve_index=False)
|
|
87
|
+
for idx, col_dtype in enumerate(df_obj.dtypes.items()):
|
|
88
|
+
if ArrowDtype is not None and isinstance(col_dtype[1], ArrowDtype):
|
|
89
|
+
schema.set(idx, pa.field(col_dtype[0], col_dtype[1].pyarrow_dtype))
|
|
90
|
+
return schema
|
|
91
|
+
|
|
64
92
|
|
|
65
93
|
def arrow_type_to_odps_type(
|
|
66
94
|
arrow_type: pa.DataType, col_name: str, unknown_as_string: bool = False
|
|
@@ -167,7 +195,51 @@ def odps_schema_to_pandas_dtypes(
|
|
|
167
195
|
arrow_schema = odps_schema_to_arrow_schema(
|
|
168
196
|
odps_schema, with_partitions=with_partitions
|
|
169
197
|
)
|
|
170
|
-
return arrow_schema.empty_table()
|
|
198
|
+
return arrow_table_to_pandas_dataframe(arrow_schema.empty_table()).dtypes
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def arrow_table_to_pandas_dataframe(
|
|
202
|
+
table: pa.Table, meta: DataFrameTableMeta = None
|
|
203
|
+
) -> pd.DataFrame:
|
|
204
|
+
df = table.to_pandas(
|
|
205
|
+
types_mapper=lambda x: (
|
|
206
|
+
ArrowDtype(x) if is_based_for_pandas_dtype(x) else None
|
|
207
|
+
),
|
|
208
|
+
ignore_metadata=True,
|
|
209
|
+
)
|
|
210
|
+
if not meta:
|
|
211
|
+
return df
|
|
212
|
+
|
|
213
|
+
# If meta is passed, we should convert the dtypes to match the ones in the meta
|
|
214
|
+
converted_column_dtypes = dict()
|
|
215
|
+
for source_dtype, target_col, target_dtype in zip(
|
|
216
|
+
df.dtypes.values,
|
|
217
|
+
df.columns,
|
|
218
|
+
list(meta.pd_index_dtypes.values) + list(meta.pd_column_dtypes.values),
|
|
219
|
+
):
|
|
220
|
+
if source_dtype != target_dtype:
|
|
221
|
+
# Converting tz-aware dtype to tz-native dtype is a special case.
|
|
222
|
+
# In numpy1.19, we can't use numpy.dtype.DateTime64Dtype
|
|
223
|
+
if (
|
|
224
|
+
isinstance(source_dtype, pd.DatetimeTZDtype)
|
|
225
|
+
and isinstance(target_dtype, np.dtype)
|
|
226
|
+
and target_dtype.name.startswith("datetime64")
|
|
227
|
+
):
|
|
228
|
+
df[target_col] = df[target_col].dt.tz_localize(None)
|
|
229
|
+
else:
|
|
230
|
+
converted_column_dtypes[target_col] = target_dtype
|
|
231
|
+
|
|
232
|
+
if converted_column_dtypes:
|
|
233
|
+
df = df.astype(converted_column_dtypes)
|
|
234
|
+
|
|
235
|
+
return df
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def pandas_dataframe_to_arrow_table(df: pd.DataFrame, nthreads=1) -> pa.Table:
|
|
239
|
+
schema = pandas_types_to_arrow_schema(df)
|
|
240
|
+
return pa.Table.from_pandas(
|
|
241
|
+
df, schema=schema, nthreads=nthreads, preserve_index=False
|
|
242
|
+
)
|
|
171
243
|
|
|
172
244
|
|
|
173
245
|
def is_scalar_object(df_obj: Any) -> bool:
|
maxframe/io/odpsio/tableio.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import os
|
|
16
16
|
import time
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
|
+
from collections import OrderedDict
|
|
18
19
|
from contextlib import contextmanager
|
|
19
20
|
from typing import Dict, List, Optional, Union
|
|
20
21
|
|
|
@@ -25,7 +26,7 @@ from odps.apis.storage_api import (
|
|
|
25
26
|
TableBatchScanResponse,
|
|
26
27
|
TableBatchWriteResponse,
|
|
27
28
|
)
|
|
28
|
-
from odps.tunnel import TableTunnel
|
|
29
|
+
from odps.tunnel import TableDownloadSession, TableDownloadStatus, TableTunnel
|
|
29
30
|
from odps.types import OdpsSchema, PartitionSpec, timestamp_ntz
|
|
30
31
|
from odps.utils import call_with_retry
|
|
31
32
|
|
|
@@ -36,12 +37,13 @@ except ImportError:
|
|
|
36
37
|
|
|
37
38
|
from ...config import options
|
|
38
39
|
from ...env import ODPS_STORAGE_API_ENDPOINT
|
|
39
|
-
from ...utils import sync_pyodps_options
|
|
40
|
+
from ...utils import is_empty, sync_pyodps_options
|
|
40
41
|
from .schema import odps_schema_to_arrow_schema
|
|
41
42
|
|
|
42
43
|
PartitionsType = Union[List[str], str, None]
|
|
43
44
|
|
|
44
45
|
_DEFAULT_ROW_BATCH_SIZE = 4096
|
|
46
|
+
_DOWNLOAD_ID_CACHE_SIZE = 100
|
|
45
47
|
|
|
46
48
|
|
|
47
49
|
class ODPSTableIO(ABC):
|
|
@@ -65,7 +67,11 @@ class ODPSTableIO(ABC):
|
|
|
65
67
|
) -> OdpsSchema:
|
|
66
68
|
final_cols = []
|
|
67
69
|
|
|
68
|
-
columns =
|
|
70
|
+
columns = (
|
|
71
|
+
columns
|
|
72
|
+
if not is_empty(columns)
|
|
73
|
+
else [col.name for col in table_schema.simple_columns]
|
|
74
|
+
)
|
|
69
75
|
if partition_columns is True:
|
|
70
76
|
partition_columns = [c.name for c in table_schema.partitions]
|
|
71
77
|
else:
|
|
@@ -215,6 +221,46 @@ class TunnelMultiPartitionReader:
|
|
|
215
221
|
|
|
216
222
|
|
|
217
223
|
class TunnelTableIO(ODPSTableIO):
|
|
224
|
+
_down_session_ids = OrderedDict()
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def create_download_sessions(
|
|
228
|
+
cls,
|
|
229
|
+
odps_entry: ODPS,
|
|
230
|
+
full_table_name: str,
|
|
231
|
+
partitions: List[Optional[str]] = None,
|
|
232
|
+
) -> Dict[Optional[str], TableDownloadSession]:
|
|
233
|
+
table = odps_entry.get_table(full_table_name)
|
|
234
|
+
tunnel = TableTunnel(odps_entry, quota_name=options.tunnel_quota_name)
|
|
235
|
+
parts = (
|
|
236
|
+
[partitions]
|
|
237
|
+
if partitions is None or isinstance(partitions, str)
|
|
238
|
+
else partitions
|
|
239
|
+
)
|
|
240
|
+
part_to_session = dict()
|
|
241
|
+
for part in parts:
|
|
242
|
+
part_key = (full_table_name, part)
|
|
243
|
+
down_session = None
|
|
244
|
+
|
|
245
|
+
if part_key in cls._down_session_ids:
|
|
246
|
+
down_id = cls._down_session_ids[part_key]
|
|
247
|
+
down_session = tunnel.create_download_session(
|
|
248
|
+
table, async_mode=True, partition_spec=part, download_id=down_id
|
|
249
|
+
)
|
|
250
|
+
if down_session.status != TableDownloadStatus.Normal:
|
|
251
|
+
down_session = None
|
|
252
|
+
|
|
253
|
+
if down_session is None:
|
|
254
|
+
down_session = tunnel.create_download_session(
|
|
255
|
+
table, async_mode=True, partition_spec=part
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
while len(cls._down_session_ids) >= _DOWNLOAD_ID_CACHE_SIZE:
|
|
259
|
+
cls._down_session_ids.popitem(False)
|
|
260
|
+
cls._down_session_ids[part_key] = down_session.id
|
|
261
|
+
part_to_session[part] = down_session
|
|
262
|
+
return part_to_session
|
|
263
|
+
|
|
218
264
|
@contextmanager
|
|
219
265
|
def open_reader(
|
|
220
266
|
self,
|
|
@@ -241,21 +287,15 @@ class TunnelTableIO(ODPSTableIO):
|
|
|
241
287
|
or (reverse_range and start is None)
|
|
242
288
|
):
|
|
243
289
|
with sync_pyodps_options():
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
290
|
+
tunnel_sessions = self.create_download_sessions(
|
|
291
|
+
self._odps, full_table_name, partitions
|
|
292
|
+
)
|
|
293
|
+
part_to_down_id = {
|
|
294
|
+
pt: session.id for (pt, session) in tunnel_sessions.items()
|
|
295
|
+
}
|
|
296
|
+
total_records = sum(
|
|
297
|
+
session.count for session in tunnel_sessions.values()
|
|
250
298
|
)
|
|
251
|
-
part_to_down_id = dict()
|
|
252
|
-
total_records = 0
|
|
253
|
-
for part in parts:
|
|
254
|
-
down_session = tunnel.create_download_session(
|
|
255
|
-
table, async_mode=True, partition_spec=part
|
|
256
|
-
)
|
|
257
|
-
part_to_down_id[part] = down_session.id
|
|
258
|
-
total_records += down_session.count
|
|
259
299
|
|
|
260
300
|
count = None
|
|
261
301
|
if start is not None or stop is not None:
|
|
@@ -521,7 +561,10 @@ class HaloTableIO(ODPSTableIO):
|
|
|
521
561
|
|
|
522
562
|
table = self._odps.get_table(full_table_name)
|
|
523
563
|
client = StorageApiArrowClient(
|
|
524
|
-
self._odps,
|
|
564
|
+
self._odps,
|
|
565
|
+
table,
|
|
566
|
+
rest_endpoint=self._storage_api_endpoint,
|
|
567
|
+
quota_name=options.tunnel_quota_name,
|
|
525
568
|
)
|
|
526
569
|
|
|
527
570
|
split_option = SplitOptions.SplitMode.SIZE
|
|
@@ -597,7 +640,10 @@ class HaloTableIO(ODPSTableIO):
|
|
|
597
640
|
|
|
598
641
|
table = self._odps.get_table(full_table_name)
|
|
599
642
|
client = StorageApiArrowClient(
|
|
600
|
-
self._odps,
|
|
643
|
+
self._odps,
|
|
644
|
+
table,
|
|
645
|
+
rest_endpoint=self._storage_api_endpoint,
|
|
646
|
+
quota_name=options.tunnel_quota_name,
|
|
601
647
|
)
|
|
602
648
|
|
|
603
649
|
part_strs = self._convert_partitions(partition)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -14,6 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
import numpy as np
|
|
16
16
|
import pandas as pd
|
|
17
|
+
import pyarrow as pa
|
|
18
|
+
import pytest
|
|
19
|
+
|
|
20
|
+
from maxframe.lib.dtypes_extension import dict_
|
|
17
21
|
|
|
18
22
|
from ..arrow import arrow_to_pandas, pandas_to_arrow
|
|
19
23
|
|
|
@@ -86,3 +90,44 @@ def test_scalar_convert():
|
|
|
86
90
|
|
|
87
91
|
scalar_res = arrow_to_pandas(arrow_data, meta)
|
|
88
92
|
assert scalar_data == scalar_res
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@pytest.mark.skipif(
|
|
96
|
+
pa is None or not hasattr(pd, "ArrowDtype"),
|
|
97
|
+
reason="pandas doesn't support ArrowDtype",
|
|
98
|
+
)
|
|
99
|
+
def test_map_convert():
|
|
100
|
+
pd_data = pd.DataFrame(
|
|
101
|
+
{
|
|
102
|
+
"A": pd.Series(
|
|
103
|
+
[(("k1", "v1"), ("k2", "v2"))], dtype=dict_(pa.string(), pa.string())
|
|
104
|
+
),
|
|
105
|
+
"B": pd.Series([{"k1": 1, "k2": 2}], dtype=dict_(pa.string(), pa.int64())),
|
|
106
|
+
},
|
|
107
|
+
)
|
|
108
|
+
arrow_data, meta = pandas_to_arrow(pd_data)
|
|
109
|
+
assert arrow_data.column_names == ["_idx_0", "a", "b"]
|
|
110
|
+
pd.testing.assert_series_equal(
|
|
111
|
+
meta.pd_column_dtypes,
|
|
112
|
+
pd.Series(
|
|
113
|
+
[dict_(pa.string(), pa.string()), dict_(pa.string(), pa.int64())],
|
|
114
|
+
index=["A", "B"],
|
|
115
|
+
),
|
|
116
|
+
)
|
|
117
|
+
pd_result = arrow_to_pandas(arrow_data, meta)
|
|
118
|
+
pd.testing.assert_frame_equal(pd_data, pd_result)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_datetime_with_tz_convert():
|
|
122
|
+
pd_data = pd.DataFrame(
|
|
123
|
+
{
|
|
124
|
+
"a": pd.to_datetime(
|
|
125
|
+
pd.Series([1609459200, 1609545600], index=[0, 1]), unit="s"
|
|
126
|
+
),
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
arrow_data, meta = pandas_to_arrow(pd_data)
|
|
131
|
+
assert arrow_data.column_names == ["_idx_0", "a"]
|
|
132
|
+
pd_result = arrow_to_pandas(arrow_data, meta)
|
|
133
|
+
pd.testing.assert_frame_equal(pd_data, pd_result)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -21,6 +21,7 @@ from odps import types as odps_types
|
|
|
21
21
|
from .... import dataframe as md
|
|
22
22
|
from .... import tensor as mt
|
|
23
23
|
from ....core import OutputType
|
|
24
|
+
from ....lib.dtypes_extension import dict_
|
|
24
25
|
from ....utils import pd_release_version
|
|
25
26
|
from ..schema import (
|
|
26
27
|
arrow_schema_to_odps_schema,
|
|
@@ -28,6 +29,7 @@ from ..schema import (
|
|
|
28
29
|
build_table_column_name,
|
|
29
30
|
odps_schema_to_arrow_schema,
|
|
30
31
|
pandas_to_odps_schema,
|
|
32
|
+
pandas_types_to_arrow_schema,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
|
|
@@ -332,3 +334,19 @@ def test_table_meta_with_datetime():
|
|
|
332
334
|
multiidx = md.Index(raw_multiindex)
|
|
333
335
|
schema, _ = pandas_to_odps_schema(multiidx, unknown_as_string=True)
|
|
334
336
|
assert schema.columns[1].type == odps_types.datetime
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
@pytest.mark.skipif(
|
|
340
|
+
pa is None or not hasattr(pd, "ArrowDtype"),
|
|
341
|
+
reason="pandas doesn't support ArrowDtype",
|
|
342
|
+
)
|
|
343
|
+
def test_pandas_types_to_arrow_schema():
|
|
344
|
+
pd_data = pd.DataFrame(
|
|
345
|
+
{
|
|
346
|
+
"int8": pd.Series([], dtype=np.int8),
|
|
347
|
+
"map": pd.Series([], dtype=dict_(pa.string(), pa.string())),
|
|
348
|
+
},
|
|
349
|
+
)
|
|
350
|
+
schema = pandas_types_to_arrow_schema(pd_data)
|
|
351
|
+
assert schema.field("int8").type == pa.int8()
|
|
352
|
+
assert schema.field("map").type == pa.map_(pa.string(), pa.string())
|
maxframe/io/odpsio/volumeio.py
CHANGED
maxframe/learn/__init__.py
CHANGED
maxframe/learn/contrib/utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 1999-
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -19,7 +19,7 @@ from ....core.operator.base import Operator
|
|
|
19
19
|
from ....core.operator.core import TileableOperatorMixin
|
|
20
20
|
from ....dataframe.core import DATAFRAME_TYPE
|
|
21
21
|
from ....serialization.serializables import Float64Field, KeyField, ListField
|
|
22
|
-
from ....serialization.serializables.field import AnyField, Int64Field
|
|
22
|
+
from ....serialization.serializables.field import AnyField, BoolField, Int64Field
|
|
23
23
|
from ....tensor import tensor as astensor
|
|
24
24
|
from ....tensor.core import TENSOR_TYPE
|
|
25
25
|
from ....typing_ import TileableType
|
|
@@ -42,6 +42,7 @@ class ToDMatrix(Operator, TileableOperatorMixin):
|
|
|
42
42
|
qid = AnyField("qid", default=None)
|
|
43
43
|
label_lower_bound = AnyField("label_lower_bound", default=None)
|
|
44
44
|
label_upper_bound = AnyField("label_upper_bound", default=None)
|
|
45
|
+
enable_categorical = BoolField("enable_categorical", default=None)
|
|
45
46
|
|
|
46
47
|
@property
|
|
47
48
|
def output_limit(self):
|
|
@@ -116,6 +117,7 @@ def to_dmatrix(
|
|
|
116
117
|
qid=None,
|
|
117
118
|
label_lower_bound=None,
|
|
118
119
|
label_upper_bound=None,
|
|
120
|
+
enable_categorical=None,
|
|
119
121
|
):
|
|
120
122
|
data = check_data(data)
|
|
121
123
|
label = check_array_like(label, "label")
|
|
@@ -139,6 +141,7 @@ def to_dmatrix(
|
|
|
139
141
|
label_lower_bound=label_lower_bound,
|
|
140
142
|
label_upper_bound=label_upper_bound,
|
|
141
143
|
gpu=data.op.gpu,
|
|
144
|
+
enable_categorical=enable_categorical,
|
|
142
145
|
_output_types=get_output_types(data),
|
|
143
146
|
)
|
|
144
147
|
return op()
|