maxframe 1.0.0rc1__cp38-cp38-macosx_10_9_universal2.whl → 1.0.0rc3__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show
  1. maxframe/_utils.cpython-38-darwin.so +0 -0
  2. maxframe/codegen.py +3 -6
  3. maxframe/config/config.py +49 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +15 -2
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/objects.py +46 -3
  9. maxframe/core/entity/output_types.py +0 -3
  10. maxframe/core/entity/tests/test_objects.py +43 -0
  11. maxframe/core/entity/tileables.py +5 -78
  12. maxframe/core/graph/__init__.py +2 -2
  13. maxframe/core/graph/builder/__init__.py +0 -1
  14. maxframe/core/graph/builder/base.py +5 -4
  15. maxframe/core/graph/builder/tileable.py +4 -4
  16. maxframe/core/graph/builder/utils.py +4 -8
  17. maxframe/core/graph/core.cpython-38-darwin.so +0 -0
  18. maxframe/core/graph/entity.py +9 -33
  19. maxframe/core/operator/__init__.py +2 -9
  20. maxframe/core/operator/base.py +3 -5
  21. maxframe/core/operator/objects.py +0 -9
  22. maxframe/core/operator/utils.py +55 -0
  23. maxframe/dataframe/__init__.py +1 -1
  24. maxframe/dataframe/arithmetic/around.py +5 -17
  25. maxframe/dataframe/arithmetic/core.py +15 -7
  26. maxframe/dataframe/arithmetic/docstring.py +5 -55
  27. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
  28. maxframe/dataframe/core.py +5 -5
  29. maxframe/dataframe/datasource/date_range.py +2 -2
  30. maxframe/dataframe/datasource/read_odps_query.py +7 -1
  31. maxframe/dataframe/datasource/read_odps_table.py +3 -2
  32. maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
  33. maxframe/dataframe/datastore/to_odps.py +1 -1
  34. maxframe/dataframe/groupby/cum.py +0 -1
  35. maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/rename.py +3 -37
  38. maxframe/dataframe/indexing/sample.py +0 -1
  39. maxframe/dataframe/indexing/set_index.py +68 -1
  40. maxframe/dataframe/merge/merge.py +236 -2
  41. maxframe/dataframe/merge/tests/test_merge.py +123 -0
  42. maxframe/dataframe/misc/apply.py +3 -10
  43. maxframe/dataframe/misc/case_when.py +1 -1
  44. maxframe/dataframe/misc/describe.py +2 -2
  45. maxframe/dataframe/misc/drop_duplicates.py +4 -25
  46. maxframe/dataframe/misc/eval.py +4 -0
  47. maxframe/dataframe/misc/pct_change.py +1 -83
  48. maxframe/dataframe/misc/transform.py +1 -30
  49. maxframe/dataframe/misc/value_counts.py +4 -17
  50. maxframe/dataframe/missing/dropna.py +1 -1
  51. maxframe/dataframe/missing/fillna.py +5 -5
  52. maxframe/dataframe/operators.py +1 -17
  53. maxframe/dataframe/reduction/core.py +2 -2
  54. maxframe/dataframe/sort/sort_values.py +1 -11
  55. maxframe/dataframe/statistics/quantile.py +5 -17
  56. maxframe/dataframe/utils.py +4 -7
  57. maxframe/io/objects/__init__.py +24 -0
  58. maxframe/io/objects/core.py +140 -0
  59. maxframe/io/objects/tensor.py +76 -0
  60. maxframe/io/objects/tests/__init__.py +13 -0
  61. maxframe/io/objects/tests/test_object_io.py +97 -0
  62. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  63. maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
  64. maxframe/{odpsio → io/odpsio}/schema.py +15 -12
  65. maxframe/io/odpsio/tableio.py +702 -0
  66. maxframe/io/odpsio/tests/__init__.py +13 -0
  67. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
  68. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  69. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  70. maxframe/io/odpsio/volumeio.py +57 -0
  71. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  72. maxframe/learn/contrib/xgboost/core.py +87 -2
  73. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  74. maxframe/learn/contrib/xgboost/predict.py +21 -7
  75. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  76. maxframe/learn/contrib/xgboost/train.py +27 -17
  77. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  78. maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
  79. maxframe/protocol.py +41 -17
  80. maxframe/remote/core.py +4 -8
  81. maxframe/serialization/__init__.py +1 -0
  82. maxframe/serialization/core.cpython-38-darwin.so +0 -0
  83. maxframe/serialization/serializables/core.py +48 -9
  84. maxframe/tensor/__init__.py +69 -2
  85. maxframe/tensor/arithmetic/isclose.py +1 -0
  86. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  87. maxframe/tensor/core.py +5 -136
  88. maxframe/tensor/datasource/array.py +3 -0
  89. maxframe/tensor/datasource/full.py +1 -1
  90. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  91. maxframe/tensor/indexing/flatnonzero.py +1 -1
  92. maxframe/tensor/merge/__init__.py +2 -0
  93. maxframe/tensor/merge/concatenate.py +98 -0
  94. maxframe/tensor/merge/tests/test_merge.py +30 -1
  95. maxframe/tensor/merge/vstack.py +70 -0
  96. maxframe/tensor/{base → misc}/__init__.py +2 -0
  97. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  98. maxframe/tensor/misc/atleast_2d.py +70 -0
  99. maxframe/tensor/misc/atleast_3d.py +85 -0
  100. maxframe/tensor/misc/tests/__init__.py +13 -0
  101. maxframe/tensor/{base → misc}/transpose.py +22 -18
  102. maxframe/tensor/{base → misc}/unique.py +2 -2
  103. maxframe/tensor/operators.py +1 -7
  104. maxframe/tensor/random/core.py +1 -1
  105. maxframe/tensor/reduction/count_nonzero.py +1 -0
  106. maxframe/tensor/reduction/mean.py +1 -0
  107. maxframe/tensor/reduction/nanmean.py +1 -0
  108. maxframe/tensor/reduction/nanvar.py +2 -0
  109. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  110. maxframe/tensor/reduction/var.py +2 -0
  111. maxframe/tensor/statistics/quantile.py +2 -2
  112. maxframe/tensor/utils.py +2 -22
  113. maxframe/tests/utils.py +11 -2
  114. maxframe/typing_.py +4 -1
  115. maxframe/udf.py +8 -9
  116. maxframe/utils.py +32 -70
  117. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +25 -25
  118. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
  119. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
  120. maxframe_client/fetcher.py +60 -68
  121. maxframe_client/session/graph.py +8 -2
  122. maxframe_client/session/odps.py +58 -22
  123. maxframe_client/tests/test_fetcher.py +21 -3
  124. maxframe_client/tests/test_session.py +27 -4
  125. maxframe/core/entity/chunks.py +0 -68
  126. maxframe/core/entity/fuse.py +0 -73
  127. maxframe/core/graph/builder/chunk.py +0 -430
  128. maxframe/odpsio/tableio.py +0 -322
  129. maxframe/odpsio/volumeio.py +0 -95
  130. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  131. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  132. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  133. /maxframe/tensor/{base → misc}/astype.py +0 -0
  134. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  135. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  136. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  137. /maxframe/tensor/{base → misc}/where.py +0 -0
  138. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABCMeta, abstractmethod
16
+ from typing import Any, Dict, Type, Union
17
+
18
+ import msgpack
19
+
20
+ from ...core import Entity, EntityData
21
+ from ...core.entity import ObjectData, TileableData
22
+ from ...lib import wrapped_pickle as pickle
23
+ from ...typing_ import SlicesType, TileableType
24
+ from ...utils import TypeDispatcher
25
+ from ..odpsio.volumeio import ODPSVolumeReader, ODPSVolumeWriter
26
+
27
+ _MetaType = Dict[str, Any]
28
+
29
+ _META_FILE_NAME = ".meta"
30
+ _META_PICKLED_KEYS_KEY = ".pickled_keys"
31
+
32
+
33
+ _io_handler_dispatcher = TypeDispatcher()
34
+
35
+
36
+ def register_object_io_handler(tileable_data_type: Type[TileableData]):
37
+ def wrapper(handler_cls):
38
+ _io_handler_dispatcher.register(tileable_data_type, handler_cls)
39
+ return handler_cls
40
+
41
+ return wrapper
42
+
43
+
44
+ def get_object_io_handler(
45
+ tileable_data_type: Union[Entity, EntityData, Type[EntityData]]
46
+ ) -> Type["AbstractObjectIOHandler"]:
47
+ if not isinstance(tileable_data_type, type):
48
+ if isinstance(tileable_data_type, Entity):
49
+ tileable_data_type = tileable_data_type.data
50
+ tileable_data_type = type(tileable_data_type)
51
+ return _io_handler_dispatcher.get_handler(tileable_data_type)
52
+
53
+
54
+ class AbstractObjectIOHandler(metaclass=ABCMeta):
55
+ def _prepare_meta_for_serial(
56
+ self, tileable: TileableType, meta: Dict[str, Any]
57
+ ) -> Dict[str, Any]:
58
+ to_pack = meta.copy()
59
+ pickled_keys = []
60
+ for k, v in meta.items():
61
+ if not isinstance(v, (str, bytes, int, float, bool)):
62
+ to_pack[k] = pickle.dumps(v)
63
+ pickled_keys.append(k)
64
+ to_pack[".pickled_keys"] = pickled_keys
65
+ return to_pack
66
+
67
+ def _prepare_meta_for_deserial(
68
+ self, tileable: TileableType, meta: Dict[str, Any]
69
+ ) -> Dict[str, Any]:
70
+ pickled_keys = meta.pop(".pickled_keys", None) or []
71
+ for k in pickled_keys:
72
+ meta[k] = pickle.loads(meta[k])
73
+ return meta
74
+
75
+ def read_object_meta(
76
+ self, reader: ODPSVolumeReader, tileable: TileableType
77
+ ) -> Dict[str, Any]:
78
+ meta_obj = msgpack.loads(reader.read_file(_META_FILE_NAME))
79
+ return self._prepare_meta_for_deserial(tileable, meta_obj)
80
+
81
+ @abstractmethod
82
+ def _read_object_body(
83
+ self,
84
+ reader: ODPSVolumeReader,
85
+ tileable: TileableType,
86
+ meta: Dict[str, Any],
87
+ slices: SlicesType = None,
88
+ ) -> Any:
89
+ raise NotImplementedError
90
+
91
+ def read_object(
92
+ self,
93
+ reader: ODPSVolumeReader,
94
+ tileable: TileableType,
95
+ slices: SlicesType = None,
96
+ ) -> Any:
97
+ meta = self.read_object_meta(reader, tileable)
98
+ return self._read_object_body(reader, tileable, meta, slices)
99
+
100
+ @abstractmethod
101
+ def _write_object_body(
102
+ self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
103
+ ):
104
+ raise NotImplementedError
105
+
106
+ def write_object_meta(
107
+ self,
108
+ writer: ODPSVolumeWriter,
109
+ tileable: TileableType,
110
+ extra_meta: Dict[str, Any] = None,
111
+ ):
112
+ meta_obj = tileable.params.copy()
113
+ if extra_meta:
114
+ meta_obj.update(extra_meta)
115
+ meta_obj = self._prepare_meta_for_serial(tileable, meta_obj)
116
+ packed = msgpack.dumps(meta_obj)
117
+ writer.write_file(_META_FILE_NAME, packed)
118
+
119
+ def write_object(
120
+ self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
121
+ ):
122
+ self.write_object_meta(writer, tileable)
123
+ self._write_object_body(writer, tileable, value)
124
+
125
+
126
+ @register_object_io_handler(ObjectData)
127
+ class ObjectIOHandler(AbstractObjectIOHandler):
128
+ def _read_object_body(
129
+ self,
130
+ reader: ODPSVolumeReader,
131
+ tileable: TileableType,
132
+ meta: Dict[str, Any],
133
+ slices: SlicesType = None,
134
+ ) -> Any:
135
+ return pickle.loads(reader.read_file("data"))
136
+
137
+ def _write_object_body(
138
+ self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
139
+ ):
140
+ writer.write_file("data", pickle.dumps(value))
@@ -0,0 +1,76 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import struct
16
+ from io import BytesIO
17
+ from typing import Any, Dict
18
+
19
+ import msgpack
20
+ import numpy as np
21
+
22
+ from ...lib import wrapped_pickle as pickle
23
+ from ...tensor.core import TensorData
24
+ from ...typing_ import SlicesType, TileableType
25
+ from ..odpsio import ODPSVolumeReader, ODPSVolumeWriter
26
+ from .core import AbstractObjectIOHandler, register_object_io_handler
27
+
28
+
29
+ @register_object_io_handler(TensorData)
30
+ class TensorIOHandler(AbstractObjectIOHandler):
31
+ def write_object_meta(
32
+ self,
33
+ writer: ODPSVolumeWriter,
34
+ tileable: TileableType,
35
+ extra_meta: Dict[str, Any] = None,
36
+ ):
37
+ # fixme upload in real slices when tensors are supported in DPE
38
+ extra_meta = extra_meta or dict()
39
+ extra_meta["nsplits"] = ((np.nan,),)
40
+
41
+ super().write_object_meta(writer, tileable, extra_meta=extra_meta)
42
+
43
+ def _read_object_body(
44
+ self,
45
+ reader: ODPSVolumeReader,
46
+ tileable: TileableType,
47
+ meta: Dict[str, Any],
48
+ slices: SlicesType = None,
49
+ ) -> Any:
50
+ # fixme read data with slices when tensors are supported in DPE
51
+ body = reader.read_file("0,0.dat")
52
+ bio = BytesIO(body)
53
+ (header_len,) = struct.unpack("<I", bio.read(4))
54
+ header_data = msgpack.loads(bio.read(header_len))
55
+
56
+ pickled = bio.read(header_data[0])
57
+ bufs = [bio.read(size) for size in header_data[1:]]
58
+ return pickle.loads(pickled, buffers=bufs)
59
+
60
+ def _write_object_body(
61
+ self, writer: ODPSVolumeWriter, tileable: TileableType, value: Any
62
+ ):
63
+ # fixme upload in real slices when tensors are supported in DPE
64
+ def data_gen():
65
+ bufs = []
66
+ pickled = pickle.dumps(value, buffer_callback=bufs.append)
67
+ header_data = msgpack.dumps(
68
+ [len(pickled)] + [len(buf.raw()) for buf in bufs]
69
+ )
70
+ yield struct.pack("<I", len(header_data))
71
+ yield header_data
72
+ yield pickled
73
+ for buf in bufs:
74
+ yield buf
75
+
76
+ writer.write_file("0,0.dat", data_gen())
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,97 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import numpy as np
15
+ import pytest
16
+ from odps import ODPS
17
+
18
+ from ....core import OutputType
19
+ from ....core.operator import ObjectOperatorMixin, Operator
20
+ from ....tensor.datasource import ArrayDataSource
21
+ from ....tests.utils import tn
22
+ from ...odpsio import ODPSVolumeReader, ODPSVolumeWriter
23
+ from ..core import get_object_io_handler
24
+
25
+
26
+ class TestObjectOp(Operator, ObjectOperatorMixin):
27
+ def __call__(self):
28
+ self._output_types = [OutputType.object]
29
+ return self.new_tileable([])
30
+
31
+
32
+ @pytest.fixture(scope="module")
33
+ def create_volume(request, oss_config):
34
+ test_vol_name = tn("test_object_io_volume")
35
+ odps_entry = ODPS.from_environments()
36
+
37
+ try:
38
+ odps_entry.delete_volume(test_vol_name, auto_remove_dir=True, recursive=True)
39
+ except:
40
+ pass
41
+
42
+ oss_test_dir_name = tn("test_oss_directory")
43
+ if oss_config is None:
44
+ pytest.skip("Need oss and its config to run this test")
45
+ (
46
+ oss_access_id,
47
+ oss_secret_access_key,
48
+ oss_bucket_name,
49
+ oss_endpoint,
50
+ ) = oss_config.oss_config
51
+ test_location = "oss://%s:%s@%s/%s/%s" % (
52
+ oss_access_id,
53
+ oss_secret_access_key,
54
+ oss_endpoint,
55
+ oss_bucket_name,
56
+ oss_test_dir_name,
57
+ )
58
+ oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
59
+ odps_entry.create_external_volume(test_vol_name, location=test_location)
60
+
61
+ try:
62
+ yield test_vol_name
63
+ finally:
64
+ try:
65
+ odps_entry.delete_volume(
66
+ test_vol_name, auto_remove_dir=True, recursive=True
67
+ )
68
+ except:
69
+ pass
70
+
71
+
72
+ def test_simple_object_io(create_volume):
73
+ obj = TestObjectOp()()
74
+ data = "abcdefg"
75
+
76
+ odps_entry = ODPS.from_environments()
77
+
78
+ reader = ODPSVolumeReader(odps_entry, create_volume, obj.key)
79
+ writer = ODPSVolumeWriter(odps_entry, create_volume, obj.key)
80
+
81
+ handler = get_object_io_handler(obj)()
82
+ handler.write_object(writer, obj, data)
83
+ assert data == handler.read_object(reader, obj)
84
+
85
+
86
+ def test_tensor_object_io(create_volume):
87
+ data = np.array([[4, 9, 2], [3, 5, 7], [8, 1, 6]])
88
+ obj = ArrayDataSource(data, dtype=data.dtype)(data.shape)
89
+
90
+ odps_entry = ODPS.from_environments()
91
+
92
+ reader = ODPSVolumeReader(odps_entry, create_volume, obj.key)
93
+ writer = ODPSVolumeWriter(odps_entry, create_volume, obj.key)
94
+
95
+ handler = get_object_io_handler(obj)()
96
+ handler.write_object(writer, obj, data)
97
+ np.testing.assert_equal(data, handler.read_object(reader, obj))
@@ -14,8 +14,10 @@
14
14
 
15
15
  from .arrow import arrow_to_pandas, pandas_to_arrow
16
16
  from .schema import (
17
+ arrow_schema_to_odps_schema,
17
18
  build_dataframe_table_meta,
18
19
  odps_schema_to_pandas_dtypes,
19
20
  pandas_to_odps_schema,
20
21
  )
21
- from .tableio import HaloTableIO
22
+ from .tableio import HaloTableIO, ODPSTableIO
23
+ from .volumeio import ODPSVolumeReader, ODPSVolumeWriter
@@ -17,10 +17,10 @@ from typing import Any, Tuple, Union
17
17
  import pandas as pd
18
18
  import pyarrow as pa
19
19
 
20
- from ..core import OutputType
21
- from ..protocol import DataFrameTableMeta
22
- from ..tensor.core import TENSOR_TYPE
23
- from ..typing_ import ArrowTableType, PandasObjectTypes
20
+ from ...core import OutputType
21
+ from ...protocol import DataFrameTableMeta
22
+ from ...tensor.core import TENSOR_TYPE
23
+ from ...typing_ import ArrowTableType, PandasObjectTypes
24
24
  from .schema import build_dataframe_table_meta
25
25
 
26
26
 
@@ -45,9 +45,13 @@ def _rebuild_dataframe(
45
45
 
46
46
  def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index:
47
47
  if df.shape[1] > 1:
48
- df.columns = pd.Index(table_meta.pd_index_level_names)
49
- return pd.MultiIndex.from_frame(df)
50
- return pd.Index(df.iloc[:, 0], name=table_meta.pd_index_level_names[0])
48
+ idx = pd.MultiIndex.from_frame(df)
49
+ idx.names = table_meta.pd_index_level_names
50
+ else:
51
+ # make sure even if None names are updated properly
52
+ idx = pd.Index(df.iloc[:, 0])
53
+ idx.name = table_meta.pd_index_level_names[0]
54
+ return idx
51
55
 
52
56
 
53
57
  def arrow_to_pandas(
@@ -75,7 +79,7 @@ def pandas_to_arrow(
75
79
  df.columns = pd.Index(table_meta.table_column_names)
76
80
  if not ignore_index:
77
81
  df = df.rename_axis(table_meta.table_index_column_names).reset_index()
78
- elif ignore_index:
82
+ elif ignore_index and table_meta.type != OutputType.index:
79
83
  df = pd.DataFrame([], columns=[])
80
84
  elif table_meta.type == OutputType.index:
81
85
  names = [f"_idx_{idx}" for idx in range(len(df.names))]
@@ -21,9 +21,9 @@ import pyarrow as pa
21
21
  from odps import types as odps_types
22
22
  from pandas.api import types as pd_types
23
23
 
24
- from ..core import TILEABLE_TYPE, OutputType
25
- from ..protocol import DataFrameTableMeta
26
- from ..tensor.core import TENSOR_TYPE
24
+ from ...core import TILEABLE_TYPE, OutputType
25
+ from ...protocol import DataFrameTableMeta
26
+ from ...tensor.core import TENSOR_TYPE
27
27
 
28
28
  _TEMP_TABLE_PREFIX = "tmp_mf_"
29
29
 
@@ -126,10 +126,15 @@ def odps_type_to_arrow_type(
126
126
  ]
127
127
  col_type = pa.struct(fields)
128
128
  elif isinstance(odps_type, odps_types.Decimal):
129
- col_type = pa.decimal128(
130
- odps_type.precision or odps_types.Decimal._max_precision,
131
- odps_type.scale or odps_types.Decimal._max_scale,
132
- )
129
+ if odps_type.name == "decimal":
130
+ # legacy decimal data without precision or scale
131
+ # precision data from internal compat mode
132
+ col_type = pa.decimal128(38, 18)
133
+ else:
134
+ col_type = pa.decimal128(
135
+ odps_type.precision or odps_types.Decimal._max_precision,
136
+ odps_type.scale or odps_types.Decimal._max_scale,
137
+ )
133
138
  elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
134
139
  col_type = pa.string()
135
140
  else:
@@ -179,7 +184,7 @@ def pandas_to_odps_schema(
179
184
  unknown_as_string: bool = False,
180
185
  ignore_index=False,
181
186
  ) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
182
- from .. import dataframe as md
187
+ from ... import dataframe as md
183
188
  from .arrow import pandas_to_arrow
184
189
 
185
190
  if _is_scalar_object(df_obj):
@@ -273,7 +278,7 @@ def build_table_column_name(
273
278
  def build_dataframe_table_meta(
274
279
  df_obj: Any, ignore_index: bool = False
275
280
  ) -> DataFrameTableMeta:
276
- from .. import dataframe as md
281
+ from ... import dataframe as md
277
282
 
278
283
  col_to_count = defaultdict(lambda: 0)
279
284
  col_to_idx = defaultdict(lambda: 0)
@@ -289,8 +294,6 @@ def build_dataframe_table_meta(
289
294
  else: # pragma: no cover
290
295
  raise TypeError(f"Cannot accept type {type(df_obj)}")
291
296
 
292
- assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
293
-
294
297
  if obj_type == OutputType.scalar:
295
298
  pd_dtypes = pd.Series([])
296
299
  column_index_names = []
@@ -346,7 +349,7 @@ def build_dataframe_table_meta(
346
349
  else:
347
350
  index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
348
351
 
349
- if ignore_index:
352
+ if ignore_index and obj_type != OutputType.index:
350
353
  table_index_column_names = []
351
354
  pd_index_dtypes = pd.Series([], index=[])
352
355
  else: