maxframe 0.1.0b3__cp38-cp38-win_amd64.whl → 0.1.0b4__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
maxframe/config/config.py CHANGED
@@ -358,6 +358,9 @@ default_options.register_option(
358
358
  default_options.register_option(
359
359
  "show_progress", "auto", validator=any_validator(is_bool, is_string)
360
360
  )
361
+ default_options.register_option(
362
+ "dag.settings", value=dict(), validator=is_dict, remote=True
363
+ )
361
364
 
362
365
  ################
363
366
  # SPE Settings #
Binary file
@@ -57,6 +57,11 @@ try:
57
57
  except ImportError: # pragma: no cover
58
58
  pass
59
59
 
60
+ try:
61
+ from . import _internal
62
+ except ImportError: # pragma: no cover
63
+ pass
64
+
60
65
  del (
61
66
  arithmetic,
62
67
  datasource,
@@ -960,7 +960,9 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
960
960
  buf = StringIO()
961
961
  max_rows = pd.get_option("display.max_rows")
962
962
  corner_max_rows = (
963
- max_rows if self.shape[0] <= max_rows else corner_data.shape[0] - 1
963
+ max_rows
964
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0
965
+ else corner_data.shape[0] - 1
964
966
  ) # make sure max_rows < corner_data
965
967
 
966
968
  with pd.option_context("display.max_rows", corner_max_rows):
@@ -1605,7 +1607,7 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1605
1607
  buf = StringIO()
1606
1608
  max_rows = pd.get_option("display.max_rows")
1607
1609
 
1608
- if self.shape[0] <= max_rows:
1610
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0:
1609
1611
  buf.write(repr(corner_data) if representation else str(corner_data))
1610
1612
  else:
1611
1613
  # remember we cannot directly call repr(df),
@@ -263,7 +263,9 @@ def read_odps_query(
263
263
  result: DataFrame
264
264
  DataFrame read from MaxCompute (ODPS) table
265
265
  """
266
- odps_entry = odps_entry or ODPS.from_environments()
266
+ odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
267
+ if odps_entry is None:
268
+ raise ValueError("Missing odps_entry parameter")
267
269
  inst = odps_entry.execute_sql(f"EXPLAIN {query}")
268
270
  explain_str = list(inst.get_task_results().values())[0]
269
271
 
@@ -164,6 +164,8 @@ def read_odps_table(
164
164
  DataFrame read from MaxCompute (ODPS) table
165
165
  """
166
166
  odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
167
+ if odps_entry is None:
168
+ raise ValueError("Missing odps_entry parameter")
167
169
  if isinstance(table_name, Table):
168
170
  table = table_name
169
171
  else:
@@ -0,0 +1,19 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
16
+
17
+
18
+ class DataFrameDataStore(DataFrameOperator, DataFrameOperatorMixin):
19
+ pass
@@ -23,11 +23,11 @@ from ...serialization.serializables import (
23
23
  ListField,
24
24
  StringField,
25
25
  )
26
- from ..operators import DataFrameOperator, DataFrameOperatorMixin
27
26
  from ..utils import parse_index
27
+ from .core import DataFrameDataStore
28
28
 
29
29
 
30
- class DataFrameToCSV(DataFrameOperator, DataFrameOperatorMixin):
30
+ class DataFrameToCSV(DataFrameDataStore):
31
31
  _op_type_ = opcodes.TO_CSV
32
32
 
33
33
  input = KeyField("input")
@@ -32,13 +32,13 @@ from ...serialization.serializables import (
32
32
  )
33
33
  from ...typing_ import TileableType
34
34
  from ..core import DataFrame # noqa: F401
35
- from ..operators import DataFrameOperator, DataFrameOperatorMixin
36
35
  from ..utils import parse_index
36
+ from .core import DataFrameDataStore
37
37
 
38
38
  logger = logging.getLogger(__name__)
39
39
 
40
40
 
41
- class DataFrameToODPSTable(DataFrameOperator, DataFrameOperatorMixin):
41
+ class DataFrameToODPSTable(DataFrameDataStore):
42
42
  _op_type_ = opcodes.TO_ODPS_TABLE
43
43
 
44
44
  dtypes = SeriesField("dtypes")
@@ -107,7 +107,6 @@ def df_reset_index(
107
107
  inplace=False,
108
108
  col_level=0,
109
109
  col_fill="",
110
- incremental_index=False,
111
110
  ):
112
111
  """
113
112
  Reset the index, or a level of it.
@@ -133,12 +132,6 @@ def df_reset_index(
133
132
  col_fill : object, default ''
134
133
  If the columns have multiple levels, determines how the other
135
134
  levels are named. If None then the index name is repeated.
136
- incremental_index: bool, default False
137
- Ensure RangeIndex incremental, when output DataFrame has multiple chunks,
138
- ensuring index incremental costs more computation,
139
- so by default, each chunk will have index which starts from 0,
140
- setting incremental_index=True,reset_index will guarantee that
141
- output DataFrame's index is from 0 to n - 1.
142
135
 
143
136
  Returns
144
137
  -------
@@ -264,7 +257,6 @@ def df_reset_index(
264
257
  drop=drop,
265
258
  col_level=col_level,
266
259
  col_fill=col_fill,
267
- incremental_index=incremental_index,
268
260
  output_types=[OutputType.dataframe],
269
261
  )
270
262
  ret = op(df)
@@ -280,7 +272,6 @@ def series_reset_index(
280
272
  drop=False,
281
273
  name=no_default,
282
274
  inplace=False,
283
- incremental_index=False,
284
275
  ):
285
276
  """
286
277
  Generate a new DataFrame or Series with the index reset.
@@ -303,12 +294,6 @@ def series_reset_index(
303
294
  when `drop` is True.
304
295
  inplace : bool, default False
305
296
  Modify the Series in place (do not create a new object).
306
- incremental_index: bool, default False
307
- Ensure RangeIndex incremental, when output Series has multiple chunks,
308
- ensuring index incremental costs more computation,
309
- so by default, each chunk will have index which starts from 0,
310
- setting incremental_index=True,reset_index will guarantee that
311
- output Series's index is from 0 to n - 1.
312
297
 
313
298
  Returns
314
299
  -------
@@ -406,8 +391,7 @@ def series_reset_index(
406
391
  level=level,
407
392
  drop=drop,
408
393
  name=name,
409
- incremental_index=incremental_index,
410
- output_types=[OutputType.series],
394
+ output_types=[OutputType.series if drop else OutputType.dataframe],
411
395
  )
412
396
  ret = op(series)
413
397
  if not inplace:
Binary file
maxframe/odpsio/arrow.py CHANGED
@@ -65,14 +65,19 @@ def arrow_to_pandas(
65
65
  raise ValueError(f"Does not support meta type {table_meta.type!r}")
66
66
 
67
67
 
68
- def pandas_to_arrow(df: Any, nthreads=1) -> Tuple[ArrowTableType, DataFrameTableMeta]:
69
- table_meta = build_dataframe_table_meta(df)
68
+ def pandas_to_arrow(
69
+ df: Any, nthreads=1, ignore_index=False
70
+ ) -> Tuple[ArrowTableType, DataFrameTableMeta]:
71
+ table_meta = build_dataframe_table_meta(df, ignore_index)
70
72
  df = df.copy() if callable(getattr(df, "copy", None)) else df
71
73
  if table_meta.type in (OutputType.dataframe, OutputType.series):
72
74
  if table_meta.type == OutputType.series:
73
75
  df = df.to_frame("_data" if df.name is None else df.name)
74
76
  df.columns = pd.Index(table_meta.table_column_names)
75
- df = df.rename_axis(table_meta.table_index_column_names).reset_index()
77
+ if not ignore_index:
78
+ df = df.rename_axis(table_meta.table_index_column_names).reset_index()
79
+ elif ignore_index:
80
+ df = pd.DataFrame([], columns=[])
76
81
  elif table_meta.type == OutputType.index:
77
82
  names = [f"_idx_{idx}" for idx in range(len(df.names))]
78
83
  df = df.to_frame(name=names[0] if len(names) == 1 else names)
maxframe/odpsio/schema.py CHANGED
@@ -175,7 +175,9 @@ def _scalar_as_index(df_obj: Any) -> pd.Index:
175
175
 
176
176
 
177
177
  def pandas_to_odps_schema(
178
- df_obj: Any, unknown_as_string: bool = False
178
+ df_obj: Any,
179
+ unknown_as_string: bool = False,
180
+ ignore_index=False,
179
181
  ) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
180
182
  from .. import dataframe as md
181
183
  from .arrow import pandas_to_arrow
@@ -209,7 +211,7 @@ def pandas_to_odps_schema(
209
211
  else:
210
212
  empty_df_obj = df_obj
211
213
 
212
- arrow_data, table_meta = pandas_to_arrow(empty_df_obj)
214
+ arrow_data, table_meta = pandas_to_arrow(empty_df_obj, ignore_index=ignore_index)
213
215
  return (
214
216
  arrow_schema_to_odps_schema(
215
217
  arrow_data.schema, unknown_as_string=unknown_as_string
@@ -268,7 +270,9 @@ def build_table_column_name(
268
270
  return col_name
269
271
 
270
272
 
271
- def build_dataframe_table_meta(df_obj: Any) -> DataFrameTableMeta:
273
+ def build_dataframe_table_meta(
274
+ df_obj: Any, ignore_index: bool = False
275
+ ) -> DataFrameTableMeta:
272
276
  from .. import dataframe as md
273
277
 
274
278
  col_to_count = defaultdict(lambda: 0)
@@ -285,6 +289,8 @@ def build_dataframe_table_meta(df_obj: Any) -> DataFrameTableMeta:
285
289
  else: # pragma: no cover
286
290
  raise TypeError(f"Cannot accept type {type(df_obj)}")
287
291
 
292
+ assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
293
+
288
294
  if obj_type == OutputType.scalar:
289
295
  pd_dtypes = pd.Series([])
290
296
  column_index_names = []
@@ -340,12 +346,19 @@ def build_dataframe_table_meta(df_obj: Any) -> DataFrameTableMeta:
340
346
  else:
341
347
  index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
342
348
 
349
+ if ignore_index:
350
+ table_index_column_names = []
351
+ pd_index_dtypes = pd.Series([], index=[])
352
+ else:
353
+ table_index_column_names = [f"_idx_{i}" for i in range(len(index_obj.names))]
354
+ pd_index_dtypes = index_dtypes
355
+
343
356
  return DataFrameTableMeta(
344
357
  table_name=table_name,
345
358
  type=obj_type,
346
359
  table_column_names=final_sql_columns,
347
- table_index_column_names=[f"_idx_{i}" for i in range(len(index_obj.names))],
360
+ table_index_column_names=table_index_column_names,
348
361
  pd_column_dtypes=pd_dtypes,
349
362
  pd_column_level_names=column_index_names,
350
- pd_index_dtypes=index_dtypes,
363
+ pd_index_dtypes=pd_index_dtypes,
351
364
  )
@@ -61,6 +61,16 @@ def test_pandas_to_odps_schema_dataframe(wrap_obj):
61
61
  assert meta.pd_column_level_names == [None]
62
62
  assert meta.pd_index_level_names == [None]
63
63
 
64
+ test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
65
+ schema, meta = pandas_to_odps_schema(test_df, ignore_index=True)
66
+ assert [c.name for c in schema.columns] == list(test_df.dtypes.index.str.lower())
67
+ assert [c.type.name for c in schema.columns] == ["double"] * len(test_df.columns)
68
+ assert meta.type == OutputType.dataframe
69
+ assert meta.table_column_names == list(test_df.dtypes.index.str.lower())
70
+ assert meta.table_index_column_names == []
71
+ assert meta.pd_column_level_names == [None]
72
+ assert meta.pd_index_level_names == []
73
+
64
74
  data.columns = pd.MultiIndex.from_tuples(
65
75
  [("A", "A"), ("A", "B"), ("A", "C"), ("B", "A"), ("B", "B")], names=["c1", "c2"]
66
76
  )
@@ -99,6 +109,15 @@ def test_pandas_to_odps_schema_series(wrap_obj):
99
109
  assert meta.pd_column_level_names == [None]
100
110
  assert meta.pd_index_level_names == [None]
101
111
 
112
+ schema, meta = pandas_to_odps_schema(test_s, ignore_index=True)
113
+ assert [c.name for c in schema.columns] == ["_data"]
114
+ assert [c.type.name for c in schema.columns] == ["double"]
115
+ assert meta.type == OutputType.series
116
+ assert meta.table_column_names == ["_data"]
117
+ assert meta.table_index_column_names == []
118
+ assert meta.pd_column_level_names == [None]
119
+ assert meta.pd_index_level_names == []
120
+
102
121
  data.index = pd.MultiIndex.from_arrays(
103
122
  [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
104
123
  names=["c1", "c2"],
@@ -130,6 +149,9 @@ def test_pandas_to_odps_schema_index(wrap_obj):
130
149
  assert meta.pd_column_level_names == []
131
150
  assert meta.pd_index_level_names == [None]
132
151
 
152
+ with pytest.raises(AssertionError):
153
+ pandas_to_odps_schema(test_idx, unknown_as_string=True, ignore_index=True)
154
+
133
155
  data = pd.MultiIndex.from_arrays(
134
156
  [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
135
157
  names=["c1", "c2"],
@@ -159,6 +181,9 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
159
181
  assert meta.pd_column_level_names == []
160
182
  assert meta.pd_index_level_names == [None]
161
183
 
184
+ with pytest.raises(AssertionError):
185
+ pandas_to_odps_schema(test_scalar, unknown_as_string=True, ignore_index=True)
186
+
162
187
 
163
188
  def test_odps_arrow_schema_conversion():
164
189
  odps_schema = odps_types.OdpsSchema(
maxframe/opcodes.py CHANGED
@@ -564,6 +564,11 @@ CHOLESKY_FUSE = 999988
564
564
  # MaxFrame-dedicated functions
565
565
  DATAFRAME_RESHUFFLE = 10001
566
566
 
567
+ # MaxFrame internal operators
568
+ DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
569
+ GROUPBY_AGGR_SAME_INDEX_MERGE = 100002
570
+ DATAFRAME_ILOC_GET_AND_RENAME_ITEM = 100003
571
+
567
572
  # fetches
568
573
  FETCH_SHUFFLE = 999998
569
574
  FETCH = 999999
maxframe/session.py CHANGED
@@ -1211,7 +1211,7 @@ def new_session(
1211
1211
  # load third party extensions.
1212
1212
  ensure_isolation_created(kwargs)
1213
1213
 
1214
- odps_entry = odps_entry or ODPS.from_environments()
1214
+ odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
1215
1215
  if address is None:
1216
1216
  from maxframe_client.session.consts import ODPS_SESSION_INSECURE_SCHEME
1217
1217
 
@@ -1255,7 +1255,9 @@ def get_default_or_create(**kwargs):
1255
1255
  if session is None:
1256
1256
  # no session attached, try to create one
1257
1257
  warnings.warn(warning_msg)
1258
- session = new_session(ODPS.from_environments(), **kwargs)
1258
+ session = new_session(
1259
+ ODPS.from_global() or ODPS.from_environments(), **kwargs
1260
+ )
1259
1261
  session.as_default()
1260
1262
  if isinstance(session, IsolatedAsyncSession):
1261
1263
  session = SyncSession.from_isolated_session(session)
maxframe/utils.py CHANGED
@@ -381,6 +381,11 @@ def build_temp_table_name(session_id: str, tileable_key: str) -> str:
381
381
  return f"tmp_mf_{session_id}_{tileable_key}"
382
382
 
383
383
 
384
+ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> str:
385
+ temp_table = build_temp_table_name(session_id, tileable_key)
386
+ return f"{temp_table}_intermediate"
387
+
388
+
384
389
  def build_session_volume_name(session_id: str) -> str:
385
390
  return f"mf_vol_{session_id}"
386
391
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 0.1.0b3
3
+ Version: 0.1.0b4
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy >=1.19.0
6
6
  Requires-Dist: pandas >=1.0.0
@@ -1,5 +1,5 @@
1
1
  maxframe/__init__.py,sha256=MgltwhBvnUQDKKmHDg9Y69TJkRGmQQ9m8-D9FC2pcLU,1007
2
- maxframe/_utils.cp38-win_amd64.pyd,sha256=v0SiHG8U69LbmO5jg_wMpsYNpTlx3bjMIsvfOeEDnxE,308736
2
+ maxframe/_utils.cp38-win_amd64.pyd,sha256=6iQnaOs0v1br0i1SjkUtFgrbof_YHT5dTJBQF65XPag,308736
3
3
  maxframe/_utils.pxd,sha256=_qHN-lCY1FQgDFIrrqA79Ys0SBdonp9kXRMS93xKSYk,1187
4
4
  maxframe/_utils.pyx,sha256=_3p6aJEJ6WZYLcNZ6o4DxoxsxqadTlJXFlgDeFPxqUQ,17564
5
5
  maxframe/codegen.py,sha256=S23hTTu2fIJTv5PMIItE_NPUnadUyoiMmb-1YAqbaWw,16329
@@ -8,14 +8,14 @@ maxframe/env.py,sha256=xY4wjMWIJ4qLsFAQ5F-X5CrVR7dDSWiryPXni0YSK5c,1435
8
8
  maxframe/errors.py,sha256=xBnvoJjjNcHVLhwj77Dux9ut8isGVmmJXFqefmmx8Ak,711
9
9
  maxframe/extension.py,sha256=o7yiS99LWTtLF7ZX6F78UUJAqUyd-LllOXA2l69np50,2455
10
10
  maxframe/mixin.py,sha256=QfX0KqVIWDlVDSFs0lwdzLexw7lS7W_IUuK7aY1Ib8c,3624
11
- maxframe/opcodes.py,sha256=L-BvSFEUOMrtGJFXIH6zk2Xv_-te5VZxv5eDDCNNt0U,10566
11
+ maxframe/opcodes.py,sha256=Gcqv1DIMPOXLHOt9yYSRisu2ihP1ez6QX0d5c_L06TA,10732
12
12
  maxframe/protocol.py,sha256=N4i0ggLY131gwnxOrCgKeZwzhLKSRB171cx1lWRvUcw,14605
13
- maxframe/session.py,sha256=Mme-jB2hioJC6ttrXfX4XSeHLCMYoFR0ikmmhx82-vc,36624
13
+ maxframe/session.py,sha256=ETR-n-DDyCYVh7I1juJfNBxcOrESJSjxW7JOHaSt9To,36700
14
14
  maxframe/typing_.py,sha256=pAgOhHHSM376N7PJLtNXvS5LHNYywz5dIjnA_hHRWSM,1133
15
15
  maxframe/udf.py,sha256=EFAAV2c8SpWKcF9_8Pocpjc4bXsEASf57Qy_Q30YH4Q,2315
16
- maxframe/utils.py,sha256=CpA4Cqf5Lg7LMKJeJFsmybqVutcSh3sSqMhdHnIi0h4,35017
16
+ maxframe/utils.py,sha256=fzlh5MTJcX9ZBEnIZ4tVMzf846X9YsUmVGTKG5GtZr4,35212
17
17
  maxframe/config/__init__.py,sha256=AHo3deaCm1JnbbRX_udboJEDYrYytdvivp9RFxJcumI,671
18
- maxframe/config/config.py,sha256=Top-9hUEkPvaTlAGrP9GzGjwvZk6YPuHtwy4TEryKqg,13475
18
+ maxframe/config/config.py,sha256=Za9cgJfSJ4noNEcOaQ8tI1lhjT0Lvd3uCURqR7vlmbg,13578
19
19
  maxframe/config/validators.py,sha256=pKnloh2kEOBRSsT8ks-zL8XVSaMMVIEvHvwNJlideeo,1672
20
20
  maxframe/config/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
21
21
  maxframe/config/tests/test_config.py,sha256=FWQZ6KBUG_jY1-KaR-GKXl7khhlTbuLlk3uaEV8koM8,2839
@@ -33,7 +33,7 @@ maxframe/core/entity/output_types.py,sha256=NnNeDBVAEhD8dtPBWzpM7n6s8neVFrahjd0z
33
33
  maxframe/core/entity/tileables.py,sha256=6jJyFscvb8sH5K_k2VaNGeUm8YrpevCtou3WSUl4Dw8,13973
34
34
  maxframe/core/entity/utils.py,sha256=454RYVbTMVW_8KnfDqUPec4kz1p98izVTC2OrzhOkao,966
35
35
  maxframe/core/graph/__init__.py,sha256=n1WiszgVu0VdXsk12oiAyggduNwu-1-9YKnfZqvmmXk,838
36
- maxframe/core/graph/core.cp38-win_amd64.pyd,sha256=Pxd3Gfcq9JBPGAfySG6YaTFnJiTphRtr1FNe3NZCBUo,250368
36
+ maxframe/core/graph/core.cp38-win_amd64.pyd,sha256=_JfoM2G1nDBu9EZTOm40LYHvpP34d37SWYM1JmYHvSg,250368
37
37
  maxframe/core/graph/core.pyx,sha256=WYlYtXXSs72vfhf2ttJO-4u85exYzy2J9mlALHOMqoA,16354
38
38
  maxframe/core/graph/entity.py,sha256=RT_xbP5niUN5D6gqZ5Pg1vUegHn8bqPk8G8A30quOVA,5730
39
39
  maxframe/core/graph/builder/__init__.py,sha256=vTRY5xRPOMHUsK0jAtNIb1BjSPGqi_6lv86AroiiiL4,718
@@ -54,9 +54,9 @@ maxframe/core/operator/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH
54
54
  maxframe/core/operator/tests/test_core.py,sha256=iqZk4AWubFLO24V_VeV6SEy5xrzBFLP9qKK6tKO0SGs,1755
55
55
  maxframe/core/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
56
56
  maxframe/core/tests/test_mode.py,sha256=fyRH-ksa6MogEs6kNhtXhCZyvhYqflgaXJYI3nSo-ps,2507
57
- maxframe/dataframe/__init__.py,sha256=t_Fnu2MNCgOr_N7mMqGU3LeZXvBF1yOHtLb5u1PoHt0,2187
57
+ maxframe/dataframe/__init__.py,sha256=BdHZ903J7SkZDOxHDxNZvDxxXjSW8Gj-fJiHG26p-kM,2275
58
58
  maxframe/dataframe/arrays.py,sha256=rOvhxMQars9E3SOYSu0ygBuuRVY0QV6xzengnMqKs4s,29616
59
- maxframe/dataframe/core.py,sha256=63TqgmOCTr5wtwbILJ6bLtHZQ6xnZd7LvVrMqR27GDQ,76034
59
+ maxframe/dataframe/core.py,sha256=mGlHzJqs9EZa0qLxxe-Qp0Kd5SnsVI3r6itDhLxVdGM,76126
60
60
  maxframe/dataframe/initializer.py,sha256=WW96yQjquofNFt6RPZvgWW4SBmH0OEDj8-BxpuyKThY,10552
61
61
  maxframe/dataframe/operators.py,sha256=jl611oPN5TGpf6UDuIwcLUsjmTcbVBNLLd6cvq8TvKo,8144
62
62
  maxframe/dataframe/utils.py,sha256=5bk441fXa9dpeT98g8OY8TtMypuoB3XSc3rLA0gjPL4,45371
@@ -119,15 +119,16 @@ maxframe/dataframe/datasource/from_records.py,sha256=ygpKOMXZnDdWzGxMxQ4KdGv-tJF
119
119
  maxframe/dataframe/datasource/from_tensor.py,sha256=mShHYi0fZcG7ZShFVgIezaphh8tSFqR9-nQMm5YKIhw,15146
120
120
  maxframe/dataframe/datasource/index.py,sha256=X_NShW67nYJGxaWp3qOrvyInNkz9L-XHjbApU4fHoes,4518
121
121
  maxframe/dataframe/datasource/read_csv.py,sha256=IvQihmpcZIdzSD7ziX92aTAHNyP5WnTgd2cZz_h43sQ,24668
122
- maxframe/dataframe/datasource/read_odps_query.py,sha256=QKU_7R6dd6GXzNX4IN9uwu3jB17MfhTr2nUtx0LFSNk,10116
123
- maxframe/dataframe/datasource/read_odps_table.py,sha256=ocSKQQ7SwIkzliFCFWNzy3e8J3CBZsf4971oRdlgdks,9294
122
+ maxframe/dataframe/datasource/read_odps_query.py,sha256=rvlFp35g0vYIRZszhMSjHt5tAR668ir1GVBb-T80TQk,10224
123
+ maxframe/dataframe/datasource/read_odps_table.py,sha256=r_VbiWWgBpJArBBB-NJCMRUbCTfUrGEOMJmq9a2TIC8,9380
124
124
  maxframe/dataframe/datasource/read_parquet.py,sha256=SZPrWoax2mwMBNvRk_3lkS72pZLe-_X_GwQ1JROBMs4,14952
125
125
  maxframe/dataframe/datasource/series.py,sha256=elQVupKETh-hUHI2fTu8TRxBE729Vyrmpjx17XlRV-8,1964
126
126
  maxframe/dataframe/datasource/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
127
127
  maxframe/dataframe/datasource/tests/test_datasource.py,sha256=UumRBjE-bIuCi7Z4_3t8qb58ZcF8ePRZf3xF7DTvqIA,15041
128
128
  maxframe/dataframe/datastore/__init__.py,sha256=MmlHYvFacMReOHDQMXF-z2bCsLyrSHYBVwIlCsZGOK4,810
129
- maxframe/dataframe/datastore/to_csv.py,sha256=lheaF3ZmBPrcwcWyhK5gEVAGIaLJbvTyVAzqZFGG7eM,8026
130
- maxframe/dataframe/datastore/to_odps.py,sha256=Ml_iF9AspqIgGbeOAjTF3ukAwv-7SizribuqXZdxuXo,5776
129
+ maxframe/dataframe/datastore/core.py,sha256=HCqrZN47RP-IC6zDqLX_RErDUAWkcTB58FHNU70V2b4,762
130
+ maxframe/dataframe/datastore/to_csv.py,sha256=sns4bBgNpq7Ihb-goNqaBRdiEtrG-V6jqhNkWGZ1YaE,7974
131
+ maxframe/dataframe/datastore/to_odps.py,sha256=NVHLccpNYbF6YPk3PKziStonkKlR0JaOFF0AxWlMhBw,5724
131
132
  maxframe/dataframe/extensions/__init__.py,sha256=x6QCVQIfpa8JP2Vu-nZwHJ1CzATnyPoKCBMqxjXwpO0,1439
132
133
  maxframe/dataframe/extensions/accessor.py,sha256=0OA8YPL3rofSvdU0z_1kMLImahrvow_vhxdQDYODki0,1497
133
134
  maxframe/dataframe/extensions/reshuffle.py,sha256=yOlJ-3R4v9CoiEKFA1zgCOvbocy00MxpFBbQuTn-uDw,2720
@@ -159,7 +160,7 @@ maxframe/dataframe/indexing/loc.py,sha256=senwgO_ijLJtbzaeqS_CMefV8nlf3guEQXKdSQ
159
160
  maxframe/dataframe/indexing/reindex.py,sha256=v4Rd85aNfh3onzcFqOhdUjiLrDv9QuNtGh-OaWpnG-4,19699
160
161
  maxframe/dataframe/indexing/rename.py,sha256=E7gI6lHGoBbMnldtErxv5StmS7jrGDdXGtpDusavihA,14009
161
162
  maxframe/dataframe/indexing/rename_axis.py,sha256=ugKcve4Kp8EuSmokQFUL-mVhGQ1cd6IDZ3UauHPiFeQ,6511
162
- maxframe/dataframe/indexing/reset_index.py,sha256=h77bfAHkh1mknUwOBsnTmickejKikW6IUMXmhn6rjcg,14410
163
+ maxframe/dataframe/indexing/reset_index.py,sha256=_NFQZTjHzc_IgiqC-aqFJUfjneyJUN42-ujxGPfBVnQ,13524
163
164
  maxframe/dataframe/indexing/sample.py,sha256=cVpmTV4q0Lo5dK3RdIpP3G5Yo6A6rwCRcqQ-rBEKnPs,8393
164
165
  maxframe/dataframe/indexing/set_axis.py,sha256=ECRV5rRfbsKAQ90nEZVWCtGyu_0hN8ZPTmWNRGIJ0zo,5724
165
166
  maxframe/dataframe/indexing/set_index.py,sha256=XHX9CA0nvPd0War2GTKgr_FKuir_Tiu1bfQ5qz3vBKo,2180
@@ -283,7 +284,7 @@ maxframe/learn/contrib/pytorch/tests/test_pytorch.py,sha256=GHP-oD5uMU8LD90Jt2cH
283
284
  maxframe/lib/__init__.py,sha256=_PB28W40qku6YiT8fJYqdmEdRMQfelOwGeksCOZJfCc,657
284
285
  maxframe/lib/compression.py,sha256=QQpNK79iUC9zck74I0HKMhapSRnLBXtTRyS91taEVIc,1497
285
286
  maxframe/lib/functools_compat.py,sha256=2LTrkSw5i-z5E9XCtZzfg9-0vPrYxicKvDjnnNrAL1Q,2697
286
- maxframe/lib/mmh3.cp38-win_amd64.pyd,sha256=HMgH8mFYmm50QB_5WtKQ7UIOgGnFUGJoPzGKKfApJVs,17408
287
+ maxframe/lib/mmh3.cp38-win_amd64.pyd,sha256=wkfm7AMJoM3aVDDpH2BKcmmDwkj55JLalxBr6TE0Vq8,17408
287
288
  maxframe/lib/version.py,sha256=VOVZu3KHS53YUsb_vQsT7AyHwcCWAgc-3bBqV5ANcbQ,18941
288
289
  maxframe/lib/wrapped_pickle.py,sha256=bzEaokhAZlkjXqw1xfeKO1KX2awhKIz_1RT81yPPoag,3949
289
290
  maxframe/lib/aio/__init__.py,sha256=xzIYnV42_7CYuDTTv8svscIXQeJMF0nn8AXMbpv173M,963
@@ -332,13 +333,13 @@ maxframe/lib/tblib/pickling_support.py,sha256=D9A0eX7gJeyqhXWxJJZ1GRwwcc5lj86wBR
332
333
  maxframe/lib/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
333
334
  maxframe/lib/tests/test_wrapped_pickle.py,sha256=WV0EJQ1hTSp8xjuosWWtEO7PeiBqdDUYgStxp72_c94,1575
334
335
  maxframe/odpsio/__init__.py,sha256=0SesD04XxFli4Gp23ipMkefFQ2ZTB0PItwZoSHpDC-k,820
335
- maxframe/odpsio/arrow.py,sha256=fYNQa_n-Dy-y3l9oAu50-fOsV3C1GaNZpBsOhlYO8i4,3732
336
- maxframe/odpsio/schema.py,sha256=clW2pgjGY6lsHSfyKx-M6xqvPJerCsqlrGm6ommFHmY,12068
336
+ maxframe/odpsio/arrow.py,sha256=vvi9g5IZ8Wn224obv3vOO8U1X6UlNNTxSOfDA4Dvx0M,3875
337
+ maxframe/odpsio/schema.py,sha256=Hba-eCXnBUS6NxHRsshaohzO1eThm4HeVzzvAF7E3Vg,12479
337
338
  maxframe/odpsio/tableio.py,sha256=r_Y47V5NhvLHsK7kypdswSDmOWag1Zoh9-FEjKe5MG0,9660
338
339
  maxframe/odpsio/volumeio.py,sha256=IT_OO-RG2rJZOEx8C8xRr0oNR358RSAJQAp6WGxeXzI,3838
339
340
  maxframe/odpsio/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
340
341
  maxframe/odpsio/tests/test_arrow.py,sha256=yeDWFzsm2IMS-k6jimlQ7uim5T6WW1Anuy8didaf4cs,3194
341
- maxframe/odpsio/tests/test_schema.py,sha256=cXQYEhETUSoJmQ7MW33bFyR2ho6WciU4ot9LnxFsARM,10839
342
+ maxframe/odpsio/tests/test_schema.py,sha256=BI2Eq_DkyCimJEjSylAm_z3SLVNdEzKto7nqu7uyiW0,12088
342
343
  maxframe/odpsio/tests/test_tableio.py,sha256=ZyQxBAVA5GG3j_NOPTTFs5vCQqQywhRKC9OAJx9LJxM,4789
343
344
  maxframe/odpsio/tests/test_volumeio.py,sha256=xvnrPZueZ76OAWK2zW_tHHI_cDxo7gJXTHiEe0lkmjk,3112
344
345
  maxframe/remote/__init__.py,sha256=Yu1ZDLICbehNfd1ur7_2bnIn2VFIsTxH_cILCbHAeZY,747
@@ -346,7 +347,7 @@ maxframe/remote/core.py,sha256=w_eTDEs0O7iIzLn1YrMGh2gcNAzzbqV0mx2bRT7su_U,7001
346
347
  maxframe/remote/run_script.py,sha256=k93-vaFLUanWoBRai4-78DX_SLeZ8_rbbxcCtOIXZO8,3677
347
348
  maxframe/serialization/__init__.py,sha256=nxxU7CI6MRcL3sjA1KmLkpTGKA3KG30FKl-MJJ0MCdI,947
348
349
  maxframe/serialization/arrow.py,sha256=OMeDjLcPgagqzokG7g3Vhwm6Xw1j-Kph1V2QsIwi6dw,3513
349
- maxframe/serialization/core.cp38-win_amd64.pyd,sha256=pm3uyk95Wnhn8B_LPdI9pV_gHACS9uBywUES38pY2OI,397824
350
+ maxframe/serialization/core.cp38-win_amd64.pyd,sha256=gBaf2kOFy6elDw4BCzayrqDqK84HpYn9vBrat_XEHdo,397824
350
351
  maxframe/serialization/core.pxd,sha256=Fymih3Wo-CrOY27_o_DRINdbRGR7mgiT-XCaXCXafxM,1347
351
352
  maxframe/serialization/core.pyx,sha256=Qmipu3LiJGIBVy_7d4tSJqcYWnG5xj2I7IaPv2PSq5E,35078
352
353
  maxframe/serialization/exception.py,sha256=e7bZyPlZ8XhSCdeOwlYreq0HazPXKOgOA6r9Q4Ecn2Y,3113
@@ -616,14 +617,14 @@ maxframe_client/clients/spe.py,sha256=uizNBejhU_FrMhsgsFgDnq7gL7Cxk803LeLYmr3nmx
616
617
  maxframe_client/session/__init__.py,sha256=9zFCd3zkSADESAFc4SPoQ2nkvRwsIhhpNNO2TtSaWbU,854
617
618
  maxframe_client/session/consts.py,sha256=nD-D0zHXumbQI8w3aUyltJS59K5ftipf3xCtHNLmtc8,1380
618
619
  maxframe_client/session/graph.py,sha256=GSZaJ-PV4DK8bTcNtoSoY5kDTyyIRAKleh4tOCSUbsI,4470
619
- maxframe_client/session/odps.py,sha256=RG7_28UaS_8tgJUOa4ohw2QtwX2fF4yqsGGy2MksQWI,16700
620
+ maxframe_client/session/odps.py,sha256=AZKFz1_Pl8-zJ_RwxBoN00uUDKf4MbzhvVZ5cTywURM,16781
620
621
  maxframe_client/session/task.py,sha256=R8x8OERIb673vTq-o0ig6Zy2NT4_jvi8AbLhyMaljo8,11409
621
622
  maxframe_client/session/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
622
623
  maxframe_client/session/tests/test_task.py,sha256=861usEURVXeTUzfJYZmBfwsHfZFexG23mMtT5IJOOm4,3364
623
624
  maxframe_client/tests/__init__.py,sha256=29eM5D4knhYwe3TF42naTuC5b4Ym3VeH4rK8KpdLWNY,609
624
625
  maxframe_client/tests/test_fetcher.py,sha256=7iYXLMIoCJLfgUkjB2HBkV-sqQ-xGlhtzfp9hRJz_kM,3605
625
- maxframe_client/tests/test_session.py,sha256=s8pxf0I6PjOd6ZZQ4IYjfOM_3F3wf6SGPRMz0tAZFmo,6514
626
- maxframe-0.1.0b3.dist-info/METADATA,sha256=aZIVKoKmfRSjIExk2K16jB0fJj5iApw9FhoHqv6hs_k,3147
627
- maxframe-0.1.0b3.dist-info/WHEEL,sha256=Wb4yjwIXVKEpht4JWFUZNCzpG7JLBNZnqtK2YNdqLkI,100
628
- maxframe-0.1.0b3.dist-info/top_level.txt,sha256=64x-fc2q59c_vXwNUkehyjF1vb8JWqFSdYmUqIFqoTM,31
629
- maxframe-0.1.0b3.dist-info/RECORD,,
626
+ maxframe_client/tests/test_session.py,sha256=_i6ICUTg6LMs-KD3w3pYNSt5_k9gCOkMx4gTfE20CF8,7130
627
+ maxframe-0.1.0b4.dist-info/METADATA,sha256=uH-sH39DV-QlQoIEKtD3W7Os0Z99jZgTs7uyKf-t30Q,3147
628
+ maxframe-0.1.0b4.dist-info/WHEEL,sha256=Wb4yjwIXVKEpht4JWFUZNCzpG7JLBNZnqtK2YNdqLkI,100
629
+ maxframe-0.1.0b4.dist-info/top_level.txt,sha256=64x-fc2q59c_vXwNUkehyjF1vb8JWqFSdYmUqIFqoTM,31
630
+ maxframe-0.1.0b4.dist-info/RECORD,,
@@ -115,7 +115,7 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
115
115
  ):
116
116
  super().__init__(address, session_id)
117
117
  self.timeout = timeout
118
- self._odps_entry = odps_entry or ODPS.from_environments()
118
+ self._odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
119
119
  self._tileable_to_infos = weakref.WeakKeyDictionary()
120
120
 
121
121
  self._caller = self._create_caller(odps_entry, address, **kwargs)
@@ -147,15 +147,16 @@ class MaxFrameSession(ToThreadMixin, IsolatedAsyncSession):
147
147
  data = t.op.get_data()
148
148
  batch_size = options.session.upload_batch_size
149
149
 
150
- halo_client = HaloTableIO(self._odps_entry)
151
- with halo_client.open_writer(table_obj.full_table_name) as writer:
152
- for batch_start in range(0, len(data), batch_size):
153
- if isinstance(data, pd.Index):
154
- batch = data[batch_start : batch_start + batch_size]
155
- else:
156
- batch = data.iloc[batch_start : batch_start + batch_size]
157
- arrow_batch, _ = pandas_to_arrow(batch)
158
- writer.write(arrow_batch)
150
+ if len(data):
151
+ halo_client = HaloTableIO(self._odps_entry)
152
+ with halo_client.open_writer(table_obj.full_table_name) as writer:
153
+ for batch_start in range(0, len(data), batch_size):
154
+ if isinstance(data, pd.Index):
155
+ batch = data[batch_start : batch_start + batch_size]
156
+ else:
157
+ batch = data.iloc[batch_start : batch_start + batch_size]
158
+ arrow_batch, _ = pandas_to_arrow(batch)
159
+ writer.write(arrow_batch)
159
160
 
160
161
  read_tileable = read_odps_table(
161
162
  table_obj.full_table_name,
@@ -28,6 +28,7 @@ from maxframe.lib.aio import stop_isolation
28
28
  from maxframe.protocol import ResultInfo
29
29
  from maxframe.serialization import RemoteException
30
30
  from maxframe.session import new_session
31
+ from maxframe.tests.utils import tn
31
32
  from maxframe.utils import build_temp_table_name
32
33
  from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F401
33
34
  framedriver_app,
@@ -115,6 +116,26 @@ def test_simple_run_dataframe(start_mock_session):
115
116
  assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
116
117
 
117
118
 
119
+ def test_run_empty_table(start_mock_session):
120
+ odps_entry = ODPS.from_environments()
121
+
122
+ table_name = tn("test_session_empty_table")
123
+ odps_entry.delete_table(table_name, if_exists=True)
124
+ empty_table = odps_entry.create_table(
125
+ table_name, "_idx_0 bigint, a double, b double", lifecycle=1
126
+ )
127
+ df = md.read_odps_table(table_name, index_col="_idx_0")
128
+ df["d"] = df["a"] + df["b"]
129
+
130
+ executed = df.execute()
131
+ assert "Index: []" in str(executed)
132
+
133
+ fetched = executed.fetch()
134
+ assert 0 == len(fetched)
135
+
136
+ empty_table.drop()
137
+
138
+
118
139
  def test_run_dataframe_with_pd_source(start_mock_session):
119
140
  odps_entry = ODPS.from_environments()
120
141