maxframe 1.0.0rc3__cp37-cp37m-win_amd64.whl → 1.0.0rc4__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (57) hide show
  1. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  2. maxframe/codegen.py +1 -0
  3. maxframe/config/config.py +13 -1
  4. maxframe/conftest.py +43 -12
  5. maxframe/core/entity/executable.py +1 -1
  6. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  7. maxframe/dataframe/arithmetic/docstring.py +26 -2
  8. maxframe/dataframe/arithmetic/equal.py +4 -2
  9. maxframe/dataframe/arithmetic/greater.py +4 -2
  10. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  11. maxframe/dataframe/arithmetic/less.py +2 -2
  12. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  13. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  14. maxframe/dataframe/core.py +2 -0
  15. maxframe/dataframe/datasource/read_odps_query.py +66 -7
  16. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  17. maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
  18. maxframe/dataframe/datastore/to_odps.py +7 -0
  19. maxframe/dataframe/extensions/__init__.py +3 -0
  20. maxframe/dataframe/extensions/flatmap.py +326 -0
  21. maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
  22. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  23. maxframe/dataframe/indexing/rename.py +11 -0
  24. maxframe/dataframe/initializer.py +11 -1
  25. maxframe/dataframe/misc/drop_duplicates.py +18 -1
  26. maxframe/dataframe/tests/test_initializer.py +33 -2
  27. maxframe/io/odpsio/schema.py +5 -3
  28. maxframe/io/odpsio/tableio.py +44 -38
  29. maxframe/io/odpsio/tests/test_schema.py +0 -4
  30. maxframe/io/odpsio/volumeio.py +9 -3
  31. maxframe/learn/contrib/__init__.py +2 -1
  32. maxframe/learn/contrib/graph/__init__.py +15 -0
  33. maxframe/learn/contrib/graph/connected_components.py +215 -0
  34. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  35. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  36. maxframe/learn/contrib/xgboost/classifier.py +3 -3
  37. maxframe/learn/contrib/xgboost/predict.py +8 -39
  38. maxframe/learn/contrib/xgboost/train.py +4 -3
  39. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  40. maxframe/opcodes.py +3 -0
  41. maxframe/protocol.py +6 -1
  42. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  43. maxframe/session.py +9 -2
  44. maxframe/tensor/indexing/getitem.py +2 -0
  45. maxframe/tensor/merge/concatenate.py +23 -20
  46. maxframe/tensor/merge/vstack.py +5 -1
  47. maxframe/tensor/misc/transpose.py +1 -1
  48. maxframe/utils.py +34 -12
  49. {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/METADATA +1 -1
  50. {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +57 -52
  51. maxframe_client/fetcher.py +10 -8
  52. maxframe_client/session/consts.py +3 -0
  53. maxframe_client/session/odps.py +84 -13
  54. maxframe_client/session/task.py +58 -20
  55. maxframe_client/tests/test_session.py +14 -2
  56. {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +0 -0
  57. {maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0
Binary file
maxframe/codegen.py CHANGED
@@ -347,6 +347,7 @@ BUILTIN_ENGINE_SPE = "SPE"
347
347
  BUILTIN_ENGINE_MCSQL = "MCSQL"
348
348
 
349
349
  FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
350
+ ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
350
351
 
351
352
 
352
353
  class BigDagCodeGenerator(metaclass=abc.ABCMeta):
maxframe/config/config.py CHANGED
@@ -343,6 +343,9 @@ default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remo
343
343
  default_options.register_option(
344
344
  "sql.generate_comments", True, validator=is_bool, remote=True
345
345
  )
346
+ default_options.register_option(
347
+ "sql.auto_use_common_image", True, validator=is_bool, remote=True
348
+ )
346
349
  default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
347
350
 
348
351
  default_options.register_option("is_production", False, validator=is_bool, remote=True)
@@ -371,13 +374,22 @@ default_options.register_option(
371
374
  validator=is_numeric,
372
375
  remote=True,
373
376
  )
377
+ default_options.register_option(
378
+ "session.quota_name", None, validator=is_null | is_string, remote=True
379
+ )
380
+ default_options.register_option(
381
+ "session.enable_schema", None, validator=is_null | is_bool, remote=True
382
+ )
383
+ default_options.register_option(
384
+ "session.default_schema", None, validator=is_null | is_string, remote=True
385
+ )
374
386
  default_options.register_option(
375
387
  "session.upload_batch_size",
376
388
  _DEFAULT_UPLOAD_BATCH_SIZE,
377
389
  validator=is_integer,
378
390
  )
379
391
  default_options.register_option(
380
- "session.table_lifecycle", None, validator=is_null | is_integer
392
+ "session.table_lifecycle", None, validator=is_null | is_integer, remote=True
381
393
  )
382
394
  default_options.register_option(
383
395
  "session.temp_table_lifecycle",
maxframe/conftest.py CHANGED
@@ -14,10 +14,11 @@
14
14
 
15
15
  import faulthandler
16
16
  import os
17
- from configparser import ConfigParser, NoOptionError
17
+ from configparser import ConfigParser, NoOptionError, NoSectionError
18
18
 
19
19
  import pytest
20
20
  from odps import ODPS
21
+ from odps.accounts import BearerTokenAccount
21
22
 
22
23
  from .config import options
23
24
 
@@ -34,12 +35,23 @@ def test_config():
34
35
  return config
35
36
 
36
37
 
37
- @pytest.fixture(scope="session", autouse=True)
38
- def odps_envs(test_config):
39
- access_id = test_config.get("odps", "access_id")
40
- secret_access_key = test_config.get("odps", "secret_access_key")
41
- project = test_config.get("odps", "project")
42
- endpoint = test_config.get("odps", "endpoint")
38
+ def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
39
+ try:
40
+ access_id = test_config.get(section_name, "access_id")
41
+ except NoOptionError:
42
+ access_id = test_config.get("odps", "access_id")
43
+ try:
44
+ secret_access_key = test_config.get(section_name, "secret_access_key")
45
+ except NoOptionError:
46
+ secret_access_key = test_config.get("odps", "secret_access_key")
47
+ try:
48
+ project = test_config.get(section_name, "project")
49
+ except NoOptionError:
50
+ project = test_config.get("odps", "project")
51
+ try:
52
+ endpoint = test_config.get(section_name, "endpoint")
53
+ except NoOptionError:
54
+ endpoint = test_config.get("odps", "endpoint")
43
55
  try:
44
56
  tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
45
57
  except NoOptionError:
@@ -55,12 +67,31 @@ def odps_envs(test_config):
55
67
  ],
56
68
  }
57
69
  token = entry.get_project().generate_auth_token(policy, "bearer", 5)
70
+ return ODPS(
71
+ account=BearerTokenAccount(token, 5),
72
+ project=project,
73
+ endpoint=endpoint,
74
+ tunnel_endpoint=tunnel_endpoint,
75
+ )
76
+
77
+
78
+ @pytest.fixture(scope="session")
79
+ def odps_with_schema(test_config):
80
+ try:
81
+ return _get_odps_env(test_config, "odps_with_schema")
82
+ except NoSectionError:
83
+ pytest.skip("Need to specify odps_with_schema section in test.conf")
84
+
85
+
86
+ @pytest.fixture(scope="session", autouse=True)
87
+ def odps_envs(test_config):
88
+ entry = _get_odps_env(test_config, "odps")
58
89
 
59
- os.environ["ODPS_BEARER_TOKEN"] = token
60
- os.environ["ODPS_PROJECT_NAME"] = project
61
- os.environ["ODPS_ENDPOINT"] = endpoint
62
- if tunnel_endpoint:
63
- os.environ["ODPS_TUNNEL_ENDPOINT"] = tunnel_endpoint
90
+ os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
91
+ os.environ["ODPS_PROJECT_NAME"] = entry.project
92
+ os.environ["ODPS_ENDPOINT"] = entry.endpoint
93
+ if entry.tunnel_endpoint:
94
+ os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
64
95
 
65
96
  try:
66
97
  yield
@@ -46,7 +46,7 @@ class DecrefRunner:
46
46
  break
47
47
 
48
48
  session = session_ref()
49
- if session is None:
49
+ if session is None or session.closed:
50
50
  fut.set_result(None)
51
51
  continue
52
52
  try:
Binary file
@@ -185,7 +185,6 @@ e NaN
185
185
  dtype: float64
186
186
  """
187
187
 
188
- # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
189
188
  _flex_comp_doc_FRAME = """
190
189
  Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
191
190
  Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
@@ -291,7 +290,7 @@ C True False
291
290
 
292
291
  Compare to a DataFrame of different shape.
293
292
 
294
- >>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
293
+ >>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
295
294
  ... index=['A', 'B', 'C', 'D'])
296
295
  >>> other.execute()
297
296
  revenue
@@ -306,6 +305,31 @@ A False False
306
305
  B False False
307
306
  C False True
308
307
  D False False
308
+
309
+ Compare to a MultiIndex by level.
310
+
311
+ >>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
312
+ ... 'revenue': [100, 250, 300, 200, 175, 225]}},
313
+ ... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
314
+ ... ['A', 'B', 'C', 'A', 'B', 'C']])
315
+ >>> df_multindex.execute()
316
+ cost revenue
317
+ Q1 A 250 100
318
+ B 150 250
319
+ C 100 300
320
+ Q2 A 150 200
321
+ B 300 175
322
+ C 220 225
323
+
324
+ >>> df.le(df_multindex, level=1).execute()
325
+ cost revenue
326
+ Q1 A True True
327
+ B True True
328
+ C True True
329
+ Q2 A False True
330
+ B True False
331
+ C True False
332
+
309
333
  """
310
334
 
311
335
 
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
54
- def eq(df, other, axis="columns", level=None):
55
- op = DataFrameEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def eq(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
55
- def gt(df, other, axis="columns", level=None):
56
- op = DataFrameGreater(axis=axis, level=level, lhs=df, rhs=other)
55
+ def gt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreater(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
55
- def ge(df, other, axis="columns", level=None):
56
- op = DataFrameGreaterEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def ge(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreaterEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,6 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
55
- def lt(df, other, axis="columns", level=None):
56
- op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
55
+ def lt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
57
57
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
55
- def le(df, other, axis="columns", level=None):
56
- op = DataFrameLessEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def le(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLessEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
54
- def ne(df, other, axis="columns", level=None):
55
- op = DataFrameNotEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def ne(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameNotEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -1666,6 +1666,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1666
1666
  raise NotImplementedError
1667
1667
 
1668
1668
  corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1669
+ if corner_data is None:
1670
+ return
1669
1671
 
1670
1672
  buf = StringIO()
1671
1673
  max_rows = pd.get_option("display.max_rows")
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import dataclasses
16
+ import logging
16
17
  import re
17
18
  from typing import Dict, List, Optional, Tuple, Union
18
19
 
@@ -22,12 +23,14 @@ from odps import ODPS
22
23
  from odps.types import Column, OdpsSchema, validate_data_type
23
24
 
24
25
  from ... import opcodes
26
+ from ...config import options
25
27
  from ...core import OutputType
26
28
  from ...core.graph import DAG
27
29
  from ...io.odpsio import odps_schema_to_pandas_dtypes
28
30
  from ...serialization.serializables import (
29
31
  AnyField,
30
32
  BoolField,
33
+ DictField,
31
34
  FieldTypes,
32
35
  Int64Field,
33
36
  ListField,
@@ -37,6 +40,10 @@ from ...serialization.serializables import (
37
40
  from ..utils import parse_index
38
41
  from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
39
42
 
43
+ logger = logging.getLogger(__name__)
44
+
45
+ _DEFAULT_ANONYMOUS_COL_PREFIX = "_anon_col_"
46
+
40
47
  _EXPLAIN_DEPENDS_REGEX = re.compile(r"([^\s]+) depends on: ([^\n]+)")
41
48
  _EXPLAIN_JOB_REGEX = re.compile(r"(\S+) is root job")
42
49
  _EXPLAIN_TASKS_HEADER_REGEX = re.compile(r"In Job ([^:]+):")
@@ -46,8 +53,11 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
46
53
  r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
47
54
  re.MULTILINE,
48
55
  )
49
- _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
50
- _ANONYMOUS_COL_REGEX = re.compile(r"^_c\d+$")
56
+ _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^\n]+)\)(?:| AS ([^ ]+))(?:\n|$)")
57
+ _ANONYMOUS_COL_REGEX = re.compile(r"^_c(\d+)$")
58
+
59
+ _SIMPLE_SCHEMA_COLS_REGEX = re.compile(r"SELECT (([^:]+:[^, ]+[, ]*)+)FROM")
60
+ _SIMPLE_SCHEMA_COL_REGEX = re.compile(r"([^\.]+):([^, ]+)")
51
61
 
52
62
 
53
63
  @dataclasses.dataclass
@@ -152,7 +162,7 @@ def _resolve_task_sector(job_name: str, sector: str) -> TaskSector:
152
162
  return TaskSector(job_name, task_name, out_target, schemas)
153
163
 
154
164
 
155
- def _parse_explained_schema(explain_string: str) -> OdpsSchema:
165
+ def _parse_full_explain(explain_string: str) -> OdpsSchema:
156
166
  sectors = _split_explain_string(explain_string)
157
167
  jobs_sector = tasks_sector = None
158
168
 
@@ -191,6 +201,25 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
191
201
  return OdpsSchema(cols)
192
202
 
193
203
 
204
+ def _parse_simple_explain(explain_string: str) -> OdpsSchema:
205
+ fields_match = _SIMPLE_SCHEMA_COLS_REGEX.search(explain_string)
206
+ if not fields_match:
207
+ raise ValueError("Cannot detect output table schema")
208
+
209
+ fields_str = fields_match.group(1)
210
+ cols = []
211
+ for field, type_name in _SIMPLE_SCHEMA_COL_REGEX.findall(fields_str):
212
+ cols.append(Column(field, validate_data_type(type_name)))
213
+ return OdpsSchema(cols)
214
+
215
+
216
+ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
217
+ if explain_string.startswith("AdhocSink"):
218
+ return _parse_simple_explain(explain_string)
219
+ else:
220
+ return _parse_full_explain(explain_string)
221
+
222
+
194
223
  class DataFrameReadODPSQuery(
195
224
  IncrementalIndexDatasource,
196
225
  ColumnPruneSupportedDataSourceMixin,
@@ -205,6 +234,7 @@ class DataFrameReadODPSQuery(
205
234
  string_as_binary = BoolField("string_as_binary", default=None)
206
235
  index_columns = ListField("index_columns", FieldTypes.string, default=None)
207
236
  index_dtypes = SeriesField("index_dtypes", default=None)
237
+ column_renames = DictField("column_renames", default=None)
208
238
 
209
239
  def get_columns(self):
210
240
  return self.columns
@@ -246,6 +276,8 @@ def read_odps_query(
246
276
  odps_entry: ODPS = None,
247
277
  index_col: Union[None, str, List[str]] = None,
248
278
  string_as_binary: bool = None,
279
+ sql_hints: Dict[str, str] = None,
280
+ anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
249
281
  **kw,
250
282
  ):
251
283
  """
@@ -260,25 +292,51 @@ def read_odps_query(
260
292
  MaxCompute SQL statement.
261
293
  index_col: Union[None, str, List[str]]
262
294
  Columns to be specified as indexes.
295
+ string_as_binary: bool, optional
296
+ Whether to convert string columns to binary.
297
+ sql_hints: Dict[str, str], optional
298
+ User specified SQL hints.
299
+ anonymous_col_prefix: str, optional
300
+ Prefix for anonymous columns, '_anon_col_' by default.
263
301
 
264
302
  Returns
265
303
  -------
266
304
  result: DataFrame
267
305
  DataFrame read from MaxCompute (ODPS) table
268
306
  """
307
+ hints = options.sql.settings.copy() or {}
308
+ if sql_hints:
309
+ hints.update(sql_hints)
310
+
269
311
  odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
312
+
313
+ if options.session.enable_schema or odps_entry.is_schema_namespace_enabled():
314
+ hints["odps.namespace.schema"] = "true"
315
+ hints["odps.sql.allow.namespace.schema"] = "true"
316
+
317
+ # fixme workaround for multi-stage split process
318
+ hints["odps.sql.object.table.split.by.object.size.enabled"] = "false"
319
+
270
320
  if odps_entry is None:
271
321
  raise ValueError("Missing odps_entry parameter")
272
- inst = odps_entry.execute_sql(f"EXPLAIN {query}")
322
+ inst = odps_entry.execute_sql(f"EXPLAIN {query}", hints=hints)
323
+ logger.debug("Explain instance ID: %s", inst.id)
273
324
  explain_str = list(inst.get_task_results().values())[0]
274
325
 
275
326
  odps_schema = _parse_explained_schema(explain_str)
276
327
 
328
+ new_columns = []
329
+ col_renames = {}
277
330
  for col in odps_schema.columns:
278
- if _ANONYMOUS_COL_REGEX.match(col.name) and col.name not in query:
279
- raise ValueError("Need to specify names for all columns in SELECT clause.")
331
+ anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
332
+ if anon_match and col.name not in query:
333
+ new_name = anonymous_col_prefix + anon_match.group(1)
334
+ col_renames[col.name] = new_name
335
+ new_columns.append(Column(new_name, col.type))
336
+ else:
337
+ new_columns.append(col)
280
338
 
281
- dtypes = odps_schema_to_pandas_dtypes(odps_schema)
339
+ dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
282
340
 
283
341
  if not index_col:
284
342
  index_dtypes = None
@@ -301,5 +359,6 @@ def read_odps_query(
301
359
  string_as_binary=string_as_binary,
302
360
  index_columns=index_col,
303
361
  index_dtypes=index_dtypes,
362
+ column_renames=col_renames,
304
363
  )
305
364
  return op(chunk_bytes=chunk_bytes, chunk_size=chunk_size)
@@ -22,6 +22,7 @@ from odps.models import Table
22
22
  from odps.utils import to_timestamp
23
23
 
24
24
  from ... import opcodes
25
+ from ...config import options
25
26
  from ...core import OutputType
26
27
  from ...io.odpsio import odps_schema_to_pandas_dtypes
27
28
  from ...serialization.serializables import (
@@ -167,12 +168,13 @@ def read_odps_table(
167
168
  DataFrame read from MaxCompute (ODPS) table
168
169
  """
169
170
  odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
171
+ schema = options.session.default_schema or odps_entry.schema
170
172
  if odps_entry is None:
171
173
  raise ValueError("Missing odps_entry parameter")
172
174
  if isinstance(table_name, Table):
173
175
  table = table_name
174
176
  else:
175
- table = odps_entry.get_table(table_name)
177
+ table = odps_entry.get_table(table_name, schema=schema)
176
178
 
177
179
  if not table.table_schema.partitions and (
178
180
  partitions is not None or append_partitions
@@ -19,6 +19,7 @@ import numpy as np
19
19
  import pandas as pd
20
20
  import pytest
21
21
  from odps import ODPS
22
+ from odps import types as odps_types
22
23
 
23
24
  from .... import tensor as mt
24
25
  from ....core import OutputType
@@ -35,7 +36,7 @@ from ..from_tensor import (
35
36
  )
36
37
  from ..index import from_pandas as from_pandas_index
37
38
  from ..index import from_tileable
38
- from ..read_odps_query import ColumnSchema, _resolve_task_sector
39
+ from ..read_odps_query import ColumnSchema, _parse_simple_explain, _resolve_task_sector
39
40
  from ..series import from_pandas as from_pandas_series
40
41
 
41
42
  ray = lazy_import("ray")
@@ -329,10 +330,6 @@ def test_from_odps_query():
329
330
  read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
330
331
  assert "instant query" in err_info.value.args[0]
331
332
 
332
- with pytest.raises(ValueError) as err_info:
333
- read_odps_query(f"SELECT col1, col2 + col3 FROM {table1_name}")
334
- assert "names" in err_info.value.args[0]
335
-
336
333
  query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
337
334
  df = read_odps_query(query1)
338
335
  assert df.op.query == query1
@@ -401,7 +398,9 @@ def test_date_range():
401
398
 
402
399
 
403
400
  def test_resolve_task_sector():
404
- input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
401
+ input_path = os.path.join(
402
+ os.path.dirname(__file__), "test-data", "task-input-full.txt"
403
+ )
405
404
  with open(input_path, "r") as f:
406
405
  sector = f.read()
407
406
  actual_sector = _resolve_task_sector("job0", sector)
@@ -413,3 +412,33 @@ def test_resolve_task_sector():
413
412
  assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
414
413
  assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
415
414
  assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
415
+
416
+
417
+ def test_resolve_task_odps2():
418
+ input_path = os.path.join(
419
+ os.path.dirname(__file__), "test-data", "task-input-odps2.txt"
420
+ )
421
+ with open(input_path, "r") as f:
422
+ sector = f.read()
423
+ actual_sector = _resolve_task_sector("job0", sector)
424
+
425
+ assert actual_sector.job_name == "job0"
426
+ assert actual_sector.task_name == "M1"
427
+ assert actual_sector.output_target == "Screen"
428
+ assert len(actual_sector.schema) == 2
429
+ assert actual_sector.schema[0] == ColumnSchema("key", "varchar(2048)", "")
430
+ assert actual_sector.schema[1] == ColumnSchema("data", "binary", "")
431
+
432
+
433
+ def test_resolve_simple_explain():
434
+ input_path = os.path.join(
435
+ os.path.dirname(__file__), "test-data", "task-input-simple.txt"
436
+ )
437
+ with open(input_path, "r") as f:
438
+ sector = f.read()
439
+
440
+ schema = _parse_simple_explain(sector)
441
+ assert schema.columns[0].name == "memberid"
442
+ assert schema.columns[0].type == odps_types.string
443
+ assert schema.columns[1].name == "createdate"
444
+ assert schema.columns[1].type == odps_types.bigint
@@ -17,6 +17,7 @@
17
17
  import logging
18
18
  from typing import List, Optional, Union
19
19
 
20
+ from odps import ODPS
20
21
  from odps.models import Table as ODPSTable
21
22
  from odps.types import PartitionSpec
22
23
 
@@ -136,8 +137,14 @@ def to_odps_table(
136
137
  --------
137
138
 
138
139
  """
140
+ odps_entry = ODPS.from_global() or ODPS.from_environments()
139
141
  if isinstance(table, ODPSTable):
140
142
  table = table.full_table_name
143
+ elif options.session.enable_schema and "." not in table:
144
+ default_schema = (
145
+ options.session.default_schema or odps_entry.schema or "default"
146
+ )
147
+ table = default_schema + "." + table
141
148
 
142
149
  if isinstance(index_label, str):
143
150
  index_label = [index_label]
@@ -18,6 +18,7 @@ from .accessor import (
18
18
  IndexMaxFrameAccessor,
19
19
  SeriesMaxFrameAccessor,
20
20
  )
21
+ from .flatmap import df_flatmap, series_flatmap
21
22
  from .reshuffle import DataFrameReshuffle, df_reshuffle
22
23
 
23
24
 
@@ -25,6 +26,8 @@ def _install():
25
26
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
26
27
 
27
28
  DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
29
+ DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
30
+ SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
28
31
 
29
32
  if DataFrameMaxFrameAccessor._api_count:
30
33
  for t in DATAFRAME_TYPE: