PyPI - maxframe - Versions diffs - 1.0.0rc3__cp37-cp37m-win_amd64.whl → 1.0.0rc4__cp37-cp37m-win_amd64.whl - Mend

maxframe 1.0.0rc3__cp37-cp37m-win_amd64.whl → 1.0.0rc4__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (57) hide show

maxframe/_utils.cp37-win_amd64.pyd +0 -0
maxframe/codegen.py +1 -0
maxframe/config/config.py +13 -1
maxframe/conftest.py +43 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
maxframe/dataframe/arithmetic/docstring.py +26 -2
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/core.py +2 -0
maxframe/dataframe/datasource/read_odps_query.py +66 -7
maxframe/dataframe/datasource/read_odps_table.py +3 -1
maxframe/dataframe/datasource/tests/test_datasource.py +35 -6
maxframe/dataframe/datastore/to_odps.py +7 -0
maxframe/dataframe/extensions/__init__.py +3 -0
maxframe/dataframe/extensions/flatmap.py +326 -0
maxframe/dataframe/extensions/tests/test_extensions.py +62 -1
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/rename.py +11 -0
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/misc/drop_duplicates.py +18 -1
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/io/odpsio/schema.py +5 -3
maxframe/io/odpsio/tableio.py +44 -38
maxframe/io/odpsio/tests/test_schema.py +0 -4
maxframe/io/odpsio/volumeio.py +9 -3
maxframe/learn/contrib/__init__.py +2 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/xgboost/classifier.py +3 -3
maxframe/learn/contrib/xgboost/predict.py +8 -39
maxframe/learn/contrib/xgboost/train.py +4 -3
maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
maxframe/opcodes.py +3 -0
maxframe/protocol.py +6 -1
maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
maxframe/session.py +9 -2
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/concatenate.py +23 -20
maxframe/tensor/merge/vstack.py +5 -1
maxframe/tensor/misc/transpose.py +1 -1
maxframe/utils.py +34 -12
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/METADATA +1 -1
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/RECORD +57 -52
maxframe_client/fetcher.py +10 -8
maxframe_client/session/consts.py +3 -0
maxframe_client/session/odps.py +84 -13
maxframe_client/session/task.py +58 -20
maxframe_client/tests/test_session.py +14 -2
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/WHEEL +0 -0
{maxframe-1.0.0rc3.dist-info → maxframe-1.0.0rc4.dist-info}/top_level.txt +0 -0

maxframe/_utils.cp37-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/codegen.py CHANGED Viewed

@@ -347,6 +347,7 @@ BUILTIN_ENGINE_SPE = "SPE"
 BUILTIN_ENGINE_MCSQL = "MCSQL"
 FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
+ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
 class BigDagCodeGenerator(metaclass=abc.ABCMeta):

maxframe/config/config.py CHANGED Viewed

@@ -343,6 +343,9 @@ default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remo
 default_options.register_option(
     "sql.generate_comments", True, validator=is_bool, remote=True
 )
+default_options.register_option(
+    "sql.auto_use_common_image", True, validator=is_bool, remote=True
+)
 default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
 default_options.register_option("is_production", False, validator=is_bool, remote=True)
@@ -371,13 +374,22 @@ default_options.register_option(
     validator=is_numeric,
     remote=True,
 )
+default_options.register_option(
+    "session.quota_name", None, validator=is_null | is_string, remote=True
+)
+default_options.register_option(
+    "session.enable_schema", None, validator=is_null | is_bool, remote=True
+)
+default_options.register_option(
+    "session.default_schema", None, validator=is_null | is_string, remote=True
+)
 default_options.register_option(
     "session.upload_batch_size",
     _DEFAULT_UPLOAD_BATCH_SIZE,
     validator=is_integer,
 )
 default_options.register_option(
-    "session.table_lifecycle", None, validator=is_null | is_integer
+    "session.table_lifecycle", None, validator=is_null | is_integer, remote=True
 )
 default_options.register_option(
     "session.temp_table_lifecycle",

maxframe/conftest.py CHANGED Viewed

@@ -14,10 +14,11 @@
 import faulthandler
 import os
-from configparser import ConfigParser, NoOptionError
+from configparser import ConfigParser, NoOptionError, NoSectionError
 import pytest
 from odps import ODPS
+from odps.accounts import BearerTokenAccount
 from .config import options
@@ -34,12 +35,23 @@ def test_config():
     return config
-@pytest.fixture(scope="session", autouse=True)
-def odps_envs(test_config):
-    access_id = test_config.get("odps", "access_id")
-    secret_access_key = test_config.get("odps", "secret_access_key")
-    project = test_config.get("odps", "project")
-    endpoint = test_config.get("odps", "endpoint")
+def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
+    try:
+        access_id = test_config.get(section_name, "access_id")
+    except NoOptionError:
+        access_id = test_config.get("odps", "access_id")
+    try:
+        secret_access_key = test_config.get(section_name, "secret_access_key")
+    except NoOptionError:
+        secret_access_key = test_config.get("odps", "secret_access_key")
+    try:
+        project = test_config.get(section_name, "project")
+    except NoOptionError:
+        project = test_config.get("odps", "project")
+    try:
+        endpoint = test_config.get(section_name, "endpoint")
+    except NoOptionError:
+        endpoint = test_config.get("odps", "endpoint")
     try:
         tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
     except NoOptionError:
@@ -55,12 +67,31 @@ def odps_envs(test_config):
         ],
     }
     token = entry.get_project().generate_auth_token(policy, "bearer", 5)
+    return ODPS(
+        account=BearerTokenAccount(token, 5),
+        project=project,
+        endpoint=endpoint,
+        tunnel_endpoint=tunnel_endpoint,
+    )
+@pytest.fixture(scope="session")
+def odps_with_schema(test_config):
+    try:
+        return _get_odps_env(test_config, "odps_with_schema")
+    except NoSectionError:
+        pytest.skip("Need to specify odps_with_schema section in test.conf")
+@pytest.fixture(scope="session", autouse=True)
+def odps_envs(test_config):
+    entry = _get_odps_env(test_config, "odps")
-    os.environ["ODPS_BEARER_TOKEN"] = token
-    os.environ["ODPS_PROJECT_NAME"] = project
-    os.environ["ODPS_ENDPOINT"] = endpoint
-    if tunnel_endpoint:
-        os.environ["ODPS_TUNNEL_ENDPOINT"] = tunnel_endpoint
+    os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
+    os.environ["ODPS_PROJECT_NAME"] = entry.project
+    os.environ["ODPS_ENDPOINT"] = entry.endpoint
+    if entry.tunnel_endpoint:
+        os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
     try:
         yield

maxframe/core/entity/executable.py CHANGED Viewed

@@ -46,7 +46,7 @@ class DecrefRunner:
                 break
             session = session_ref()
-            if session is None:
+            if session is None or session.closed:
                 fut.set_result(None)
                 continue
             try:

maxframe/core/graph/core.cp37-win_amd64.pyd CHANGED Viewed

Binary file

maxframe/dataframe/arithmetic/docstring.py CHANGED Viewed

@@ -185,7 +185,6 @@ e    NaN
 dtype: float64
 """
-# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
 _flex_comp_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
@@ -291,7 +290,7 @@ C   True    False
 Compare to a DataFrame of different shape.
->>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
+>>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
 ...                      index=['A', 'B', 'C', 'D'])
 >>> other.execute()
    revenue
@@ -306,6 +305,31 @@ A  False    False
 B  False    False
 C  False     True
 D  False    False
+Compare to a MultiIndex by level.
+>>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
+...                              'revenue': [100, 250, 300, 200, 175, 225]}},
+...                             index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
+...                                    ['A', 'B', 'C', 'A', 'B', 'C']])
+>>> df_multindex.execute()
+      cost  revenue
+Q1 A   250      100
+   B   150      250
+   C   100      300
+Q2 A   150      200
+   B   300      175
+   C   220      225
+>>> df.le(df_multindex, level=1).execute()
+       cost  revenue
+Q1 A   True     True
+   B   True     True
+   C   True     True
+Q2 A  False     True
+   B   True    False
+   C   True    False
 """

maxframe/dataframe/arithmetic/equal.py CHANGED Viewed

@@ -51,6 +51,8 @@ dtype: bool
 @bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
-def eq(df, other, axis="columns", level=None):
-    op = DataFrameEqual(axis=axis, level=level, lhs=df, rhs=other)
+def eq(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/greater.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
-def gt(df, other, axis="columns", level=None):
-    op = DataFrameGreater(axis=axis, level=level, lhs=df, rhs=other)
+def gt(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameGreater(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/greater_equal.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
-def ge(df, other, axis="columns", level=None):
-    op = DataFrameGreaterEqual(axis=axis, level=level, lhs=df, rhs=other)
+def ge(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameGreaterEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/less.py CHANGED Viewed

@@ -52,6 +52,6 @@ dtype: bool
 @bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
-def lt(df, other, axis="columns", level=None):
-    op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
+def lt(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
     return op(df, other)

maxframe/dataframe/arithmetic/less_equal.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
-def le(df, other, axis="columns", level=None):
-    op = DataFrameLessEqual(axis=axis, level=level, lhs=df, rhs=other)
+def le(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameLessEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/not_equal.py CHANGED Viewed

@@ -51,6 +51,8 @@ dtype: bool
 @bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
-def ne(df, other, axis="columns", level=None):
-    op = DataFrameNotEqual(axis=axis, level=level, lhs=df, rhs=other)
+def ne(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameNotEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/core.py CHANGED Viewed

@@ -1666,6 +1666,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
             raise NotImplementedError
         corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
+        if corner_data is None:
+            return
         buf = StringIO()
         max_rows = pd.get_option("display.max_rows")

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import dataclasses
+import logging
 import re
 from typing import Dict, List, Optional, Tuple, Union
@@ -22,12 +23,14 @@ from odps import ODPS
 from odps.types import Column, OdpsSchema, validate_data_type
 from ... import opcodes
+from ...config import options
 from ...core import OutputType
 from ...core.graph import DAG
 from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
     AnyField,
     BoolField,
+    DictField,
     FieldTypes,
     Int64Field,
     ListField,
@@ -37,6 +40,10 @@ from ...serialization.serializables import (
 from ..utils import parse_index
 from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
+logger = logging.getLogger(__name__)
+_DEFAULT_ANONYMOUS_COL_PREFIX = "_anon_col_"
 _EXPLAIN_DEPENDS_REGEX = re.compile(r"([^\s]+) depends on: ([^\n]+)")
 _EXPLAIN_JOB_REGEX = re.compile(r"(\S+) is root job")
 _EXPLAIN_TASKS_HEADER_REGEX = re.compile(r"In Job ([^:]+):")
@@ -46,8 +53,11 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
     r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
     re.MULTILINE,
 )
-_EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
-_ANONYMOUS_COL_REGEX = re.compile(r"^_c\d+$")
+_EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^\n]+)\)(?:| AS ([^ ]+))(?:\n|$)")
+_ANONYMOUS_COL_REGEX = re.compile(r"^_c(\d+)$")
+_SIMPLE_SCHEMA_COLS_REGEX = re.compile(r"SELECT (([^:]+:[^, ]+[, ]*)+)FROM")
+_SIMPLE_SCHEMA_COL_REGEX = re.compile(r"([^\.]+):([^, ]+)")
 @dataclasses.dataclass
@@ -152,7 +162,7 @@ def _resolve_task_sector(job_name: str, sector: str) -> TaskSector:
     return TaskSector(job_name, task_name, out_target, schemas)
-def _parse_explained_schema(explain_string: str) -> OdpsSchema:
+def _parse_full_explain(explain_string: str) -> OdpsSchema:
     sectors = _split_explain_string(explain_string)
     jobs_sector = tasks_sector = None
@@ -191,6 +201,25 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
     return OdpsSchema(cols)
+def _parse_simple_explain(explain_string: str) -> OdpsSchema:
+    fields_match = _SIMPLE_SCHEMA_COLS_REGEX.search(explain_string)
+    if not fields_match:
+        raise ValueError("Cannot detect output table schema")
+    fields_str = fields_match.group(1)
+    cols = []
+    for field, type_name in _SIMPLE_SCHEMA_COL_REGEX.findall(fields_str):
+        cols.append(Column(field, validate_data_type(type_name)))
+    return OdpsSchema(cols)
+def _parse_explained_schema(explain_string: str) -> OdpsSchema:
+    if explain_string.startswith("AdhocSink"):
+        return _parse_simple_explain(explain_string)
+    else:
+        return _parse_full_explain(explain_string)
 class DataFrameReadODPSQuery(
     IncrementalIndexDatasource,
     ColumnPruneSupportedDataSourceMixin,
@@ -205,6 +234,7 @@ class DataFrameReadODPSQuery(
     string_as_binary = BoolField("string_as_binary", default=None)
     index_columns = ListField("index_columns", FieldTypes.string, default=None)
     index_dtypes = SeriesField("index_dtypes", default=None)
+    column_renames = DictField("column_renames", default=None)
     def get_columns(self):
         return self.columns
@@ -246,6 +276,8 @@ def read_odps_query(
     odps_entry: ODPS = None,
     index_col: Union[None, str, List[str]] = None,
     string_as_binary: bool = None,
+    sql_hints: Dict[str, str] = None,
+    anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
     **kw,
 ):
     """
@@ -260,25 +292,51 @@ def read_odps_query(
         MaxCompute SQL statement.
     index_col: Union[None, str, List[str]]
         Columns to be specified as indexes.
+    string_as_binary: bool, optional
+        Whether to convert string columns to binary.
+    sql_hints: Dict[str, str], optional
+        User specified SQL hints.
+    anonymous_col_prefix: str, optional
+        Prefix for anonymous columns, '_anon_col_' by default.
     Returns
     -------
     result: DataFrame
         DataFrame read from MaxCompute (ODPS) table
     """
+    hints = options.sql.settings.copy() or {}
+    if sql_hints:
+        hints.update(sql_hints)
     odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
+    if options.session.enable_schema or odps_entry.is_schema_namespace_enabled():
+        hints["odps.namespace.schema"] = "true"
+        hints["odps.sql.allow.namespace.schema"] = "true"
+    # fixme workaround for multi-stage split process
+    hints["odps.sql.object.table.split.by.object.size.enabled"] = "false"
     if odps_entry is None:
         raise ValueError("Missing odps_entry parameter")
-    inst = odps_entry.execute_sql(f"EXPLAIN {query}")
+    inst = odps_entry.execute_sql(f"EXPLAIN {query}", hints=hints)
+    logger.debug("Explain instance ID: %s", inst.id)
     explain_str = list(inst.get_task_results().values())[0]
     odps_schema = _parse_explained_schema(explain_str)
+    new_columns = []
+    col_renames = {}
     for col in odps_schema.columns:
-        if _ANONYMOUS_COL_REGEX.match(col.name) and col.name not in query:
-            raise ValueError("Need to specify names for all columns in SELECT clause.")
+        anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
+        if anon_match and col.name not in query:
+            new_name = anonymous_col_prefix + anon_match.group(1)
+            col_renames[col.name] = new_name
+            new_columns.append(Column(new_name, col.type))
+        else:
+            new_columns.append(col)
-    dtypes = odps_schema_to_pandas_dtypes(odps_schema)
+    dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
     if not index_col:
         index_dtypes = None
@@ -301,5 +359,6 @@ def read_odps_query(
         string_as_binary=string_as_binary,
         index_columns=index_col,
         index_dtypes=index_dtypes,
+        column_renames=col_renames,
     )
     return op(chunk_bytes=chunk_bytes, chunk_size=chunk_size)

maxframe/dataframe/datasource/read_odps_table.py CHANGED Viewed

@@ -22,6 +22,7 @@ from odps.models import Table
 from odps.utils import to_timestamp
 from ... import opcodes
+from ...config import options
 from ...core import OutputType
 from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
@@ -167,12 +168,13 @@ def read_odps_table(
         DataFrame read from MaxCompute (ODPS) table
     """
     odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
+    schema = options.session.default_schema or odps_entry.schema
     if odps_entry is None:
         raise ValueError("Missing odps_entry parameter")
     if isinstance(table_name, Table):
         table = table_name
     else:
-        table = odps_entry.get_table(table_name)
+        table = odps_entry.get_table(table_name, schema=schema)
     if not table.table_schema.partitions and (
         partitions is not None or append_partitions

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -19,6 +19,7 @@ import numpy as np
 import pandas as pd
 import pytest
 from odps import ODPS
+from odps import types as odps_types
 from .... import tensor as mt
 from ....core import OutputType
@@ -35,7 +36,7 @@ from ..from_tensor import (
 )
 from ..index import from_pandas as from_pandas_index
 from ..index import from_tileable
-from ..read_odps_query import ColumnSchema, _resolve_task_sector
+from ..read_odps_query import ColumnSchema, _parse_simple_explain, _resolve_task_sector
 from ..series import from_pandas as from_pandas_series
 ray = lazy_import("ray")
@@ -329,10 +330,6 @@ def test_from_odps_query():
         read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
     assert "instant query" in err_info.value.args[0]
-    with pytest.raises(ValueError) as err_info:
-        read_odps_query(f"SELECT col1, col2 + col3 FROM {table1_name}")
-    assert "names" in err_info.value.args[0]
     query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
     df = read_odps_query(query1)
     assert df.op.query == query1
@@ -401,7 +398,9 @@ def test_date_range():
 def test_resolve_task_sector():
-    input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-full.txt"
+    )
     with open(input_path, "r") as f:
         sector = f.read()
     actual_sector = _resolve_task_sector("job0", sector)
@@ -413,3 +412,33 @@ def test_resolve_task_sector():
     assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
     assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
     assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
+def test_resolve_task_odps2():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-odps2.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    actual_sector = _resolve_task_sector("job0", sector)
+    assert actual_sector.job_name == "job0"
+    assert actual_sector.task_name == "M1"
+    assert actual_sector.output_target == "Screen"
+    assert len(actual_sector.schema) == 2
+    assert actual_sector.schema[0] == ColumnSchema("key", "varchar(2048)", "")
+    assert actual_sector.schema[1] == ColumnSchema("data", "binary", "")
+def test_resolve_simple_explain():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-simple.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    schema = _parse_simple_explain(sector)
+    assert schema.columns[0].name == "memberid"
+    assert schema.columns[0].type == odps_types.string
+    assert schema.columns[1].name == "createdate"
+    assert schema.columns[1].type == odps_types.bigint

maxframe/dataframe/datastore/to_odps.py CHANGED Viewed

@@ -17,6 +17,7 @@
 import logging
 from typing import List, Optional, Union
+from odps import ODPS
 from odps.models import Table as ODPSTable
 from odps.types import PartitionSpec
@@ -136,8 +137,14 @@ def to_odps_table(
     --------
     """
+    odps_entry = ODPS.from_global() or ODPS.from_environments()
     if isinstance(table, ODPSTable):
         table = table.full_table_name
+    elif options.session.enable_schema and "." not in table:
+        default_schema = (
+            options.session.default_schema or odps_entry.schema or "default"
+        )
+        table = default_schema + "." + table
     if isinstance(index_label, str):
         index_label = [index_label]

maxframe/dataframe/extensions/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .accessor import (
     IndexMaxFrameAccessor,
     SeriesMaxFrameAccessor,
 )
+from .flatmap import df_flatmap, series_flatmap
 from .reshuffle import DataFrameReshuffle, df_reshuffle
@@ -25,6 +26,8 @@ def _install():
     from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
     DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
+    DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
+    SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
     if DataFrameMaxFrameAccessor._api_count:
         for t in DATAFRAME_TYPE: