PyPI - maxframe - Versions diffs - 1.0.0rc3__cp38-cp38-win32.whl → 1.1.0__cp38-cp38-win32.whl - Mend

maxframe 1.0.0rc3cp38-cp38-win32.whl → 1.1.0cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (112) hide show

maxframe/_utils.cp38-win32.pyd +0 -0
maxframe/codegen.py +1 -0
maxframe/config/config.py +16 -1
maxframe/conftest.py +52 -14
maxframe/core/entity/executable.py +1 -1
maxframe/core/graph/core.cp38-win32.pyd +0 -0
maxframe/core/operator/base.py +2 -0
maxframe/dataframe/arithmetic/docstring.py +26 -2
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
maxframe/dataframe/core.py +26 -2
maxframe/dataframe/datasource/read_odps_query.py +116 -28
maxframe/dataframe/datasource/read_odps_table.py +3 -1
maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
maxframe/dataframe/datastore/to_odps.py +7 -0
maxframe/dataframe/extensions/__init__.py +8 -0
maxframe/dataframe/extensions/apply_chunk.py +649 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +314 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/__init__.py +1 -0
maxframe/dataframe/groupby/aggregation.py +1 -0
maxframe/dataframe/groupby/apply.py +9 -1
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
maxframe/dataframe/groupby/transform.py +8 -2
maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +11 -0
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +1 -1
maxframe/dataframe/merge/tests/test_merge.py +3 -1
maxframe/dataframe/misc/apply.py +3 -0
maxframe/dataframe/misc/drop_duplicates.py +23 -2
maxframe/dataframe/misc/map.py +3 -1
maxframe/dataframe/misc/tests/test_misc.py +24 -2
maxframe/dataframe/misc/transform.py +22 -13
maxframe/dataframe/reduction/__init__.py +3 -0
maxframe/dataframe/reduction/aggregation.py +1 -0
maxframe/dataframe/reduction/median.py +56 -0
maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
maxframe/dataframe/statistics/quantile.py +8 -2
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/tests/test_utils.py +60 -0
maxframe/dataframe/utils.py +110 -7
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/io/objects/tests/test_object_io.py +39 -12
maxframe/io/odpsio/arrow.py +30 -2
maxframe/io/odpsio/schema.py +28 -8
maxframe/io/odpsio/tableio.py +55 -133
maxframe/io/odpsio/tests/test_schema.py +40 -4
maxframe/io/odpsio/tests/test_tableio.py +5 -5
maxframe/io/odpsio/tests/test_volumeio.py +35 -11
maxframe/io/odpsio/volumeio.py +36 -6
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/xgboost/classifier.py +3 -3
maxframe/learn/contrib/xgboost/predict.py +8 -39
maxframe/learn/contrib/xgboost/train.py +4 -3
maxframe/lib/mmh3.cp38-win32.pyd +0 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/opcodes.py +10 -1
maxframe/protocol.py +6 -1
maxframe/serialization/core.cp38-win32.pyd +0 -0
maxframe/serialization/core.pyx +13 -1
maxframe/serialization/pandas.py +50 -20
maxframe/serialization/serializables/core.py +24 -5
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +8 -1
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +9 -2
maxframe/tensor/__init__.py +19 -7
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/concatenate.py +23 -20
maxframe/tensor/merge/vstack.py +5 -1
maxframe/tensor/misc/transpose.py +1 -1
maxframe/tests/utils.py +16 -0
maxframe/udf.py +27 -0
maxframe/utils.py +64 -14
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +28 -10
maxframe_client/session/consts.py +3 -0
maxframe_client/session/odps.py +104 -20
maxframe_client/session/task.py +42 -26
maxframe_client/session/tests/test_task.py +0 -4
maxframe_client/tests/test_session.py +44 -12
{maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import dataclasses
+import logging
 import re
 from typing import Dict, List, Optional, Tuple, Union
@@ -22,12 +23,14 @@ from odps import ODPS
 from odps.types import Column, OdpsSchema, validate_data_type
 from ... import opcodes
+from ...config import options
 from ...core import OutputType
 from ...core.graph import DAG
 from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
     AnyField,
     BoolField,
+    DictField,
     FieldTypes,
     Int64Field,
     ListField,
@@ -37,6 +40,10 @@ from ...serialization.serializables import (
 from ..utils import parse_index
 from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
+logger = logging.getLogger(__name__)
+_DEFAULT_ANONYMOUS_COL_PREFIX = "_anon_col_"
 _EXPLAIN_DEPENDS_REGEX = re.compile(r"([^\s]+) depends on: ([^\n]+)")
 _EXPLAIN_JOB_REGEX = re.compile(r"(\S+) is root job")
 _EXPLAIN_TASKS_HEADER_REGEX = re.compile(r"In Job ([^:]+):")
@@ -46,8 +53,11 @@ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
     r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
     re.MULTILINE,
 )
-_EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
-_ANONYMOUS_COL_REGEX = re.compile(r"^_c\d+$")
+_EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^\n]+)\)(?:| AS ([^ ]+))(?:\n|$)")
+_ANONYMOUS_COL_REGEX = re.compile(r"^_c(\d+)$")
+_SIMPLE_SCHEMA_COLS_REGEX = re.compile(r"SELECT (([^:]+:[^, ]+[, ]*)+)FROM")
+_SIMPLE_SCHEMA_COL_REGEX = re.compile(r"([^ \.\)]+):([^ ]+)")
 @dataclasses.dataclass
@@ -152,7 +162,7 @@ def _resolve_task_sector(job_name: str, sector: str) -> TaskSector:
     return TaskSector(job_name, task_name, out_target, schemas)
-def _parse_explained_schema(explain_string: str) -> OdpsSchema:
+def _parse_full_explain(explain_string: str) -> OdpsSchema:
     sectors = _split_explain_string(explain_string)
     jobs_sector = tasks_sector = None
@@ -170,27 +180,53 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
     job_dag = jobs_sector.build_dag()
     indep_job_names = list(job_dag.iter_indep(reverse=True))
-    if len(indep_job_names) > 1:  # pragma: no cover
-        raise ValueError("Only one final job is allowed in SQL statement")
-    tasks_sector = jobs_sector.jobs[indep_job_names[0]]
-    task_dag = tasks_sector.build_dag()
-    indep_task_names = list(task_dag.iter_indep(reverse=True))
-    if len(indep_task_names) > 1:  # pragma: no cover
+    schema_signatures = dict()
+    for job_name in indep_job_names:
+        tasks_sector = jobs_sector.jobs[job_name]
+        task_dag = tasks_sector.build_dag()
+        indep_task_names = list(task_dag.iter_indep(reverse=True))
+        for task_name in indep_task_names:
+            task_sector = tasks_sector.tasks[task_name]
+            if not task_sector.schema:  # pragma: no cover
+                raise ValueError("Cannot detect output schema")
+            if task_sector.output_target != "Screen":
+                raise ValueError("The SQL statement should be an instant query")
+            sig_tuples = sorted(
+                [
+                    (c.column_alias or c.column_name, c.column_type)
+                    for c in task_sector.schema
+                ]
+            )
+            schema_signatures[hash(tuple(sig_tuples))] = task_sector.schema
+    if len(schema_signatures) != 1:
         raise ValueError("Only one final task is allowed in SQL statement")
-    task_sector = tasks_sector.tasks[indep_task_names[0]]
-    if not task_sector.schema:  # pragma: no cover
-        raise ValueError("Cannot detect output schema")
-    if task_sector.output_target != "Screen":
-        raise ValueError("The SQL statement should be an instant query")
+    schema = list(schema_signatures.values())[0]
     cols = [
         Column(c.column_alias or c.column_name, validate_data_type(c.column_type))
-        for c in task_sector.schema
+        for c in schema
     ]
     return OdpsSchema(cols)
+def _parse_simple_explain(explain_string: str) -> OdpsSchema:
+    fields_match = _SIMPLE_SCHEMA_COLS_REGEX.search(explain_string)
+    if not fields_match:
+        raise ValueError("Cannot detect output table schema")
+    fields_str = fields_match.group(1)
+    cols = []
+    for field, type_name in _SIMPLE_SCHEMA_COL_REGEX.findall(fields_str):
+        cols.append(Column(field, validate_data_type(type_name.rstrip(","))))
+    return OdpsSchema(cols)
+def _parse_explained_schema(explain_string: str) -> OdpsSchema:
+    if explain_string.startswith("AdhocSink"):
+        return _parse_simple_explain(explain_string)
+    else:
+        return _parse_full_explain(explain_string)
 class DataFrameReadODPSQuery(
     IncrementalIndexDatasource,
     ColumnPruneSupportedDataSourceMixin,
@@ -205,6 +241,7 @@ class DataFrameReadODPSQuery(
     string_as_binary = BoolField("string_as_binary", default=None)
     index_columns = ListField("index_columns", FieldTypes.string, default=None)
     index_dtypes = SeriesField("index_dtypes", default=None)
+    column_renames = DictField("column_renames", default=None)
     def get_columns(self):
         return self.columns
@@ -227,12 +264,18 @@ class DataFrameReadODPSQuery(
             )
             index_value = parse_index(idx)
-        columns_value = parse_index(self.dtypes.index, store_data=True)
+        if self.dtypes is not None:
+            columns_value = parse_index(self.dtypes.index, store_data=True)
+            shape = (np.nan, len(self.dtypes))
+        else:
+            columns_value = None
+            shape = (np.nan, np.nan)
         self.output_types = [OutputType.dataframe]
         return self.new_tileable(
             [],
             None,
-            shape=(len(self.dtypes), np.nan),
+            shape=shape,
             dtypes=self.dtypes,
             index_value=index_value,
             columns_value=columns_value,
@@ -246,6 +289,9 @@ def read_odps_query(
     odps_entry: ODPS = None,
     index_col: Union[None, str, List[str]] = None,
     string_as_binary: bool = None,
+    sql_hints: Dict[str, str] = None,
+    anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
+    skip_schema: bool = False,
     **kw,
 ):
     """
@@ -260,29 +306,70 @@ def read_odps_query(
         MaxCompute SQL statement.
     index_col: Union[None, str, List[str]]
         Columns to be specified as indexes.
+    string_as_binary: bool, optional
+        Whether to convert string columns to binary.
+    sql_hints: Dict[str, str], optional
+        User specified SQL hints.
+    anonymous_col_prefix: str, optional
+        Prefix for anonymous columns, '_anon_col_' by default.
+    skip_schema: bool, optional
+        Skip resolving output schema before execution. Once this is configured,
+        the output DataFrame cannot be inputs of other DataFrame operators
+        before execution.
     Returns
     -------
     result: DataFrame
         DataFrame read from MaxCompute (ODPS) table
     """
+    hints = options.sql.settings.copy() or {}
+    if sql_hints:
+        hints.update(sql_hints)
     odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
-    if odps_entry is None:
-        raise ValueError("Missing odps_entry parameter")
-    inst = odps_entry.execute_sql(f"EXPLAIN {query}")
-    explain_str = list(inst.get_task_results().values())[0]
-    odps_schema = _parse_explained_schema(explain_str)
+    if options.session.enable_schema or odps_entry.is_schema_namespace_enabled():
+        hints["odps.namespace.schema"] = "true"
+        hints["odps.sql.allow.namespace.schema"] = "true"
+    # fixme workaround for multi-stage split process
+    hints["odps.sql.object.table.split.by.object.size.enabled"] = "false"
-    for col in odps_schema.columns:
-        if _ANONYMOUS_COL_REGEX.match(col.name) and col.name not in query:
-            raise ValueError("Need to specify names for all columns in SELECT clause.")
+    if odps_entry is None:
+        raise ValueError("Missing odps_entry parameter")
-    dtypes = odps_schema_to_pandas_dtypes(odps_schema)
+    col_renames = {}
+    if not skip_schema:
+        inst = odps_entry.execute_sql(f"EXPLAIN {query}", hints=hints)
+        logger.debug("Explain instance ID: %s", inst.id)
+        explain_str = list(inst.get_task_results().values())[0]
+        try:
+            odps_schema = _parse_explained_schema(explain_str)
+        except ValueError as ex:
+            exc = ValueError(str(ex) + "\nExplain instance ID: " + inst.id)
+            raise exc.with_traceback(ex.__traceback__) from None
+        new_columns = []
+        for col in odps_schema.columns:
+            anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
+            if anon_match and col.name not in query:
+                new_name = anonymous_col_prefix + anon_match.group(1)
+                col_renames[col.name] = new_name
+                new_columns.append(Column(new_name, col.type))
+            else:
+                new_columns.append(col)
+        dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
+    else:
+        dtypes = None
     if not index_col:
         index_dtypes = None
     else:
+        if dtypes is None:
+            raise ValueError("Cannot configure index_col when skip_schema is True")
         if isinstance(index_col, str):
             index_col = [index_col]
         index_col_set = set(index_col)
@@ -301,5 +388,6 @@ def read_odps_query(
         string_as_binary=string_as_binary,
         index_columns=index_col,
         index_dtypes=index_dtypes,
+        column_renames=col_renames,
     )
     return op(chunk_bytes=chunk_bytes, chunk_size=chunk_size)

maxframe/dataframe/datasource/read_odps_table.py CHANGED Viewed

@@ -22,6 +22,7 @@ from odps.models import Table
 from odps.utils import to_timestamp
 from ... import opcodes
+from ...config import options
 from ...core import OutputType
 from ...io.odpsio import odps_schema_to_pandas_dtypes
 from ...serialization.serializables import (
@@ -167,12 +168,13 @@ def read_odps_table(
         DataFrame read from MaxCompute (ODPS) table
     """
     odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
+    schema = options.session.default_schema or odps_entry.schema
     if odps_entry is None:
         raise ValueError("Missing odps_entry parameter")
     if isinstance(table_name, Table):
         table = table_name
     else:
-        table = odps_entry.get_table(table_name)
+        table = odps_entry.get_table(table_name, schema=schema)
     if not table.table_schema.partitions and (
         partitions is not None or append_partitions

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -13,19 +13,28 @@
 # limitations under the License.
 import os
+import uuid
 from collections import OrderedDict
 import numpy as np
 import pandas as pd
 import pytest
 from odps import ODPS
+from odps import types as odps_types
 from .... import tensor as mt
 from ....core import OutputType
 from ....tests.utils import tn
 from ....utils import lazy_import
 from ... import read_odps_query, read_odps_table
-from ...core import DatetimeIndex, Float64Index, IndexValue, Int64Index, MultiIndex
+from ...core import (
+    DatetimeIndex,
+    Float64Index,
+    Index,
+    IndexValue,
+    Int64Index,
+    MultiIndex,
+)
 from ..dataframe import from_pandas as from_pandas_df
 from ..date_range import date_range
 from ..from_tensor import (
@@ -35,7 +44,12 @@ from ..from_tensor import (
 )
 from ..index import from_pandas as from_pandas_index
 from ..index import from_tileable
-from ..read_odps_query import ColumnSchema, _resolve_task_sector
+from ..read_odps_query import (
+    ColumnSchema,
+    _parse_full_explain,
+    _parse_simple_explain,
+    _resolve_task_sector,
+)
 from ..series import from_pandas as from_pandas_series
 ray = lazy_import("ray")
@@ -113,18 +127,22 @@ def test_from_tileable_index():
     for o in [df, df[0]]:
         index = o.index
-        assert isinstance(index, Int64Index)
+        assert isinstance(index, (Index, Int64Index))
         assert index.dtype == np.int64
         assert index.name == pd_df.index.name
-        assert isinstance(index.index_value.value, IndexValue.Int64Index)
+        assert isinstance(
+            index.index_value.value, (IndexValue.Int64Index, IndexValue.Index)
+        )
     t = mt.random.rand(10, chunk_size=6)
     index = from_tileable(t, name="new_name")
-    assert isinstance(index, Float64Index)
+    assert isinstance(index, (Index, Float64Index))
     assert index.dtype == np.float64
     assert index.name == "new_name"
-    assert isinstance(index.index_value.value, IndexValue.Float64Index)
+    assert isinstance(
+        index.index_value.value, (IndexValue.Float64Index, IndexValue.Index)
+    )
 def test_from_tensor():
@@ -326,13 +344,12 @@ def test_from_odps_query():
     odps_entry.write_table(test_table2, [["A", 10, 4.5]])
     with pytest.raises(ValueError) as err_info:
-        read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
+        read_odps_query(
+            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
+            f"AS SELECT * FROM {table1_name}"
+        )
     assert "instant query" in err_info.value.args[0]
-    with pytest.raises(ValueError) as err_info:
-        read_odps_query(f"SELECT col1, col2 + col3 FROM {table1_name}")
-    assert "names" in err_info.value.args[0]
     query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
     df = read_odps_query(query1)
     assert df.op.query == query1
@@ -346,6 +363,10 @@ def test_from_odps_query():
         ),
     )
+    df = read_odps_query(query1, skip_schema=True)
+    assert df.dtypes is None
+    assert df.columns_value is None
     df = read_odps_query(query1, index_col="col1")
     assert df.op.query == query1
     assert df.index_value.name == "col1"
@@ -401,7 +422,9 @@ def test_date_range():
 def test_resolve_task_sector():
-    input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-full.txt"
+    )
     with open(input_path, "r") as f:
         sector = f.read()
     actual_sector = _resolve_task_sector("job0", sector)
@@ -413,3 +436,61 @@ def test_resolve_task_sector():
     assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
     assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
     assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
+def test_resolve_task_odps2():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-odps2.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    actual_sector = _resolve_task_sector("job0", sector)
+    assert actual_sector.job_name == "job0"
+    assert actual_sector.task_name == "M1"
+    assert actual_sector.output_target == "Screen"
+    assert len(actual_sector.schema) == 2
+    assert actual_sector.schema[0] == ColumnSchema("key", "varchar(2048)", "")
+    assert actual_sector.schema[1] == ColumnSchema("data", "binary", "")
+def test_resolve_simple_explain():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-simple.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    schema = _parse_simple_explain(sector)
+    assert schema.columns[0].name == "memberid"
+    assert schema.columns[0].type == odps_types.string
+    assert schema.columns[1].name == "createdate"
+    assert schema.columns[1].type == odps_types.bigint
+def test_resolve_conditional():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-multi-cond.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    expected_col_types = {
+        "cs1": "string",
+        "cs2": "string",
+        "ci1": "bigint",
+        "cs3": "string",
+        "cs4": "string",
+        "cs5": "string",
+        "cs6": "string",
+        "cs7": "string",
+        "cs8": "string",
+        "ci2": "int",
+        "ci3": "bigint",
+        "cs9": "string",
+    }
+    schema = _parse_full_explain(sector)
+    for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
+        assert col.name == exp_nm
+        assert col.type == odps_types.validate_data_type(exp_tp)

maxframe/dataframe/datastore/to_odps.py CHANGED Viewed

@@ -17,6 +17,7 @@
 import logging
 from typing import List, Optional, Union
+from odps import ODPS
 from odps.models import Table as ODPSTable
 from odps.types import PartitionSpec
@@ -136,8 +137,14 @@ def to_odps_table(
     --------
     """
+    odps_entry = ODPS.from_global() or ODPS.from_environments()
     if isinstance(table, ODPSTable):
         table = table.full_table_name
+    elif options.session.enable_schema and "." not in table:
+        default_schema = (
+            options.session.default_schema or odps_entry.schema or "default"
+        )
+        table = default_schema + "." + table
     if isinstance(index_label, str):
         index_label = [index_label]

maxframe/dataframe/extensions/__init__.py CHANGED Viewed

@@ -18,6 +18,9 @@ from .accessor import (
     IndexMaxFrameAccessor,
     SeriesMaxFrameAccessor,
 )
+from .apply_chunk import df_apply_chunk, series_apply_chunk
+from .flatjson import series_flatjson
+from .flatmap import df_flatmap, series_flatmap
 from .reshuffle import DataFrameReshuffle, df_reshuffle
@@ -25,6 +28,11 @@ def _install():
     from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
     DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
+    DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
+    DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
+    SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
+    SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
+    SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
     if DataFrameMaxFrameAccessor._api_count:
         for t in DATAFRAME_TYPE:

maxframe 1.0.0rc3__cp38-cp38-win32.whl → 1.1.0__cp38-cp38-win32.whl

Potentially problematic release.

maxframe 1.0.0rc3cp38-cp38-win32.whl → 1.1.0cp38-cp38-win32.whl