PyPI - maxframe - Versions diffs - 1.0.0rc4__cp38-cp38-macosx_10_9_universal2.whl → 1.1.0__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 1.0.0rc4__cp38-cp38-macosx_10_9_universal2.whl → 1.1.0__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (83) hide show

maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/config/config.py +3 -0
maxframe/conftest.py +9 -2
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/operator/base.py +2 -0
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
maxframe/dataframe/core.py +24 -2
maxframe/dataframe/datasource/read_odps_query.py +63 -34
maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
maxframe/dataframe/extensions/__init__.py +5 -0
maxframe/dataframe/extensions/apply_chunk.py +649 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +28 -40
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
maxframe/dataframe/groupby/__init__.py +1 -0
maxframe/dataframe/groupby/aggregation.py +1 -0
maxframe/dataframe/groupby/apply.py +9 -1
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
maxframe/dataframe/groupby/transform.py +8 -2
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +1 -1
maxframe/dataframe/merge/tests/test_merge.py +3 -1
maxframe/dataframe/misc/apply.py +3 -0
maxframe/dataframe/misc/drop_duplicates.py +5 -1
maxframe/dataframe/misc/map.py +3 -1
maxframe/dataframe/misc/tests/test_misc.py +24 -2
maxframe/dataframe/misc/transform.py +22 -13
maxframe/dataframe/reduction/__init__.py +3 -0
maxframe/dataframe/reduction/aggregation.py +1 -0
maxframe/dataframe/reduction/median.py +56 -0
maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
maxframe/dataframe/statistics/quantile.py +8 -2
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_utils.py +60 -0
maxframe/dataframe/utils.py +110 -7
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/io/objects/tests/test_object_io.py +39 -12
maxframe/io/odpsio/arrow.py +30 -2
maxframe/io/odpsio/schema.py +23 -5
maxframe/io/odpsio/tableio.py +26 -110
maxframe/io/odpsio/tests/test_schema.py +40 -0
maxframe/io/odpsio/tests/test_tableio.py +5 -5
maxframe/io/odpsio/tests/test_volumeio.py +35 -11
maxframe/io/odpsio/volumeio.py +27 -3
maxframe/learn/contrib/__init__.py +3 -2
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/opcodes.py +7 -1
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/core.pyx +13 -1
maxframe/serialization/pandas.py +50 -20
maxframe/serialization/serializables/core.py +24 -5
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +8 -1
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/tensor/__init__.py +19 -7
maxframe/tests/utils.py +16 -0
maxframe/udf.py +27 -0
maxframe/utils.py +36 -8
{maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
{maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/RECORD +83 -72
{maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +18 -2
maxframe_client/session/odps.py +23 -10
maxframe_client/session/task.py +2 -24
maxframe_client/session/tests/test_task.py +0 -4
maxframe_client/tests/test_session.py +30 -10
{maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0

maxframe/_utils.cpython-38-darwin.so CHANGED Viewed

Binary file

maxframe/config/config.py CHANGED Viewed

@@ -380,6 +380,9 @@ default_options.register_option(
 default_options.register_option(
     "session.enable_schema", None, validator=is_null | is_bool, remote=True
 )
+default_options.register_option(
+    "session.enable_high_availability", None, validator=is_null | is_bool, remote=True
+)
 default_options.register_option(
     "session.default_schema", None, validator=is_null | is_string, remote=True
 )

maxframe/conftest.py CHANGED Viewed

@@ -126,7 +126,14 @@ def oss_config():
         oss_rolearn = config.get("oss", "rolearn")
         options.service_role_arn = oss_rolearn
-        options.object_cache_url = f"oss://{oss_endpoint}/{oss_bucket_name}"
+        if "test" in oss_endpoint:
+            oss_svc_endpoint = oss_endpoint
+        else:
+            endpoint_parts = oss_endpoint.split(".", 1)
+            if "-internal" not in endpoint_parts[0]:
+                endpoint_parts[0] += "-internal"
+            oss_svc_endpoint = ".".join(endpoint_parts)
+        options.object_cache_url = f"oss://{oss_svc_endpoint}/{oss_bucket_name}"
         config.oss_config = (
             oss_access_id,
@@ -141,7 +148,7 @@ def oss_config():
         config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
         config.oss_rolearn = oss_rolearn
         yield config
-    except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
+    except (NoSectionError, NoOptionError, ImportError):
         return None
     finally:
         options.service_role_arn = old_role_arn

maxframe/core/graph/core.cpython-38-darwin.so CHANGED Viewed

Binary file

maxframe/core/operator/base.py CHANGED Viewed

@@ -86,6 +86,8 @@ class SchedulingHint(Serializable):
     # `gpu` indicates that if the operator should be executed on the GPU.
     gpu = BoolField("gpu", default=None)
     priority = Int32Field("priority", default=None)
+    expect_engine = StringField("expect_engine", default=None)
+    expect_resources = DictField("expect_resources", FieldTypes.string, default=None)
     @classproperty
     @lru_cache(1)

maxframe/dataframe/arithmetic/tests/test_arithmetic.py CHANGED Viewed

@@ -22,6 +22,7 @@ import pandas as pd
 import pytest
 from ....core import OperatorType
+from ....tests.utils import assert_mf_index_dtype
 from ....utils import dataslots
 from ...core import IndexValue
 from ...datasource.dataframe import from_pandas
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -370,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -403,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -433,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
     )
-    assert isinstance(df6.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df6.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -468,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -501,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -534,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -558,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), func_opts.func(data, data).columns
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -590,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
     pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
     pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
-    assert isinstance(result.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result.index_value.value, np.int64)
     pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
-    assert isinstance(result2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result2.index_value.value, np.int64)
     pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
-    assert isinstance(result3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result3.index_value.value, np.int64)
     pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
-    assert isinstance(result4.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result4.index_value.value, np.int64)
     pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
-    assert isinstance(result5.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result5.index_value.value, np.int64)
     if "builtin_function_or_method" not in str(type(func_opts.func)):
         # skip NotImplemented test for comparison function
@@ -679,7 +680,7 @@ def test_abs():
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     assert df2.shape == (10, 10)
@@ -697,7 +698,7 @@ def test_not():
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     assert df2.shape == (10, 10)

maxframe/dataframe/core.py CHANGED Viewed

@@ -142,6 +142,14 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
+        @property
+        def inferred_type(self):
+            return "floating" if self.dtype.kind == "f" else "integer"
     class RangeIndex(IndexBase):
         _name = AnyField("name")
         _slice = SliceField("slice")
@@ -243,6 +251,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "integer"
@@ -254,6 +266,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "integer"
@@ -265,6 +281,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "floating"
@@ -1514,8 +1534,7 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
         refresh_index_value(self)
         refresh_dtypes(self)
-    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
-        dtypes = table_meta.pd_column_dtypes
+    def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
         self._dtypes = dtypes
         self._columns_value = parse_index(dtypes.index, store_data=True)
         self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
@@ -1523,6 +1542,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
         new_shape[-1] = len(dtypes)
         self._shape = tuple(new_shape)
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        self.refresh_from_dtypes(table_meta.pd_column_dtypes)
     @property
     def dtypes(self):
         dt = getattr(self, "_dtypes", None)

maxframe/dataframe/datasource/read_odps_query.py CHANGED Viewed

@@ -57,7 +57,7 @@ _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^\n]+)\)(?:| AS ([^ ]+))(?:\n|
 _ANONYMOUS_COL_REGEX = re.compile(r"^_c(\d+)$")
 _SIMPLE_SCHEMA_COLS_REGEX = re.compile(r"SELECT (([^:]+:[^, ]+[, ]*)+)FROM")
-_SIMPLE_SCHEMA_COL_REGEX = re.compile(r"([^\.]+):([^, ]+)")
+_SIMPLE_SCHEMA_COL_REGEX = re.compile(r"([^ \.\)]+):([^ ]+)")
 @dataclasses.dataclass
@@ -180,23 +180,30 @@ def _parse_full_explain(explain_string: str) -> OdpsSchema:
     job_dag = jobs_sector.build_dag()
     indep_job_names = list(job_dag.iter_indep(reverse=True))
-    if len(indep_job_names) > 1:  # pragma: no cover
-        raise ValueError("Only one final job is allowed in SQL statement")
-    tasks_sector = jobs_sector.jobs[indep_job_names[0]]
-    task_dag = tasks_sector.build_dag()
-    indep_task_names = list(task_dag.iter_indep(reverse=True))
-    if len(indep_task_names) > 1:  # pragma: no cover
+    schema_signatures = dict()
+    for job_name in indep_job_names:
+        tasks_sector = jobs_sector.jobs[job_name]
+        task_dag = tasks_sector.build_dag()
+        indep_task_names = list(task_dag.iter_indep(reverse=True))
+        for task_name in indep_task_names:
+            task_sector = tasks_sector.tasks[task_name]
+            if not task_sector.schema:  # pragma: no cover
+                raise ValueError("Cannot detect output schema")
+            if task_sector.output_target != "Screen":
+                raise ValueError("The SQL statement should be an instant query")
+            sig_tuples = sorted(
+                [
+                    (c.column_alias or c.column_name, c.column_type)
+                    for c in task_sector.schema
+                ]
+            )
+            schema_signatures[hash(tuple(sig_tuples))] = task_sector.schema
+    if len(schema_signatures) != 1:
         raise ValueError("Only one final task is allowed in SQL statement")
-    task_sector = tasks_sector.tasks[indep_task_names[0]]
-    if not task_sector.schema:  # pragma: no cover
-        raise ValueError("Cannot detect output schema")
-    if task_sector.output_target != "Screen":
-        raise ValueError("The SQL statement should be an instant query")
+    schema = list(schema_signatures.values())[0]
     cols = [
         Column(c.column_alias or c.column_name, validate_data_type(c.column_type))
-        for c in task_sector.schema
+        for c in schema
     ]
     return OdpsSchema(cols)
@@ -209,7 +216,7 @@ def _parse_simple_explain(explain_string: str) -> OdpsSchema:
     fields_str = fields_match.group(1)
     cols = []
     for field, type_name in _SIMPLE_SCHEMA_COL_REGEX.findall(fields_str):
-        cols.append(Column(field, validate_data_type(type_name)))
+        cols.append(Column(field, validate_data_type(type_name.rstrip(","))))
     return OdpsSchema(cols)
@@ -257,12 +264,18 @@ class DataFrameReadODPSQuery(
             )
             index_value = parse_index(idx)
-        columns_value = parse_index(self.dtypes.index, store_data=True)
+        if self.dtypes is not None:
+            columns_value = parse_index(self.dtypes.index, store_data=True)
+            shape = (np.nan, len(self.dtypes))
+        else:
+            columns_value = None
+            shape = (np.nan, np.nan)
         self.output_types = [OutputType.dataframe]
         return self.new_tileable(
             [],
             None,
-            shape=(len(self.dtypes), np.nan),
+            shape=shape,
             dtypes=self.dtypes,
             index_value=index_value,
             columns_value=columns_value,
@@ -278,6 +291,7 @@ def read_odps_query(
     string_as_binary: bool = None,
     sql_hints: Dict[str, str] = None,
     anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
+    skip_schema: bool = False,
     **kw,
 ):
     """
@@ -298,6 +312,10 @@ def read_odps_query(
         User specified SQL hints.
     anonymous_col_prefix: str, optional
         Prefix for anonymous columns, '_anon_col_' by default.
+    skip_schema: bool, optional
+        Skip resolving output schema before execution. Once this is configured,
+        the output DataFrame cannot be inputs of other DataFrame operators
+        before execution.
     Returns
     -------
@@ -319,28 +337,39 @@ def read_odps_query(
     if odps_entry is None:
         raise ValueError("Missing odps_entry parameter")
-    inst = odps_entry.execute_sql(f"EXPLAIN {query}", hints=hints)
-    logger.debug("Explain instance ID: %s", inst.id)
-    explain_str = list(inst.get_task_results().values())[0]
-    odps_schema = _parse_explained_schema(explain_str)
-    new_columns = []
     col_renames = {}
-    for col in odps_schema.columns:
-        anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
-        if anon_match and col.name not in query:
-            new_name = anonymous_col_prefix + anon_match.group(1)
-            col_renames[col.name] = new_name
-            new_columns.append(Column(new_name, col.type))
-        else:
-            new_columns.append(col)
-    dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
+    if not skip_schema:
+        inst = odps_entry.execute_sql(f"EXPLAIN {query}", hints=hints)
+        logger.debug("Explain instance ID: %s", inst.id)
+        explain_str = list(inst.get_task_results().values())[0]
+        try:
+            odps_schema = _parse_explained_schema(explain_str)
+        except ValueError as ex:
+            exc = ValueError(str(ex) + "\nExplain instance ID: " + inst.id)
+            raise exc.with_traceback(ex.__traceback__) from None
+        new_columns = []
+        for col in odps_schema.columns:
+            anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
+            if anon_match and col.name not in query:
+                new_name = anonymous_col_prefix + anon_match.group(1)
+                col_renames[col.name] = new_name
+                new_columns.append(Column(new_name, col.type))
+            else:
+                new_columns.append(col)
+        dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
+    else:
+        dtypes = None
     if not index_col:
         index_dtypes = None
     else:
+        if dtypes is None:
+            raise ValueError("Cannot configure index_col when skip_schema is True")
         if isinstance(index_col, str):
             index_col = [index_col]
         index_col_set = set(index_col)

maxframe/dataframe/datasource/tests/test_datasource.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
+import uuid
 from collections import OrderedDict
 import numpy as np
@@ -26,7 +27,14 @@ from ....core import OutputType
 from ....tests.utils import tn
 from ....utils import lazy_import
 from ... import read_odps_query, read_odps_table
-from ...core import DatetimeIndex, Float64Index, IndexValue, Int64Index, MultiIndex
+from ...core import (
+    DatetimeIndex,
+    Float64Index,
+    Index,
+    IndexValue,
+    Int64Index,
+    MultiIndex,
+)
 from ..dataframe import from_pandas as from_pandas_df
 from ..date_range import date_range
 from ..from_tensor import (
@@ -36,7 +44,12 @@ from ..from_tensor import (
 )
 from ..index import from_pandas as from_pandas_index
 from ..index import from_tileable
-from ..read_odps_query import ColumnSchema, _parse_simple_explain, _resolve_task_sector
+from ..read_odps_query import (
+    ColumnSchema,
+    _parse_full_explain,
+    _parse_simple_explain,
+    _resolve_task_sector,
+)
 from ..series import from_pandas as from_pandas_series
 ray = lazy_import("ray")
@@ -114,18 +127,22 @@ def test_from_tileable_index():
     for o in [df, df[0]]:
         index = o.index
-        assert isinstance(index, Int64Index)
+        assert isinstance(index, (Index, Int64Index))
         assert index.dtype == np.int64
         assert index.name == pd_df.index.name
-        assert isinstance(index.index_value.value, IndexValue.Int64Index)
+        assert isinstance(
+            index.index_value.value, (IndexValue.Int64Index, IndexValue.Index)
+        )
     t = mt.random.rand(10, chunk_size=6)
     index = from_tileable(t, name="new_name")
-    assert isinstance(index, Float64Index)
+    assert isinstance(index, (Index, Float64Index))
     assert index.dtype == np.float64
     assert index.name == "new_name"
-    assert isinstance(index.index_value.value, IndexValue.Float64Index)
+    assert isinstance(
+        index.index_value.value, (IndexValue.Float64Index, IndexValue.Index)
+    )
 def test_from_tensor():
@@ -327,7 +344,10 @@ def test_from_odps_query():
     odps_entry.write_table(test_table2, [["A", 10, 4.5]])
     with pytest.raises(ValueError) as err_info:
-        read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
+        read_odps_query(
+            f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
+            f"AS SELECT * FROM {table1_name}"
+        )
     assert "instant query" in err_info.value.args[0]
     query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
@@ -343,6 +363,10 @@ def test_from_odps_query():
         ),
     )
+    df = read_odps_query(query1, skip_schema=True)
+    assert df.dtypes is None
+    assert df.columns_value is None
     df = read_odps_query(query1, index_col="col1")
     assert df.op.query == query1
     assert df.index_value.name == "col1"
@@ -442,3 +466,31 @@ def test_resolve_simple_explain():
     assert schema.columns[0].type == odps_types.string
     assert schema.columns[1].name == "createdate"
     assert schema.columns[1].type == odps_types.bigint
+def test_resolve_conditional():
+    input_path = os.path.join(
+        os.path.dirname(__file__), "test-data", "task-input-multi-cond.txt"
+    )
+    with open(input_path, "r") as f:
+        sector = f.read()
+    expected_col_types = {
+        "cs1": "string",
+        "cs2": "string",
+        "ci1": "bigint",
+        "cs3": "string",
+        "cs4": "string",
+        "cs5": "string",
+        "cs6": "string",
+        "cs7": "string",
+        "cs8": "string",
+        "ci2": "int",
+        "ci3": "bigint",
+        "cs9": "string",
+    }
+    schema = _parse_full_explain(sector)
+    for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
+        assert col.name == exp_nm
+        assert col.type == odps_types.validate_data_type(exp_tp)

maxframe/dataframe/extensions/__init__.py CHANGED Viewed

@@ -18,6 +18,8 @@ from .accessor import (
     IndexMaxFrameAccessor,
     SeriesMaxFrameAccessor,
 )
+from .apply_chunk import df_apply_chunk, series_apply_chunk
+from .flatjson import series_flatjson
 from .flatmap import df_flatmap, series_flatmap
 from .reshuffle import DataFrameReshuffle, df_reshuffle
@@ -27,7 +29,10 @@ def _install():
     DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
     DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
+    DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
     SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
+    SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
+    SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
     if DataFrameMaxFrameAccessor._api_count:
         for t in DATAFRAME_TYPE: