PyPI - maxframe - Versions diffs - 2.2.0__cp38-cp38-macosx_10_9_universal2.whl → 2.3.0rc1__cp38-cp38-macosx_10_9_universal2.whl - Mend

maxframe 2.2.0__cp38-cp38-macosx_10_9_universal2.whl → 2.3.0rc1__cp38-cp38-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show

maxframe/_utils.cpython-38-darwin.so +0 -0
maxframe/codegen/core.py +3 -2
maxframe/codegen/spe/dataframe/merge.py +4 -0
maxframe/codegen/spe/dataframe/misc.py +2 -0
maxframe/codegen/spe/dataframe/reduction.py +18 -0
maxframe/codegen/spe/dataframe/sort.py +9 -1
maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
maxframe/codegen/spe/dataframe/tseries.py +9 -0
maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
maxframe/codegen/spe/tensor/datasource.py +1 -0
maxframe/config/config.py +3 -0
maxframe/conftest.py +10 -0
maxframe/core/base.py +2 -1
maxframe/core/entity/tileables.py +2 -0
maxframe/core/graph/core.cpython-38-darwin.so +0 -0
maxframe/core/graph/entity.py +7 -1
maxframe/core/mode.py +6 -1
maxframe/dataframe/__init__.py +2 -2
maxframe/dataframe/arithmetic/__init__.py +4 -0
maxframe/dataframe/arithmetic/maximum.py +33 -0
maxframe/dataframe/arithmetic/minimum.py +33 -0
maxframe/dataframe/core.py +98 -106
maxframe/dataframe/datasource/core.py +6 -0
maxframe/dataframe/datasource/direct.py +57 -0
maxframe/dataframe/datasource/read_csv.py +19 -11
maxframe/dataframe/datasource/read_odps_query.py +29 -6
maxframe/dataframe/datasource/read_odps_table.py +32 -10
maxframe/dataframe/datasource/read_parquet.py +38 -39
maxframe/dataframe/datastore/__init__.py +6 -0
maxframe/dataframe/datastore/direct.py +268 -0
maxframe/dataframe/datastore/to_odps.py +6 -0
maxframe/dataframe/extensions/flatjson.py +2 -1
maxframe/dataframe/groupby/__init__.py +5 -1
maxframe/dataframe/groupby/aggregation.py +10 -6
maxframe/dataframe/groupby/apply_chunk.py +1 -3
maxframe/dataframe/groupby/core.py +20 -4
maxframe/dataframe/indexing/__init__.py +2 -1
maxframe/dataframe/indexing/insert.py +45 -17
maxframe/dataframe/merge/__init__.py +3 -0
maxframe/dataframe/merge/combine.py +244 -0
maxframe/dataframe/misc/__init__.py +14 -3
maxframe/dataframe/misc/check_unique.py +41 -10
maxframe/dataframe/misc/drop.py +31 -0
maxframe/dataframe/misc/infer_dtypes.py +251 -0
maxframe/dataframe/misc/map.py +31 -18
maxframe/dataframe/misc/repeat.py +159 -0
maxframe/dataframe/misc/tests/test_misc.py +35 -1
maxframe/dataframe/missing/checkna.py +3 -2
maxframe/dataframe/reduction/__init__.py +10 -5
maxframe/dataframe/reduction/aggregation.py +6 -6
maxframe/dataframe/reduction/argmax.py +7 -4
maxframe/dataframe/reduction/argmin.py +7 -4
maxframe/dataframe/reduction/core.py +18 -9
maxframe/dataframe/reduction/mode.py +144 -0
maxframe/dataframe/reduction/nunique.py +10 -3
maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
maxframe/dataframe/sort/__init__.py +9 -2
maxframe/dataframe/sort/argsort.py +7 -1
maxframe/dataframe/sort/core.py +1 -1
maxframe/dataframe/sort/rank.py +147 -0
maxframe/dataframe/tseries/__init__.py +19 -0
maxframe/dataframe/tseries/at_time.py +61 -0
maxframe/dataframe/tseries/between_time.py +122 -0
maxframe/dataframe/utils.py +30 -26
maxframe/learn/contrib/llm/core.py +16 -7
maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/config.py +221 -0
maxframe/learn/contrib/llm/deploy/core.py +247 -0
maxframe/learn/contrib/llm/deploy/framework.py +35 -0
maxframe/learn/contrib/llm/deploy/loader.py +360 -0
maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +12 -6
maxframe/learn/contrib/llm/models/managed.py +76 -11
maxframe/learn/contrib/llm/models/openai.py +72 -0
maxframe/learn/contrib/llm/tests/__init__.py +13 -0
maxframe/learn/contrib/llm/tests/test_core.py +34 -0
maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
maxframe/learn/contrib/llm/text.py +348 -42
maxframe/learn/contrib/models.py +4 -1
maxframe/learn/contrib/xgboost/classifier.py +2 -0
maxframe/learn/contrib/xgboost/core.py +31 -7
maxframe/learn/contrib/xgboost/predict.py +4 -2
maxframe/learn/contrib/xgboost/regressor.py +5 -0
maxframe/learn/contrib/xgboost/train.py +2 -0
maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
maxframe/learn/utils/__init__.py +1 -0
maxframe/learn/utils/extmath.py +42 -9
maxframe/learn/utils/odpsio.py +80 -11
maxframe/lib/filesystem/_oss_lib/common.py +2 -0
maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
maxframe/opcodes.py +9 -1
maxframe/remote/core.py +4 -0
maxframe/serialization/core.cpython-38-darwin.so +0 -0
maxframe/serialization/tests/test_serial.py +2 -2
maxframe/tensor/arithmetic/__init__.py +1 -1
maxframe/tensor/arithmetic/core.py +2 -2
maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
maxframe/tensor/core.py +3 -0
maxframe/tensor/misc/copyto.py +1 -1
maxframe/tests/test_udf.py +61 -0
maxframe/tests/test_utils.py +8 -5
maxframe/udf.py +103 -7
maxframe/utils.py +61 -8
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
maxframe_client/session/task.py +8 -1
maxframe_client/tests/test_session.py +24 -0
maxframe/dataframe/arrays.py +0 -864
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
{maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0

maxframe/dataframe/datasource/read_parquet.py CHANGED Viewed

@@ -32,6 +32,7 @@ except ImportError:
 from ... import opcodes
 from ...config import options
+from ...lib.dtypes_extension import ArrowDtype
 from ...lib.filesystem import FileSystem, get_fs, glob, open_file
 from ...serialization.serializables import (
     AnyField,
@@ -43,10 +44,13 @@ from ...serialization.serializables import (
     StringField,
 )
 from ...utils import lazy_import
-from ..arrays import ArrowStringDtype
 from ..operators import OutputType
 from ..utils import parse_index, to_arrow_dtypes
-from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
+from .core import (
+    ColumnPruneSupportedDataSourceMixin,
+    DtypeBackendCompatibleMixin,
+    IncrementalIndexDatasource,
+)
 PARQUET_MEMORY_SCALE = 15
 STRING_FIELD_OVERHEAD = 50
@@ -89,13 +93,11 @@ class ParquetEngine:
     def read_dtypes(self, f, **kwargs):
         raise NotImplementedError
-    def read_to_pandas(
-        self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
-    ):
+    def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
         raise NotImplementedError
     def read_group_to_pandas(
-        self, f, group_index, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
+        self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
     ):
         raise NotImplementedError
@@ -106,11 +108,11 @@ class ParquetEngine:
         partition_keys: Dict,
         columns=None,
         nrows=None,
-        use_arrow_dtype=None,
+        dtype_backend=None,
         **kwargs,
     ):
         raw_df = self.read_to_pandas(
-            f, columns=columns, nrows=nrows, use_arrow_dtype=use_arrow_dtype, **kwargs
+            f, columns=columns, nrows=nrows, dtype_backend=dtype_backend, **kwargs
         )
         for col, value in partition_keys.items():
             dictionary = partitions[col]
@@ -169,28 +171,26 @@ class ArrowEngine(ParquetEngine):
         return file.schema_arrow.empty_table().to_pandas().dtypes
     @classmethod
-    def _table_to_pandas(cls, t, nrows=None, use_arrow_dtype=None):
+    def _table_to_pandas(cls, t, nrows=None, dtype_backend=None):
         if nrows is not None:
             t = t.slice(0, nrows)
-        if use_arrow_dtype:
-            df = t.to_pandas(types_mapper={pa.string(): ArrowStringDtype()}.get)
+        if dtype_backend == "pyarrow":
+            df = t.to_pandas(types_mapper={pa.string(): ArrowDtype(pa.string())}.get)
         else:
             df = t.to_pandas()
         return df
-    def read_to_pandas(
-        self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
-    ):
+    def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
         file = pq.ParquetFile(f)
         t = file.read(columns=columns, **kwargs)
-        return self._table_to_pandas(t, nrows=nrows, use_arrow_dtype=use_arrow_dtype)
+        return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
     def read_group_to_pandas(
-        self, f, group_index, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
+        self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
     ):
         file = pq.ParquetFile(f)
         t = file.read_row_group(group_index, columns=columns, **kwargs)
-        return self._table_to_pandas(t, nrows=nrows, use_arrow_dtype=use_arrow_dtype)
+        return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
 class FastpaquetEngine(ParquetEngine):
@@ -203,14 +203,12 @@ class FastpaquetEngine(ParquetEngine):
         dtypes_dict = file._dtypes()
         return pd.Series(dict((c, dtypes_dict[c]) for c in file.columns))
-    def read_to_pandas(
-        self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
-    ):
+    def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
         file = fastparquet.ParquetFile(f)
         df = file.to_pandas(columns, **kwargs)
         if nrows is not None:
             df = df.head(nrows)
-        if use_arrow_dtype:
+        if dtype_backend == "pyarrow":
             df = df.astype(to_arrow_dtypes(df.dtypes).to_dict())
         return df
@@ -265,29 +263,30 @@ class CudfEngine:
 class DataFrameReadParquet(
     IncrementalIndexDatasource,
     ColumnPruneSupportedDataSourceMixin,
+    DtypeBackendCompatibleMixin,
 ):
     _op_type_ = opcodes.READ_PARQUET
     path = AnyField("path")
     engine = StringField("engine")
     columns = ListField("columns")
-    use_arrow_dtype = BoolField("use_arrow_dtype")
-    groups_as_chunks = BoolField("groups_as_chunks")
-    group_index = Int32Field("group_index")
-    read_kwargs = DictField("read_kwargs")
-    incremental_index = BoolField("incremental_index")
-    storage_options = DictField("storage_options")
-    is_partitioned = BoolField("is_partitioned")
-    merge_small_files = BoolField("merge_small_files")
-    merge_small_file_options = DictField("merge_small_file_options")
+    dtype_backend = StringField("dtype_backend", default=None)
+    groups_as_chunks = BoolField("groups_as_chunks", default=None)
+    group_index = Int32Field("group_index", default=None)
+    read_kwargs = DictField("read_kwargs", default=None)
+    incremental_index = BoolField("incremental_index", default=None)
+    storage_options = DictField("storage_options", default=None)
+    is_partitioned = BoolField("is_partitioned", default=None)
+    merge_small_files = BoolField("merge_small_files", default=None)
+    merge_small_file_options = DictField("merge_small_file_options", default=None)
     # for chunk
     partitions = DictField("partitions", default=None)
     partition_keys = DictField("partition_keys", default=None)
     num_group_rows = Int64Field("num_group_rows", default=None)
     # as read meta may be too time-consuming when number of files is large,
     # thus we only read first file to get row number and raw file size
-    first_chunk_row_num = Int64Field("first_chunk_row_num")
-    first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes")
+    first_chunk_row_num = Int64Field("first_chunk_row_num", default=None)
+    first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes", default=None)
     def get_columns(self):
         return self.columns
@@ -319,7 +318,7 @@ def read_parquet(
     engine: str = "auto",
     columns: list = None,
     groups_as_chunks: bool = False,
-    use_arrow_dtype: bool = None,
+    dtype_backend: str = None,
     incremental_index: bool = False,
     storage_options: dict = None,
     memory_scale: int = None,
@@ -356,8 +355,8 @@ def read_parquet(
     incremental_index: bool, default False
         If index_col not specified, ensure range index incremental,
         gain a slightly better performance if setting False.
-    use_arrow_dtype: bool, default None
-        If True, use arrow dtype to store columns.
+    dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
+        Back-end data type applied to the resultant DataFrame (still experimental).
     storage_options: dict, optional
         Options for storage connection.
     memory_scale: int, optional
@@ -401,9 +400,9 @@ def read_parquet(
     if columns:
         dtypes = dtypes[columns]
-    if use_arrow_dtype is None:
-        use_arrow_dtype = options.dataframe.use_arrow_dtype
-    if use_arrow_dtype:
+    if dtype_backend is None:
+        dtype_backend = options.dataframe.dtype_backend
+    if dtype_backend == "pyarrow":
         dtypes = to_arrow_dtypes(dtypes)
     index_value = parse_index(pd.RangeIndex(-1))
@@ -413,7 +412,7 @@ def read_parquet(
         engine=engine_type,
         columns=columns,
         groups_as_chunks=groups_as_chunks,
-        use_arrow_dtype=use_arrow_dtype,
+        dtype_backend=dtype_backend,
         read_kwargs=kwargs,
         incremental_index=incremental_index,
         storage_options=storage_options,

maxframe/dataframe/datastore/__init__.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .direct import df_to_dict, series_to_dict, series_to_list, to_clipboard
 from .to_csv import to_csv
 from .to_odps import to_odps_table
@@ -20,10 +21,15 @@ def _install():
     from ..core import DATAFRAME_TYPE, SERIES_TYPE
     for t in DATAFRAME_TYPE:
+        t.to_clipboard = to_clipboard
         t.to_csv = to_csv
+        t.to_dict = df_to_dict
         t.to_odps_table = to_odps_table
     for t in SERIES_TYPE:
+        t.to_clipboard = to_clipboard
         t.to_csv = to_csv
+        t.to_dict = series_to_dict
+        t.to_list = series_to_list
 _install()

maxframe/dataframe/datastore/direct.py ADDED Viewed

@@ -0,0 +1,268 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ...utils import pd_release_version
+_to_dict_has_index = pd_release_version[0] >= 2
+def df_to_dict(
+    df, orient="dict", into=dict, index=True, batch_size=10000, session=None
+):
+    """
+    Convert the DataFrame to a dictionary.
+    The type of the key-value pairs can be customized with the parameters
+    (see below).
+    Parameters
+    ----------
+    orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
+        Determines the type of the values of the dictionary.
+        - 'dict' (default) : dict like {column -> {index -> value}}
+        - 'list' : dict like {column -> [values]}
+        - 'series' : dict like {column -> Series(values)}
+        - 'split' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+        - 'tight' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+          'index_names' -> [index.names], 'column_names' -> [column.names]}
+        - 'records' : list like
+          [{column -> value}, ... , {column -> value}]
+        - 'index' : dict like {index -> {column -> value}}
+    into : class, default dict
+        The collections.abc.MutableMapping subclass used for all Mappings
+        in the return value.  Can be the actual class or an empty
+        instance of the mapping type you want.  If you want a
+        collections.defaultdict, you must pass it initialized.
+    index : bool, default True
+        Whether to include the index item (and index_names item if `orient`
+        is 'tight') in the returned dictionary. Can only be ``False``
+        when `orient` is 'split' or 'tight'.
+    Returns
+    -------
+    dict, list or collections.abc.MutableMapping
+        Return a collections.abc.MutableMapping object representing the
+        DataFrame. The resulting transformation depends on the `orient`
+        parameter.
+    See Also
+    --------
+    DataFrame.from_dict: Create a DataFrame from a dictionary.
+    DataFrame.to_json: Convert a DataFrame to JSON format.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({'col1': [1, 2],
+    ...                    'col2': [0.5, 0.75]},
+    ...                   index=['row1', 'row2'])
+    >>> df.execute()
+          col1  col2
+    row1     1  0.50
+    row2     2  0.75
+    >>> df.to_dict()
+    {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
+    You can specify the return orientation.
+    >>> df.to_dict('series')
+    {'col1': row1    1
+             row2    2
+    Name: col1, dtype: int64,
+    'col2': row1    0.50
+            row2    0.75
+    Name: col2, dtype: float64}
+    >>> df.to_dict('split')
+    {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+     'data': [[1, 0.5], [2, 0.75]]}
+    >>> df.to_dict('records')
+    [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
+    >>> df.to_dict('index')
+    {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
+    >>> df.to_dict('tight')
+    {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+     'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
+    You can also specify the mapping type.
+    >>> from collections import OrderedDict, defaultdict
+    >>> df.to_dict(into=OrderedDict)
+    OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
+                 ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
+    If you want a `defaultdict`, you need to initialize it:
+    >>> dd = defaultdict(list)
+    >>> df.to_dict('records', into=dd)
+    [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+     defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
+    """
+    fetch_kwargs = dict(batch_size=batch_size)
+    to_dict_kw = dict(orient=orient, into=into)
+    if _to_dict_has_index:
+        to_dict_kw["index"] = index
+    return df.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
+        **to_dict_kw
+    )
+def series_to_dict(series, into=dict, batch_size=10000, session=None):
+    """
+    Convert Series to {label -> value} dict or dict-like object.
+    Parameters
+    ----------
+    into : class, default dict
+        The collections.abc.Mapping subclass to use as the return
+        object. Can be the actual class or an empty
+        instance of the mapping type you want.  If you want a
+        collections.defaultdict, you must pass it initialized.
+    Returns
+    -------
+    collections.abc.Mapping
+        Key-value representation of Series.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([1, 2, 3, 4])
+    >>> s.to_dict()
+    {0: 1, 1: 2, 2: 3, 3: 4}
+    >>> from collections import OrderedDict, defaultdict
+    >>> s.to_dict(OrderedDict)
+    OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+    >>> dd = defaultdict(list)
+    >>> s.to_dict(dd)
+    defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
+    """
+    fetch_kwargs = dict(batch_size=batch_size)
+    return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
+        into=into
+    )
+def series_to_list(series, batch_size=10000, session=None):
+    """
+    Return a list of the values.
+    These are each a scalar type, which is a Python scalar
+    (for str, int, float) or a pandas scalar
+    (for Timestamp/Timedelta/Interval/Period)
+    Returns
+    -------
+    list
+    See Also
+    --------
+    numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
+        nested list of Python scalars.
+    Examples
+    --------
+    For Series
+    >>> import maxframe.dataframe as md
+    >>> s = md.Series([1, 2, 3])
+    >>> s.to_list()
+    [1, 2, 3]
+    For Index:
+    >>> idx = md.Index([1, 2, 3])
+    >>> idx.execute()
+    Index([1, 2, 3], dtype='int64')
+    >>> idx.to_list()
+    [1, 2, 3]
+    """
+    fetch_kwargs = dict(batch_size=batch_size)
+    return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_list()
+def to_clipboard(
+    obj, *, excel=True, sep=None, batch_size=10000, session=None, **kwargs
+):
+    """
+    Copy object to the system clipboard.
+    Write a text representation of object to the system clipboard.
+    This can be pasted into Excel, for example.
+    Parameters
+    ----------
+    excel : bool, default True
+        Produce output in a csv format for easy pasting into excel.
+        - True, use the provided separator for csv pasting.
+        - False, write a string representation of the object to the clipboard.
+    sep : str, default ``'\t'``
+        Field delimiter.
+    **kwargs
+        These parameters will be passed to DataFrame.to_csv.
+    See Also
+    --------
+    DataFrame.to_csv : Write a DataFrame to a comma-separated values
+        (csv) file.
+    read_clipboard : Read text from clipboard and pass to read_csv.
+    Notes
+    -----
+    Requirements for your platform.
+      - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
+      - Windows : none
+      - macOS : none
+    This method uses the processes developed for the package `pyperclip`. A
+    solution to render any output string format is given in the examples.
+    Examples
+    --------
+    Copy the contents of a DataFrame to the clipboard.
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
+    >>> df.to_clipboard(sep=',')  # doctest: +SKIP
+    ... # Wrote the following to the system clipboard:
+    ... # ,A,B,C
+    ... # 0,1,2,3
+    ... # 1,4,5,6
+    We can omit the index by passing the keyword `index` and setting
+    it to false.
+    >>> df.to_clipboard(sep=',', index=False)  # doctest: +SKIP
+    ... # Wrote the following to the system clipboard:
+    ... # A,B,C
+    ... # 1,2,3
+    ... # 4,5,6
+    """
+    fetch_kwargs = dict(batch_size=batch_size)
+    return obj.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_clipboard(
+        excel=excel, sep=sep, **kwargs
+    )

maxframe/dataframe/datastore/to_odps.py CHANGED Viewed

@@ -57,10 +57,16 @@ class DataFrameToODPSTable(DataFrameDataStore):
     lifecycle = Int64Field("lifecycle", default=None)
     table_properties = DictField("table_properties", default=None)
     primary_key = ListField("primary_key", FieldTypes.string, default=None)
+    use_generated_table_meta = BoolField("use_generated_table_meta", default=False)
     def __init__(self, **kw):
         super().__init__(_output_types=[OutputType.dataframe], **kw)
+    def check_inputs(self, inputs: List[TileableType]):
+        if self.use_generated_table_meta:
+            return None
+        return super().check_inputs(inputs)
     def __call__(self, x):
         shape = (0,) * len(x.shape)
         index_value = parse_index(x.index_value.to_pandas()[:0], x.key, "index")

maxframe/dataframe/extensions/flatjson.py CHANGED Viewed

@@ -39,12 +39,13 @@ class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
                 name=name,
                 dtype=make_dtype(dtype),
             )
+        dtypes = make_dtypes(dtypes)
         return self.new_dataframe(
             [series],
             shape=(series.shape[0], len(dtypes)),
             index_value=series.index_value,
             columns_value=parse_index(dtypes.index, store_data=True),
-            dtypes=make_dtypes(dtypes),
+            dtypes=dtypes,
         )

maxframe/dataframe/groupby/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # noinspection PyUnresolvedReferences
 from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
-from .core import NamedAgg
+from .core import _make_named_agg_compat
 from .expanding import ExpandingGroupby
 from .rolling import RollingGroupby
@@ -99,3 +99,7 @@ def _install():
 _install()
 del _install
+__getattr__ = _make_named_agg_compat
+del _make_named_agg_compat

maxframe/dataframe/groupby/aggregation.py CHANGED Viewed

@@ -21,7 +21,7 @@ import pandas as pd
 from ... import opcodes
 from ...config import options
-from ...core import ENTITY_TYPE, EntityData, OutputType
+from ...core import ENTITY_TYPE, EntityData, OutputType, enter_mode
 from ...serialization import PickleContainer
 from ...serialization.serializables import (
     AnyField,
@@ -34,7 +34,7 @@ from ...serialization.serializables import (
     StringField,
 )
 from ...udf import BuiltinFunction
-from ...utils import find_objects, lazy_import, pd_release_version
+from ...utils import find_objects, get_pd_option, lazy_import, pd_release_version
 from ..core import GROUPBY_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..reduction.aggregation import (
@@ -116,7 +116,10 @@ def build_mock_agg_result(
     **raw_func_kw,
 ):
     try:
-        agg_result = groupby.op.build_mock_groupby().aggregate(raw_func, **raw_func_kw)
+        with enter_mode(mock=True):
+            agg_result = groupby.op.build_mock_groupby().aggregate(
+                raw_func, **raw_func_kw
+            )
     except ValueError:
         if (
             groupby_params.get("as_index") or _support_get_group_without_as_index
@@ -377,9 +380,10 @@ def agg(groupby, func=None, method="auto", *args, **kwargs):
     1   1   2  0.590715
     2   3   4  0.704907
-    To control the output names with different aggregations per column, pandas supports “named aggregation”
+    To control the output names with different aggregations per column,
+    MaxFrame supports “named aggregation”
-    >>> from maxframe.dataframe.groupby import NamedAgg
+    >>> from maxframe.dataframe import NamedAgg
     >>> df.groupby("A").agg(
     ...  b_min=NamedAgg(column="B", aggfunc="min"),
     ...  c_sum=NamedAgg(column="C", aggfunc="sum")).execute()
@@ -432,6 +436,6 @@ def agg(groupby, func=None, method="auto", *args, **kwargs):
         groupby_params=groupby.op.groupby_params,
         combine_size=combine_size,
         chunk_store_limit=options.chunk_store_limit,
-        use_inf_as_na=pd.get_option("mode.use_inf_as_na"),
+        use_inf_as_na=get_pd_option("mode.use_inf_as_na", False),
     )
     return agg_op(groupby)

maxframe/dataframe/groupby/apply_chunk.py CHANGED Viewed

@@ -29,7 +29,7 @@ from ...serialization.serializables import (
     TupleField,
 )
 from ...udf import BuiltinFunction, MarkedFunction
-from ...utils import copy_if_possible
+from ...utils import copy_if_possible, make_dtype, make_dtypes
 from ..core import (
     DATAFRAME_GROUPBY_TYPE,
     GROUPBY_TYPE,
@@ -45,8 +45,6 @@ from ..utils import (
     copy_func_scheduling_hints,
     infer_dataframe_return_value,
     make_column_list,
-    make_dtype,
-    make_dtypes,
     parse_index,
     validate_output_types,
 )

maxframe/dataframe/groupby/core.py CHANGED Viewed

@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from collections import namedtuple
+import os
+import warnings
 from typing import Any, Dict, List
 import pandas as pd
@@ -20,6 +21,7 @@ import pandas as pd
 from ... import opcodes
 from ...core import ENTITY_TYPE, Entity, EntityData, OutputType
 from ...core.operator import MapReduceOperator
+from ...env import MAXFRAME_INSIDE_TASK
 from ...serialization import PickleContainer
 from ...serialization.serializables import AnyField, BoolField, DictField, Int32Field
 from ...udf import BuiltinFunction
@@ -38,9 +40,6 @@ from ..utils import (
 cudf = lazy_import("cudf")
-NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
 class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
     _op_type_ = opcodes.GROUPBY
     _legacy_name = "DataFrameGroupByOperator"  # since v2.0.0
@@ -324,3 +323,20 @@ class BaseGroupByWindowOp(DataFrameOperatorMixin, DataFrameOperator):
             name, dtype = out_dtypes
             kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
         return self.new_tileable([in_df], **kw)
+def _make_named_agg_compat(name):  # pragma: no cover
+    # to make imports compatible
+    from ..reduction import NamedAgg
+    if name == "NamedAgg":
+        if MAXFRAME_INSIDE_TASK not in os.environ:
+            warnings.warn(
+                "Please import NamedAgg from maxframe.dataframe",
+                DeprecationWarning,
+            )
+        return NamedAgg
+    raise AttributeError(f"module {__name__} has no attribute {name}")
+__getattr__ = _make_named_agg_compat

maxframe/dataframe/indexing/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ def _install():
     from .getitem import dataframe_getitem, series_getitem
     from .iat import iat
     from .iloc import head, iloc, index_getitem, index_setitem, tail
-    from .insert import df_insert
+    from .insert import df_insert, index_insert
     from .loc import loc
     from .reindex import reindex, reindex_like
     from .rename import df_rename, index_rename, index_set_names, series_rename
@@ -94,6 +94,7 @@ def _install():
         setattr(cls, "droplevel", index_droplevel)
         setattr(cls, "get_level_values", get_level_values)
         setattr(cls, "__getitem__", index_getitem)
+        setattr(cls, "insert", index_insert)
         setattr(cls, "rename", index_rename)
         setattr(cls, "__setitem__", index_setitem)
         setattr(cls, "set_names", index_set_names)