PyPI - maxframe - Versions diffs - 0.1.0b4__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl - Mend

maxframe 0.1.0b4cp39-cp39-win32.whl → 1.0.0cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp39-win32.pyd +0 -0
maxframe/codegen.py +56 -5
maxframe/config/config.py +78 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +58 -14
maxframe/core/__init__.py +2 -16
maxframe/core/entity/__init__.py +1 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/entity/objects.py +46 -45
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp39-win32.pyd +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +2 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +7 -33
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
maxframe/dataframe/core.py +58 -12
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +120 -24
maxframe/dataframe/datasource/read_odps_table.py +9 -4
maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +28 -0
maxframe/dataframe/extensions/__init__.py +5 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +317 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
maxframe/dataframe/groupby/transform.py +5 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +5 -28
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +237 -3
maxframe/dataframe/merge/tests/test_merge.py +126 -1
maxframe/dataframe/misc/__init__.py +4 -0
maxframe/dataframe/misc/apply.py +6 -11
maxframe/dataframe/misc/case_when.py +141 -0
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +8 -8
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/pivot_table.py +262 -0
maxframe/dataframe/misc/tests/test_misc.py +93 -1
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/plotting/core.py +2 -2
maxframe/dataframe/reduction/core.py +4 -3
maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +13 -19
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/utils.py +33 -11
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/io/__init__.py +13 -0
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
maxframe/{odpsio → io/odpsio}/schema.py +38 -16
maxframe/io/odpsio/tableio.py +719 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +63 -0
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/utils.py +52 -0
maxframe/learn/contrib/xgboost/__init__.py +26 -0
maxframe/learn/contrib/xgboost/classifier.py +110 -0
maxframe/learn/contrib/xgboost/core.py +241 -0
maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
maxframe/learn/contrib/xgboost/predict.py +121 -0
maxframe/learn/contrib/xgboost/regressor.py +71 -0
maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
maxframe/learn/contrib/xgboost/train.py +132 -0
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/learn/utils/__init__.py +15 -0
maxframe/learn/utils/core.py +29 -0
maxframe/lib/mmh3.cp39-win32.pyd +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/opcodes.py +11 -0
maxframe/protocol.py +154 -27
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp39-win32.pyd +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +64 -0
maxframe/serialization/core.pyx +67 -26
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +52 -17
maxframe/serialization/serializables/core.py +180 -15
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +54 -5
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +37 -2
maxframe/tensor/__init__.py +81 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +7 -2
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/scalar.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +101 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +74 -0
maxframe/tensor/{base → misc}/__init__.py +4 -0
maxframe/tensor/misc/atleast_1d.py +72 -0
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/misc/unique.py +205 -0
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +2 -1
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +17 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +62 -3
maxframe/utils.py +112 -86
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
maxframe_client/__init__.py +0 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +123 -54
maxframe_client/session/consts.py +3 -0
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +223 -40
maxframe_client/session/task.py +108 -80
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +136 -8
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -300
maxframe/odpsio/volumeio.py +0 -95
maxframe_client/clients/spe.py +0 -104
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/indexing/align.py CHANGED Viewed

@@ -138,7 +138,7 @@ class DataFrameAlign(DataFrameOperator, DataFrameOperatorMixin):
                 series_index = rhs.index_value.to_pandas()
                 dtypes = lhs.dtypes.reindex(
                     lhs.dtypes.index.join(series_index, how=self.join)
-                ).fillna(np.dtype(np.float_))
+                ).fillna(np.dtype(float))
                 l_shape[1] = r_size = len(dtypes)
                 col_val = r_idx_val = parse_index(dtypes.index, store_data=True)

maxframe/dataframe/indexing/loc.py CHANGED Viewed

@@ -25,13 +25,14 @@ from ...core import ENTITY_TYPE, OutputType
 from ...serialization.serializables import AnyField, KeyField, ListField
 from ...tensor.datasource import asarray
 from ...tensor.utils import calc_sliced_size, filter_inputs
-from ...utils import is_full_slice, lazy_import
+from ...utils import is_full_slice, lazy_import, pd_release_version
 from ..core import DATAFRAME_TYPE, IndexValue
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import parse_index
 from .iloc import DataFrameIlocSetItem
 cudf = lazy_import("cudf")
+with_slice_locs_kind = pd_release_version < (1, 4, 0)
 def process_loc_indexes(inp, indexes, fetch_index: bool = True):
@@ -210,9 +211,10 @@ class DataFrameLocGetItem(DataFrameOperator, DataFrameOperatorMixin):
             if axis == 1:
                 param["dtypes"] = inp.dtypes
         elif input_index_value.has_value():
-            start, end = pd_index.slice_locs(
-                index.start, index.stop, index.step, kind="loc"
-            )
+            kw = {}
+            if with_slice_locs_kind:
+                kw["kind"] = "loc"
+            start, end = pd_index.slice_locs(index.start, index.stop, index.step, **kw)
             slc = slice(start, end, index.step)
             size = calc_sliced_size(inp.shape[axis], slc)
             param["shape"] = size

maxframe/dataframe/indexing/rename.py CHANGED Viewed

@@ -17,7 +17,7 @@ import warnings
 from ... import opcodes
 from ...core import get_output_types
 from ...serialization.serializables import AnyField, StringField
-from ..core import SERIES_TYPE
+from ..core import INDEX_TYPE, SERIES_TYPE
 from ..operators import DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_df, build_series, parse_index, validate_axis
@@ -73,6 +73,8 @@ class DataFrameRename(DataFrameOperator, DataFrameOperatorMixin):
             params["index_value"] = parse_index(new_index)
         if df.ndim == 1:
             params["name"] = new_df.name
+            if isinstance(df, INDEX_TYPE):
+                params["names"] = new_df.names
         return self.new_tileable([df], **params)
@@ -246,6 +248,7 @@ def df_rename(
     )
+# fixme https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/58
 def series_rename(
     series,
     index=None,
@@ -303,11 +306,6 @@ def series_rename(
     1    2
     2    3
     Name: my_name, dtype: int64
-    >>> s.rename(lambda x: x ** 2).execute()  # function, changes labels.execute()
-    0    1
-    1    2
-    4    3
-    dtype: int64
     >>> s.rename({1: 3, 2: 5}).execute()  # mapping, changes labels.execute()
     0    1
     3    2
@@ -385,6 +383,7 @@ def index_rename(index, name, inplace=False):
         return ret
+# fixme https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/59
 def index_set_names(index, names, level=None, inplace=False):
     """
     Set Index or MultiIndex name.
@@ -419,28 +418,6 @@ def index_set_names(index, names, level=None, inplace=False):
     Int64Index([1, 2, 3, 4], dtype='int64')
     >>> idx.set_names('quarter').execute()
     Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
-    >>> idx = md.MultiIndex.from_product([['python', 'cobra'],
-    ...                                   [2018, 2019]])
-    >>> idx.execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               )
-    >>> idx.set_names(['kind', 'year'], inplace=True)
-    >>> idx.execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               names=['kind', 'year'])
-    >>> idx.set_names('species', level=0).execute()
-    MultiIndex([('python', 2018),
-                ('python', 2019),
-                ( 'cobra', 2018),
-                ( 'cobra', 2019)],
-               names=['species', 'year'])
     """
     op = DataFrameRename(
         index_mapper=names, level=level, output_types=get_output_types(index)

maxframe/dataframe/indexing/sample.py CHANGED Viewed

@@ -195,7 +195,6 @@ def sample(
             num_legs  num_wings  num_specimen_seen
     falcon         2          2                 10
     fish           0          0                  8
     """
     axis = validate_axis(axis or 0, df_or_series)
     if axis == 1:

maxframe/dataframe/indexing/set_index.py CHANGED Viewed

@@ -31,7 +31,7 @@ class DataFrameSetIndex(DataFrameOperator, DataFrameOperatorMixin):
         super().__init__(_output_types=output_types, **kw)
     def __call__(self, df):
-        new_df = build_empty_df(df.dtypes).set_index(
+        new_df = build_empty_df(df.dtypes, index=df.index_value.to_pandas()).set_index(
             keys=self.keys,
             drop=self.drop,
             append=self.append,
@@ -47,6 +47,73 @@ class DataFrameSetIndex(DataFrameOperator, DataFrameOperatorMixin):
 def set_index(df, keys, drop=True, append=False, inplace=False, verify_integrity=False):
+    # TODO add support for set index by series, index, mt.ndarray, etc.
+    """
+    Set the DataFrame index using existing columns.
+    Set the DataFrame index (row labels) using one or more existing
+    columns. The index can replace the existing index or expand on it.
+    Parameters
+    ----------
+    keys : label or array-like or list of labels
+        This parameter can be either a single column key, or a list containing column keys.
+    drop : bool, default True
+        Delete columns to be used as the new index.
+    append : bool, default False
+        Whether to append columns to existing index.
+    inplace : bool, default False
+        If True, modifies the DataFrame in place (do not create a new object).
+    verify_integrity : bool, default False
+        Check the new index for duplicates. Otherwise defer the check until
+        necessary. Setting to False will improve the performance of this
+        method.
+    Returns
+    -------
+    DataFrame or None
+        Changed row labels or None if ``inplace=True``.
+    See Also
+    --------
+    DataFrame.reset_index : Opposite of set_index.
+    DataFrame.reindex : Change to new indices or expand indices.
+    DataFrame.reindex_like : Change to same indices as other DataFrame.
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame({'month': [1, 4, 7, 10],
+    ...                    'year': [2012, 2014, 2013, 2014],
+    ...                    'sale': [55, 40, 84, 31]})
+    >>> df
+       month  year  sale
+    0      1  2012    55
+    1      4  2014    40
+    2      7  2013    84
+    3     10  2014    31
+    Set the index to become the 'month' column:
+    >>> df.set_index('month')
+           year  sale
+    month
+    1      2012    55
+    4      2014    40
+    7      2013    84
+    10     2014    31
+    Create a MultiIndex using columns 'year' and 'month':
+    >>> df.set_index(['year', 'month'])
+                sale
+    year  month
+    2012  1     55
+    2014  4     40
+    2013  7     84
+    2014  10    31
+    """
     op = DataFrameSetIndex(
         keys=keys,
         drop=drop,

maxframe/dataframe/initializer.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from typing import Union
 import pandas as pd
+from pandas.api.types import is_list_like
 from pandas.core.dtypes.common import pandas_dtype
 from ..core import ENTITY_TYPE
@@ -61,6 +62,8 @@ class DataFrame(_Frame, metaclass=InitializerMeta):
         num_partitions=None,
     ):
         need_repart = False
+        if columns is not None and not is_list_like(columns):
+            raise ValueError("columns must be a list-like object")
         if isinstance(data, TENSOR_TYPE):
             if chunk_size is not None:
                 data = data.rechunk(chunk_size)
@@ -69,7 +72,10 @@ class DataFrame(_Frame, metaclass=InitializerMeta):
             )
             need_repart = num_partitions is not None
         elif isinstance(data, SERIES_TYPE):
-            df = data.to_frame()
+            if columns is not None and len(columns) != 1:
+                raise ValueError("columns' length must be 1 when data is Series")
+            col_name = columns[0] if columns else None
+            df = data.to_frame(name=col_name)
             need_repart = num_partitions is not None
         elif isinstance(data, DATAFRAME_TYPE):
             if not hasattr(data, "data"):
@@ -77,6 +83,10 @@ class DataFrame(_Frame, metaclass=InitializerMeta):
                 df = _Frame(data)
             else:
                 df = data
+            if columns is not None:
+                if len(df.columns) != len(columns):
+                    raise ValueError("columns' length must be equal to the data's")
+                df.columns = columns
             need_repart = num_partitions is not None
         elif isinstance(data, dict) and self._can_process_by_1d_tileables(data):
             # data is a dict and some value is tensor

maxframe/dataframe/merge/__init__.py CHANGED Viewed

@@ -14,7 +14,15 @@
 from .append import DataFrameAppend, append
 from .concat import DataFrameConcat, concat
-from .merge import DataFrameMerge, DataFrameMergeAlign, join, merge
+from .merge import (
+    DataFrameMerge,
+    DataFrameMergeAlign,
+    DistributedMapJoinHint,
+    MapJoinHint,
+    SkewJoinHint,
+    join,
+    merge,
+)
 def _install():

maxframe/dataframe/merge/concat.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import List, Union
 import pandas as pd
@@ -24,6 +25,7 @@ from ...serialization.serializables import (
     StringField,
 )
 from ...utils import lazy_import
+from ..core import DataFrame, Series
 from ..operators import SERIES_TYPE, DataFrameOperator, DataFrameOperatorMixin
 from ..utils import build_empty_df, build_empty_series, parse_index, validate_axis
@@ -55,41 +57,53 @@ class DataFrameConcat(DataFrameOperator, DataFrameOperatorMixin):
         return self.names
     @classmethod
-    def _concat_index(cls, prev_index: pd.Index, cur_index: pd.Index):
-        if isinstance(prev_index, pd.RangeIndex) and isinstance(
-            cur_index, pd.RangeIndex
-        ):
-            # handle RangeIndex that append may generate huge amount of data
-            # e.g. pd.RangeIndex(10_000) and pd.RangeIndex(10_000)
-            # will generate a Int64Index full of data
-            # for details see GH#1647
-            prev_stop = prev_index.start + prev_index.size * prev_index.step
-            cur_start = cur_index.start
-            if prev_stop == cur_start and prev_index.step == cur_index.step:
-                # continuous RangeIndex, still return RangeIndex
-                return prev_index.append(cur_index)
-            else:
-                # otherwise, return an empty index
-                return pd.Index([], dtype=prev_index.dtype)
-        elif isinstance(prev_index, pd.RangeIndex):
-            return pd.Index([], prev_index.dtype).append(cur_index)
-        elif isinstance(cur_index, pd.RangeIndex):
-            return prev_index.append(pd.Index([], cur_index.dtype))
-        return prev_index.append(cur_index)
+    def _concat_index(cls, df_or_series_list: Union[List[DataFrame], List[Series]]):
+        concat_index = None
+        all_indexes_have_value = all(
+            input.index_value.has_value() for input in df_or_series_list
+        )
+        def _concat(prev_index: pd.Index, cur_index: pd.Index):
+            if prev_index is None:
+                return cur_index
+            if (
+                all_indexes_have_value
+                and isinstance(prev_index, pd.RangeIndex)
+                and isinstance(cur_index, pd.RangeIndex)
+            ):
+                # handle RangeIndex that append may generate huge amount of data
+                # e.g. pd.RangeIndex(10_000) and pd.RangeIndex(10_000)
+                # will generate a Int64Index full of data
+                # for details see GH#1647
+                prev_stop = prev_index.start + prev_index.size * prev_index.step
+                cur_start = cur_index.start
+                if prev_stop == cur_start and prev_index.step == cur_index.step:
+                    # continuous RangeIndex, still return RangeIndex
+                    return prev_index.append(cur_index)
+                else:
+                    # otherwise, return an empty index
+                    return pd.Index([], dtype=prev_index.dtype)
+            elif isinstance(prev_index, pd.RangeIndex):
+                return pd.Index([], prev_index.dtype).append(cur_index)
+            elif isinstance(cur_index, pd.RangeIndex):
+                return prev_index.append(pd.Index([], cur_index.dtype))
+            return prev_index.append(cur_index)
+        for input in df_or_series_list:
+            concat_index = _concat(concat_index, input.index_value.to_pandas())
+        return concat_index
     def _call_series(self, objs):
         if self.axis == 0:
             row_length = 0
-            index = None
             for series in objs:
-                if index is None:
-                    index = series.index_value.to_pandas()
-                else:
-                    index = self._concat_index(index, series.index_value.to_pandas())
                 row_length += series.shape[0]
             if self.ignore_index:  # pragma: no cover
                 index_value = parse_index(pd.RangeIndex(row_length))
             else:
+                index = self._concat_index(objs)
                 index_value = parse_index(index, objs)
             obj_names = {obj.name for obj in objs}
             return self.new_series(
@@ -130,13 +144,8 @@ class DataFrameConcat(DataFrameOperator, DataFrameOperatorMixin):
     def _call_dataframes(self, objs):
         if self.axis == 0:
             row_length = 0
-            index = None
             empty_dfs = []
             for df in objs:
-                if index is None:
-                    index = df.index_value.to_pandas()
-                else:
-                    index = self._concat_index(index, df.index_value.to_pandas())
                 row_length += df.shape[0]
                 if df.ndim == 2:
                     empty_dfs.append(build_empty_df(df.dtypes))
@@ -153,6 +162,7 @@ class DataFrameConcat(DataFrameOperator, DataFrameOperatorMixin):
             if self.ignore_index:  # pragma: no cover
                 index_value = parse_index(pd.RangeIndex(row_length))
             else:
+                index = self._concat_index(objs)
                 index_value = parse_index(index, objs)
             new_objs = []

maxframe 0.1.0b4__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl

Potentially problematic release.

maxframe 0.1.0b4cp39-cp39-win32.whl → 1.0.0cp39-cp39-win32.whl