PyPI - maxframe - Versions diffs - 0.1.0b4__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl - Mend

maxframe 0.1.0b4cp39-cp39-win32.whl → 1.0.0cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show

maxframe/__init__.py +1 -0
maxframe/_utils.cp39-win32.pyd +0 -0
maxframe/codegen.py +56 -5
maxframe/config/config.py +78 -10
maxframe/config/validators.py +42 -11
maxframe/conftest.py +58 -14
maxframe/core/__init__.py +2 -16
maxframe/core/entity/__init__.py +1 -12
maxframe/core/entity/executable.py +1 -1
maxframe/core/entity/objects.py +46 -45
maxframe/core/entity/output_types.py +0 -3
maxframe/core/entity/tests/test_objects.py +43 -0
maxframe/core/entity/tileables.py +5 -78
maxframe/core/graph/__init__.py +2 -2
maxframe/core/graph/builder/__init__.py +0 -1
maxframe/core/graph/builder/base.py +5 -4
maxframe/core/graph/builder/tileable.py +4 -4
maxframe/core/graph/builder/utils.py +4 -8
maxframe/core/graph/core.cp39-win32.pyd +0 -0
maxframe/core/graph/core.pyx +4 -4
maxframe/core/graph/entity.py +9 -33
maxframe/core/operator/__init__.py +2 -9
maxframe/core/operator/base.py +3 -5
maxframe/core/operator/objects.py +0 -9
maxframe/core/operator/utils.py +55 -0
maxframe/dataframe/__init__.py +2 -1
maxframe/dataframe/arithmetic/around.py +5 -17
maxframe/dataframe/arithmetic/core.py +15 -7
maxframe/dataframe/arithmetic/docstring.py +7 -33
maxframe/dataframe/arithmetic/equal.py +4 -2
maxframe/dataframe/arithmetic/greater.py +4 -2
maxframe/dataframe/arithmetic/greater_equal.py +4 -2
maxframe/dataframe/arithmetic/less.py +2 -2
maxframe/dataframe/arithmetic/less_equal.py +4 -2
maxframe/dataframe/arithmetic/not_equal.py +4 -2
maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
maxframe/dataframe/core.py +58 -12
maxframe/dataframe/datasource/date_range.py +2 -2
maxframe/dataframe/datasource/read_odps_query.py +120 -24
maxframe/dataframe/datasource/read_odps_table.py +9 -4
maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
maxframe/dataframe/datastore/to_odps.py +28 -0
maxframe/dataframe/extensions/__init__.py +5 -0
maxframe/dataframe/extensions/flatjson.py +131 -0
maxframe/dataframe/extensions/flatmap.py +317 -0
maxframe/dataframe/extensions/reshuffle.py +1 -1
maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
maxframe/dataframe/groupby/core.py +1 -1
maxframe/dataframe/groupby/cum.py +0 -1
maxframe/dataframe/groupby/fill.py +4 -1
maxframe/dataframe/groupby/getitem.py +6 -0
maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
maxframe/dataframe/groupby/transform.py +5 -1
maxframe/dataframe/indexing/align.py +1 -1
maxframe/dataframe/indexing/loc.py +6 -4
maxframe/dataframe/indexing/rename.py +5 -28
maxframe/dataframe/indexing/sample.py +0 -1
maxframe/dataframe/indexing/set_index.py +68 -1
maxframe/dataframe/initializer.py +11 -1
maxframe/dataframe/merge/__init__.py +9 -1
maxframe/dataframe/merge/concat.py +41 -31
maxframe/dataframe/merge/merge.py +237 -3
maxframe/dataframe/merge/tests/test_merge.py +126 -1
maxframe/dataframe/misc/__init__.py +4 -0
maxframe/dataframe/misc/apply.py +6 -11
maxframe/dataframe/misc/case_when.py +141 -0
maxframe/dataframe/misc/describe.py +2 -2
maxframe/dataframe/misc/drop_duplicates.py +8 -8
maxframe/dataframe/misc/eval.py +4 -0
maxframe/dataframe/misc/memory_usage.py +2 -2
maxframe/dataframe/misc/pct_change.py +1 -83
maxframe/dataframe/misc/pivot_table.py +262 -0
maxframe/dataframe/misc/tests/test_misc.py +93 -1
maxframe/dataframe/misc/transform.py +1 -30
maxframe/dataframe/misc/value_counts.py +4 -17
maxframe/dataframe/missing/dropna.py +1 -1
maxframe/dataframe/missing/fillna.py +5 -5
maxframe/dataframe/operators.py +1 -17
maxframe/dataframe/plotting/core.py +2 -2
maxframe/dataframe/reduction/core.py +4 -3
maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
maxframe/dataframe/sort/sort_values.py +1 -11
maxframe/dataframe/statistics/corr.py +3 -3
maxframe/dataframe/statistics/quantile.py +13 -19
maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
maxframe/dataframe/tests/test_initializer.py +33 -2
maxframe/dataframe/utils.py +33 -11
maxframe/dataframe/window/expanding.py +5 -3
maxframe/dataframe/window/tests/test_expanding.py +2 -2
maxframe/errors.py +13 -0
maxframe/extension.py +12 -0
maxframe/io/__init__.py +13 -0
maxframe/io/objects/__init__.py +24 -0
maxframe/io/objects/core.py +140 -0
maxframe/io/objects/tensor.py +76 -0
maxframe/io/objects/tests/__init__.py +13 -0
maxframe/io/objects/tests/test_object_io.py +97 -0
maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
maxframe/{odpsio → io/odpsio}/schema.py +38 -16
maxframe/io/odpsio/tableio.py +719 -0
maxframe/io/odpsio/tests/__init__.py +13 -0
maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
maxframe/io/odpsio/volumeio.py +63 -0
maxframe/learn/contrib/__init__.py +3 -1
maxframe/learn/contrib/graph/__init__.py +15 -0
maxframe/learn/contrib/graph/connected_components.py +215 -0
maxframe/learn/contrib/graph/tests/__init__.py +13 -0
maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
maxframe/learn/contrib/llm/__init__.py +16 -0
maxframe/learn/contrib/llm/core.py +54 -0
maxframe/learn/contrib/llm/models/__init__.py +14 -0
maxframe/learn/contrib/llm/models/dashscope.py +73 -0
maxframe/learn/contrib/llm/multi_modal.py +42 -0
maxframe/learn/contrib/llm/text.py +42 -0
maxframe/learn/contrib/utils.py +52 -0
maxframe/learn/contrib/xgboost/__init__.py +26 -0
maxframe/learn/contrib/xgboost/classifier.py +110 -0
maxframe/learn/contrib/xgboost/core.py +241 -0
maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
maxframe/learn/contrib/xgboost/predict.py +121 -0
maxframe/learn/contrib/xgboost/regressor.py +71 -0
maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
maxframe/learn/contrib/xgboost/train.py +132 -0
maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
maxframe/learn/utils/__init__.py +15 -0
maxframe/learn/utils/core.py +29 -0
maxframe/lib/mmh3.cp39-win32.pyd +0 -0
maxframe/lib/mmh3.pyi +43 -0
maxframe/lib/sparse/tests/test_sparse.py +15 -15
maxframe/lib/wrapped_pickle.py +2 -1
maxframe/opcodes.py +11 -0
maxframe/protocol.py +154 -27
maxframe/remote/core.py +4 -8
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp39-win32.pyd +0 -0
maxframe/serialization/core.pxd +3 -0
maxframe/serialization/core.pyi +64 -0
maxframe/serialization/core.pyx +67 -26
maxframe/serialization/exception.py +1 -1
maxframe/serialization/pandas.py +52 -17
maxframe/serialization/serializables/core.py +180 -15
maxframe/serialization/serializables/field_type.py +4 -1
maxframe/serialization/serializables/tests/test_serializable.py +54 -5
maxframe/serialization/tests/test_serial.py +2 -1
maxframe/session.py +37 -2
maxframe/tensor/__init__.py +81 -2
maxframe/tensor/arithmetic/isclose.py +1 -0
maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
maxframe/tensor/core.py +5 -136
maxframe/tensor/datasource/array.py +7 -2
maxframe/tensor/datasource/full.py +1 -1
maxframe/tensor/datasource/scalar.py +1 -1
maxframe/tensor/datasource/tests/test_datasource.py +1 -1
maxframe/tensor/indexing/flatnonzero.py +1 -1
maxframe/tensor/indexing/getitem.py +2 -0
maxframe/tensor/merge/__init__.py +2 -0
maxframe/tensor/merge/concatenate.py +101 -0
maxframe/tensor/merge/tests/test_merge.py +30 -1
maxframe/tensor/merge/vstack.py +74 -0
maxframe/tensor/{base → misc}/__init__.py +4 -0
maxframe/tensor/misc/atleast_1d.py +72 -0
maxframe/tensor/misc/atleast_2d.py +70 -0
maxframe/tensor/misc/atleast_3d.py +85 -0
maxframe/tensor/misc/tests/__init__.py +13 -0
maxframe/tensor/{base → misc}/transpose.py +22 -18
maxframe/tensor/misc/unique.py +205 -0
maxframe/tensor/operators.py +1 -7
maxframe/tensor/random/core.py +1 -1
maxframe/tensor/reduction/count_nonzero.py +2 -1
maxframe/tensor/reduction/mean.py +1 -0
maxframe/tensor/reduction/nanmean.py +1 -0
maxframe/tensor/reduction/nanvar.py +2 -0
maxframe/tensor/reduction/tests/test_reduction.py +12 -1
maxframe/tensor/reduction/var.py +2 -0
maxframe/tensor/statistics/quantile.py +2 -2
maxframe/tensor/utils.py +2 -22
maxframe/tests/test_protocol.py +34 -0
maxframe/tests/test_utils.py +0 -12
maxframe/tests/utils.py +17 -2
maxframe/typing_.py +4 -1
maxframe/udf.py +62 -3
maxframe/utils.py +112 -86
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
maxframe_client/__init__.py +0 -1
maxframe_client/clients/framedriver.py +4 -1
maxframe_client/fetcher.py +123 -54
maxframe_client/session/consts.py +3 -0
maxframe_client/session/graph.py +8 -2
maxframe_client/session/odps.py +223 -40
maxframe_client/session/task.py +108 -80
maxframe_client/tests/test_fetcher.py +21 -3
maxframe_client/tests/test_session.py +136 -8
maxframe/core/entity/chunks.py +0 -68
maxframe/core/entity/fuse.py +0 -73
maxframe/core/graph/builder/chunk.py +0 -430
maxframe/odpsio/tableio.py +0 -300
maxframe/odpsio/volumeio.py +0 -95
maxframe_client/clients/spe.py +0 -104
/maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
/maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
/maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
/maxframe/tensor/{base → misc}/astype.py +0 -0
/maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
/maxframe/tensor/{base → misc}/ravel.py +0 -0
/maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
/maxframe/tensor/{base → misc}/where.py +0 -0
{maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0

maxframe/dataframe/__init__.py CHANGED Viewed

@@ -46,6 +46,7 @@ from .misc.cut import cut
 from .misc.eval import maxframe_eval as eval  # pylint: disable=redefined-builtin
 from .misc.get_dummies import get_dummies
 from .misc.melt import melt
+from .misc.pivot_table import pivot_table
 from .misc.qcut import qcut
 from .misc.to_numeric import to_numeric
 from .missing import isna, isnull, notna, notnull
@@ -53,7 +54,7 @@ from .reduction import CustomReduction, unique
 from .tseries.to_datetime import to_datetime
 try:
-    from pandas import NA, Timestamp
+    from pandas import NA, NaT, Timestamp
 except ImportError:  # pragma: no cover
     pass

maxframe/dataframe/arithmetic/around.py CHANGED Viewed

@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
     return op(df)
+# FIXME Series input of decimals not supported yet
 around.__frame_doc__ = """
 Round a DataFrame to a variable number of decimal places.
 Parameters
 ----------
-decimals : int, dict, Series
+decimals : int, dict
     Number of decimal places to round each column to. If an int is
     given, round each column to the same number of places.
     Otherwise dict and Series round to variable numbers of places.
     Column names should be in the keys if `decimals` is a
-    dict-like, or in the index if `decimals` is a Series. Any
-    columns not included in `decimals` will be left as is. Elements
-    of `decimals` which are not columns of the input will be
-    ignored.
+    dict-like. Any columns not included in `decimals` will be left
+    as is. Elements of `decimals` which are not columns of the
+    input will be ignored.
 *args
     Additional keywords have no effect but might be accepted for
     compatibility with numpy.
@@ -107,18 +107,6 @@ places as value
 1   0.0   1.0
 2   0.7   0.0
 3   0.2   0.0
-Using a Series, the number of places for specific columns can be
-specified with the column names as index and the number of
-decimal places as value
->>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
->>> df.round(decimals).execute()
-    dogs  cats
-0   0.2   0.0
-1   0.0   1.0
-2   0.7   0.0
-3   0.2   0.0
 """
 around.__series_doc__ = """
 Round each value in a Series to the given number of decimals.

maxframe/dataframe/arithmetic/core.py CHANGED Viewed

@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         raise NotImplementedError
     @classmethod
-    def _calc_properties(cls, x1, x2=None, axis="columns"):
+    def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
         if isinstance(x1, DATAFRAME_TYPE) and (
             x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
         ):
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                         column_shape = len(dtypes)
                     else:  # pragma: no cover
                         dtypes = x1.dtypes  # FIXME
-                        columns = infer_index_value(x1.columns_value, x2.index_value)
+                        columns = infer_index_value(
+                            x1.columns_value, x2.index_value, level=level
+                        )
                         column_shape = np.nan
             else:
                 assert axis == "index" or axis == 0
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                                 ],
                                 index=x1.dtypes.index,
                             )
-                        index = infer_index_value(x1.index_value, x2.index_value)
+                        index = infer_index_value(
+                            x1.index_value, x2.index_value, level=level
+                        )
                         index_shape = np.nan
             return {
                 "shape": (index_shape, column_shape),
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
                     index = copy.copy(x1.index_value)
                     index_shape = x1.shape[0]
                 else:
-                    index = infer_index_value(x1.index_value, x2.index_value)
+                    index = infer_index_value(
+                        x1.index_value, x2.index_value, level=level
+                    )
                     if index.key == x1.index_value.key == x2.index_value.key and (
                         not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
                     ):
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
         self._check_inputs(x1, x2)
         if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
             df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
-            kw = self._calc_properties(df1, df2, axis=self.axis)
+            kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
             if not pd.api.types.is_scalar(df2):
                 return self.new_dataframe([x1, x2], **kw)
             else:
                 return self.new_dataframe([df1], **kw)
         if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
             s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
-            kw = self._calc_properties(s1, s2)
+            kw = self._calc_properties(s1, s2, level=self.level)
             if not pd.api.types.is_scalar(s2):
                 return self.new_series([x1, x2], **kw)
             else:

maxframe/dataframe/arithmetic/docstring.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
 _flex_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 Equivalent to ``{equiv}``, but with support to substitute a fill_value
@@ -127,44 +128,15 @@ circle          0
 triangle        3
 rectangle       4
->>> (df * other).execute()
-           angles  degrees
-circle          0      NaN
-triangle        9      NaN
-rectangle      16      NaN
 >>> df.mul(other, fill_value=0).execute()
            angles  degrees
 circle          0      0.0
 triangle        9      0.0
 rectangle      16      0.0
-Divide by a MultiIndex by level.
->>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
-...                              'degrees': [360, 180, 360, 360, 540, 720]}},
-...                             index=[['A', 'A', 'A', 'B', 'B', 'B'],
-...                                    ['circle', 'triangle', 'rectangle',
-...                                     'square', 'pentagon', 'hexagon']])
->>> df_multindex.execute()
-             angles  degrees
-A circle          0      360
-  triangle        3      180
-  rectangle       4      360
-B square          4      360
-  pentagon        5      540
-  hexagon         6      720
->>> df.div(df_multindex, level=1, fill_value=0).execute()
-             angles  degrees
-A circle        NaN      1.0
-  triangle      1.0      1.0
-  rectangle     1.0      1.0
-B square        0.0      0.0
-  pentagon      0.0      0.0
-  hexagon       0.0      0.0
 """
+# FIXME：https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
 _flex_doc_SERIES = """
 Return {desc} of series and other, element-wise (binary operator `{op_name}`).
@@ -257,7 +229,8 @@ Mismatched indices will be unioned together.
 Examples
 --------
->>> df = pd.DataFrame({{'cost': [250, 150, 100],
+>>> import maxframe.dataframe as md
+>>> df = md.DataFrame({{'cost': [250, 150, 100],
 ...                    'revenue': [100, 250, 300]}},
 ...                   index=['A', 'B', 'C'])
 >>> df.execute()
@@ -317,7 +290,7 @@ C   True    False
 Compare to a DataFrame of different shape.
->>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
+>>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
 ...                      index=['A', 'B', 'C', 'D'])
 >>> other.execute()
    revenue
@@ -335,7 +308,7 @@ D  False    False
 Compare to a MultiIndex by level.
->>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
+>>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
 ...                              'revenue': [100, 250, 300, 200, 175, 225]}},
 ...                             index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
 ...                                    ['A', 'B', 'C', 'A', 'B', 'C']])
@@ -356,6 +329,7 @@ Q1 A   True     True
 Q2 A  False     True
    B   True    False
    C   True    False
 """

maxframe/dataframe/arithmetic/equal.py CHANGED Viewed

@@ -51,6 +51,8 @@ dtype: bool
 @bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
-def eq(df, other, axis="columns", level=None):
-    op = DataFrameEqual(axis=axis, level=level, lhs=df, rhs=other)
+def eq(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/greater.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
-def gt(df, other, axis="columns", level=None):
-    op = DataFrameGreater(axis=axis, level=level, lhs=df, rhs=other)
+def gt(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameGreater(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/greater_equal.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
-def ge(df, other, axis="columns", level=None):
-    op = DataFrameGreaterEqual(axis=axis, level=level, lhs=df, rhs=other)
+def ge(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameGreaterEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/less.py CHANGED Viewed

@@ -52,6 +52,6 @@ dtype: bool
 @bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
-def lt(df, other, axis="columns", level=None):
-    op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
+def lt(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
     return op(df, other)

maxframe/dataframe/arithmetic/less_equal.py CHANGED Viewed

@@ -52,6 +52,8 @@ dtype: bool
 @bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
-def le(df, other, axis="columns", level=None):
-    op = DataFrameLessEqual(axis=axis, level=level, lhs=df, rhs=other)
+def le(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameLessEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/not_equal.py CHANGED Viewed

@@ -51,6 +51,8 @@ dtype: bool
 @bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
-def ne(df, other, axis="columns", level=None):
-    op = DataFrameNotEqual(axis=axis, level=level, lhs=df, rhs=other)
+def ne(df, other, axis="columns", level=None, fill_value=None):
+    op = DataFrameNotEqual(
+        axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
+    )
     return op(df, other)

maxframe/dataframe/arithmetic/tests/test_arithmetic.py CHANGED Viewed

@@ -22,6 +22,7 @@ import pandas as pd
 import pytest
 from ....core import OperatorType
+from ....tests.utils import assert_mf_index_dtype
 from ....utils import dataslots
 from ...core import IndexValue
 from ...datasource.dataframe import from_pandas
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -239,6 +240,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
     assert df2.columns_value.key != df1.columns_value.key
+@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
+def test_dataframe_and_series_with_multiindex(func_name, func_opts):
+    data1 = pd.DataFrame(
+        np.random.rand(10, 10),
+        index=pd.MultiIndex.from_arrays(
+            [list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
+        ),
+        columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
+    )
+    data1 = to_boolean_if_needed(func_opts.func_name, data1)
+    df1 = from_pandas(data1, chunk_size=5)
+    s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
+    df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
+    # test df2's index and columns
+    assert df2.shape == (np.nan, df1.shape[1])
+    assert df2.index_value.key != df1.index_value.key
+    assert df2.index_value.names == df1.index_value.names
+    assert df2.columns_value.key == df1.columns_value.key
 @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
 def test_series_and_series_with_align_map(func_name, func_opts):
     data1 = pd.DataFrame(
@@ -348,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -381,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -411,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
     pd.testing.assert_index_equal(
         df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
     )
-    assert isinstance(df6.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df6.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -446,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -479,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -512,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
     pd.testing.assert_index_equal(
         df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
     )
-    assert isinstance(df3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df3.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -536,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), func_opts.func(data, data).columns
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     pd.testing.assert_index_equal(
         df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
@@ -568,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
     pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
     pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
-    assert isinstance(result.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result.index_value.value, np.int64)
     pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
-    assert isinstance(result2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result2.index_value.value, np.int64)
     pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
-    assert isinstance(result3.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result3.index_value.value, np.int64)
     pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
-    assert isinstance(result4.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result4.index_value.value, np.int64)
     pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
-    assert isinstance(result5.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(result5.index_value.value, np.int64)
     if "builtin_function_or_method" not in str(type(func_opts.func)):
         # skip NotImplemented test for comparison function
@@ -657,7 +680,7 @@ def test_abs():
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     assert df2.shape == (10, 10)
@@ -675,7 +698,7 @@ def test_not():
     pd.testing.assert_index_equal(
         df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
     )
-    assert isinstance(df2.index_value.value, IndexValue.Int64Index)
+    assert_mf_index_dtype(df2.index_value.value, np.int64)
     assert df2.shape == (10, 10)

maxframe/dataframe/core.py CHANGED Viewed

@@ -35,6 +35,7 @@ from ..core import (
     register_output_types,
 )
 from ..core.entity.utils import refresh_tileable_shape
+from ..protocol import DataFrameTableMeta
 from ..serialization.serializables import (
     AnyField,
     BoolField,
@@ -59,7 +60,13 @@ from ..utils import (
     on_serialize_numpy_type,
     tokenize,
 )
-from .utils import ReprSeries, fetch_corner_data, merge_index_value, parse_index
+from .utils import (
+    ReprSeries,
+    apply_if_callable,
+    fetch_corner_data,
+    merge_index_value,
+    parse_index,
+)
 class IndexValue(Serializable):
@@ -135,6 +142,14 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
+        @property
+        def inferred_type(self):
+            return "floating" if self.dtype.kind == "f" else "integer"
     class RangeIndex(IndexBase):
         _name = AnyField("name")
         _slice = SliceField("slice")
@@ -236,6 +251,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "integer"
@@ -247,6 +266,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "integer"
@@ -258,6 +281,10 @@ class IndexValue(Serializable):
         _data = NDArrayField("data")
         _dtype = DataTypeField("dtype")
+        @property
+        def dtype(self):
+            return getattr(self, "_dtype", None)
         @property
         def inferred_type(self):
             return "floating"
@@ -616,6 +643,9 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
         if self._name is None:
             self._name = self.chunks[0].name
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        pass
     def _to_str(self, representation=False):
         if is_build_mode() or len(self._executed_sessions) == 0:
             # in build mode, or not executed, just return representation
@@ -945,6 +975,9 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
         if self._name is None:
             self._name = self.chunks[0].name
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        pass
     def _to_str(self, representation=False):
         if is_build_mode() or len(self._executed_sessions) == 0:
             # in build mode, or not executed, just return representation
@@ -978,7 +1011,7 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
         return self._to_str(representation=False)
     def __repr__(self):
-        return self._to_str(representation=False)
+        return self._to_str(representation=True)
     @property
     def dtype(self):
@@ -1073,11 +1106,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim
@@ -1501,6 +1534,17 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
         refresh_index_value(self)
         refresh_dtypes(self)
+    def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
+        self._dtypes = dtypes
+        self._columns_value = parse_index(dtypes.index, store_data=True)
+        self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
+        new_shape = list(self._shape)
+        new_shape[-1] = len(dtypes)
+        self._shape = tuple(new_shape)
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        self.refresh_from_dtypes(table_meta.pd_column_dtypes)
     @property
     def dtypes(self):
         dt = getattr(self, "_dtypes", None)
@@ -1644,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
             raise NotImplementedError
         corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
+        if corner_data is None:
+            return
         buf = StringIO()
         max_rows = pd.get_option("display.max_rows")
@@ -1739,11 +1785,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
         --------
         >>> import maxframe.dataframe as md
         >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
-        >>> s.ndim.execute()
+        >>> s.ndim
         1
         >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
-        >>> df.ndim.execute()
+        >>> df.ndim
         2
         """
         return super().ndim
@@ -1997,12 +2043,6 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
         Berkeley    25.0    77.0  298.15
         """
-        def apply_if_callable(maybe_callable, obj, **kwargs):
-            if callable(maybe_callable):
-                return maybe_callable(obj, **kwargs)
-            return maybe_callable
         data = self.copy()
         for k, v in kwargs.items():
@@ -2197,6 +2237,9 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
                 pd.Categorical(categories).categories, store_data=True
             )
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        pass
     def _to_str(self, representation=False):
         if is_build_mode() or len(self._executed_sessions) == 0:
             # in build mode, or not executed, just return representation
@@ -2347,6 +2390,9 @@ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
             data_params["name"] = self.chunks[0].name
         self._data_params.update(data_params)
+    def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
+        pass
     def ensure_data(self):
         from .fetch.core import DataFrameFetch

maxframe/dataframe/datasource/date_range.py CHANGED Viewed

@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
 from pandas.tseries.frequencies import to_offset
 from pandas.tseries.offsets import Tick
-from ... import opcodes as OperandDef
+from ... import opcodes
 from ...core import OutputType
 from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
 from ...utils import no_default, pd_release_version
@@ -117,7 +117,7 @@ def generate_range_count(
 class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
-    _op_type_ = OperandDef.DATE_RANGE
+    _op_type_ = opcodes.DATE_RANGE
     start = AnyField("start")
     end = AnyField("end")

maxframe 0.1.0b4__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl

Potentially problematic release.

maxframe 0.1.0b4cp39-cp39-win32.whl → 1.0.0cp39-cp39-win32.whl