maxframe 0.1.0b5__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
# FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
|
|
15
16
|
_flex_doc_FRAME = """
|
|
16
17
|
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
|
|
17
18
|
Equivalent to ``{equiv}``, but with support to substitute a fill_value
|
|
@@ -127,44 +128,15 @@ circle 0
|
|
|
127
128
|
triangle 3
|
|
128
129
|
rectangle 4
|
|
129
130
|
|
|
130
|
-
>>> (df * other).execute()
|
|
131
|
-
angles degrees
|
|
132
|
-
circle 0 NaN
|
|
133
|
-
triangle 9 NaN
|
|
134
|
-
rectangle 16 NaN
|
|
135
|
-
|
|
136
131
|
>>> df.mul(other, fill_value=0).execute()
|
|
137
132
|
angles degrees
|
|
138
133
|
circle 0 0.0
|
|
139
134
|
triangle 9 0.0
|
|
140
135
|
rectangle 16 0.0
|
|
141
136
|
|
|
142
|
-
Divide by a MultiIndex by level.
|
|
143
|
-
|
|
144
|
-
>>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
|
|
145
|
-
... 'degrees': [360, 180, 360, 360, 540, 720]}},
|
|
146
|
-
... index=[['A', 'A', 'A', 'B', 'B', 'B'],
|
|
147
|
-
... ['circle', 'triangle', 'rectangle',
|
|
148
|
-
... 'square', 'pentagon', 'hexagon']])
|
|
149
|
-
>>> df_multindex.execute()
|
|
150
|
-
angles degrees
|
|
151
|
-
A circle 0 360
|
|
152
|
-
triangle 3 180
|
|
153
|
-
rectangle 4 360
|
|
154
|
-
B square 4 360
|
|
155
|
-
pentagon 5 540
|
|
156
|
-
hexagon 6 720
|
|
157
|
-
|
|
158
|
-
>>> df.div(df_multindex, level=1, fill_value=0).execute()
|
|
159
|
-
angles degrees
|
|
160
|
-
A circle NaN 1.0
|
|
161
|
-
triangle 1.0 1.0
|
|
162
|
-
rectangle 1.0 1.0
|
|
163
|
-
B square 0.0 0.0
|
|
164
|
-
pentagon 0.0 0.0
|
|
165
|
-
hexagon 0.0 0.0
|
|
166
137
|
"""
|
|
167
138
|
|
|
139
|
+
# FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
|
|
168
140
|
_flex_doc_SERIES = """
|
|
169
141
|
Return {desc} of series and other, element-wise (binary operator `{op_name}`).
|
|
170
142
|
|
|
@@ -257,7 +229,8 @@ Mismatched indices will be unioned together.
|
|
|
257
229
|
|
|
258
230
|
Examples
|
|
259
231
|
--------
|
|
260
|
-
>>>
|
|
232
|
+
>>> import maxframe.dataframe as md
|
|
233
|
+
>>> df = md.DataFrame({{'cost': [250, 150, 100],
|
|
261
234
|
... 'revenue': [100, 250, 300]}},
|
|
262
235
|
... index=['A', 'B', 'C'])
|
|
263
236
|
>>> df.execute()
|
|
@@ -317,7 +290,7 @@ C True False
|
|
|
317
290
|
|
|
318
291
|
Compare to a DataFrame of different shape.
|
|
319
292
|
|
|
320
|
-
>>> other =
|
|
293
|
+
>>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
|
|
321
294
|
... index=['A', 'B', 'C', 'D'])
|
|
322
295
|
>>> other.execute()
|
|
323
296
|
revenue
|
|
@@ -335,7 +308,7 @@ D False False
|
|
|
335
308
|
|
|
336
309
|
Compare to a MultiIndex by level.
|
|
337
310
|
|
|
338
|
-
>>> df_multindex =
|
|
311
|
+
>>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
|
|
339
312
|
... 'revenue': [100, 250, 300, 200, 175, 225]}},
|
|
340
313
|
... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
|
|
341
314
|
... ['A', 'B', 'C', 'A', 'B', 'C']])
|
|
@@ -356,6 +329,7 @@ Q1 A True True
|
|
|
356
329
|
Q2 A False True
|
|
357
330
|
B True False
|
|
358
331
|
C True False
|
|
332
|
+
|
|
359
333
|
"""
|
|
360
334
|
|
|
361
335
|
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
|
|
54
|
-
def eq(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameEqual(
|
|
54
|
+
def eq(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
|
|
55
|
-
def gt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreater(
|
|
55
|
+
def gt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreater(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
|
|
55
|
-
def ge(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreaterEqual(
|
|
55
|
+
def ge(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreaterEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,6 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
|
|
55
|
-
def lt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
|
|
55
|
+
def lt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
|
|
57
57
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
|
|
55
|
-
def le(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLessEqual(
|
|
55
|
+
def le(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLessEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
|
|
54
|
-
def ne(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameNotEqual(
|
|
54
|
+
def ne(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameNotEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -22,6 +22,7 @@ import pandas as pd
|
|
|
22
22
|
import pytest
|
|
23
23
|
|
|
24
24
|
from ....core import OperatorType
|
|
25
|
+
from ....tests.utils import assert_mf_index_dtype
|
|
25
26
|
from ....utils import dataslots
|
|
26
27
|
from ...core import IndexValue
|
|
27
28
|
from ...datasource.dataframe import from_pandas
|
|
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
164
165
|
pd.testing.assert_index_equal(
|
|
165
166
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
166
167
|
)
|
|
167
|
-
|
|
168
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
168
169
|
pd.testing.assert_index_equal(
|
|
169
170
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
170
171
|
)
|
|
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
176
177
|
pd.testing.assert_index_equal(
|
|
177
178
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
180
181
|
pd.testing.assert_index_equal(
|
|
181
182
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
182
183
|
)
|
|
@@ -239,6 +240,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
|
|
|
239
240
|
assert df2.columns_value.key != df1.columns_value.key
|
|
240
241
|
|
|
241
242
|
|
|
243
|
+
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
|
|
244
|
+
def test_dataframe_and_series_with_multiindex(func_name, func_opts):
|
|
245
|
+
data1 = pd.DataFrame(
|
|
246
|
+
np.random.rand(10, 10),
|
|
247
|
+
index=pd.MultiIndex.from_arrays(
|
|
248
|
+
[list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
|
|
249
|
+
),
|
|
250
|
+
columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
|
|
251
|
+
)
|
|
252
|
+
data1 = to_boolean_if_needed(func_opts.func_name, data1)
|
|
253
|
+
df1 = from_pandas(data1, chunk_size=5)
|
|
254
|
+
s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
|
|
255
|
+
|
|
256
|
+
df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
|
|
257
|
+
|
|
258
|
+
# test df2's index and columns
|
|
259
|
+
assert df2.shape == (np.nan, df1.shape[1])
|
|
260
|
+
assert df2.index_value.key != df1.index_value.key
|
|
261
|
+
assert df2.index_value.names == df1.index_value.names
|
|
262
|
+
assert df2.columns_value.key == df1.columns_value.key
|
|
263
|
+
|
|
264
|
+
|
|
242
265
|
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
|
|
243
266
|
def test_series_and_series_with_align_map(func_name, func_opts):
|
|
244
267
|
data1 = pd.DataFrame(
|
|
@@ -348,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
|
|
|
348
371
|
pd.testing.assert_index_equal(
|
|
349
372
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
350
373
|
)
|
|
351
|
-
|
|
374
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
352
375
|
pd.testing.assert_index_equal(
|
|
353
376
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
354
377
|
)
|
|
@@ -381,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
381
404
|
pd.testing.assert_index_equal(
|
|
382
405
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
383
406
|
)
|
|
384
|
-
|
|
407
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
385
408
|
pd.testing.assert_index_equal(
|
|
386
409
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
387
410
|
)
|
|
@@ -411,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
411
434
|
pd.testing.assert_index_equal(
|
|
412
435
|
df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
|
|
413
436
|
)
|
|
414
|
-
|
|
437
|
+
assert_mf_index_dtype(df6.index_value.value, np.int64)
|
|
415
438
|
pd.testing.assert_index_equal(
|
|
416
439
|
df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
417
440
|
)
|
|
@@ -446,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
|
|
|
446
469
|
pd.testing.assert_index_equal(
|
|
447
470
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
448
471
|
)
|
|
449
|
-
|
|
472
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
450
473
|
pd.testing.assert_index_equal(
|
|
451
474
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
452
475
|
)
|
|
@@ -479,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
|
|
|
479
502
|
pd.testing.assert_index_equal(
|
|
480
503
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
481
504
|
)
|
|
482
|
-
|
|
505
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
483
506
|
pd.testing.assert_index_equal(
|
|
484
507
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
485
508
|
)
|
|
@@ -512,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
|
|
|
512
535
|
pd.testing.assert_index_equal(
|
|
513
536
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
514
537
|
)
|
|
515
|
-
|
|
538
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
516
539
|
pd.testing.assert_index_equal(
|
|
517
540
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
518
541
|
)
|
|
@@ -536,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
|
|
|
536
559
|
pd.testing.assert_index_equal(
|
|
537
560
|
df2.columns_value.to_pandas(), func_opts.func(data, data).columns
|
|
538
561
|
)
|
|
539
|
-
|
|
562
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
540
563
|
pd.testing.assert_index_equal(
|
|
541
564
|
df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
542
565
|
)
|
|
@@ -568,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
|
|
|
568
591
|
pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
|
|
569
592
|
|
|
570
593
|
pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
|
|
571
|
-
|
|
594
|
+
assert_mf_index_dtype(result.index_value.value, np.int64)
|
|
572
595
|
|
|
573
596
|
pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
|
|
574
|
-
|
|
597
|
+
assert_mf_index_dtype(result2.index_value.value, np.int64)
|
|
575
598
|
|
|
576
599
|
pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
|
|
577
|
-
|
|
600
|
+
assert_mf_index_dtype(result3.index_value.value, np.int64)
|
|
578
601
|
|
|
579
602
|
pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
|
|
580
|
-
|
|
603
|
+
assert_mf_index_dtype(result4.index_value.value, np.int64)
|
|
581
604
|
|
|
582
605
|
pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
|
|
583
|
-
|
|
606
|
+
assert_mf_index_dtype(result5.index_value.value, np.int64)
|
|
584
607
|
|
|
585
608
|
if "builtin_function_or_method" not in str(type(func_opts.func)):
|
|
586
609
|
# skip NotImplemented test for comparison function
|
|
@@ -657,7 +680,7 @@ def test_abs():
|
|
|
657
680
|
pd.testing.assert_index_equal(
|
|
658
681
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
659
682
|
)
|
|
660
|
-
|
|
683
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
661
684
|
assert df2.shape == (10, 10)
|
|
662
685
|
|
|
663
686
|
|
|
@@ -675,7 +698,7 @@ def test_not():
|
|
|
675
698
|
pd.testing.assert_index_equal(
|
|
676
699
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
677
700
|
)
|
|
678
|
-
|
|
701
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
679
702
|
assert df2.shape == (10, 10)
|
|
680
703
|
|
|
681
704
|
|
maxframe/dataframe/core.py
CHANGED
|
@@ -142,6 +142,14 @@ class IndexValue(Serializable):
|
|
|
142
142
|
_data = NDArrayField("data")
|
|
143
143
|
_dtype = DataTypeField("dtype")
|
|
144
144
|
|
|
145
|
+
@property
|
|
146
|
+
def dtype(self):
|
|
147
|
+
return getattr(self, "_dtype", None)
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def inferred_type(self):
|
|
151
|
+
return "floating" if self.dtype.kind == "f" else "integer"
|
|
152
|
+
|
|
145
153
|
class RangeIndex(IndexBase):
|
|
146
154
|
_name = AnyField("name")
|
|
147
155
|
_slice = SliceField("slice")
|
|
@@ -243,6 +251,10 @@ class IndexValue(Serializable):
|
|
|
243
251
|
_data = NDArrayField("data")
|
|
244
252
|
_dtype = DataTypeField("dtype")
|
|
245
253
|
|
|
254
|
+
@property
|
|
255
|
+
def dtype(self):
|
|
256
|
+
return getattr(self, "_dtype", None)
|
|
257
|
+
|
|
246
258
|
@property
|
|
247
259
|
def inferred_type(self):
|
|
248
260
|
return "integer"
|
|
@@ -254,6 +266,10 @@ class IndexValue(Serializable):
|
|
|
254
266
|
_data = NDArrayField("data")
|
|
255
267
|
_dtype = DataTypeField("dtype")
|
|
256
268
|
|
|
269
|
+
@property
|
|
270
|
+
def dtype(self):
|
|
271
|
+
return getattr(self, "_dtype", None)
|
|
272
|
+
|
|
257
273
|
@property
|
|
258
274
|
def inferred_type(self):
|
|
259
275
|
return "integer"
|
|
@@ -265,6 +281,10 @@ class IndexValue(Serializable):
|
|
|
265
281
|
_data = NDArrayField("data")
|
|
266
282
|
_dtype = DataTypeField("dtype")
|
|
267
283
|
|
|
284
|
+
@property
|
|
285
|
+
def dtype(self):
|
|
286
|
+
return getattr(self, "_dtype", None)
|
|
287
|
+
|
|
268
288
|
@property
|
|
269
289
|
def inferred_type(self):
|
|
270
290
|
return "floating"
|
|
@@ -1086,11 +1106,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1086
1106
|
--------
|
|
1087
1107
|
>>> import maxframe.dataframe as md
|
|
1088
1108
|
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1089
|
-
>>> s.ndim
|
|
1109
|
+
>>> s.ndim
|
|
1090
1110
|
1
|
|
1091
1111
|
|
|
1092
1112
|
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1093
|
-
>>> df.ndim
|
|
1113
|
+
>>> df.ndim
|
|
1094
1114
|
2
|
|
1095
1115
|
"""
|
|
1096
1116
|
return super().ndim
|
|
@@ -1514,15 +1534,17 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1514
1534
|
refresh_index_value(self)
|
|
1515
1535
|
refresh_dtypes(self)
|
|
1516
1536
|
|
|
1517
|
-
def
|
|
1518
|
-
dtypes = table_meta.pd_column_dtypes
|
|
1537
|
+
def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
|
|
1519
1538
|
self._dtypes = dtypes
|
|
1520
1539
|
self._columns_value = parse_index(dtypes.index, store_data=True)
|
|
1521
1540
|
self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
1522
1541
|
new_shape = list(self._shape)
|
|
1523
|
-
new_shape[
|
|
1542
|
+
new_shape[-1] = len(dtypes)
|
|
1524
1543
|
self._shape = tuple(new_shape)
|
|
1525
1544
|
|
|
1545
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
1546
|
+
self.refresh_from_dtypes(table_meta.pd_column_dtypes)
|
|
1547
|
+
|
|
1526
1548
|
@property
|
|
1527
1549
|
def dtypes(self):
|
|
1528
1550
|
dt = getattr(self, "_dtypes", None)
|
|
@@ -1666,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
|
|
|
1666
1688
|
raise NotImplementedError
|
|
1667
1689
|
|
|
1668
1690
|
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1691
|
+
if corner_data is None:
|
|
1692
|
+
return
|
|
1669
1693
|
|
|
1670
1694
|
buf = StringIO()
|
|
1671
1695
|
max_rows = pd.get_option("display.max_rows")
|
|
@@ -1761,11 +1785,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1761
1785
|
--------
|
|
1762
1786
|
>>> import maxframe.dataframe as md
|
|
1763
1787
|
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1764
|
-
>>> s.ndim
|
|
1788
|
+
>>> s.ndim
|
|
1765
1789
|
1
|
|
1766
1790
|
|
|
1767
1791
|
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1768
|
-
>>> df.ndim
|
|
1792
|
+
>>> df.ndim
|
|
1769
1793
|
2
|
|
1770
1794
|
"""
|
|
1771
1795
|
return super().ndim
|
|
@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
|
|
|
22
22
|
from pandas.tseries.frequencies import to_offset
|
|
23
23
|
from pandas.tseries.offsets import Tick
|
|
24
24
|
|
|
25
|
-
from ... import opcodes
|
|
25
|
+
from ... import opcodes
|
|
26
26
|
from ...core import OutputType
|
|
27
27
|
from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
|
|
28
28
|
from ...utils import no_default, pd_release_version
|
|
@@ -117,7 +117,7 @@ def generate_range_count(
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
|
|
120
|
-
_op_type_ =
|
|
120
|
+
_op_type_ = opcodes.DATE_RANGE
|
|
121
121
|
|
|
122
122
|
start = AnyField("start")
|
|
123
123
|
end = AnyField("end")
|